From: Adam Dickmeiss Date: Wed, 4 May 2005 08:31:43 +0000 (+0000) Subject: Added support for character set conversion of query terms. The XML X-Git-Tag: YAZPROXY.ERE~8 X-Git-Url: http://git.indexdata.com/?p=yazproxy-moved-to-github.git;a=commitdiff_plain;h=6699ea1b1b538de074500e2c740152135dfa75c5 Added support for character set conversion of query terms. The XML configuration element query-charset specifies character set encoding for target. In SRW/SRU mode, the proxy will convert from UTF-8 to this encoding. Next, logical, step will be to honor Z39.50 character set negotiation as well, so that Z39.50 queries will be converted as well (we do not know encoding for Z39.50 sessions unless they say so). --- diff --git a/etc/config.xml b/etc/config.xml index 36933d7..08985e9 100644 --- a/etc/config.xml +++ b/etc/config.xml @@ -1,5 +1,5 @@ - + 0 pqf.properties + ISO-8859-1 60 diff --git a/include/yazproxy/proxy.h b/include/yazproxy/proxy.h index b356d79..f15171e 100644 --- a/include/yazproxy/proxy.h +++ b/include/yazproxy/proxy.h @@ -1,4 +1,4 @@ -/* $Id: proxy.h,v 1.14 2005-02-22 10:08:19 adam Exp $ +/* $Id: proxy.h,v 1.15 2005-05-04 08:31:44 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -41,6 +41,7 @@ class Yaz_Proxy; class Yaz_usemarcon; class Yaz_ProxyConfig; class Yaz_ProxyClient; +class Yaz_CharsetConverter; enum YAZ_Proxy_MARCXML_mode { none, @@ -98,6 +99,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc { Z_APDU *handle_query_validation(Z_APDU *apdu); Z_APDU *handle_query_transformation(Z_APDU *apdu); + Z_APDU *handle_query_charset_conversion(Z_APDU *apdu); Z_APDU *handle_syntax_validation(Z_APDU *apdu); @@ -169,6 +171,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc { char *m_usemarcon_ini_stage1; char *m_usemarcon_ini_stage2; Yaz_usemarcon *m_usemarcon; + Yaz_CharsetConverter *m_charset_converter; public: Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable, Yaz_Proxy *parent = 0); @@ -186,6 +189,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc { const char *option(const char *name, const char *value); void set_default_target(const char *target); void set_proxy_negotiation (const char *charset, const char *lang); + void set_query_charset(const char *charset); char *get_proxy_target() { return m_proxyTarget; }; char *get_session_str() { return m_session_str; }; void set_max_clients(int m) { m_max_clients = m; }; diff --git a/src/Makefile.am b/src/Makefile.am index 58e9a21..3436bed 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.5 2005-02-11 15:19:08 adam Exp $ +## $Id: Makefile.am,v 1.6 2005-05-04 08:31:44 adam Exp $ AM_CXXFLAGS = $(YAZPPINC) -I$(srcdir)/../include $(XSLT_CFLAGS) $(USEMARCONINC) @@ -6,7 +6,7 @@ lib_LTLIBRARIES = libyazproxy.la libyazproxy_la_LDFLAGS=-version-info 1:0:0 libyazproxy_la_SOURCES= yaz-proxy.cpp yaz-proxy-config.cpp yaz-bw.cpp \ - proxyp.h yaz-usemarcon.cpp + proxyp.h yaz-usemarcon.cpp charset-converter.cpp bin_PROGRAMS = yazproxy check_PROGRAMS = cdetails diff --git a/src/proxyp.h b/src/proxyp.h index 3485885..7455bbd 100644 --- a/src/proxyp.h +++ b/src/proxyp.h @@ -1,4 +1,4 @@ -/* $Id: proxyp.h,v 1.3 2005-02-21 14:27:32 adam Exp $ +/* $Id: proxyp.h,v 1.4 2005-05-04 08:31:44 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -52,6 +52,26 @@ class Yaz_usemarcon { #endif }; +class Yaz_CharsetConverter { +public: + Yaz_CharsetConverter(); + ~Yaz_CharsetConverter(); + void set_target_query_charset(const char *s); + void set_client_query_charset(const char *org); + void convert_type_1(Z_RPNQuery *q, ODR o); +private: + void convert_type_1(char *buf_in, int len_in, + char **buf_out, int *len_out, + ODR o); + void convert_type_1(Z_Term *q, ODR o); + void convert_type_1(Z_RPNStructure *q, ODR o); + void convert_type_1(Z_Operand *q, ODR o); + char *m_target_query_charset; + char *m_client_query_charset; + yaz_iconv_t m_ct; + WRBUF m_wrbuf; +}; + class Yaz_ProxyConfig { public: Yaz_ProxyConfig(); @@ -73,7 +93,8 @@ public: const char **cql2rpn, const char **authentication, const char **negotiation_charset, - const char **negotiation_lang); + const char **negotiation_lang, + const char **query_charset); void get_generic_info(int *log_mask, int *max_clients); @@ -85,7 +106,8 @@ public: int *pre_init, const char **cql2rpn, const char **negotiation_charset, - const char **negotiation_lang); + const char **negotiation_lang, + const char **query_charset); const char *check_mime_type(const char *path); int check_query(ODR odr, const char *name, Z_Query *query, char **addinfo); diff --git a/src/yaz-proxy-config.cpp b/src/yaz-proxy-config.cpp index 29865e0..11ad189 100644 --- a/src/yaz-proxy-config.cpp +++ b/src/yaz-proxy-config.cpp @@ -1,4 +1,4 @@ -/* $Id: yaz-proxy-config.cpp,v 1.17 2005-02-21 14:27:32 adam Exp $ +/* $Id: yaz-proxy-config.cpp,v 1.18 2005-05-04 08:31:44 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -122,7 +122,8 @@ class Yaz_ProxyConfigP { int *keepalive_limit_bw, int *keepalive_limit_pdu, int *pre_init, const char **cql2rpn, const char **negotiation_charset, - const char **negotiation_lang); + const char **negotiation_lang, + const char **query_charset); void return_limit(xmlNodePtr ptr, int *limit_bw, int *limit_pdu, int *limit_req); int check_type_1(ODR odr, xmlNodePtr ptr, Z_RPNQuery *query, @@ -328,7 +329,8 @@ void Yaz_ProxyConfigP::return_target_info(xmlNodePtr ptr, int *pre_init, const char **cql2rpn, const char **negotiation_charset, - const char **negotiation_lang) + const char **negotiation_lang, + const char **query_charset) { *pre_init = 0; int no_url = 0; @@ -393,6 +395,13 @@ void Yaz_ProxyConfigP::return_target_info(xmlNodePtr ptr, *cql2rpn = t; } if (ptr->type == XML_ELEMENT_NODE + && !strcmp((const char *) ptr->name, "query-charset")) + { + const char *t = get_text(ptr); + if (t && query_charset) + *query_charset = t; + } + if (ptr->type == XML_ELEMENT_NODE && !strcmp((const char *) ptr->name, "negotiation-charset")) { const char *t = get_text(ptr); @@ -519,8 +528,8 @@ int Yaz_ProxyConfigP::check_type_1_attributes(ODR odr, xmlNodePtr ptrl, #if HAVE_XSLT int Yaz_ProxyConfigP::check_type_1_structure(ODR odr, xmlNodePtr ptr, - Z_RPNStructure *q, - char **addinfo) + Z_RPNStructure *q, + char **addinfo) { if (q->which == Z_RPNStructure_complex) { @@ -1021,7 +1030,8 @@ int Yaz_ProxyConfig::get_target_no(int no, const char **cql2rpn, const char **authentication, const char **negotiation_charset, - const char **negotiation_lang) + const char **negotiation_lang, + const char **query_charset) { #if HAVE_XSLT xmlNodePtr ptr; @@ -1049,7 +1059,7 @@ int Yaz_ProxyConfig::get_target_no(int no, target_idletime, client_idletime, keepalive_limit_bw, keepalive_limit_pdu, pre_init, cql2rpn, - negotiation_charset, negotiation_lang); + negotiation_charset, negotiation_lang, query_charset); return 1; } i++; @@ -1248,7 +1258,8 @@ void Yaz_ProxyConfig::get_target_info(const char *name, int *pre_init, const char **cql2rpn, const char **negotiation_charset, - const char **negotiation_lang) + const char **negotiation_lang, + const char **query_charset) { #if HAVE_XSLT xmlNodePtr ptr; @@ -1285,7 +1296,8 @@ void Yaz_ProxyConfig::get_target_info(const char *name, target_idletime, client_idletime, keepalive_limit_bw, keepalive_limit_pdu, pre_init, cql2rpn, - negotiation_charset, negotiation_lang); + negotiation_charset, negotiation_lang, + query_charset); } #else *url = name; diff --git a/src/yaz-proxy.cpp b/src/yaz-proxy.cpp index 2d418a1..f174362 100644 --- a/src/yaz-proxy.cpp +++ b/src/yaz-proxy.cpp @@ -1,4 +1,4 @@ -/* $Id: yaz-proxy.cpp,v 1.24 2005-02-22 10:08:20 adam Exp $ +/* $Id: yaz-proxy.cpp,v 1.25 2005-05-04 08:31:44 adam Exp $ Copyright (c) 1998-2005, Index Data. This file is part of the yaz-proxy. @@ -119,6 +119,7 @@ Yaz_Proxy::Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable, m_default_target = 0; m_proxy_negotiation_charset = 0; m_proxy_negotiation_lang = 0; + m_charset_converter = new Yaz_CharsetConverter; m_max_clients = 150; m_log_mask = 0; m_seed = time(0); @@ -198,6 +199,7 @@ Yaz_Proxy::~Yaz_Proxy() xfree(m_default_target); xfree(m_proxy_negotiation_charset); xfree(m_proxy_negotiation_lang); + delete m_charset_converter; xfree(m_optimize); #if HAVE_XSLT @@ -306,12 +308,12 @@ IYaz_PDU_Observer *Yaz_Proxy::sessionNotify(IYaz_PDU_Observable new_proxy->set_APDU_yazlog(1); else new_proxy->set_APDU_yazlog(0); - new_proxy->set_proxy_negotiation(m_proxy_negotiation_charset, - m_proxy_negotiation_lang); sprintf(new_proxy->m_session_str, "%ld:%d ", (long) time(0), m_session_no); m_session_no++; yaz_log (YLOG_LOG, "%sNew session %s", new_proxy->m_session_str, the_PDU_Observable->getpeername()); + new_proxy->set_proxy_negotiation(m_proxy_negotiation_charset, + m_proxy_negotiation_lang); return new_proxy; } @@ -424,6 +426,7 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie, const char *cql2rpn_fname = 0; const char *negotiation_charset = 0; const char *negotiation_lang = 0; + const char *query_charset = 0; url[0] = m_default_target; url[1] = 0; if (cfg) @@ -438,7 +441,8 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie, &pre_init, &cql2rpn_fname, &negotiation_charset, - &negotiation_lang); + &negotiation_lang, + &query_charset); } if (client_idletime != -1) { @@ -453,6 +457,7 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie, set_proxy_negotiation(negotiation_charset, negotiation_lang); } + m_charset_converter->set_target_query_charset(query_charset); if (!url[0]) { yaz_log(YLOG_LOG, "%sNo default target", m_session_str); @@ -1800,6 +1805,24 @@ Z_APDU *Yaz_Proxy::handle_query_transformation(Z_APDU *apdu) return apdu; } +Z_APDU *Yaz_Proxy::handle_query_charset_conversion(Z_APDU *apdu) +{ + if (apdu->which == Z_APDU_searchRequest && + apdu->u.searchRequest->query) + { + if (apdu->u.searchRequest->query->which == Z_Query_type_1 + || apdu->u.searchRequest->query->which == Z_Query_type_101) + { + if (m_http_version) + m_charset_converter->set_client_query_charset("UTF-8"); + Z_RPNQuery *rpnquery = apdu->u.searchRequest->query->u.type_1; + m_charset_converter->convert_type_1(rpnquery, odr_encode()); + } + } + return apdu; +} + + Z_APDU *Yaz_Proxy::handle_query_validation(Z_APDU *apdu) { if (apdu->which == Z_APDU_searchRequest) @@ -2067,7 +2090,6 @@ void Yaz_Proxy::srw_get_client(const char *db, const char **backend_db) int Yaz_Proxy::file_access(Z_HTTP_Request *hreq) { struct stat sbuf; - yaz_log(YLOG_LOG, "file_access"); if (strcmp(hreq->method, "GET")) return 0; if (hreq->path[0] != '/') @@ -2614,10 +2636,14 @@ void Yaz_Proxy::handle_incoming_Z_PDU(Z_APDU *apdu) apdu = handle_query_transformation(apdu); if (apdu) + apdu = handle_query_charset_conversion(apdu); + + if (apdu) apdu = handle_query_validation(apdu); if (apdu) apdu = result_set_optimize(apdu); + if (!apdu) { m_client->timeout(m_target_idletime); // mark it active even @@ -2832,7 +2858,8 @@ void Yaz_Proxy::pre_init() &cql2rpn, &authentication, &negotiation_charset, - &negotiation_lang) ; i++) + &negotiation_lang, + 0) ; i++) { if (pre_init) {