Added support for character set conversion of query terms. The XML
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 4 May 2005 08:31:43 +0000 (08:31 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 4 May 2005 08:31:43 +0000 (08:31 +0000)
configuration element query-charset specifies character set encoding
for target. In SRW/SRU mode, the proxy will convert from UTF-8 to
this encoding. Next, logical, step will be to honor Z39.50 character
set negotiation as well, so that Z39.50 queries will be converted
as well (we do not know encoding for Z39.50 sessions unless they say so).

etc/config.xml
include/yazproxy/proxy.h
src/Makefile.am
src/proxyp.h
src/yaz-proxy-config.cpp
src/yaz-proxy.cpp

index 36933d7..08985e9 100644 (file)
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
 <?xml version="1.0"?>
-<!-- $Id: config.xml,v 1.10 2005-03-14 13:09:59 adam Exp $ -->
+<!-- $Id: config.xml,v 1.11 2005-05-04 08:31:43 adam Exp $ -->
 <proxy xmlns="http://indexdata.dk/yazproxy/schema/0.9/"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
 <proxy xmlns="http://indexdata.dk/yazproxy/schema/0.9/"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
@@ -29,6 +29,7 @@
     <preinit>0</preinit>
     <xi:include href="explain.xml"/>
     <cql2rpn>pqf.properties</cql2rpn>
     <preinit>0</preinit>
     <xi:include href="explain.xml"/>
     <cql2rpn>pqf.properties</cql2rpn>
+    <query-charset>ISO-8859-1</query-charset>
   </target>
   <target name="*">
     <target-timeout>60</target-timeout>
   </target>
   <target name="*">
     <target-timeout>60</target-timeout>
index b356d79..f15171e 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: proxy.h,v 1.14 2005-02-22 10:08:19 adam Exp $
+/* $Id: proxy.h,v 1.15 2005-05-04 08:31:44 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -41,6 +41,7 @@ class Yaz_Proxy;
 class Yaz_usemarcon;
 class Yaz_ProxyConfig;
 class Yaz_ProxyClient;
 class Yaz_usemarcon;
 class Yaz_ProxyConfig;
 class Yaz_ProxyClient;
+class Yaz_CharsetConverter;
 
 enum YAZ_Proxy_MARCXML_mode {
     none,
 
 enum YAZ_Proxy_MARCXML_mode {
     none,
@@ -98,6 +99,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc {
 
     Z_APDU *handle_query_validation(Z_APDU *apdu);
     Z_APDU *handle_query_transformation(Z_APDU *apdu);
 
     Z_APDU *handle_query_validation(Z_APDU *apdu);
     Z_APDU *handle_query_transformation(Z_APDU *apdu);
+    Z_APDU *handle_query_charset_conversion(Z_APDU *apdu);
 
     Z_APDU *handle_syntax_validation(Z_APDU *apdu);
 
 
     Z_APDU *handle_syntax_validation(Z_APDU *apdu);
 
@@ -169,6 +171,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc {
     char *m_usemarcon_ini_stage1;
     char *m_usemarcon_ini_stage2;
     Yaz_usemarcon *m_usemarcon;
     char *m_usemarcon_ini_stage1;
     char *m_usemarcon_ini_stage2;
     Yaz_usemarcon *m_usemarcon;
+    Yaz_CharsetConverter *m_charset_converter;
  public:
     Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable,
              Yaz_Proxy *parent = 0);
  public:
     Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable,
              Yaz_Proxy *parent = 0);
@@ -186,6 +189,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc {
     const char *option(const char *name, const char *value);
     void set_default_target(const char *target);
     void set_proxy_negotiation (const char *charset, const char *lang);
     const char *option(const char *name, const char *value);
     void set_default_target(const char *target);
     void set_proxy_negotiation (const char *charset, const char *lang);
+    void set_query_charset(const char *charset);
     char *get_proxy_target() { return m_proxyTarget; };
     char *get_session_str() { return m_session_str; };
     void set_max_clients(int m) { m_max_clients = m; };
     char *get_proxy_target() { return m_proxyTarget; };
     char *get_session_str() { return m_session_str; };
     void set_max_clients(int m) { m_max_clients = m; };
index 58e9a21..3436bed 100644 (file)
@@ -1,4 +1,4 @@
-## $Id: Makefile.am,v 1.5 2005-02-11 15:19:08 adam Exp $
+## $Id: Makefile.am,v 1.6 2005-05-04 08:31:44 adam Exp $
 
 AM_CXXFLAGS = $(YAZPPINC) -I$(srcdir)/../include $(XSLT_CFLAGS) $(USEMARCONINC)
 
 
 AM_CXXFLAGS = $(YAZPPINC) -I$(srcdir)/../include $(XSLT_CFLAGS) $(USEMARCONINC)
 
@@ -6,7 +6,7 @@ lib_LTLIBRARIES = libyazproxy.la
 libyazproxy_la_LDFLAGS=-version-info 1:0:0
 
 libyazproxy_la_SOURCES= yaz-proxy.cpp yaz-proxy-config.cpp yaz-bw.cpp \
 libyazproxy_la_LDFLAGS=-version-info 1:0:0
 
 libyazproxy_la_SOURCES= yaz-proxy.cpp yaz-proxy-config.cpp yaz-bw.cpp \
- proxyp.h yaz-usemarcon.cpp
+ proxyp.h yaz-usemarcon.cpp charset-converter.cpp
 
 bin_PROGRAMS = yazproxy
 check_PROGRAMS = cdetails
 
 bin_PROGRAMS = yazproxy
 check_PROGRAMS = cdetails
index 3485885..7455bbd 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: proxyp.h,v 1.3 2005-02-21 14:27:32 adam Exp $
+/* $Id: proxyp.h,v 1.4 2005-05-04 08:31:44 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -52,6 +52,26 @@ class Yaz_usemarcon {
 #endif
 };
 
 #endif
 };
 
+class Yaz_CharsetConverter {
+public:
+    Yaz_CharsetConverter();
+    ~Yaz_CharsetConverter();
+    void set_target_query_charset(const char *s);
+    void set_client_query_charset(const char *org);
+    void convert_type_1(Z_RPNQuery *q, ODR o);
+private:
+    void convert_type_1(char *buf_in, int len_in,
+                       char **buf_out, int *len_out,
+                       ODR o);
+    void convert_type_1(Z_Term *q, ODR o);
+    void convert_type_1(Z_RPNStructure *q, ODR o);
+    void convert_type_1(Z_Operand *q, ODR o);
+    char *m_target_query_charset;
+    char *m_client_query_charset;
+    yaz_iconv_t m_ct;
+    WRBUF m_wrbuf;
+};
+
 class Yaz_ProxyConfig {
 public:
     Yaz_ProxyConfig();
 class Yaz_ProxyConfig {
 public:
     Yaz_ProxyConfig();
@@ -73,7 +93,8 @@ public:
                      const char **cql2rpn,
                      const char **authentication,
                      const char **negotiation_charset,
                      const char **cql2rpn,
                      const char **authentication,
                      const char **negotiation_charset,
-                     const char **negotiation_lang);
+                     const char **negotiation_lang,
+                     const char **query_charset);
     
     void get_generic_info(int *log_mask, int *max_clients);
 
     
     void get_generic_info(int *log_mask, int *max_clients);
 
@@ -85,7 +106,8 @@ public:
                         int *pre_init,
                         const char **cql2rpn,
                         const char **negotiation_charset,
                         int *pre_init,
                         const char **cql2rpn,
                         const char **negotiation_charset,
-                        const char **negotiation_lang);
+                        const char **negotiation_lang,
+                        const char **query_charset);
 
     const char *check_mime_type(const char *path);
     int check_query(ODR odr, const char *name, Z_Query *query, char **addinfo);
 
     const char *check_mime_type(const char *path);
     int check_query(ODR odr, const char *name, Z_Query *query, char **addinfo);
index 29865e0..11ad189 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: yaz-proxy-config.cpp,v 1.17 2005-02-21 14:27:32 adam Exp $
+/* $Id: yaz-proxy-config.cpp,v 1.18 2005-05-04 08:31:44 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -122,7 +122,8 @@ class Yaz_ProxyConfigP {
                            int *keepalive_limit_bw, int *keepalive_limit_pdu,
                            int *pre_init, const char **cql2rpn,
                            const char **negotiation_charset,
                            int *keepalive_limit_bw, int *keepalive_limit_pdu,
                            int *pre_init, const char **cql2rpn,
                            const char **negotiation_charset,
-                           const char **negotiation_lang);
+                           const char **negotiation_lang,
+                           const char **query_charset);
     void return_limit(xmlNodePtr ptr,
                      int *limit_bw, int *limit_pdu, int *limit_req);
     int check_type_1(ODR odr, xmlNodePtr ptr, Z_RPNQuery *query,
     void return_limit(xmlNodePtr ptr,
                      int *limit_bw, int *limit_pdu, int *limit_req);
     int check_type_1(ODR odr, xmlNodePtr ptr, Z_RPNQuery *query,
@@ -328,7 +329,8 @@ void Yaz_ProxyConfigP::return_target_info(xmlNodePtr ptr,
                                          int *pre_init,
                                          const char **cql2rpn,
                                          const char **negotiation_charset,
                                          int *pre_init,
                                          const char **cql2rpn,
                                          const char **negotiation_charset,
-                                         const char **negotiation_lang)
+                                         const char **negotiation_lang,
+                                         const char **query_charset)
 {
     *pre_init = 0;
     int no_url = 0;
 {
     *pre_init = 0;
     int no_url = 0;
@@ -393,6 +395,13 @@ void Yaz_ProxyConfigP::return_target_info(xmlNodePtr ptr,
                *cql2rpn = t;
        }
        if (ptr->type == XML_ELEMENT_NODE 
                *cql2rpn = t;
        }
        if (ptr->type == XML_ELEMENT_NODE 
+           && !strcmp((const char *) ptr->name, "query-charset"))
+       {
+           const char *t = get_text(ptr);
+           if (t && query_charset)
+               *query_charset = t;
+       }
+       if (ptr->type == XML_ELEMENT_NODE 
            && !strcmp((const char *) ptr->name, "negotiation-charset"))
        {
            const char *t = get_text(ptr);
            && !strcmp((const char *) ptr->name, "negotiation-charset"))
        {
            const char *t = get_text(ptr);
@@ -519,8 +528,8 @@ int Yaz_ProxyConfigP::check_type_1_attributes(ODR odr, xmlNodePtr ptrl,
 
 #if HAVE_XSLT
 int Yaz_ProxyConfigP::check_type_1_structure(ODR odr, xmlNodePtr ptr,
 
 #if HAVE_XSLT
 int Yaz_ProxyConfigP::check_type_1_structure(ODR odr, xmlNodePtr ptr,
-                                           Z_RPNStructure *q,
-                                           char **addinfo)
+                                            Z_RPNStructure *q,
+                                            char **addinfo)
 {
     if (q->which == Z_RPNStructure_complex)
     {
 {
     if (q->which == Z_RPNStructure_complex)
     {
@@ -1021,7 +1030,8 @@ int Yaz_ProxyConfig::get_target_no(int no,
                                   const char **cql2rpn,
                                   const char **authentication,
                                   const char **negotiation_charset,
                                   const char **cql2rpn,
                                   const char **authentication,
                                   const char **negotiation_charset,
-                                  const char **negotiation_lang)
+                                  const char **negotiation_lang,
+                                  const char **query_charset)
 {
 #if HAVE_XSLT
     xmlNodePtr ptr;
 {
 #if HAVE_XSLT
     xmlNodePtr ptr;
@@ -1049,7 +1059,7 @@ int Yaz_ProxyConfig::get_target_no(int no,
                    target_idletime, client_idletime,
                    keepalive_limit_bw, keepalive_limit_pdu,
                    pre_init, cql2rpn,
                    target_idletime, client_idletime,
                    keepalive_limit_bw, keepalive_limit_pdu,
                    pre_init, cql2rpn,
-                   negotiation_charset, negotiation_lang);
+                   negotiation_charset, negotiation_lang, query_charset);
                return 1;
            }
            i++;
                return 1;
            }
            i++;
@@ -1248,7 +1258,8 @@ void Yaz_ProxyConfig::get_target_info(const char *name,
                                      int *pre_init,
                                      const char **cql2rpn,
                                      const char **negotiation_charset,
                                      int *pre_init,
                                      const char **cql2rpn,
                                      const char **negotiation_charset,
-                                     const char **negotiation_lang)
+                                     const char **negotiation_lang,
+                                     const char **query_charset)
 {
 #if HAVE_XSLT
     xmlNodePtr ptr;
 {
 #if HAVE_XSLT
     xmlNodePtr ptr;
@@ -1285,7 +1296,8 @@ void Yaz_ProxyConfig::get_target_info(const char *name,
                                 target_idletime, client_idletime,
                                 keepalive_limit_bw, keepalive_limit_pdu,
                                 pre_init, cql2rpn,
                                 target_idletime, client_idletime,
                                 keepalive_limit_bw, keepalive_limit_pdu,
                                 pre_init, cql2rpn,
-                                negotiation_charset, negotiation_lang);
+                                negotiation_charset, negotiation_lang,
+                                query_charset);
     }
 #else
     *url = name;
     }
 #else
     *url = name;
index 2d418a1..f174362 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: yaz-proxy.cpp,v 1.24 2005-02-22 10:08:20 adam Exp $
+/* $Id: yaz-proxy.cpp,v 1.25 2005-05-04 08:31:44 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -119,6 +119,7 @@ Yaz_Proxy::Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable,
     m_default_target = 0;
     m_proxy_negotiation_charset = 0;
     m_proxy_negotiation_lang = 0;
     m_default_target = 0;
     m_proxy_negotiation_charset = 0;
     m_proxy_negotiation_lang = 0;
+    m_charset_converter = new Yaz_CharsetConverter;
     m_max_clients = 150;
     m_log_mask = 0;
     m_seed = time(0);
     m_max_clients = 150;
     m_log_mask = 0;
     m_seed = time(0);
@@ -198,6 +199,7 @@ Yaz_Proxy::~Yaz_Proxy()
     xfree(m_default_target);
     xfree(m_proxy_negotiation_charset);
     xfree(m_proxy_negotiation_lang);
     xfree(m_default_target);
     xfree(m_proxy_negotiation_charset);
     xfree(m_proxy_negotiation_lang);
+    delete m_charset_converter;
     xfree(m_optimize);
 
 #if HAVE_XSLT
     xfree(m_optimize);
 
 #if HAVE_XSLT
@@ -306,12 +308,12 @@ IYaz_PDU_Observer *Yaz_Proxy::sessionNotify(IYaz_PDU_Observable
        new_proxy->set_APDU_yazlog(1);
     else
        new_proxy->set_APDU_yazlog(0);
        new_proxy->set_APDU_yazlog(1);
     else
        new_proxy->set_APDU_yazlog(0);
-    new_proxy->set_proxy_negotiation(m_proxy_negotiation_charset,
-       m_proxy_negotiation_lang);
     sprintf(new_proxy->m_session_str, "%ld:%d ", (long) time(0), m_session_no);
     m_session_no++;
     yaz_log (YLOG_LOG, "%sNew session %s", new_proxy->m_session_str,
             the_PDU_Observable->getpeername());
     sprintf(new_proxy->m_session_str, "%ld:%d ", (long) time(0), m_session_no);
     m_session_no++;
     yaz_log (YLOG_LOG, "%sNew session %s", new_proxy->m_session_str,
             the_PDU_Observable->getpeername());
+    new_proxy->set_proxy_negotiation(m_proxy_negotiation_charset,
+       m_proxy_negotiation_lang);
     return new_proxy;
 }
 
     return new_proxy;
 }
 
@@ -424,6 +426,7 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie,
        const char *cql2rpn_fname = 0;
        const char *negotiation_charset = 0;
        const char *negotiation_lang = 0;
        const char *cql2rpn_fname = 0;
        const char *negotiation_charset = 0;
        const char *negotiation_lang = 0;
+       const char *query_charset = 0;
        url[0] = m_default_target;
        url[1] = 0;
        if (cfg)
        url[0] = m_default_target;
        url[1] = 0;
        if (cfg)
@@ -438,7 +441,8 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie,
                                 &pre_init,
                                 &cql2rpn_fname,
                                 &negotiation_charset,
                                 &pre_init,
                                 &cql2rpn_fname,
                                 &negotiation_charset,
-                                &negotiation_lang);
+                                &negotiation_lang,
+                                &query_charset);
        }
        if (client_idletime != -1)
        {
        }
        if (client_idletime != -1)
        {
@@ -453,6 +457,7 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie,
            set_proxy_negotiation(negotiation_charset,
                negotiation_lang);
        }
            set_proxy_negotiation(negotiation_charset,
                negotiation_lang);
        }
+       m_charset_converter->set_target_query_charset(query_charset);
        if (!url[0])
        {
            yaz_log(YLOG_LOG, "%sNo default target", m_session_str);
        if (!url[0])
        {
            yaz_log(YLOG_LOG, "%sNo default target", m_session_str);
@@ -1800,6 +1805,24 @@ Z_APDU *Yaz_Proxy::handle_query_transformation(Z_APDU *apdu)
     return apdu;
 }
 
     return apdu;
 }
 
+Z_APDU *Yaz_Proxy::handle_query_charset_conversion(Z_APDU *apdu)
+{
+    if (apdu->which == Z_APDU_searchRequest &&
+       apdu->u.searchRequest->query)
+    {
+       if (apdu->u.searchRequest->query->which == Z_Query_type_1
+           || apdu->u.searchRequest->query->which == Z_Query_type_101)
+       {
+           if (m_http_version)
+               m_charset_converter->set_client_query_charset("UTF-8");
+           Z_RPNQuery *rpnquery = apdu->u.searchRequest->query->u.type_1;
+           m_charset_converter->convert_type_1(rpnquery, odr_encode());
+       }
+    }
+    return apdu;
+}
+
+
 Z_APDU *Yaz_Proxy::handle_query_validation(Z_APDU *apdu)
 {
     if (apdu->which == Z_APDU_searchRequest)
 Z_APDU *Yaz_Proxy::handle_query_validation(Z_APDU *apdu)
 {
     if (apdu->which == Z_APDU_searchRequest)
@@ -2067,7 +2090,6 @@ void Yaz_Proxy::srw_get_client(const char *db, const char **backend_db)
 int Yaz_Proxy::file_access(Z_HTTP_Request *hreq)
 {
     struct stat sbuf;
 int Yaz_Proxy::file_access(Z_HTTP_Request *hreq)
 {
     struct stat sbuf;
-    yaz_log(YLOG_LOG, "file_access");
     if (strcmp(hreq->method, "GET"))
        return 0;
     if (hreq->path[0] != '/')
     if (strcmp(hreq->method, "GET"))
        return 0;
     if (hreq->path[0] != '/')
@@ -2614,10 +2636,14 @@ void Yaz_Proxy::handle_incoming_Z_PDU(Z_APDU *apdu)
        apdu = handle_query_transformation(apdu);
 
     if (apdu)
        apdu = handle_query_transformation(apdu);
 
     if (apdu)
+       apdu = handle_query_charset_conversion(apdu);
+
+    if (apdu)
        apdu = handle_query_validation(apdu);
 
     if (apdu)
        apdu = result_set_optimize(apdu);
        apdu = handle_query_validation(apdu);
 
     if (apdu)
        apdu = result_set_optimize(apdu);
+
     if (!apdu)
     {
        m_client->timeout(m_target_idletime);  // mark it active even 
     if (!apdu)
     {
        m_client->timeout(m_target_idletime);  // mark it active even 
@@ -2832,7 +2858,8 @@ void Yaz_Proxy::pre_init()
                                          &cql2rpn,
                                          &authentication,
                                          &negotiation_charset,
                                          &cql2rpn,
                                          &authentication,
                                          &negotiation_charset,
-                                         &negotiation_lang) ; i++)
+                                         &negotiation_lang,
+                                         0) ; i++)
     {
        if (pre_init)
        {
     {
        if (pre_init)
        {