Added support for Z39.50 character set negotiation. This allows
authorAdam Dickmeiss <adam@indexdata.dk>
Wed, 18 May 2005 20:15:22 +0000 (20:15 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Wed, 18 May 2005 20:15:22 +0000 (20:15 +0000)
the proxy to act as a Z39.50 server supporting character set negotiation
for backends not supporting it. New config element target-charset
specifies encoding for target, and MUST be specified in order
for any conversion to take place. Conversion also takes place for
SRW/SRU clients that are negotiate implicitly to UTF-8.

NEWS
etc/config.xml
etc/yazproxy.xsd
include/yazproxy/proxy.h
src/charset-converter.cpp
src/proxyp.h
src/yaz-proxy-config.cpp
src/yaz-proxy.cpp

diff --git a/NEWS b/NEWS
index 6e192db..8266f4e 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,10 @@
+Added support for Z39.50 character set negotiation. This allows
+the proxy to act as a Z39.50 server supporting character set negotiation
+for backends not supporting it. New config element target-charset
+specifies encoding for target, and MUST be specified in order
+for any conversion to take place. Conversion also takes place for
+SRW/SRU clients that are negotiate implicitly to UTF-8.
+
 --- 1.0 2005/03/14
 
 Added Support OPAC to XML conversion. Triggered by backendtype="opac"
index 08985e9..b7721cc 100644 (file)
@@ -1,5 +1,5 @@
 <?xml version="1.0"?>
-<!-- $Id: config.xml,v 1.11 2005-05-04 08:31:43 adam Exp $ -->
+<!-- $Id: config.xml,v 1.12 2005-05-18 20:15:22 adam Exp $ -->
 <proxy xmlns="http://indexdata.dk/yazproxy/schema/0.9/"
  xmlns:xi="http://www.w3.org/2001/XInclude"
  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
     <attribute type="1" value="1-11,13-1016"/>
     <attribute type="1" value="*" error="114"/>
     <syntax type="opac"/>
-    <syntax type="usmarc"/>
+    <syntax type="usmarc" backendcharset="iso-8859-1"/>
     <syntax type="none"/>
+    <syntax type="sutrs" backendcharset="iso-8859-1"/>
     <syntax type="xml" marcxml="1"/>
     <syntax type="*" error="238"/>
     <preinit>0</preinit>
     <xi:include href="explain.xml"/>
+    <target-charset>iso-8859-1</target-charset>
     <cql2rpn>pqf.properties</cql2rpn>
-    <query-charset>ISO-8859-1</query-charset>
   </target>
   <target name="*">
     <target-timeout>60</target-timeout>
     <client-timeout>30</client-timeout>
     <!-- everything else -->
   </target>
-  <max-clients>50</max-clients>
+  <max-clients>2</max-clients>
   <log>client-requests server-requests</log>
 </proxy>
index 4e30805..b601c54 100644 (file)
@@ -1,6 +1,6 @@
 <?xml version="1.0"?>
 <!-- XML Schema for YAZ proxy config file.
-    $Id: yazproxy.xsd,v 1.8 2005-02-21 14:27:31 adam Exp $
+    $Id: yazproxy.xsd,v 1.9 2005-05-18 20:15:22 adam Exp $
 -->
 <xs:schema
   xmlns:xs="http://www.w3.org/2001/XMLSchema"
@@ -34,6 +34,7 @@
      <xs:element ref="preinit" minOccurs="0"/>
      <xs:element ref="exp:explain" minOccurs="0"/>
      <xs:element ref="cql2rpn" minOccurs="0"/>
+     <xs:element ref="target-charset" minOccurs="0"/>
      <xs:element ref="target-authentication" minOccurs="0"/>
      <xs:element ref="client-authentication" minOccurs="0"/>
      <xs:element ref="negotiation-charset" minOccurs="0"/>
@@ -52,6 +53,7 @@
  <xs:element name="pdu" type="xs:integer"/>
  <xs:element name="retrieve" type="xs:integer"/>
  <xs:element name="preinit" type="xs:integer"/>
+ <xs:element name="target-charset" type="xs:string"/>
  <xs:element name="cql2rpn" type="xs:string"/>
  <xs:element name="target-authentication">
    <xs:complexType>
index f15171e..854191c 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: proxy.h,v 1.15 2005-05-04 08:31:44 adam Exp $
+/* $Id: proxy.h,v 1.16 2005-05-18 20:15:22 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -99,7 +99,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc {
 
     Z_APDU *handle_query_validation(Z_APDU *apdu);
     Z_APDU *handle_query_transformation(Z_APDU *apdu);
-    Z_APDU *handle_query_charset_conversion(Z_APDU *apdu);
+    Z_APDU *handle_target_charset_conversion(Z_APDU *apdu);
 
     Z_APDU *handle_syntax_validation(Z_APDU *apdu);
 
@@ -121,6 +121,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc {
     int m_frontend_type;
     void convert_to_frontend_type(Z_NamePlusRecordList *p);
     void convert_to_marcxml(Z_NamePlusRecordList *p, const char *charset);
+    void convert_records_charset(Z_NamePlusRecordList *p, const char *charset);
     int convert_xsl(Z_NamePlusRecordList *p, Z_APDU *apdu);
     void convert_xsl_delay();
     Z_APDU *m_initRequest_apdu;
@@ -189,7 +190,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc {
     const char *option(const char *name, const char *value);
     void set_default_target(const char *target);
     void set_proxy_negotiation (const char *charset, const char *lang);
-    void set_query_charset(const char *charset);
+    void set_target_charset(const char *charset);
     char *get_proxy_target() { return m_proxyTarget; };
     char *get_session_str() { return m_session_str; };
     void set_max_clients(int m) { m_max_clients = m; };
index ad615f6..5bb4e03 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: charset-converter.cpp,v 1.2 2005-05-06 06:55:54 adam Exp $
+/* $Id: charset-converter.cpp,v 1.3 2005-05-18 20:15:22 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -28,6 +28,7 @@ Yaz_CharsetConverter::Yaz_CharsetConverter()
     m_wrbuf = wrbuf_alloc();
     m_target_query_charset = 0;
     m_client_query_charset = 0;
+    m_client_charset_selected = 0;
 }
 
 Yaz_CharsetConverter::~Yaz_CharsetConverter()
@@ -37,6 +38,11 @@ Yaz_CharsetConverter::~Yaz_CharsetConverter()
     xfree(m_client_query_charset);
 }
 
+const char *Yaz_CharsetConverter::get_target_query_charset()
+{
+    return m_target_query_charset;
+}
+
 void Yaz_CharsetConverter::set_target_query_charset(const char *s)
 {
     xfree(m_target_query_charset);
@@ -53,6 +59,21 @@ void Yaz_CharsetConverter::set_client_query_charset(const char *s)
        m_client_query_charset = xstrdup(s);
 }
 
+const char *Yaz_CharsetConverter::get_client_query_charset()
+{
+    return m_client_query_charset;
+}
+
+void Yaz_CharsetConverter::set_client_charset_selected(int sel)
+{
+    m_client_charset_selected = sel;
+}
+
+int Yaz_CharsetConverter::get_client_charset_selected()
+{
+    return m_client_charset_selected;
+}
+
 void Yaz_CharsetConverter::convert_type_1(char *buf_in, int len_in,
                                          char **buf_out, int *len_out,
                                          ODR o)
@@ -111,6 +132,7 @@ void Yaz_CharsetConverter::convert_type_1(Z_RPNStructure *q, ODR o)
        break;
     }
 }
+
 void Yaz_CharsetConverter::convert_type_1(Z_RPNQuery *q, ODR o)
 {
     if (m_target_query_charset && m_client_query_charset)
index 7455bbd..f36d182 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: proxyp.h,v 1.4 2005-05-04 08:31:44 adam Exp $
+/* $Id: proxyp.h,v 1.5 2005-05-18 20:15:22 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -58,7 +58,11 @@ public:
     ~Yaz_CharsetConverter();
     void set_target_query_charset(const char *s);
     void set_client_query_charset(const char *org);
+    const char *get_client_query_charset(void);
+    const char *get_target_query_charset(void);
     void convert_type_1(Z_RPNQuery *q, ODR o);
+    void set_client_charset_selected(int sel);
+    int get_client_charset_selected();
 private:
     void convert_type_1(char *buf_in, int len_in,
                        char **buf_out, int *len_out,
@@ -68,6 +72,7 @@ private:
     void convert_type_1(Z_Operand *q, ODR o);
     char *m_target_query_charset;
     char *m_client_query_charset;
+    int m_client_charset_selected;
     yaz_iconv_t m_ct;
     WRBUF m_wrbuf;
 };
index 11ad189..76259cf 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: yaz-proxy-config.cpp,v 1.18 2005-05-04 08:31:44 adam Exp $
+/* $Id: yaz-proxy-config.cpp,v 1.19 2005-05-18 20:15:22 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -123,7 +123,7 @@ class Yaz_ProxyConfigP {
                            int *pre_init, const char **cql2rpn,
                            const char **negotiation_charset,
                            const char **negotiation_lang,
-                           const char **query_charset);
+                           const char **target_charset);
     void return_limit(xmlNodePtr ptr,
                      int *limit_bw, int *limit_pdu, int *limit_req);
     int check_type_1(ODR odr, xmlNodePtr ptr, Z_RPNQuery *query,
@@ -330,7 +330,7 @@ void Yaz_ProxyConfigP::return_target_info(xmlNodePtr ptr,
                                          const char **cql2rpn,
                                          const char **negotiation_charset,
                                          const char **negotiation_lang,
-                                         const char **query_charset)
+                                         const char **target_charset)
 {
     *pre_init = 0;
     int no_url = 0;
@@ -395,11 +395,11 @@ void Yaz_ProxyConfigP::return_target_info(xmlNodePtr ptr,
                *cql2rpn = t;
        }
        if (ptr->type == XML_ELEMENT_NODE 
-           && !strcmp((const char *) ptr->name, "query-charset"))
+           && !strcmp((const char *) ptr->name, "target-charset"))
        {
            const char *t = get_text(ptr);
-           if (t && query_charset)
-               *query_charset = t;
+           if (t && target_charset)
+               *target_charset = t;
        }
        if (ptr->type == XML_ELEMENT_NODE 
            && !strcmp((const char *) ptr->name, "negotiation-charset"))
@@ -1031,7 +1031,7 @@ int Yaz_ProxyConfig::get_target_no(int no,
                                   const char **authentication,
                                   const char **negotiation_charset,
                                   const char **negotiation_lang,
-                                  const char **query_charset)
+                                  const char **target_charset)
 {
 #if HAVE_XSLT
     xmlNodePtr ptr;
@@ -1059,7 +1059,7 @@ int Yaz_ProxyConfig::get_target_no(int no,
                    target_idletime, client_idletime,
                    keepalive_limit_bw, keepalive_limit_pdu,
                    pre_init, cql2rpn,
-                   negotiation_charset, negotiation_lang, query_charset);
+                   negotiation_charset, negotiation_lang, target_charset);
                return 1;
            }
            i++;
@@ -1259,7 +1259,7 @@ void Yaz_ProxyConfig::get_target_info(const char *name,
                                      const char **cql2rpn,
                                      const char **negotiation_charset,
                                      const char **negotiation_lang,
-                                     const char **query_charset)
+                                     const char **target_charset)
 {
 #if HAVE_XSLT
     xmlNodePtr ptr;
@@ -1297,7 +1297,7 @@ void Yaz_ProxyConfig::get_target_info(const char *name,
                                 keepalive_limit_bw, keepalive_limit_pdu,
                                 pre_init, cql2rpn,
                                 negotiation_charset, negotiation_lang,
-                                query_charset);
+                                target_charset);
     }
 #else
     *url = name;
index f174362..594435e 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: yaz-proxy.cpp,v 1.25 2005-05-04 08:31:44 adam Exp $
+/* $Id: yaz-proxy.cpp,v 1.26 2005-05-18 20:15:23 adam Exp $
    Copyright (c) 1998-2005, Index Data.
 
 This file is part of the yaz-proxy.
@@ -453,7 +453,6 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie,
            m_cql2rpn.set_pqf_file(cql2rpn_fname);
        if (negotiation_charset || negotiation_lang)
        {
-           yaz_log(YLOG_LOG, "set_proxy_negotiation...");
            set_proxy_negotiation(negotiation_charset,
                negotiation_lang);
        }
@@ -842,6 +841,74 @@ void Yaz_Proxy::convert_to_frontend_type(Z_NamePlusRecordList *p)
     }
 }
 
+void Yaz_Proxy::convert_records_charset(Z_NamePlusRecordList *p,
+                                       const char *backend_charset)
+{
+    yaz_log(YLOG_LOG, "%sconvert_to_marc", m_session_str);
+    int sel =  m_charset_converter->get_client_charset_selected();
+    const char *client_record_charset =
+       m_charset_converter->get_client_query_charset();
+    if (sel && backend_charset && client_record_charset &&
+       strcmp(backend_charset, client_record_charset))
+    {
+       int i;
+       yaz_iconv_t cd = yaz_iconv_open(client_record_charset,
+                                       backend_charset);
+       yaz_marc_t mt = yaz_marc_create();
+       yaz_marc_xml(mt, YAZ_MARC_ISO2709);
+       yaz_marc_iconv(mt, cd);
+       for (i = 0; i < p->num_records; i++)
+       {
+           Z_NamePlusRecord *npr = p->records[i];
+           if (npr->which == Z_NamePlusRecord_databaseRecord)
+           {
+               Z_External *r = npr->u.databaseRecord;
+               oident *ent = oid_getentbyoid(r->direct_reference);
+               if (!ent || ent->value == VAL_NONE)
+                   continue;
+
+               if (ent->value == VAL_SUTRS)
+               {
+                   WRBUF w = wrbuf_alloc();
+
+                   wrbuf_iconv_write(w, cd,  (char*) r->u.octet_aligned->buf,
+                                     r->u.octet_aligned->len);
+                   npr->u.databaseRecord =
+                       z_ext_record(odr_encode(), ent->value, wrbuf_buf(w),
+                                    wrbuf_len(w));
+                   wrbuf_free(w, 1);
+               }
+               else if (ent->value == VAL_TEXT_XML)
+               {
+                   ;
+               }
+               else if (r->which == Z_External_octet)
+               {
+                   int rlen;
+                   char *result;
+                   if (yaz_marc_decode_buf(mt,
+                                           (char*) r->u.octet_aligned->buf,
+                                           r->u.octet_aligned->len,
+                                           &result, &rlen))
+                   {
+                       npr->u.databaseRecord =
+                           z_ext_record(odr_encode(), ent->value, result, rlen);
+                       yaz_log(YLOG_LOG, "%sRecoding MARC record",
+                               m_session_str);
+                   }
+               }
+           }
+       }
+       if (cd)
+           yaz_iconv_close(cd);
+       yaz_marc_destroy(mt);
+    }
+    else
+    {
+       yaz_log(YLOG_LOG, "%sSkipping marc convert", m_session_str);
+    }
+}
+
 void Yaz_Proxy::convert_to_marcxml(Z_NamePlusRecordList *p,
                                   const char *backend_charset)
 {
@@ -1200,6 +1267,9 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu)
                if (m_marcxml_mode == marcxml)
                    convert_to_marcxml(p->u.databaseOrSurDiagnostics,
                                       m_backend_charset);
+               else
+                   convert_records_charset(p->u.databaseOrSurDiagnostics,
+                                           m_backend_charset);
                if (convert_xsl(p->u.databaseOrSurDiagnostics, apdu))
                    return 0;
                    
@@ -1245,6 +1315,9 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu)
            if (m_marcxml_mode == marcxml)
                convert_to_marcxml(p->u.databaseOrSurDiagnostics,
                                   m_backend_charset);
+           else
+               convert_records_charset(p->u.databaseOrSurDiagnostics,
+                                       m_backend_charset);
            if (convert_xsl(p->u.databaseOrSurDiagnostics, apdu))
                return 0;
        }
@@ -1635,11 +1708,13 @@ void Yaz_Proxy::handle_charset_lang_negotiation(Z_APDU *apdu)
 {
     if (apdu->which == Z_APDU_initRequest)
     {
+       yaz_log(YLOG_LOG, "%shandle_charset_lang_negotiation", 
+               m_session_str);
        if (m_initRequest_options &&
            !ODR_MASK_GET(m_initRequest_options, Z_Options_negotiationModel) &&
            (m_proxy_negotiation_charset || m_proxy_negotiation_lang))
        {
-           // There is not negotiation proposal from
+           // There is no negotiation proposal from
            // client's side. OK. The proxy negotiation
            // in use, only.
            Z_InitRequest *initRequest = apdu->u.initRequest;
@@ -1661,6 +1736,66 @@ void Yaz_Proxy::handle_charset_lang_negotiation(Z_APDU *apdu)
                    1);
            }
        }
+       else if (m_initRequest_options &&
+                ODR_MASK_GET(m_initRequest_options,
+                             Z_Options_negotiationModel) &&
+                m_charset_converter->get_target_query_charset())
+       {
+           yaz_log(YLOG_LOG, "%sManaged charset negotiation: charset=%s",
+                   m_session_str,
+                   m_charset_converter->get_target_query_charset());
+           Z_InitRequest *initRequest = apdu->u.initRequest;
+           Z_CharSetandLanguageNegotiation *negotiation =
+               yaz_get_charneg_record (initRequest->otherInfo);        
+           if (negotiation &&
+               negotiation->which == Z_CharSetandLanguageNegotiation_proposal)
+           {
+               NMEM nmem = nmem_create();
+               char **charsets = 0;
+               int num_charsets = 0;
+               char **langs = 0;
+               int num_langs = 0;
+               int selected = 0;
+               yaz_get_proposal_charneg (nmem, negotiation,
+                                         &charsets, &num_charsets,
+                                         &langs, &num_langs, &selected);
+               int i;
+               for (i = 0; i<num_charsets; i++)
+                   yaz_log(YLOG_LOG, "%scharset %s", m_session_str,
+                           charsets[i]);
+               for (i = 0; i<num_langs; i++)
+                   yaz_log(YLOG_LOG, "%slang %s", m_session_str,
+                           langs[i]);
+
+               const char *t_charset =
+                   m_charset_converter->get_target_query_charset();
+               // sweep through charsets and pick the first supported
+               // conversion
+               for (i = 0; i<num_charsets; i++)
+               {
+                   const char *c_charset = charsets[i];
+                   if (!odr_set_charset(odr_decode(), t_charset, c_charset))
+                       break;
+               }
+               if (i != num_charsets)
+               {
+                   // got one .. set up ODR for reverse direction
+                   const char *c_charset = charsets[i];
+                   odr_set_charset(odr_encode(), c_charset, t_charset);
+                   m_charset_converter->set_client_query_charset(c_charset);
+                   m_charset_converter->set_client_charset_selected(selected);
+               }
+               nmem_destroy(nmem);
+               ODR_MASK_CLEAR(m_initRequest_options, 
+                              Z_Options_negotiationModel);
+               yaz_del_charneg_record(&initRequest->otherInfo);
+           }
+           else
+           {
+               yaz_log(YLOG_WARN, "%sUnable to decode charset package",
+                       m_session_str);
+           }
+       }
     }
     else if (apdu->which == Z_APDU_initResponse)
     {
@@ -1708,35 +1843,42 @@ void Yaz_Proxy::handle_charset_lang_negotiation(Z_APDU *apdu)
                ODR_MASK_CLEAR(initResponse->options, Z_Options_negotiationModel);
                
                // Delete negotiation (charneg-3) entry.
-               Z_OtherInformation *p = *otherInfo;
-               for (int i=0; i<p->num_elements; i++)
+               yaz_del_charneg_record(otherInfo);
+           }
+       }
+       else
+       {
+           if (m_proxy_negotiation_charset || m_proxy_negotiation_lang)
+           {
+               yaz_log(YLOG_LOG, "%sTarget did not honor negotiation",
+                       m_session_str);
+           }
+           else if (m_charset_converter->get_client_query_charset())
+           {
+               Z_OtherInformation **otherInfo;  
+               Z_OtherInformationUnit *oi;
+               get_otherInfoAPDU(apdu, &otherInfo);
+               oi = update_otherInformation(otherInfo, 1, NULL, 0, 0);
+               if (oi)
                {
-                   if (p->list[i]->which == Z_OtherInfo_externallyDefinedInfo)
-                   {
-                       Z_External *pext =
-                           p->list[i]->information.externallyDefinedInfo;
-                       struct oident *e = oid_getentbyoid(pext->direct_reference);
-                   
-                       if (e && e->value == VAL_CHARNEG3 && e->oclass == CLASS_NEGOT &&
-                           pext->which == Z_External_charSetandLanguageNegotiation)
-                       {
-                           (p->num_elements)--;                        
-                           if(p->num_elements == 0)
-                           {
-                               *otherInfo = 0;
-                           }
-                           else
-                           {
-                               for (int j=i; j<p->num_elements;j++)
-                                   p->list[j] = p->list[j+1];
-                           }
-                       }
-                   }
-               }    
+                   ODR_MASK_SET(initResponse->options,
+                                Z_Options_negotiationModel);
+                   ODR_MASK_SET(m_initRequest_options,
+                                Z_Options_negotiationModel);
+
+                   oi->which = Z_OtherInfo_externallyDefinedInfo;    
+                   oi->information.externallyDefinedInfo =
+                       yaz_set_response_charneg(
+                           odr_encode(),
+                           m_charset_converter->get_client_query_charset(),
+                           0 /* no lang */,
+                           m_charset_converter->get_client_charset_selected());
+               }
            }
        }
     }
 }
+
 Z_Records *Yaz_Proxy::create_nonSurrogateDiagnostics(ODR odr,
                                                     int error,
                                                     const char *addinfo)
@@ -1805,7 +1947,7 @@ Z_APDU *Yaz_Proxy::handle_query_transformation(Z_APDU *apdu)
     return apdu;
 }
 
-Z_APDU *Yaz_Proxy::handle_query_charset_conversion(Z_APDU *apdu)
+Z_APDU *Yaz_Proxy::handle_target_charset_conversion(Z_APDU *apdu)
 {
     if (apdu->which == Z_APDU_searchRequest &&
        apdu->u.searchRequest->query)
@@ -2636,7 +2778,7 @@ void Yaz_Proxy::handle_incoming_Z_PDU(Z_APDU *apdu)
        apdu = handle_query_transformation(apdu);
 
     if (apdu)
-       apdu = handle_query_charset_conversion(apdu);
+       apdu = handle_target_charset_conversion(apdu);
 
     if (apdu)
        apdu = handle_query_validation(apdu);