USMARC to MARCXML conversion. Check for reconfigure in more places
authorAdam Dickmeiss <adam@indexdata.dk>
Fri, 10 Oct 2003 17:58:28 +0000 (17:58 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Fri, 10 Oct 2003 17:58:28 +0000 (17:58 +0000)
TODO
doc/proxy.xml
include/yaz++/proxy.h
src/config.xml
src/yaz-proxy-config.cpp
src/yaz-proxy.cpp
src/yaz-z-cache.cpp

diff --git a/TODO b/TODO
index 184382f..066e9f9 100644 (file)
--- a/TODO
+++ b/TODO
@@ -1,4 +1,6 @@
 
+Control the various optimizations with config..
+
 Handle invalidate sessions (-1 hit, control-number search).
 
 MARCXML support.
index 90529f0..9d04a65 100644 (file)
    </para>
   </section>
   <section id="proxy-keepalive">
-   <title>Keep-alive Facility for Stateless Clients</title>
+   <title>Keep-alive Facility</title>
    <para>
-    Stateless clients such as web gateways may generate a cookie for a Z39.50
-    session which is sent to the proxy as part of PDUs. 
-    In this case, the proxy will keep alive its Z39.50 session
-    to the backend target even when the connection from the client
-    to the proxy is closed. When the client contacts the
-    proxy again, and re-issues the same cookie, the proxy reuses the
-    Z39.50 connection with the backend target.
+   The keep-alive is a facility where the proxy keeps the connection to the
+   backend - even if the client closes the connection to the proxy.
    </para>
    <para>
-    There is no
-    guarantee that the Z39.50 connection to the backend
-    target is kept forever: the proxy will shut it down after certain
-    idle time.
-    So in effect, the connection from the client's
-    point of view should be considered stateless, and the keep-alive
-    facility should be treated only as a performance booster.
+   If a new or another client connects to the proxy again and requests the
+   same backend it will be reassigned to this backend. In this case, the
+   proxy sends an initialize response directly to the client and an
+   initialize handshake with the backend is omitted.
    </para>
    <para>
-    Cookies may be passed in an
+   When a client reconnects, query and record caching works better, if the
+   proxy assigns it to the same backend as before. And the result set
+   (if any) is re-used. To achive this, Index Data defined a session
+   cookie which identifies the backend session.
+   </para>
+   <para>
+   The cookie is defined by the client and is sent as part of the
+   Initialize Request and passed in an
     <link linkend="otherinfo-encoding"><literal>otherInfo</literal></link>
      element with OID <literal>1.2.840.10003.10.1000.81.2</literal>.
    </para>
+   <para>
+   Clients that do not send a cookie as part of the initialize request
+   may still better performance, since the init handshake is saved.
+   </para>
   </section>
 
-  <section id="proxy-cache">
+  <section id="query-cache">
    <title>Query Caching</title>
    <para>
     Simple stateless clients often send identical Z39.50 searches
     You can enable/disable query caching using option -o.
    </para>
   </section>
+  
 
-  <section id="proxy-optimizations">
+  <section id="record-cache">
+  <title>Record Caching</title>
+  <para>
+  As an        option, the proxy may also cache result set records for the
+  last search.
+  The proxy takes into account the Record Syntax and CompSpec.
+  The CompSpec includes simple element set names as well.
+  </para>
+  </section>
+
+  <section id="query-validation">
+  <title>Query Validation</title>
+  <para>
+  </para>
+  </section>
+
+  <section id="record-validation">
+  <title>Record Syntax Validation</title>
+  <para>
+  </para>
+  </section>
+
+  <section id="other-optimizations">
    <title>Other Optimizations</title>
-   <para>
-    We've had some plans to support caching of result set records,
+     <para>
+     We've had some plans to support global caching of result set records,
     but this has not yet been implemented.
    </para>
   </section>
index 4cd619c..1d0c12d 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 1998-2003, Index Data.
  * See the file LICENSE for details.
  * 
- * $Id: proxy.h,v 1.14 2003-10-09 12:11:09 adam Exp $
+ * $Id: proxy.h,v 1.15 2003-10-10 17:58:29 adam Exp $
  */
 
 #include <yaz++/z-assoc.h>
@@ -30,11 +30,11 @@ public:
                         int *target_idletime, int *client_idletime,
                         int *max_clients,
                         int *keepalive_limit_bw, int *keepalive_limit_pdu);
-    void operator=(const Yaz_ProxyConfig &conf);
     int check_query(ODR odr, const char *name, Z_Query *query, char **addinfo);
     int check_syntax(ODR odr, const char *name,
                     Odr_oid *syntax, char **addinfo);
 private:
+    void operator=(const Yaz_ProxyConfig &conf);
 #if HAVE_XML2
     xmlDocPtr m_docPtr;
     xmlNodePtr m_proxyPtr;
@@ -159,7 +159,7 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc {
     char *m_optimize;
     int m_session_no;         // sequence for each client session
     char m_session_str[30];  // session string (time:session_no)
-    Yaz_ProxyConfig m_config;
+    Yaz_ProxyConfig *m_config;
     char *m_config_fname;
     int m_bytes_sent;
     int m_bytes_recv;
@@ -178,9 +178,11 @@ class YAZ_EXPORT Yaz_Proxy : public Yaz_Z_Assoc {
     Z_APDU *handle_syntax_validation(Z_APDU *apdu);
     const char *load_balance(const char **url);
     int m_reconfig_flag;
-    void check_reconfigure();
+    Yaz_ProxyConfig *check_reconfigure();
     int m_request_no;
     int m_invalid_session;
+    int m_marcxml_flag;
+    void convert_to_marcxml(Z_NamePlusRecordList *p);
  public:
     Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable);
     ~Yaz_Proxy();
index 82954da..efe5126 100644 (file)
     <target-timeout>180</target-timeout>
     <client-timeout>120</client-timeout>
     <syntax type="usmarc"/>
+    <syntax type="xml" marcxml="1"/>
     <syntax type="*" error="238"/>
   </target>
   <target name="localhost">
     <url>localhost:9999</url>
-    <url>localhost:9998</url>
     <keepalive/> <!-- keepalive enabled -->
     <limit><!-- limits .. -->
       <bandwidth>50000</bandwidth>
-      <pdu>11</pdu>
+      <pdu>60</pdu>
       <retrieve>10</retrieve>
     </limit>
     <attribute type="1" value="10,1023-9000" error="114"/>
     <syntax type="usmarc"/>
     <syntax type="grs1"/>
-    <syntax type="xml"/>
+    <syntax type="xml" marcxml="1"/>
     <syntax type="*" error="238"/>
   </target>
   <target name="*">
index 84a901a..056354b 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 1998-2003, Index Data.
  * See the file LICENSE for details.
  * 
- * $Id: yaz-proxy-config.cpp,v 1.7 2003-10-09 12:11:10 adam Exp $
+ * $Id: yaz-proxy-config.cpp,v 1.8 2003-10-10 17:58:29 adam Exp $
  */
 
 #include <ctype.h>
@@ -26,15 +26,6 @@ Yaz_ProxyConfig::~Yaz_ProxyConfig()
 #endif
 }
 
-void Yaz_ProxyConfig::operator=(const Yaz_ProxyConfig &conf)
-{
-#if HAVE_XML2
-    m_docPtr = conf.m_docPtr;
-    m_proxyPtr = conf.m_proxyPtr;
-#endif
-    m_copy = 1;
-}
-
 int Yaz_ProxyConfig::read_xml(const char *fname)
 {
 #if HAVE_XML2
@@ -348,6 +339,7 @@ int Yaz_ProxyConfig::check_syntax(ODR odr, const char *name,
            int match = 0;  // if we match record syntax
            const char *match_type = 0;
            const char *match_error = 0;
+           const char *match_marcxml = 0;
            struct _xmlAttr *attr;
            for (attr = ptr->properties; attr; attr = attr->next)
            {
@@ -357,6 +349,9 @@ int Yaz_ProxyConfig::check_syntax(ODR odr, const char *name,
                if (!strcmp((const char *) attr->name, "error") &&
                    attr->children && attr->children->type == XML_TEXT_NODE)
                    match_error = (const char *) attr->children->content;
+               if (!strcmp((const char *) attr->name, "marcxml") &&
+                   attr->children && attr->children->type == XML_TEXT_NODE)
+                   match_marcxml = (const char *) attr->children->content;
            }
            if (match_type)
            {
@@ -377,6 +372,10 @@ int Yaz_ProxyConfig::check_syntax(ODR odr, const char *name,
            }
            if (match)
            {
+               if (match_marcxml)
+               {
+                   return -1;
+               }
                if (match_error)
                {
                    if (syntax)
index d80d24e..a559383 100644 (file)
@@ -2,12 +2,14 @@
  * Copyright (c) 1998-2003, Index Data.
  * See the file LICENSE for details.
  * 
- * $Id: yaz-proxy.cpp,v 1.55 2003-10-10 12:37:26 adam Exp $
+ * $Id: yaz-proxy.cpp,v 1.56 2003-10-10 17:58:29 adam Exp $
  */
 
 #include <assert.h>
 #include <time.h>
 
+#include <yaz/marcdisp.h>
+#include <yaz/yaz-iconv.h>
 #include <yaz/log.h>
 #include <yaz/diagbib1.h>
 #include <yaz++/proxy.h>
@@ -79,6 +81,8 @@ Yaz_Proxy::Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable) :
     m_config_fname = 0;
     m_request_no = 0;
     m_invalid_session = 0;
+    m_config = 0;
+    m_marcxml_flag = 0;
 }
 
 Yaz_Proxy::~Yaz_Proxy()
@@ -89,15 +93,16 @@ Yaz_Proxy::~Yaz_Proxy()
     xfree (m_default_target);
     xfree (m_proxy_authentication);
     xfree (m_optimize);
-    if (m_parent)
-       m_parent->check_reconfigure();
+    delete m_config;
 }
 
 int Yaz_Proxy::set_config(const char *config)
 {
+    delete m_config;
+    m_config = new Yaz_ProxyConfig();
     xfree(m_config_fname);
     m_config_fname = xstrdup(config);
-    int r = m_config.read_xml(config);
+    int r = m_config->read_xml(config);
     return r;
 }
 
@@ -117,16 +122,20 @@ void Yaz_Proxy::set_proxy_authentication (const char *auth)
        m_proxy_authentication = (char *) xstrdup (auth);
 }
 
-void Yaz_Proxy::check_reconfigure()
+Yaz_ProxyConfig *Yaz_Proxy::check_reconfigure()
 {
+    if (m_parent)
+       return m_parent->check_reconfigure();
+
+    Yaz_ProxyConfig *cfg = m_config;
     if (m_reconfig_flag)
     {
        yaz_log(LOG_LOG, "reconfigure");
        yaz_log_reopen();
-       if (m_config_fname)
+       if (m_config_fname && cfg)
        {
            yaz_log(LOG_LOG, "reconfigure config %s", m_config_fname);
-           int r = m_config.read_xml(m_config_fname);
+           int r = cfg->read_xml(m_config_fname);
            if (r)
                yaz_log(LOG_WARN, "reconfigure failed");
        }
@@ -134,6 +143,7 @@ void Yaz_Proxy::check_reconfigure()
            yaz_log(LOG_LOG, "reconfigure");
        m_reconfig_flag = 0;
     }
+    return cfg;
 }
 
 IYaz_PDU_Observer *Yaz_Proxy::sessionNotify(IYaz_PDU_Observable
@@ -142,7 +152,7 @@ IYaz_PDU_Observer *Yaz_Proxy::sessionNotify(IYaz_PDU_Observable
     check_reconfigure();
     Yaz_Proxy *new_proxy = new Yaz_Proxy(the_PDU_Observable);
     new_proxy->m_parent = this;
-    new_proxy->m_config = m_config;
+    new_proxy->m_config = 0;
     new_proxy->m_config_fname = 0;
     new_proxy->timeout(m_client_idletime);
     new_proxy->m_target_idletime = m_target_idletime;
@@ -231,19 +241,21 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu)
     {
        const char *url[MAX_ZURL_PLEX];
        const char *proxy_host = get_proxy(oi);
+       Yaz_ProxyConfig *cfg = check_reconfigure();
        if (proxy_host)
        {
            xfree(m_default_target);
            m_default_target = xstrdup(proxy_host);
            proxy_host = m_default_target;
        }
-       
        int client_idletime = -1;
-       m_config.get_target_info(proxy_host, url, &m_bw_max,
+       if (cfg)
+           cfg->get_target_info(proxy_host, url, &m_bw_max,
                                 &m_pdu_max, &m_max_record_retrieve,
                                 &m_target_idletime, &client_idletime,
                                 &parent->m_max_clients,
-                                &m_keepalive_limit_bw, &m_keepalive_limit_pdu);
+                                &m_keepalive_limit_bw,
+                                &m_keepalive_limit_pdu);
        if (client_idletime != -1)
        {
            m_client_idletime = client_idletime;
@@ -489,6 +501,67 @@ void Yaz_Proxy::display_diagrecs(Z_DiagRec **pp, int num)
     }
 }
 
+void Yaz_Proxy::convert_to_marcxml(Z_NamePlusRecordList *p)
+{
+    int i;
+
+    yaz_marc_t mt = yaz_marc_create();
+    yaz_marc_xml(mt, YAZ_MARC_MARCXML);
+    for (i = 0; i < p->num_records; i++)
+    {
+       Z_NamePlusRecord *npr = p->records[i];
+       if (npr->which == Z_NamePlusRecord_databaseRecord)
+       {
+           Z_External *r = npr->u.databaseRecord;
+           if (r->which == Z_External_octet)
+           {
+               int rlen;
+               char *result;
+               if (yaz_marc_decode_buf(mt, (char*) r->u.octet_aligned->buf,
+                                       r->u.octet_aligned->len,
+                                       &result, &rlen))
+               {
+                   yaz_iconv_t cd = yaz_iconv_open("UTF-8", "MARC-8");
+                   WRBUF wrbuf = wrbuf_alloc();
+                   
+                   char outbuf[120];
+                   size_t inbytesleft = rlen;
+                   const char *inp = result;
+                   while (cd && inbytesleft)
+                   {
+                       size_t outbytesleft = sizeof(outbuf);
+                       char *outp = outbuf;
+                       size_t r;
+                       
+                       r = yaz_iconv (cd, (char**) &inp,
+                                      &inbytesleft,
+                                      &outp, &outbytesleft);
+                       if (r == (size_t) (-1))
+                       {
+                           int e = yaz_iconv_error(cd);
+                           if (e != YAZ_ICONV_E2BIG)
+                           {
+                               yaz_log(LOG_WARN, "conversion failure");
+                               break;
+                           }
+                       }
+                       wrbuf_write(wrbuf, outbuf, outp - outbuf);
+                   }
+                   if (cd)
+                       yaz_iconv_close(cd);
+
+                   npr->u.databaseRecord = z_ext_record(odr_encode(),
+                                                        VAL_TEXT_XML,
+                                                        wrbuf_buf(wrbuf),
+                                                        wrbuf_len(wrbuf));
+                   wrbuf_free(wrbuf, 1);
+               }
+           }
+       }
+    }
+    yaz_marc_destroy(mt);
+}
+
 int Yaz_Proxy::send_to_client(Z_APDU *apdu)
 {
     int len = 0;
@@ -506,6 +579,8 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu)
        }
        else
        {
+           if (m_marcxml_flag && p && p->which == Z_Records_DBOSD)
+               convert_to_marcxml(p->u.databaseOrSurDiagnostics);
            if (sr->resultCount)
            {
                yaz_log(LOG_LOG, "%s%d hits", m_session_str,
@@ -527,6 +602,8 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu)
 
            display_diagrecs(&dr_p, 1);
        }
+       if (m_marcxml_flag && p && p->which == Z_Records_DBOSD)
+           convert_to_marcxml(p->u.databaseOrSurDiagnostics);
     }
     int r = send_Z_PDU(apdu, &len);
     yaz_log (LOG_DEBUG, "%sSending %s to client %d bytes", m_session_str,
@@ -597,7 +674,7 @@ Z_APDU *Yaz_Proxy::result_set_optimize(Z_APDU *apdu)
     
     this_query->set_Z_Query(sr->query);
 
-    char query_str[80];
+    char query_str[120];
     this_query->print(query_str, sizeof(query_str)-1);
     yaz_log(LOG_LOG, "%sQuery %s", m_session_str, query_str);
 
@@ -845,10 +922,13 @@ Z_APDU *Yaz_Proxy::handle_query_validation(Z_APDU *apdu)
     if (apdu->which == Z_APDU_searchRequest)
     {
        Z_SearchRequest *sr = apdu->u.searchRequest;
-       int err;
+       int err = 0;
        char *addinfo = 0;
-       err = m_config.check_query(odr_encode(), m_default_target, sr->query,
-                                  &addinfo);
+
+       Yaz_ProxyConfig *cfg = check_reconfigure();
+       if (cfg)
+           err = cfg->check_query(odr_encode(), m_default_target,
+                                  sr->query, &addinfo);
        if (err)
        {
            Z_APDU *new_apdu = create_Z_PDU(Z_APDU_searchResponse);
@@ -868,17 +948,29 @@ Z_APDU *Yaz_Proxy::handle_query_validation(Z_APDU *apdu)
 
 Z_APDU *Yaz_Proxy::handle_syntax_validation(Z_APDU *apdu)
 {
+    m_marcxml_flag = 0;
     if (apdu->which == Z_APDU_searchRequest)
     {
        Z_SearchRequest *sr = apdu->u.searchRequest;
        if (*sr->smallSetUpperBound > 0 || *sr->largeSetLowerBound > 1)
        {
-           int err;
+           int err = 0;
            char *addinfo = 0;
-           err = m_config.check_syntax(odr_encode(), m_default_target,
+           Yaz_ProxyConfig *cfg = check_reconfigure();
+
+           if (cfg)
+               err = cfg->check_syntax(odr_encode(),
+                                       m_default_target,
                                        sr->preferredRecordSyntax,
                                        &addinfo);
-           if (err)
+           if (err == -1)
+           {
+               sr->preferredRecordSyntax =
+                   yaz_oidval_to_z3950oid(odr_decode(), CLASS_RECSYN,
+                                          VAL_USMARC);
+               m_marcxml_flag = 1;
+           }
+           else if (err)
            {
                Z_APDU *new_apdu = create_Z_PDU(Z_APDU_searchResponse);
                
@@ -896,12 +988,22 @@ Z_APDU *Yaz_Proxy::handle_syntax_validation(Z_APDU *apdu)
     else if (apdu->which == Z_APDU_presentRequest)
     {
        Z_PresentRequest *pr = apdu->u.presentRequest;
-       int err;
+       int err = 0;
        char *addinfo = 0;
-       err = m_config.check_syntax(odr_encode(), m_default_target,
+       Yaz_ProxyConfig *cfg = check_reconfigure();
+
+       if (cfg)
+           err = cfg->check_syntax(odr_encode(), m_default_target,
                                    pr->preferredRecordSyntax,
                                    &addinfo);
-       if (err)
+       if (err == -1)
+       {
+           pr->preferredRecordSyntax =
+               yaz_oidval_to_z3950oid(odr_decode(), CLASS_RECSYN,
+                                      VAL_USMARC);
+           m_marcxml_flag = 1;
+       }
+       else if (err)
        {
            Z_APDU *new_apdu = create_Z_PDU(Z_APDU_presentResponse);
            
@@ -1044,7 +1146,7 @@ void Yaz_Proxy::shutdown()
 const char *Yaz_ProxyClient::get_session_str() 
 {
     if (!m_server)
-       return "0";
+       return "0 ";
     return m_server->get_session_str();
 }
 
index 1468b7d..975aee9 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (c) 2002-2003, Index Data.
  * See the file LICENSE for details.
  * 
- * $Id: yaz-z-cache.cpp,v 1.6 2003-10-08 08:52:59 adam Exp $
+ * $Id: yaz-z-cache.cpp,v 1.7 2003-10-10 17:58:30 adam Exp $
  */
 
 #include <yaz/log.h>
@@ -87,7 +87,10 @@ void Yaz_RecordCache::add (ODR o, Z_NamePlusRecordList *npr, int start,
                           int hits)
 {
     if (nmem_total(m_mem) > m_max_size)
+    {
+       yaz_log(LOG_LOG, "cache size");
        return;
+    }
     // Build appropriate compspec for this response
     Z_RecordComposition *comp = 0;
     if (hits == -1 && m_presentRequest)
@@ -116,7 +119,11 @@ void Yaz_RecordCache::add (ODR o, Z_NamePlusRecordList *npr, int start,
     {
        Yaz_RecordCache_Entry *entry = (Yaz_RecordCache_Entry *)
            nmem_malloc(m_mem, sizeof(*entry));
-       entry->m_record = npr->records[i];
+       entry->m_record = (Z_NamePlusRecord *)
+           nmem_malloc(m_mem, sizeof(*entry->m_record));
+       entry->m_record->databaseName = npr->records[i]->databaseName;
+       entry->m_record->which = npr->records[i]->which;
+       entry->m_record->u.databaseRecord  = npr->records[i]->u.databaseRecord;
        entry->m_comp = comp;
        entry->m_offset = i + start;
        entry->m_next = m_entries;
@@ -150,7 +157,6 @@ int Yaz_RecordCache::match (Yaz_RecordCache_Entry *entry,
     odr_destroy(o2);
     if (!match)
        return 0;
-
     if (!syntax)
        return 0;
     // See if offset, OID match..
@@ -159,6 +165,14 @@ int Yaz_RecordCache::match (Yaz_RecordCache_Entry *entry,
        !oid_oidcmp(entry->m_record->u.databaseRecord->direct_reference,
                    syntax))
        return 1;
+#if 0
+    char mstr1[100];
+    oid_to_dotstring(entry->m_record->u.databaseRecord->direct_reference, mstr1);
+    char mstr2[100];
+    oid_to_dotstring(syntax, mstr2);
+    yaz_log(LOG_LOG, "match fail 3 d=%s s=%s", mstr1, mstr2);
+#endif
+
     return 0;
 }
 
@@ -191,7 +205,12 @@ int Yaz_RecordCache::lookup (ODR o, Z_NamePlusRecordList **npr,
                break;
        if (!entry)
            return 0;
-       (*npr)->records[i] = entry->m_record;
+       (*npr)->records[i] = (Z_NamePlusRecord *)
+           odr_malloc(o, sizeof(Z_NamePlusRecord));
+       (*npr)->records[i]->databaseName = entry->m_record->databaseName;
+       (*npr)->records[i]->which = entry->m_record->which;
+       (*npr)->records[i]->u.databaseRecord =
+           entry->m_record->u.databaseRecord;
     }
     return 1;
 }