Properly initialize libxml2
[yazpp-moved-to-github.git] / src / yaz-proxy.cpp
index a4257a3..4dbcc43 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (c) 1998-2003, Index Data.
+ * Copyright (c) 1998-2004, Index Data.
  * See the file LICENSE for details.
  * 
- * $Id: yaz-proxy.cpp,v 1.74 2003-12-22 19:01:34 adam Exp $
+ * $Id: yaz-proxy.cpp,v 1.87 2004-01-12 21:02:42 adam Exp $
  */
 
 #include <assert.h>
 #include <yaz++/proxy.h>
 #include <yaz/pquery.h>
 
+#if HAVE_XSLT
+#include <libxslt/xsltutils.h>
+#include <libxslt/transform.h>
+#endif
+
 static const char *apdu_name(Z_APDU *apdu)
 {
     switch (apdu->which)
@@ -101,6 +106,9 @@ Yaz_Proxy::Yaz_Proxy(IYaz_PDU_Observable *the_PDU_Observable,
     m_invalid_session = 0;
     m_config = 0;
     m_marcxml_flag = 0;
+    m_stylesheet_schema = 0;
+    m_s2z_stylesheet = 0;
+    m_schema = 0;
     m_initRequest_apdu = 0;
     m_initRequest_mem = 0;
     m_apdu_invalid_session = 0;
@@ -128,6 +136,8 @@ Yaz_Proxy::~Yaz_Proxy()
     xfree (m_default_target);
     xfree (m_proxy_authentication);
     xfree (m_optimize);
+    xfree (m_stylesheet_schema);
+    xfree (m_schema);
     if (m_s2z_odr_init)
        odr_destroy(m_s2z_odr_init);
     if (m_s2z_odr_search)
@@ -283,7 +293,6 @@ Yaz_ProxyClient *Yaz_Proxy::get_client(Z_APDU *apdu, const char *cookie,
 {
     assert (m_parent);
     Yaz_Proxy *parent = m_parent;
-    Z_OtherInformation **oi;
     Yaz_ProxyClient *c = m_client;
     
     if (!m_proxyTarget)
@@ -572,6 +581,44 @@ void Yaz_Proxy::display_diagrecs(Z_DiagRec **pp, int num)
     }
 }
 
+void Yaz_Proxy::convert_xsl(Z_NamePlusRecordList *p)
+{
+    if (!m_stylesheet_schema)
+       return;
+    xsltStylesheetPtr xsp;
+
+    xsp = xsltParseStylesheetFile((const xmlChar *) m_stylesheet_schema);
+
+    int i;
+    for (i = 0; i < p->num_records; i++)
+    {
+       Z_NamePlusRecord *npr = p->records[i];
+       if (npr->which == Z_NamePlusRecord_databaseRecord)
+       {
+           Z_External *r = npr->u.databaseRecord;
+           if (r->which == Z_External_octet)
+           {
+               xmlDocPtr res, doc = xmlParseMemory(
+                   (char*) r->u.octet_aligned->buf,
+                   r->u.octet_aligned->len);
+               
+               res = xsltApplyStylesheet(xsp, doc, 0);
+               
+               xmlChar *out_buf;
+               int out_len;
+               xmlDocDumpMemory (res, &out_buf, &out_len);
+               p->records[i]->u.databaseRecord = 
+                   z_ext_record(odr_encode(), VAL_TEXT_XML,
+                                (char*) out_buf, out_len);
+               xmlFree(out_buf);
+               xmlFreeDoc(doc);
+               xmlFreeDoc(res);
+           }
+       }
+    }
+    xsltFreeStylesheet(xsp);
+}
+
 void Yaz_Proxy::convert_to_marcxml(Z_NamePlusRecordList *p)
 {
     int i;
@@ -652,7 +699,6 @@ void Yaz_Proxy::logtime()
 int Yaz_Proxy::send_http_response(int code)
 {
     ODR o = odr_encode();
-    const char *ctype = "text/xml";
     Z_GDU *gdu = z_get_HTTP_Response(o, code);
     Z_HTTP_Response *hres = gdu->u.HTTP_Response;
     if (m_http_version)
@@ -682,7 +728,7 @@ int Yaz_Proxy::send_srw_response(Z_SRW_PDU *srw_pdu)
         z_HTTP_header_add(o, &hres->headers, "Connection", "Keep-Alive");
 
     static Z_SOAP_Handler soap_handlers[2] = {
-#if HAVE_XML2
+#if HAVE_XSLT
        {"http://www.loc.gov/zing/srw/", 0,
         (Z_SOAP_fun) yaz_srw_codec},
 #endif
@@ -697,9 +743,9 @@ int Yaz_Proxy::send_srw_response(Z_SRW_PDU *srw_pdu)
     soap_package->u.generic->ns = soap_handlers[0].ns;
     soap_package->u.generic->p = (void *) srw_pdu;
     soap_package->ns = m_soap_ns;
-    int ret = z_soap_codec_enc(o, &soap_package,
-                              &hres->content_buf, &hres->content_len,
-                              soap_handlers, 0);
+    z_soap_codec_enc_xsl(o, &soap_package,
+                        &hres->content_buf, &hres->content_len,
+                        soap_handlers, 0, m_s2z_stylesheet);
     if (m_log_mask & PROXY_LOG_REQ_CLIENT)
     {
        yaz_log (LOG_LOG, "%sSending %s to client", m_session_str,
@@ -711,7 +757,7 @@ int Yaz_Proxy::send_srw_response(Z_SRW_PDU *srw_pdu)
     return r;
 }
 
-int Yaz_Proxy::send_to_srw_client_error(int srw_error)
+int Yaz_Proxy::send_to_srw_client_error(int srw_error, const char *add)
 {
     ODR o = odr_encode();
     Z_SRW_PDU *srw_pdu = yaz_srw_get(o, Z_SRW_searchRetrieve_response);
@@ -721,7 +767,7 @@ int Yaz_Proxy::send_to_srw_client_error(int srw_error)
     srw_res->diagnostics = (Z_SRW_diagnostic *)
        odr_malloc(o, sizeof(*srw_res->diagnostics));
     srw_res->diagnostics[0].code =  odr_intdup(o, srw_error);
-    srw_res->diagnostics[0].details = 0;
+    srw_res->diagnostics[0].details = add ? odr_strdup(o, add) : 0;
     return send_srw_response(srw_pdu);
 }
 
@@ -770,7 +816,7 @@ int Yaz_Proxy::send_to_srw_client_ok(int hits, Z_Records *records, int start)
            oident *ent = oid_getentbyoid(r->direct_reference);
            if (r->which == Z_External_octet && ent->value == VAL_TEXT_XML)
            {
-               srw_res->records[i].recordSchema = "http://www.loc.gov/marcxml/";
+               srw_res->records[i].recordSchema = m_schema;
                srw_res->records[i].recordPacking = m_s2z_packing;
                srw_res->records[i].recordData_buf = (char*) 
                    r->u.octet_aligned->buf;
@@ -799,7 +845,8 @@ int Yaz_Proxy::send_to_srw_client_ok(int hits, Z_Records *records, int start)
     
 }
 
-int Yaz_Proxy::send_srw_explain()
+int Yaz_Proxy::send_srw_explain_response(Z_SRW_diagnostic *diagnostics,
+                                       int num_diagnostics)
 {
     Z_SRW_PDU *res = yaz_srw_get(odr_encode(), Z_SRW_explain_response);
     Z_SRW_explainResponse *er = res->u.explain_response;
@@ -808,7 +855,6 @@ int Yaz_Proxy::send_srw_explain()
     if (cfg)
     {
        int len;
-       assert (m_proxyTarget);
        char *b = cfg->get_explain(odr_encode(), 0 /* target */,
                                   0 /* db */, &len);
        if (b)
@@ -818,6 +864,8 @@ int Yaz_Proxy::send_srw_explain()
            er->record.recordPacking = m_s2z_packing;
        }
     }
+    er->diagnostics = diagnostics;
+    er->num_diagnostics = num_diagnostics;
     return send_srw_response(res);
 }
 
@@ -830,11 +878,11 @@ int Yaz_Proxy::send_PDU_convert(Z_APDU *apdu, int *len)
            Z_InitResponse *res = apdu->u.initResponse;
            if (*res->result == 0)
            {
-               send_to_srw_client_error(3);
+               send_to_srw_client_error(3, 0);
            }
            else if (!m_s2z_search_apdu)
            {
-               send_srw_explain();
+               send_srw_explain_response(0, 0);
            }
            else
            {
@@ -852,6 +900,16 @@ int Yaz_Proxy::send_PDU_convert(Z_APDU *apdu, int *len)
            }
            else if (m_s2z_present_apdu)
            {
+               // adjust 
+               Z_PresentRequest *pr = m_s2z_present_apdu->u.presentRequest;
+               
+               if (*pr->resultSetStartPoint <= m_s2z_hit_count)
+               {
+                   if (*pr->numberOfRecordsRequested+ *pr->resultSetStartPoint
+                       > m_s2z_hit_count)
+                       *pr->numberOfRecordsRequested =
+                           1 + m_s2z_hit_count - *pr->resultSetStartPoint;
+               }
                handle_incoming_Z_PDU(m_s2z_present_apdu);
            }
            else
@@ -900,8 +958,12 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu)
        }
        else
        {
-           if (m_marcxml_flag && p && p->which == Z_Records_DBOSD)
-               convert_to_marcxml(p->u.databaseOrSurDiagnostics);
+           if (p && p->which == Z_Records_DBOSD)
+           {
+               if (m_marcxml_flag)
+                   convert_to_marcxml(p->u.databaseOrSurDiagnostics);
+               convert_xsl(p->u.databaseOrSurDiagnostics);
+           }
            if (sr->resultCount)
            {
                yaz_log(LOG_LOG, "%s%d hits", m_session_str,
@@ -932,8 +994,12 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu)
                *sr->presentStatus = Z_PresentStatus_failure;
            display_diagrecs(&dr_p, 1);
        }
-       if (m_marcxml_flag && p && p->which == Z_Records_DBOSD)
-           convert_to_marcxml(p->u.databaseOrSurDiagnostics);
+       if (p && p->which == Z_Records_DBOSD)
+       {
+           if (m_marcxml_flag)
+               convert_to_marcxml(p->u.databaseOrSurDiagnostics);
+           convert_xsl(p->u.databaseOrSurDiagnostics);
+       }
     }
     int r = send_PDU_convert(apdu, &len);
     if (r)
@@ -964,11 +1030,12 @@ int Yaz_Proxy::send_to_client(Z_APDU *apdu)
 int Yaz_ProxyClient::send_to_target(Z_APDU *apdu)
 {
     int len = 0;
+    const char *apdu_name_tmp = apdu_name(apdu);
     int r = send_Z_PDU(apdu, &len);
     if (m_root->get_log_mask() & PROXY_LOG_REQ_SERVER)
        yaz_log (LOG_LOG, "%sSending %s to %s %d bytes",
                 get_session_str(),
-                apdu_name(apdu), get_hostname(), len);
+                apdu_name_tmp, get_hostname(), len);
     m_bytes_sent += len;
     return r;
 }
@@ -1307,16 +1374,32 @@ Z_APDU *Yaz_Proxy::handle_query_transformation(Z_APDU *apdu)
     {
        Z_RPNQuery *rpnquery = 0;
        Z_SearchRequest *sr = apdu->u.searchRequest;
+       char *addinfo = 0;
        
        yaz_log(LOG_LOG, "%sCQL: %s", m_session_str,
                sr->query->u.type_104->u.cql);
 
        int r = m_cql2rpn.query_transform(sr->query->u.type_104->u.cql,
-                                         &rpnquery, odr_encode());
+                                         &rpnquery, odr_encode(),
+                                         &addinfo);
        if (r == -3)
            yaz_log(LOG_LOG, "%sNo CQL to RPN table", m_session_str);
        else if (r)
+       {
            yaz_log(LOG_LOG, "%sCQL Conversion error %d", m_session_str, r);
+           Z_APDU *new_apdu = create_Z_PDU(Z_APDU_searchResponse);
+
+           new_apdu->u.searchResponse->referenceId = sr->referenceId;
+           new_apdu->u.searchResponse->records =
+               create_nonSurrogateDiagnostics(odr_encode(),
+                                              yaz_diag_srw_to_bib1(r),
+                                              addinfo);
+           *new_apdu->u.searchResponse->searchStatus = 0;
+
+           send_to_client(new_apdu);
+
+           return 0;
+       }
        else
        {
            sr->query->which = Z_Query_type_1;
@@ -1365,17 +1448,24 @@ Z_APDU *Yaz_Proxy::handle_syntax_validation(Z_APDU *apdu)
        int err = 0;
        char *addinfo = 0;
        Yaz_ProxyConfig *cfg = check_reconfigure();
-       
+
+       Z_RecordComposition rc_temp, *rc = 0;
+       if (sr->smallSetElementSetNames)
+       {
+           rc_temp.which = Z_RecordComp_simple;
+           rc_temp.u.simple = sr->smallSetElementSetNames;
+           rc = &rc_temp;
+       }
+           
        if (cfg)
            err = cfg->check_syntax(odr_encode(),
                                    m_default_target,
-                                   sr->preferredRecordSyntax,
-                                   &addinfo);
+                                   sr->preferredRecordSyntax, rc,
+                                   &addinfo, &m_stylesheet_schema, &m_schema);
        if (err == -1)
        {
            sr->preferredRecordSyntax =
-               yaz_oidval_to_z3950oid(odr_decode(), CLASS_RECSYN,
-                                      VAL_USMARC);
+               yaz_oidval_to_z3950oid(odr_encode(), CLASS_RECSYN, VAL_USMARC);
            m_marcxml_flag = 1;
        }
        else if (err)
@@ -1402,12 +1492,12 @@ Z_APDU *Yaz_Proxy::handle_syntax_validation(Z_APDU *apdu)
        if (cfg)
            err = cfg->check_syntax(odr_encode(), m_default_target,
                                    pr->preferredRecordSyntax,
-                                   &addinfo);
+                                   pr->recordComposition,
+                                   &addinfo, &m_stylesheet_schema, &m_schema);
        if (err == -1)
        {
            pr->preferredRecordSyntax =
-               yaz_oidval_to_z3950oid(odr_decode(), CLASS_RECSYN,
-                                      VAL_USMARC);
+               yaz_oidval_to_z3950oid(odr_decode(), CLASS_RECSYN, VAL_USMARC);
            m_marcxml_flag = 1;
        }
        else if (err)
@@ -1428,10 +1518,20 @@ Z_APDU *Yaz_Proxy::handle_syntax_validation(Z_APDU *apdu)
     return apdu;
 }
 
+Z_ElementSetNames *Yaz_Proxy::mk_esn_from_schema(ODR o, const char *schema)
+{
+    if (!schema)
+       return 0;
+    Z_ElementSetNames *esn = (Z_ElementSetNames *)
+       odr_malloc(o, sizeof(Z_ElementSetNames));
+    esn->which = Z_ElementSetNames_generic;
+    esn->u.generic = odr_strdup(o, schema);
+    return esn;
+}
+
 void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
 {
-    Z_SRW_PDU *srw_pdu = 0;
-    char *soap_ns = 0;
+
     if (m_s2z_odr_init)
     {
        odr_destroy(m_s2z_odr_init);
@@ -1464,19 +1564,55 @@ void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
         m_http_version = "1.1";
     }
 
-    if (yaz_check_for_srw(hreq, &srw_pdu, &soap_ns, odr_decode()) == 0
-       || yaz_check_for_sru(hreq, &srw_pdu, &soap_ns, odr_decode()) == 0)
+    Z_SRW_PDU *srw_pdu = 0;
+    Z_SOAP *soap_package = 0;
+    char *charset = 0;
+    Z_SRW_diagnostic *diagnostic = 0;
+    int num_diagnostic = 0;
+    if (yaz_srw_decode(hreq, &srw_pdu, &soap_package, odr_decode(),
+                      &charset) == 0
+       || yaz_sru_decode(hreq, &srw_pdu, &soap_package, odr_decode(),
+                         &charset, &diagnostic, &num_diagnostic) == 0)
     {
        m_s2z_odr_init = odr_createmem(ODR_ENCODE);
        m_s2z_odr_search = odr_createmem(ODR_ENCODE);
-       m_soap_ns = odr_strdup(m_s2z_odr_search, soap_ns);
+       m_soap_ns = odr_strdup(m_s2z_odr_search, soap_package->ns);
        m_s2z_init_apdu = 0;
        m_s2z_search_apdu = 0;
        m_s2z_present_apdu = 0;
+
+       m_s2z_stylesheet = 0;
+       
        if (srw_pdu->which == Z_SRW_searchRetrieve_request)
        {
            Z_SRW_searchRetrieveRequest *srw_req = srw_pdu->u.request;
 
+           // recordXPath unsupported.
+           if (srw_req->recordXPath)
+            {
+               yaz_add_srw_diagnostic(odr_decode(),
+                                      &diagnostic, &num_diagnostic,
+                                      72, 0);
+            }
+           // must have a query
+           if (!srw_req->query.cql)
+           {
+               yaz_add_srw_diagnostic(odr_decode(),
+                                      &diagnostic, &num_diagnostic,
+                                      7, "query");
+           }
+           // sort unsupported
+           if (srw_req->sort_type != Z_SRW_sort_type_none)
+           {
+               yaz_add_srw_diagnostic(odr_decode(),
+                                      &diagnostic, &num_diagnostic,
+                                      80, 0);
+           }
+           // save stylesheet
+           if (srw_req->stylesheet)
+               m_s2z_stylesheet =
+                   odr_strdup(m_s2z_odr_init, srw_req->stylesheet);
+                                             
            // set packing for response records ..
            if (srw_req->recordPacking &&
                !strcmp(srw_req->recordPacking, "xml"))
@@ -1484,6 +1620,19 @@ void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
            else
                m_s2z_packing = Z_SRW_recordPacking_string;
 
+           if (num_diagnostic)
+           {
+               Z_SRW_PDU *srw_pdu =
+                   yaz_srw_get(odr_encode(),
+                               Z_SRW_searchRetrieve_response);
+               Z_SRW_searchRetrieveResponse *srw_res = srw_pdu->u.response;
+               
+               srw_res->diagnostics = diagnostic;
+               srw_res->num_diagnostics = num_diagnostic;
+               send_srw_response(srw_pdu);
+               return;
+           }
+
            // prepare search PDU
            m_s2z_search_apdu = zget_APDU(m_s2z_odr_search,
                                          Z_APDU_searchRequest);
@@ -1532,7 +1681,7 @@ void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
                    yaz_log(LOG_LOG, "%*s^\n", off+4, "");
                    yaz_log(LOG_LOG, "Bad PQF: %s (code %d)\n", pqf_msg, code);
                    
-                   send_to_srw_client_error(10);
+                   send_to_srw_client_error(10, 0);
                    return;
                }
                query->which = Z_Query_type_1;
@@ -1542,7 +1691,7 @@ void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
            }
            else
            {
-               send_to_srw_client_error(11);
+               send_to_srw_client_error(7, "query");
                return;
            }
 
@@ -1556,14 +1705,24 @@ void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
                start = *srw_req->startRecord;
            if (max > 0)
            {
-               if (start <= 1)  // Z39.50 piggyback
+                // Some backend, such as Voyager doesn't honor piggyback
+               // So we use present always (0 &&).
+               if (0 && start <= 1)  // Z39.50 piggyback
                {
                    *z_searchRequest->smallSetUpperBound = max;
                    *z_searchRequest->mediumSetPresentNumber = max;
                    *z_searchRequest->largeSetLowerBound = 2000000000; // 2e9
+
                    z_searchRequest->preferredRecordSyntax =
                        yaz_oidval_to_z3950oid(m_s2z_odr_search, CLASS_RECSYN,
                                               VAL_TEXT_XML);
+                   if (srw_req->recordSchema)
+                   {
+                       z_searchRequest->smallSetElementSetNames =
+                           z_searchRequest->mediumSetElementSetNames =
+                           mk_esn_from_schema(m_s2z_odr_search,
+                                              srw_req->recordSchema);
+                   }
                }
                else   // Z39.50 present
                {
@@ -1576,6 +1735,18 @@ void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
                    z_presentRequest->preferredRecordSyntax =
                        yaz_oidval_to_z3950oid(m_s2z_odr_search, CLASS_RECSYN,
                                               VAL_TEXT_XML);
+                   z_presentRequest->recordComposition =
+                       (Z_RecordComposition *)
+                       odr_malloc(m_s2z_odr_search,
+                                  sizeof(Z_RecordComposition));
+                   if (srw_req->recordSchema)
+                   {
+                       z_presentRequest->recordComposition->which = 
+                           Z_RecordComp_simple;                    
+                       z_presentRequest->recordComposition->u.simple =
+                           mk_esn_from_schema(m_s2z_odr_search,
+                                              srw_req->recordSchema);
+                   }
                }
            }
            if (!m_client)
@@ -1598,16 +1769,26 @@ void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
        else if (srw_pdu->which == Z_SRW_explain_request)
        {
            Z_SRW_explainRequest *srw_req = srw_pdu->u.explain_request;
-           
+
+           // save stylesheet
+           if (srw_req->stylesheet)
+               m_s2z_stylesheet =
+                   odr_strdup(m_s2z_odr_init, srw_req->stylesheet);
+
            if (srw_req->recordPacking &&
                !strcmp(srw_req->recordPacking, "xml"))
                m_s2z_packing = Z_SRW_recordPacking_XML;
            else
                m_s2z_packing = Z_SRW_recordPacking_string;
 
+           if (num_diagnostic)
+           {
+               send_srw_explain_response(diagnostic, num_diagnostic);
+               return;
+           }
+
            if (!m_client)
            {
-               yaz_log(LOG_LOG, "handle_incoming: initRequest");
                m_s2z_init_apdu = zget_APDU(m_s2z_odr_init,
                                            Z_APDU_initRequest);
                
@@ -1617,9 +1798,28 @@ void Yaz_Proxy::handle_incoming_HTTP(Z_HTTP_Request *hreq)
                handle_incoming_Z_PDU(m_s2z_init_apdu);
            }
            else
-               send_srw_explain();
+               send_srw_explain_response(0, 0);
            return;
        }
+       else if (srw_pdu->which == Z_SRW_scan_request)
+        {
+           yaz_add_srw_diagnostic(odr_decode(),
+                                  &diagnostic, &num_diagnostic,
+                                  4, "scan");
+           Z_SRW_PDU *srw_pdu =
+               yaz_srw_get(odr_encode(),
+                           Z_SRW_scan_response);
+           Z_SRW_scanResponse *srw_res = srw_pdu->u.scan_response;
+           
+           srw_res->diagnostics = diagnostic;
+           srw_res->num_diagnostics = num_diagnostic;
+           send_srw_response(srw_pdu);
+           return;
+        }
+       else
+        {
+           send_to_srw_client_error(4, 0);
+        }
     }
     int len = 0;
     Z_GDU *p = z_get_HTTP_Response(odr_encode(), 400);
@@ -1876,7 +2076,6 @@ void Yaz_Proxy::pre_init()
     int keepalive_limit_bw, keepalive_limit_pdu;
     int pre_init;
     const char *cql2rpn = 0;
-    const char *zeerex = 0;
 
     Yaz_ProxyConfig *cfg = check_reconfigure();
 
@@ -2153,7 +2352,7 @@ int Yaz_Proxy::server(const char *addr)
     int r = Yaz_Z_Assoc::server(addr);
     if (!r)
     {
-       yaz_log(LOG_LOG, "%sStarted listener on %s", m_session_str, addr);
+       yaz_log(LOG_LOG, "%sStarted proxy " VERSION " on %s", m_session_str, addr);
        timeout(1);
     }
     return r;