zoom: Use udb and query only one searchable
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
index 436444e..129b027 100644 (file)
@@ -19,12 +19,18 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "config.hpp"
 #include "filter_zoom.hpp"
 #include <yaz/zoom.h>
+#include <yaz/srw.h>
 #include <metaproxy/package.hpp>
 #include <metaproxy/util.hpp>
 #include "torus.hpp"
 
+#include <libxslt/xsltutils.h>
+#include <libxslt/transform.h>
+
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition.hpp>
+#include <yaz/ccl.h>
+#include <yaz/cql.h>
 #include <yaz/oid_db.h>
 #include <yaz/diagbib1.h>
 #include <yaz/log.h>
@@ -36,24 +42,36 @@ namespace yf = mp::filter;
 
 namespace metaproxy_1 {
     namespace filter {
-        struct Zoom::Searchable {
+        struct Zoom::Searchable : boost::noncopyable {
+            std::string authentication;
+            std::string cfAuth;
+            std::string cfProxy;
+            std::string cfSubDb;
             std::string database;
             std::string target;
             std::string query_encoding;
             std::string sru;
+            std::string request_syntax;
+            std::string element_set;
+            std::string record_encoding;
+            std::string transform_xsl_fname;
+            bool use_turbomarc;
             bool piggyback;
+            CCL_bibset ccl_bibset;
             Searchable();
             ~Searchable();
         };
-        class Zoom::Backend {
+        class Zoom::Backend : boost::noncopyable {
             friend class Impl;
             friend class Frontend;
             std::string zurl;
             ZOOM_connection m_connection;
             ZOOM_resultset m_resultset;
             std::string m_frontend_database;
+            SearchablePtr sptr;
+            xsltStylesheetPtr xsp;
         public:
-            Backend();
+            Backend(SearchablePtr sptr);
             ~Backend();
             void connect(std::string zurl, int *error, const char **addinfo);
             void search_pqf(const char *pqf, Odr_int *hits,
@@ -63,7 +81,7 @@ namespace metaproxy_1 {
             void set_option(const char *name, const char *value);
             int get_error(const char **addinfo);
         };
-        class Zoom::Frontend {
+        class Zoom::Frontend : boost::noncopyable {
             friend class Impl;
             Impl *m_p;
             bool m_is_virtual;
@@ -98,14 +116,12 @@ namespace metaproxy_1 {
         private:
             FrontendPtr get_frontend(mp::Package &package);
             void release_frontend(mp::Package &package);
-            void parse_torus(const xmlNode *ptr);
-
-            std::list<Zoom::Searchable>m_searchables;
+            SearchablePtr parse_torus(const xmlNode *ptr);
 
             std::map<mp::Session, FrontendPtr> m_clients;            
             boost::mutex m_mutex;
             boost::condition m_cond_session_ready;
-            mp::Torus torus;
+            std::string torus_url;
         };
     }
 }
@@ -133,14 +149,17 @@ void yf::Zoom::process(mp::Package &package) const
 
 // define Implementation stuff
 
-yf::Zoom::Backend::Backend()
+yf::Zoom::Backend::Backend(SearchablePtr ptr) : sptr(ptr)
 {
     m_connection = ZOOM_connection_create(0);
     m_resultset = 0;
+    xsp = 0;
 }
 
 yf::Zoom::Backend::~Backend()
 {
+    if (xsp)
+        xsltFreeStylesheet(xsp);
     ZOOM_connection_destroy(m_connection);
     ZOOM_resultset_destroy(m_resultset);
 }
@@ -186,10 +205,13 @@ int yf::Zoom::Backend::get_error(const char **addinfo)
 yf::Zoom::Searchable::Searchable()
 {
     piggyback = true;
+    use_turbomarc = true;
+    ccl_bibset = ccl_qual_mk();
 }
 
 yf::Zoom::Searchable::~Searchable()
 {
+    ccl_qual_rm(&ccl_bibset);
 }
 
 yf::Zoom::Frontend::Frontend(Impl *impl) : 
@@ -254,10 +276,11 @@ yf::Zoom::Impl::~Impl()
 { 
 }
 
-void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
+yf::Zoom::SearchablePtr yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
 {
+    SearchablePtr notfound;
     if (!ptr1)
-        return ;
+        return notfound;
     for (ptr1 = ptr1->children; ptr1; ptr1 = ptr1->next)
     {
         if (ptr1->type != XML_ELEMENT_NODE)
@@ -271,59 +294,107 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
                     continue;
                 if (!strcmp((const char *) ptr2->name, "layer"))
                 {
-                    Zoom::Searchable s;
+                    Zoom::SearchablePtr s(new Searchable);
 
                     const xmlNode *ptr3 = ptr2;
                     for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next)
                     {
                         if (ptr3->type != XML_ELEMENT_NODE)
                             continue;
-                        if (!strcmp((const char *) ptr3->name, "id"))
+                        if (!strcmp((const char *) ptr3->name,
+                                    "authentication"))
                         {
-                            s.database = mp::xml::get_text(ptr3);
+                            s->authentication = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                    "cfAuth"))
+                        {
+                            s->cfAuth = mp::xml::get_text(ptr3);
+                        } 
+                        else if (!strcmp((const char *) ptr3->name,
+                                    "cfProxy"))
+                        {
+                            s->cfProxy = mp::xml::get_text(ptr3);
+                        }  
+                        else if (!strcmp((const char *) ptr3->name,
+                                    "cfSubDb"))
+                        {
+                            s->cfSubDb = mp::xml::get_text(ptr3);
+                        }  
+                        else if (!strcmp((const char *) ptr3->name, "id"))
+                        {
+                            s->database = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name, "zurl"))
                         {
-                            s.target = mp::xml::get_text(ptr3);
+                            s->target = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name, "sru"))
                         {
-                            s.sru = mp::xml::get_text(ptr3);
+                            s->sru = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name,
                                          "queryEncoding"))
                         {
-                            s.query_encoding = mp::xml::get_text(ptr3);
+                            s->query_encoding = mp::xml::get_text(ptr3);
                         }
                         else if (!strcmp((const char *) ptr3->name,
                                          "piggyback"))
                         {
-                            s.piggyback = mp::xml::get_bool(ptr3, true);
+                            s->piggyback = mp::xml::get_bool(ptr3, true);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "requestSyntax"))
+                        {
+                            s->request_syntax = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "elementSet"))
+                        {
+                            s->element_set = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "recordEncoding"))
+                        {
+                            s->record_encoding = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "transform"))
+                        {
+                            s->transform_xsl_fname = mp::xml::get_text(ptr3);
+                        }
+                        else if (!strcmp((const char *) ptr3->name,
+                                         "useTurboMarc"))
+                        {
+                            ; // useTurboMarc is ignored
+                        }
+                        else if (!strncmp((const char *) ptr3->name,
+                                          "cclmap_", 7))
+                        {
+                            std::string value = mp::xml::get_text(ptr3);
+                            ccl_qual_fitem(s->ccl_bibset, value.c_str(),
+                                           (const char *) ptr3->name + 7);
                         }
                     }
-                    if (s.database.length() && s.target.length())
+                    if (s->database.length() && s->target.length())
                     {
                         yaz_log(YLOG_LOG, "add db=%s target=%s", 
-                                s.database.c_str(), s.target.c_str());
-                        m_searchables.push_back(s);
+                                s->database.c_str(), s->target.c_str());
                     }
+                    return s;
                 }
             }
         }
     }
+    return notfound;
 }
 
-
 void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
 {
     for (ptr = ptr->children; ptr; ptr = ptr->next)
     {
         if (ptr->type != XML_ELEMENT_NODE)
             continue;
-        if (!strcmp((const char *) ptr->name, "records"))
-        {
-            parse_torus(ptr);
-        }
         else if (!strcmp((const char *) ptr->name, "torus"))
         {
             std::string url;
@@ -337,13 +408,11 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
                         "Bad attribute " + std::string((const char *)
                                                        attr->name));
             }
-            torus.read_searchables(url);
-            xmlDoc *doc = torus.get_doc();
-            if (doc)
-            {
-                xmlNode *ptr = xmlDocGetRootElement(doc);
-                parse_torus(ptr);
-            }
+            torus_url = url;
+        }
+        else if (!strcmp((const char *) ptr->name, "records"))
+        {
+            yaz_log(YLOG_WARN, "records ignored!");
         }
         else
         {
@@ -362,43 +431,103 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     if (m_backend && m_backend->m_frontend_database == database)
         return m_backend;
 
-    std::list<Zoom::Searchable>::iterator map_s =
-        m_p->m_searchables.begin();
-
-    std::string c_db = mp::util::database_name_normalize(database);
-
-    while (map_s != m_p->m_searchables.end())
+    xmlDoc *doc = mp::get_searchable(m_p->torus_url, database);
+    if (!doc)
     {
-        if (c_db.compare(map_s->database) == 0)
-            break;
-        map_s++;
+        *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
+        *addinfo = database.c_str();
+        BackendPtr b;
+        return b;
     }
-    if (map_s == m_p->m_searchables.end())
+    SearchablePtr sptr = m_p->parse_torus(xmlDocGetRootElement(doc));
+    xmlFreeDoc(doc);
+    if (!sptr)
     {
         *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
         *addinfo = database.c_str();
         BackendPtr b;
         return b;
     }
+        
+    xsltStylesheetPtr xsp = 0;
+    if (sptr->transform_xsl_fname.length())
+    {
+        xmlDoc *xsp_doc = xmlParseFile(sptr->transform_xsl_fname.c_str());
+        if (!xsp_doc)
+        {
+            *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+            *addinfo = "xmlParseFile failed";
+            BackendPtr b;
+            return b;
+        }
+        xsp = xsltParseStylesheetDoc(xsp_doc);
+        if (!xsp)
+        {
+            *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
+            *addinfo = "xsltParseStylesheetDoc failed";
+            BackendPtr b;
+            xmlFreeDoc(xsp_doc);
+            return b;
+        }
+    }
 
     m_backend.reset();
 
-    BackendPtr b(new Backend);
+    BackendPtr b(new Backend(sptr));
 
+    std::string cf_parm;
+    b->xsp = xsp;
     b->m_frontend_database = database;
+    std::string authentication = sptr->authentication;
 
-    if (map_s->query_encoding.length())
-        b->set_option("rpnCharset", map_s->query_encoding.c_str());
+    if (sptr->query_encoding.length())
+        b->set_option("rpnCharset", sptr->query_encoding.c_str());
+
+    if (sptr->cfAuth.length())
+    {
+        b->set_option("user", sptr->cfAuth.c_str());
+        if (authentication.length())
+        {
+            size_t found = authentication.find('/');
+            if (found != std::string::npos)
+            {
+                cf_parm += "user=" + mp::util::uri_encode(authentication.substr(0, found))
+                    + "&password=" + mp::util::uri_encode(authentication.substr(found+1));
+            }
+            else
+                cf_parm += "user=" + mp::util::uri_encode(authentication);
+        }
+    }
+    else if (authentication.length())
+        b->set_option("user", authentication.c_str());
+
+    if (sptr->cfProxy.length())
+    {
+        if (cf_parm.length())
+            cf_parm += "&";
+        cf_parm += "proxy=" + mp::util::uri_encode(sptr->cfProxy);
+    }
+    if (sptr->cfSubDb.length())
+    {
+        if (cf_parm.length())
+            cf_parm += "&";
+        cf_parm += "subdatabase=" + mp::util::uri_encode(sptr->cfSubDb);
+    }
 
     std::string url;
-    if (map_s->sru.length())
+    if (sptr->sru.length())
     {
-        url = "http://" + map_s->target;
-        b->set_option("sru", map_s->sru.c_str());
+        url = "http://" + sptr->target;
+        b->set_option("sru", sptr->sru.c_str());
     }
     else
-        url = map_s->target;
-
+    {
+        url = sptr->target;
+    }
+    if (cf_parm.length())
+    {
+        url += "," + cf_parm;
+    }
     b->connect(url, error, addinfo);
     if (*error == 0)
     {
@@ -419,6 +548,7 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
 {
     *number_of_records_returned = 0;
     Z_Records *records = 0;
+    bool enable_pz2_transform = false;
 
     if (start < 0 || number_to_present <= 0)
         return records;
@@ -433,10 +563,33 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
     const char *syntax_name = 0;
 
     if (preferredRecordSyntax)
-        syntax_name =
-            yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
+    {
+        if (!oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml)
+            && element_set_name &&
+            !strcmp(element_set_name, "pz2"))
+        {
+            if (b->sptr->request_syntax.length())
+            {
+                syntax_name = b->sptr->request_syntax.c_str();
+                enable_pz2_transform = true;
+            }
+        }
+        else
+        {
+            syntax_name =
+                yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
+        }
+    }
+
     b->set_option("preferredRecordSyntax", syntax_name);
-        
+
+    if (enable_pz2_transform)
+    {
+        element_set_name = "F";
+        if (b->sptr->element_set.length())
+            element_set_name = b->sptr->element_set.c_str();
+    }
+
     b->set_option("elementSetName", element_set_name);
 
     b->present(start, number_to_present, recs, error, addinfo);
@@ -470,17 +623,73 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
                 npr = zget_surrogateDiagRec(odr, odr_database, sur_error,
                                             addinfo);
             }
+            else if (enable_pz2_transform)
+            {
+                char rec_type_str[100];
+
+                strcpy(rec_type_str, b->sptr->use_turbomarc ?
+                       "txml" : "xml");
+                
+                // prevent buffer overflow ...
+                if (b->sptr->record_encoding.length() > 0 &&
+                    b->sptr->record_encoding.length() < 
+                    (sizeof(rec_type_str)-20))
+                {
+                    strcat(rec_type_str, "; charset=");
+                    strcat(rec_type_str, b->sptr->record_encoding.c_str());
+                }
+                
+                int rec_len;
+                const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str,
+                                                      &rec_len);
+                if (rec_buf && b->xsp)
+                {
+                    xmlDoc *rec_doc = xmlParseMemory(rec_buf, rec_len);
+                    if (rec_doc)
+                    { 
+                        xmlDoc *rec_res;
+                        rec_res = xsltApplyStylesheet(b->xsp, rec_doc, 0);
+
+                        if (rec_res)
+                            xsltSaveResultToString((xmlChar **) &rec_buf, &rec_len,
+                                                   rec_res, b->xsp);
+                    }
+                }
+
+                if (rec_buf)
+                {
+                    npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
+                    npr->databaseName = odr_database;
+                    npr->which = Z_NamePlusRecord_databaseRecord;
+                    npr->u.databaseRecord =
+                        z_ext_record_xml(odr, rec_buf, rec_len);
+                }
+                else
+                {
+                    npr = zget_surrogateDiagRec(
+                        odr, odr_database, 
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
+                        rec_type_str);
+                }
+            }
             else
             {
-                npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
                 Z_External *ext =
                     (Z_External *) ZOOM_record_get(recs[i], "ext", 0);
-                npr->databaseName = odr_database;
                 if (ext)
                 {
+                    npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
+                    npr->databaseName = odr_database;
                     npr->which = Z_NamePlusRecord_databaseRecord;
                     npr->u.databaseRecord = ext;
                 }
+                else
+                {
+                    npr = zget_surrogateDiagRec(
+                        odr, odr_database, 
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
+                        "ZOOM_record, type ext");
+                }
             }
             npl->records[i] = npr;
         }
@@ -524,13 +733,59 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
 
     Odr_int hits = 0;
     Z_Query *query = sr->query;
+    WRBUF ccl_wrbuf = 0;
+    WRBUF pqf_wrbuf = 0;
+
     if (query->which == Z_Query_type_1 || query->which == Z_Query_type_101)
     {
-        WRBUF w = wrbuf_alloc();
-        yaz_rpnquery_to_wrbuf(w, query->u.type_1);
+        // RPN
+        pqf_wrbuf = wrbuf_alloc();
+        yaz_rpnquery_to_wrbuf(pqf_wrbuf, query->u.type_1);
+    }
+    else if (query->which == Z_Query_type_2)
+    {
+        // CCL
+        ccl_wrbuf = wrbuf_alloc();
+        wrbuf_write(ccl_wrbuf, (const char *) query->u.type_2->buf,
+                    query->u.type_2->len);
+    }
+    else if (query->which == Z_Query_type_104 &&
+             query->u.type_104->which == Z_External_CQL)
+    {
+        // CQL
+        const char *cql = query->u.type_104->u.cql;
+        CQL_parser cp = cql_parser_create();
+        int r = cql_parser_string(cp, cql);
+        if (r)
+        {
+            cql_parser_destroy(cp);
+            apdu_res = 
+                odr.create_searchResponse(apdu_req, 
+                                          YAZ_BIB1_MALFORMED_QUERY,
+                                          "CQL syntax error");
+            package.response() = apdu_res;
+            return;
+        }
+        struct cql_node *cn = cql_parser_result(cp);
+        char ccl_buf[1024];
 
-        b->search_pqf(wrbuf_cstr(w), &hits, &error, &addinfo);
-        wrbuf_destroy(w);
+        r = cql_to_ccl_buf(cn, ccl_buf, sizeof(ccl_buf));
+        yaz_log(YLOG_LOG, "cql_to_ccl_buf returned %d", r);
+        if (r == 0)
+        {
+            ccl_wrbuf = wrbuf_alloc();
+            wrbuf_puts(ccl_wrbuf, ccl_buf);
+        }
+        cql_parser_destroy(cp);
+        if (r)
+        {
+            apdu_res = 
+                odr.create_searchResponse(apdu_req, 
+                                          YAZ_BIB1_MALFORMED_QUERY,
+                                          "CQL to CCL conversion error");
+            package.response() = apdu_res;
+            return;
+        }
     }
     else
     {
@@ -539,6 +794,36 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
         package.response() = apdu_res;
         return;
     }
+
+    if (ccl_wrbuf)
+    {
+        // CCL to PQF
+        assert(pqf_wrbuf == 0);
+        int cerror, cpos;
+        struct ccl_rpn_node *cn;
+        cn = ccl_find_str(b->sptr->ccl_bibset, wrbuf_cstr(ccl_wrbuf),
+                          &cerror, &cpos);
+        wrbuf_destroy(ccl_wrbuf);
+        if (!cn)
+        {
+            char *addinfo = odr_strdup(odr, ccl_err_msg(cerror));
+
+            apdu_res = 
+                odr.create_searchResponse(apdu_req, 
+                                          YAZ_BIB1_MALFORMED_QUERY,
+                                          addinfo);
+            package.response() = apdu_res;
+            return;
+        }
+        pqf_wrbuf = wrbuf_alloc();
+        ccl_pquery(pqf_wrbuf, cn);
+        ccl_rpn_delete(cn);
+    }
+    
+    assert(pqf_wrbuf);
+    b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo);
+    
+    wrbuf_destroy(pqf_wrbuf);
     
     const char *element_set_name = 0;
     Odr_int number_to_present = 0;