zoom: element set is omitted if not set in profile
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
index 4529cef..f9c07df 100644 (file)
@@ -19,6 +19,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "config.hpp"
 #include "filter_zoom.hpp"
 #include <yaz/zoom.h>
+#include <yaz/yaz-version.h>
 #include <yaz/srw.h>
 #include <metaproxy/package.hpp>
 #include <metaproxy/util.hpp>
@@ -29,7 +30,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition.hpp>
+#include <yaz/ccl_xml.h>
 #include <yaz/ccl.h>
+#include <yaz/rpn2cql.h>
+#include <yaz/rpn2solr.h>
+#include <yaz/pquery.h>
 #include <yaz/cql.h>
 #include <yaz/oid_db.h>
 #include <yaz/diagbib1.h>
@@ -47,7 +52,7 @@ namespace metaproxy_1 {
             std::string cfAuth;
             std::string cfProxy;
             std::string cfSubDb;
-            std::string database;
+            std::string udb;
             std::string target;
             std::string query_encoding;
             std::string sru;
@@ -58,7 +63,7 @@ namespace metaproxy_1 {
             bool use_turbomarc;
             bool piggyback;
             CCL_bibset ccl_bibset;
-            Searchable();
+            Searchable(CCL_bibset base);
             ~Searchable();
         };
         class Zoom::Backend : boost::noncopyable {
@@ -76,10 +81,13 @@ namespace metaproxy_1 {
             void connect(std::string zurl, int *error, const char **addinfo);
             void search_pqf(const char *pqf, Odr_int *hits,
                             int *error, const char **addinfo);
+            void search_cql(const char *cql, Odr_int *hits,
+                            int *error, const char **addinfo);
             void present(Odr_int start, Odr_int number, ZOOM_record *recs,
                          int *error, const char **addinfo);
             void set_option(const char *name, const char *value);
-            int get_error(const char **addinfo);
+            const char *get_option(const char *name);
+            void get_zoom_error(int *error, const char **addinfo);
         };
         class Zoom::Frontend : boost::noncopyable {
             friend class Impl;
@@ -114,9 +122,10 @@ namespace metaproxy_1 {
             void process(metaproxy_1::Package & package);
             void configure(const xmlNode * ptr, bool test_only);
         private:
+            void configure_local_records(const xmlNode * ptr, bool test_only);
             FrontendPtr get_frontend(mp::Package &package);
             void release_frontend(mp::Package &package);
-            SearchablePtr parse_torus(const xmlNode *ptr);
+            SearchablePtr parse_torus_record(const xmlNode *ptr);
             struct cql_node *convert_cql_fields(struct cql_node *cn, ODR odr);
             std::map<mp::Session, FrontendPtr> m_clients;            
             boost::mutex m_mutex;
@@ -124,6 +133,8 @@ namespace metaproxy_1 {
             std::string torus_url;
             std::map<std::string,std::string> fieldmap;
             std::string xsldir;
+            CCL_bibset bibset;
+            std::map<std::string,SearchablePtr> s_map;
         };
     }
 }
@@ -166,18 +177,51 @@ yf::Zoom::Backend::~Backend()
     ZOOM_resultset_destroy(m_resultset);
 }
 
+
+void yf::Zoom::Backend::get_zoom_error(int *error, const char **addinfo)
+{
+    const char *msg = 0;
+    *error = ZOOM_connection_error(m_connection, &msg, addinfo);
+    if (*error)
+    {
+        if (*error >= ZOOM_ERROR_CONNECT)
+        {
+            // turn ZOOM diagnostic into a Bib-1 2: with addinfo=zoom err msg
+            *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+            if (addinfo)
+                *addinfo = msg;
+        }
+    }
+}
+
 void yf::Zoom::Backend::connect(std::string zurl,
                                 int *error, const char **addinfo)
 {
     ZOOM_connection_connect(m_connection, zurl.c_str(), 0);
-    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+    get_zoom_error(error, addinfo);
 }
 
 void yf::Zoom::Backend::search_pqf(const char *pqf, Odr_int *hits,
                                    int *error, const char **addinfo)
 {
     m_resultset = ZOOM_connection_search_pqf(m_connection, pqf);
-    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+    get_zoom_error(error, addinfo);
+    if (*error == 0)
+        *hits = ZOOM_resultset_size(m_resultset);
+    else
+        *hits = 0;
+}
+
+void yf::Zoom::Backend::search_cql(const char *cql, Odr_int *hits,
+                                   int *error, const char **addinfo)
+{
+    ZOOM_query q = ZOOM_query_create();
+
+    ZOOM_query_cql(q, cql);
+
+    m_resultset = ZOOM_connection_search(m_connection, q);
+    ZOOM_query_destroy(q);
+    get_zoom_error(error, addinfo);
     if (*error == 0)
         *hits = ZOOM_resultset_size(m_resultset);
     else
@@ -189,7 +233,7 @@ void yf::Zoom::Backend::present(Odr_int start, Odr_int number,
                                 int *error, const char **addinfo)
 {
     ZOOM_resultset_records(m_resultset, recs, start, number);
-    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+    get_zoom_error(error, addinfo);
 }
 
 void yf::Zoom::Backend::set_option(const char *name, const char *value)
@@ -199,16 +243,16 @@ void yf::Zoom::Backend::set_option(const char *name, const char *value)
         ZOOM_resultset_option_set(m_resultset, name, value);
 }
 
-int yf::Zoom::Backend::get_error(const char **addinfo)
+const char *yf::Zoom::Backend::get_option(const char *name)
 {
-    return ZOOM_connection_error(m_connection, 0, addinfo);
+    return ZOOM_connection_option_get(m_connection, name);
 }
 
-yf::Zoom::Searchable::Searchable()
+yf::Zoom::Searchable::Searchable(CCL_bibset base)
 {
     piggyback = true;
     use_turbomarc = true;
-    ccl_bibset = ccl_qual_mk();
+    ccl_bibset = ccl_qual_dup(base);
 }
 
 yf::Zoom::Searchable::~Searchable()
@@ -272,118 +316,148 @@ void yf::Zoom::Impl::release_frontend(mp::Package &package)
 
 yf::Zoom::Impl::Impl()
 {
+    bibset = ccl_qual_mk();
 }
 
 yf::Zoom::Impl::~Impl()
 { 
+    ccl_qual_rm(&bibset);
 }
 
-yf::Zoom::SearchablePtr yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
+yf::Zoom::SearchablePtr yf::Zoom::Impl::parse_torus_record(const xmlNode *ptr)
 {
-    SearchablePtr notfound;
-    if (!ptr1)
-        return notfound;
-    for (ptr1 = ptr1->children; ptr1; ptr1 = ptr1->next)
+    Zoom::SearchablePtr s(new Searchable(bibset));
+    
+    for (ptr = ptr->children; ptr; ptr = ptr->next)
     {
-        if (ptr1->type != XML_ELEMENT_NODE)
+        if (ptr->type != XML_ELEMENT_NODE)
             continue;
-        if (!strcmp((const char *) ptr1->name, "record"))
+        if (!strcmp((const char *) ptr->name, "layer"))
+            ptr = ptr->children;
+        else if (!strcmp((const char *) ptr->name,
+                         "authentication"))
+        {
+            s->authentication = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name,
+                         "cfAuth"))
+        {
+            s->cfAuth = mp::xml::get_text(ptr);
+        } 
+        else if (!strcmp((const char *) ptr->name,
+                         "cfProxy"))
+        {
+            s->cfProxy = mp::xml::get_text(ptr);
+        }  
+        else if (!strcmp((const char *) ptr->name,
+                         "cfSubDb"))
         {
-            const xmlNode *ptr2 = ptr1;
-            for (ptr2 = ptr2->children; ptr2; ptr2 = ptr2->next)
+            s->cfSubDb = mp::xml::get_text(ptr);
+        }  
+        else if (!strcmp((const char *) ptr->name, "udb"))
+        {
+            s->udb = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name, "zurl"))
+        {
+            s->target = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name, "sru"))
+        {
+            s->sru = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name,
+                         "queryEncoding"))
+        {
+            s->query_encoding = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name,
+                         "piggyback"))
+        {
+            s->piggyback = mp::xml::get_bool(ptr, true);
+        }
+        else if (!strcmp((const char *) ptr->name,
+                         "requestSyntax"))
+        {
+            s->request_syntax = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name,
+                         "elementSet"))
+        {
+            s->element_set = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name,
+                         "recordEncoding"))
+        {
+            s->record_encoding = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name,
+                         "transform"))
+        {
+            s->transform_xsl_fname = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name,
+                         "useTurboMarc"))
+        {
+            ; // useTurboMarc is ignored
+        }
+        else if (!strncmp((const char *) ptr->name,
+                          "cclmap_", 7))
+        {
+            std::string value = mp::xml::get_text(ptr);
+            ccl_qual_fitem(s->ccl_bibset, value.c_str(),
+                           (const char *) ptr->name + 7);
+        }
+    }
+    return s;
+}
+
+void yf::Zoom::Impl::configure_local_records(const xmlNode *ptr, bool test_only)
+{
+    while (ptr && ptr->type != XML_ELEMENT_NODE)
+        ptr = ptr->next;
+    
+    if (ptr)
+    {
+        if (!strcmp((const char *) ptr->name, "records"))
+        {
+            for (ptr = ptr->children; ptr; ptr = ptr->next)
             {
-                if (ptr2->type != XML_ELEMENT_NODE)
+                if (ptr->type != XML_ELEMENT_NODE)
                     continue;
-                if (!strcmp((const char *) ptr2->name, "layer"))
+                if (!strcmp((const char *) ptr->name, "record"))
                 {
-                    Zoom::SearchablePtr s(new Searchable);
-
-                    const xmlNode *ptr3 = ptr2;
-                    for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next)
+                    SearchablePtr s = parse_torus_record(ptr);
+                    if (s)
                     {
-                        if (ptr3->type != XML_ELEMENT_NODE)
-                            continue;
-                        if (!strcmp((const char *) ptr3->name,
-                                    "authentication"))
-                        {
-                            s->authentication = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name,
-                                    "cfAuth"))
-                        {
-                            s->cfAuth = mp::xml::get_text(ptr3);
-                        } 
-                        else if (!strcmp((const char *) ptr3->name,
-                                    "cfProxy"))
-                        {
-                            s->cfProxy = mp::xml::get_text(ptr3);
-                        }  
-                        else if (!strcmp((const char *) ptr3->name,
-                                    "cfSubDb"))
-                        {
-                            s->cfSubDb = mp::xml::get_text(ptr3);
-                        }  
-                        else if (!strcmp((const char *) ptr3->name, "id"))
-                        {
-                            s->database = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name, "zurl"))
-                        {
-                            s->target = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name, "sru"))
-                        {
-                            s->sru = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name,
-                                         "queryEncoding"))
-                        {
-                            s->query_encoding = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name,
-                                         "piggyback"))
-                        {
-                            s->piggyback = mp::xml::get_bool(ptr3, true);
-                        }
-                        else if (!strcmp((const char *) ptr3->name,
-                                         "requestSyntax"))
-                        {
-                            s->request_syntax = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name,
-                                         "elementSet"))
-                        {
-                            s->element_set = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name,
-                                         "recordEncoding"))
-                        {
-                            s->record_encoding = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name,
-                                         "transform"))
+                        std::string udb = s->udb;
+                        if (udb.length())
+                            s_map[s->udb] = s;
+                        else
                         {
-                            s->transform_xsl_fname = mp::xml::get_text(ptr3);
-                        }
-                        else if (!strcmp((const char *) ptr3->name,
-                                         "useTurboMarc"))
-                        {
-                            ; // useTurboMarc is ignored
-                        }
-                        else if (!strncmp((const char *) ptr3->name,
-                                          "cclmap_", 7))
-                        {
-                            std::string value = mp::xml::get_text(ptr3);
-                            ccl_qual_fitem(s->ccl_bibset, value.c_str(),
-                                           (const char *) ptr3->name + 7);
+                            throw mp::filter::FilterException
+                                ("No udb for local torus record");
                         }
                     }
-                    return s;
+                }
+                else
+                {
+                    throw mp::filter::FilterException
+                        ("Bad element " 
+                         + std::string((const char *) ptr->name)
+                         + " in zoom filter inside element "
+                         "<torus><records>");
                 }
             }
         }
+        else
+        {
+            throw mp::filter::FilterException
+                ("Bad element " 
+                 + std::string((const char *) ptr->name)
+                 + " in zoom filter inside element <torus>");
+        }
     }
-    return notfound;
 }
 
 void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
@@ -406,6 +480,12 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
                         "Bad attribute " + std::string((const char *)
                                                        attr->name));
             }
+            configure_local_records(ptr->children, test_only);
+        }
+        else if (!strcmp((const char *) ptr->name, "cclmap"))
+        {
+            const char *addinfo = 0;
+            ccl_xml_config(bibset, ptr, &addinfo);
         }
         else if (!strcmp((const char *) ptr->name, "fieldmap"))
         {
@@ -426,10 +506,6 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
             if (cql_field.length())
                 fieldmap[cql_field] = ccl_field;
         }
-        else if (!strcmp((const char *) ptr->name, "records"))
-        {
-            yaz_log(YLOG_WARN, "records ignored!");
-        }
         else
         {
             throw mp::filter::FilterException
@@ -447,27 +523,51 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     if (m_backend && m_backend->m_frontend_database == database)
         return m_backend;
 
-    bool db_args = false;
+    std::string db_args;
+    std::string cf_parm;
     std::string torus_db;
     size_t db_arg_pos = database.find(',');
     if (db_arg_pos != std::string::npos)
     {
         torus_db = database.substr(0, db_arg_pos);
-        db_args = true;
+        db_args = database.substr(db_arg_pos+1);
     }
     else
         torus_db = database;
  
-    xmlDoc *doc = mp::get_searchable(m_p->torus_url, torus_db);
-    if (!doc)
+    SearchablePtr sptr;
+
+    std::map<std::string,SearchablePtr>::iterator it;
+    it = m_p->s_map.find(torus_db);
+    if (it != m_p->s_map.end())
+        sptr = it->second;
+    else
     {
-        *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
-        *addinfo = database.c_str();
-        BackendPtr b;
-        return b;
+        xmlDoc *doc = mp::get_searchable(m_p->torus_url, torus_db);
+        if (!doc)
+        {
+            *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
+            *addinfo = database.c_str();
+            BackendPtr b;
+            return b;
+        }
+        const xmlNode *ptr = xmlDocGetRootElement(doc);
+        if (ptr)
+        {   // presumably ptr is a records element node
+            // parse first record in document
+            for (ptr = ptr->children; ptr; ptr = ptr->next)
+            {
+                if (ptr->type == XML_ELEMENT_NODE
+                    && !strcmp((const char *) ptr->name, "record"))
+                {
+                    sptr = m_p->parse_torus_record(ptr);
+                    break;
+                }
+            }
+        }
+        xmlFreeDoc(doc);
     }
-    SearchablePtr sptr = m_p->parse_torus(xmlDocGetRootElement(doc));
-    xmlFreeDoc(doc);
+
     if (!sptr)
     {
         *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
@@ -508,10 +608,11 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
 
     BackendPtr b(new Backend(sptr));
 
-    std::string cf_parm;
     b->xsp = xsp;
     b->m_frontend_database = database;
     std::string authentication = sptr->authentication;
+        
+    b->set_option("timeout", "40");
 
     if (sptr->query_encoding.length())
         b->set_option("rpnCharset", sptr->query_encoding.c_str());
@@ -557,10 +658,11 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     {
         url = sptr->target;
     }
-    if (cf_parm.length() && !db_args)
-    {
+    if (db_args.length())
+        url += "," + db_args;
+    else if (cf_parm.length())
         url += "," + cf_parm;
-    }
+    yaz_log(YLOG_LOG, "url=%s", url.c_str());
     b->connect(url, error, addinfo);
     if (*error == 0)
     {
@@ -602,10 +704,8 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
             !strcmp(element_set_name, "pz2"))
         {
             if (b->sptr->request_syntax.length())
-            {
                 syntax_name = b->sptr->request_syntax.c_str();
-                enable_pz2_transform = true;
-            }
+            enable_pz2_transform = true;
         }
         else
         {
@@ -618,7 +718,7 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
 
     if (enable_pz2_transform)
     {
-        element_set_name = "F";
+        element_set_name = 0;
         if (b->sptr->element_set.length())
             element_set_name = b->sptr->element_set.c_str();
     }
@@ -662,7 +762,6 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
 
                 strcpy(rec_type_str, b->sptr->use_turbomarc ?
                        "txml" : "xml");
-                
                 // prevent buffer overflow ...
                 if (b->sptr->record_encoding.length() > 0 &&
                     b->sptr->record_encoding.length() < 
@@ -911,9 +1010,53 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
     }
     
     assert(pqf_wrbuf);
-    b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo);
+    if (b->get_option("sru"))
+    {
+        int status = 0;
+        Z_RPNQuery *zquery;
+        zquery = p_query_rpn(odr, wrbuf_cstr(pqf_wrbuf));
+        WRBUF wrb = wrbuf_alloc();
+            
+        if (!strcmp(b->get_option("sru"), "solr"))
+        {
+            solr_transform_t cqlt = solr_transform_create();
+            
+            status = solr_transform_rpn2solr_wrbuf(cqlt, wrb, zquery);
+            
+            solr_transform_close(cqlt);
+        }
+        else
+        {
+            cql_transform_t cqlt = cql_transform_create();
+            
+            status = cql_transform_rpn2cql_wrbuf(cqlt, wrb, zquery);
+            
+            cql_transform_close(cqlt);
+        }
+        if (status == 0)
+        {
+            yaz_log(YLOG_LOG, "search CQL: %s", wrbuf_cstr(wrb));
+            b->search_cql(wrbuf_cstr(wrb), &hits, &error, &addinfo);
+        }
+        
+        wrbuf_destroy(wrb);
+        wrbuf_destroy(pqf_wrbuf);
+        if (status)
+        {
+            apdu_res = 
+                odr.create_searchResponse(apdu_req, YAZ_BIB1_MALFORMED_QUERY,
+                                          "can not convert from RPN to CQL/SOLR");
+            package.response() = apdu_res;
+            return;
+        }
+    }
+    else
+    {
+        yaz_log(YLOG_LOG, "search PQF: %s", wrbuf_cstr(pqf_wrbuf));
+        b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo);
+        wrbuf_destroy(pqf_wrbuf);
+    }
     
-    wrbuf_destroy(pqf_wrbuf);
     
     const char *element_set_name = 0;
     Odr_int number_to_present = 0;