Allow local ccl maps to be given (as base)
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
index 68252b2..a90bf97 100644 (file)
@@ -19,6 +19,8 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include "config.hpp"
 #include "filter_zoom.hpp"
 #include <yaz/zoom.h>
+#include <yaz/yaz-version.h>
+#include <yaz/srw.h>
 #include <metaproxy/package.hpp>
 #include <metaproxy/util.hpp>
 #include "torus.hpp"
@@ -28,7 +30,10 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition.hpp>
+#include <yaz/ccl_xml.h>
 #include <yaz/ccl.h>
+#include <yaz/rpn2cql.h>
+#include <yaz/pquery.h>
 #include <yaz/cql.h>
 #include <yaz/oid_db.h>
 #include <yaz/diagbib1.h>
@@ -43,6 +48,9 @@ namespace metaproxy_1 {
     namespace filter {
         struct Zoom::Searchable : boost::noncopyable {
             std::string authentication;
+            std::string cfAuth;
+            std::string cfProxy;
+            std::string cfSubDb;
             std::string database;
             std::string target;
             std::string query_encoding;
@@ -54,7 +62,7 @@ namespace metaproxy_1 {
             bool use_turbomarc;
             bool piggyback;
             CCL_bibset ccl_bibset;
-            Searchable();
+            Searchable(CCL_bibset base);
             ~Searchable();
         };
         class Zoom::Backend : boost::noncopyable {
@@ -72,10 +80,13 @@ namespace metaproxy_1 {
             void connect(std::string zurl, int *error, const char **addinfo);
             void search_pqf(const char *pqf, Odr_int *hits,
                             int *error, const char **addinfo);
+            void search_cql(const char *cql, Odr_int *hits,
+                            int *error, const char **addinfo);
             void present(Odr_int start, Odr_int number, ZOOM_record *recs,
                          int *error, const char **addinfo);
             void set_option(const char *name, const char *value);
-            int get_error(const char **addinfo);
+            const char *get_option(const char *name);
+            void get_zoom_error(int *error, const char **addinfo);
         };
         class Zoom::Frontend : boost::noncopyable {
             friend class Impl;
@@ -112,14 +123,15 @@ namespace metaproxy_1 {
         private:
             FrontendPtr get_frontend(mp::Package &package);
             void release_frontend(mp::Package &package);
-            void parse_torus(const xmlNode *ptr);
-
-            std::list<Zoom::SearchablePtr>m_searchables;
-
+            SearchablePtr parse_torus(const xmlNode *ptr);
+            struct cql_node *convert_cql_fields(struct cql_node *cn, ODR odr);
             std::map<mp::Session, FrontendPtr> m_clients;            
             boost::mutex m_mutex;
             boost::condition m_cond_session_ready;
-            mp::Torus torus;
+            std::string torus_url;
+            std::map<std::string,std::string> fieldmap;
+            std::string xsldir;
+            CCL_bibset bibset;
         };
     }
 }
@@ -162,18 +174,51 @@ yf::Zoom::Backend::~Backend()
     ZOOM_resultset_destroy(m_resultset);
 }
 
+
+void yf::Zoom::Backend::get_zoom_error(int *error, const char **addinfo)
+{
+    const char *msg = 0;
+    *error = ZOOM_connection_error(m_connection, &msg, addinfo);
+    if (*error)
+    {
+        if (*error >= ZOOM_ERROR_CONNECT)
+        {
+            // turn ZOOM diagnostic into a Bib-1 2: with addinfo=zoom err msg
+            *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+            if (addinfo)
+                *addinfo = msg;
+        }
+    }
+}
+
 void yf::Zoom::Backend::connect(std::string zurl,
                                 int *error, const char **addinfo)
 {
     ZOOM_connection_connect(m_connection, zurl.c_str(), 0);
-    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+    get_zoom_error(error, addinfo);
 }
 
 void yf::Zoom::Backend::search_pqf(const char *pqf, Odr_int *hits,
                                    int *error, const char **addinfo)
 {
     m_resultset = ZOOM_connection_search_pqf(m_connection, pqf);
-    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+    get_zoom_error(error, addinfo);
+    if (*error == 0)
+        *hits = ZOOM_resultset_size(m_resultset);
+    else
+        *hits = 0;
+}
+
+void yf::Zoom::Backend::search_cql(const char *cql, Odr_int *hits,
+                                   int *error, const char **addinfo)
+{
+    ZOOM_query q = ZOOM_query_create();
+
+    ZOOM_query_cql(q, cql);
+
+    m_resultset = ZOOM_connection_search(m_connection, q);
+    ZOOM_query_destroy(q);
+    get_zoom_error(error, addinfo);
     if (*error == 0)
         *hits = ZOOM_resultset_size(m_resultset);
     else
@@ -185,7 +230,7 @@ void yf::Zoom::Backend::present(Odr_int start, Odr_int number,
                                 int *error, const char **addinfo)
 {
     ZOOM_resultset_records(m_resultset, recs, start, number);
-    *error = ZOOM_connection_error(m_connection, 0, addinfo);
+    get_zoom_error(error, addinfo);
 }
 
 void yf::Zoom::Backend::set_option(const char *name, const char *value)
@@ -195,16 +240,16 @@ void yf::Zoom::Backend::set_option(const char *name, const char *value)
         ZOOM_resultset_option_set(m_resultset, name, value);
 }
 
-int yf::Zoom::Backend::get_error(const char **addinfo)
+const char *yf::Zoom::Backend::get_option(const char *name)
 {
-    return ZOOM_connection_error(m_connection, 0, addinfo);
+    return ZOOM_connection_option_get(m_connection, name);
 }
 
-yf::Zoom::Searchable::Searchable()
+yf::Zoom::Searchable::Searchable(CCL_bibset base)
 {
     piggyback = true;
     use_turbomarc = true;
-    ccl_bibset = ccl_qual_mk();
+    ccl_bibset = ccl_qual_dup(base);
 }
 
 yf::Zoom::Searchable::~Searchable()
@@ -268,16 +313,19 @@ void yf::Zoom::Impl::release_frontend(mp::Package &package)
 
 yf::Zoom::Impl::Impl()
 {
+    bibset = ccl_qual_mk();
 }
 
 yf::Zoom::Impl::~Impl()
 { 
+    ccl_qual_rm(&bibset);
 }
 
-void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
+yf::Zoom::SearchablePtr yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
 {
+    SearchablePtr notfound;
     if (!ptr1)
-        return ;
+        return notfound;
     for (ptr1 = ptr1->children; ptr1; ptr1 = ptr1->next)
     {
         if (ptr1->type != XML_ELEMENT_NODE)
@@ -291,7 +339,7 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
                     continue;
                 if (!strcmp((const char *) ptr2->name, "layer"))
                 {
-                    Zoom::SearchablePtr s(new Searchable);
+                    Zoom::SearchablePtr s(new Searchable(bibset));
 
                     const xmlNode *ptr3 = ptr2;
                     for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next)
@@ -303,6 +351,21 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
                         {
                             s->authentication = mp::xml::get_text(ptr3);
                         }
+                        else if (!strcmp((const char *) ptr3->name,
+                                    "cfAuth"))
+                        {
+                            s->cfAuth = mp::xml::get_text(ptr3);
+                        } 
+                        else if (!strcmp((const char *) ptr3->name,
+                                    "cfProxy"))
+                        {
+                            s->cfProxy = mp::xml::get_text(ptr3);
+                        }  
+                        else if (!strcmp((const char *) ptr3->name,
+                                    "cfSubDb"))
+                        {
+                            s->cfSubDb = mp::xml::get_text(ptr3);
+                        }  
                         else if (!strcmp((const char *) ptr3->name, "id"))
                         {
                             s->database = mp::xml::get_text(ptr3);
@@ -358,17 +421,12 @@ void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1)
                                            (const char *) ptr3->name + 7);
                         }
                     }
-                    if (s->database.length() && s->target.length())
-                    {
-                        yaz_log(YLOG_LOG, "add db=%s target=%s turbomarc=%s", 
-                                s->database.c_str(), s->target.c_str(),
-                                s->use_turbomarc ? "1" : "0");
-                        m_searchables.push_back(s);
-                    }
+                    return s;
                 }
             }
         }
     }
+    return notfound;
 }
 
 void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
@@ -377,30 +435,44 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
     {
         if (ptr->type != XML_ELEMENT_NODE)
             continue;
-        if (!strcmp((const char *) ptr->name, "records"))
-        {
-            parse_torus(ptr);
-        }
         else if (!strcmp((const char *) ptr->name, "torus"))
         {
-            std::string url;
             const struct _xmlAttr *attr;
             for (attr = ptr->properties; attr; attr = attr->next)
             {
                 if (!strcmp((const char *) attr->name, "url"))
-                    url = mp::xml::get_text(attr->children);
+                    torus_url = mp::xml::get_text(attr->children);
+                else if (!strcmp((const char *) attr->name, "xsldir"))
+                    xsldir = mp::xml::get_text(attr->children);
                 else
                     throw mp::filter::FilterException(
                         "Bad attribute " + std::string((const char *)
                                                        attr->name));
             }
-            torus.read_searchables(url);
-            xmlDoc *doc = torus.get_doc();
-            if (doc)
+        }
+        else if (!strcmp((const char *) ptr->name, "cclmap"))
+        {
+            const char *addinfo = 0;
+            ccl_xml_config(bibset, ptr, &addinfo);
+        }
+        else if (!strcmp((const char *) ptr->name, "fieldmap"))
+        {
+            const struct _xmlAttr *attr;
+            std::string ccl_field;
+            std::string cql_field;
+            for (attr = ptr->properties; attr; attr = attr->next)
             {
-                xmlNode *ptr = xmlDocGetRootElement(doc);
-                parse_torus(ptr);
+                if (!strcmp((const char *) attr->name, "ccl"))
+                    ccl_field = mp::xml::get_text(attr->children);
+                else if (!strcmp((const char *) attr->name, "cql"))
+                    cql_field = mp::xml::get_text(attr->children);
+                else
+                    throw mp::filter::FilterException(
+                        "Bad attribute " + std::string((const char *)
+                                                       attr->name));
             }
+            if (cql_field.length())
+                fieldmap[cql_field] = ccl_field;
         }
         else
         {
@@ -419,29 +491,46 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     if (m_backend && m_backend->m_frontend_database == database)
         return m_backend;
 
-    std::list<Zoom::SearchablePtr>::iterator map_s =
-        m_p->m_searchables.begin();
-
-    std::string c_db = mp::util::database_name_normalize(database);
-
-    while (map_s != m_p->m_searchables.end())
+    std::string db_args;
+    std::string cf_parm;
+    std::string torus_db;
+    size_t db_arg_pos = database.find(',');
+    if (db_arg_pos != std::string::npos)
     {
-        if (c_db.compare((*map_s)->database) == 0)
-            break;
-        map_s++;
+        torus_db = database.substr(0, db_arg_pos);
+        db_args = database.substr(db_arg_pos+1);
     }
-    if (map_s == m_p->m_searchables.end())
+    else
+        torus_db = database;
+    xmlDoc *doc = mp::get_searchable(m_p->torus_url, torus_db);
+    if (!doc)
     {
         *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
         *addinfo = database.c_str();
         BackendPtr b;
         return b;
     }
-
+    SearchablePtr sptr = m_p->parse_torus(xmlDocGetRootElement(doc));
+    xmlFreeDoc(doc);
+    if (!sptr)
+    {
+        *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
+        *addinfo = database.c_str();
+        BackendPtr b;
+        return b;
+    }
+        
     xsltStylesheetPtr xsp = 0;
-    if ((*map_s)->transform_xsl_fname.length())
+    if (sptr->transform_xsl_fname.length())
     {
-        xmlDoc *xsp_doc = xmlParseFile((*map_s)->transform_xsl_fname.c_str());
+        std::string fname;
+
+        if (m_p->xsldir.length()) 
+            fname = m_p->xsldir + "/" + sptr->transform_xsl_fname;
+        else
+            fname = sptr->transform_xsl_fname;
+        xmlDoc *xsp_doc = xmlParseFile(fname.c_str());
         if (!xsp_doc)
         {
             *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
@@ -460,20 +549,49 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
         }
     }
 
-    SearchablePtr sptr = *map_s;
-
     m_backend.reset();
 
     BackendPtr b(new Backend(sptr));
 
     b->xsp = xsp;
     b->m_frontend_database = database;
+    std::string authentication = sptr->authentication;
+        
+    b->set_option("timeout", "40");
 
     if (sptr->query_encoding.length())
         b->set_option("rpnCharset", sptr->query_encoding.c_str());
 
-    if (sptr->authentication.length())
-        b->set_option("user", sptr->authentication.c_str());
+    if (sptr->cfAuth.length())
+    {
+        b->set_option("user", sptr->cfAuth.c_str());
+        if (authentication.length())
+        {
+            size_t found = authentication.find('/');
+            if (found != std::string::npos)
+            {
+                cf_parm += "user=" + mp::util::uri_encode(authentication.substr(0, found))
+                    + "&password=" + mp::util::uri_encode(authentication.substr(found+1));
+            }
+            else
+                cf_parm += "user=" + mp::util::uri_encode(authentication);
+        }
+    }
+    else if (authentication.length())
+        b->set_option("user", authentication.c_str());
+
+    if (sptr->cfProxy.length())
+    {
+        if (cf_parm.length())
+            cf_parm += "&";
+        cf_parm += "proxy=" + mp::util::uri_encode(sptr->cfProxy);
+    }
+    if (sptr->cfSubDb.length())
+    {
+        if (cf_parm.length())
+            cf_parm += "&";
+        cf_parm += "subdatabase=" + mp::util::uri_encode(sptr->cfSubDb);
+    }
 
     std::string url;
     if (sptr->sru.length())
@@ -482,8 +600,14 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
         b->set_option("sru", sptr->sru.c_str());
     }
     else
+    {
         url = sptr->target;
-
+    }
+    if (db_args.length())
+        url += "," + db_args;
+    else if (cf_parm.length())
+        url += "," + cf_parm;
+    yaz_log(YLOG_LOG, "url=%s", url.c_str());
     b->connect(url, error, addinfo);
     if (*error == 0)
     {
@@ -521,7 +645,8 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
     if (preferredRecordSyntax)
     {
         if (!oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml)
-            && !strcmp(element_set_name, "pz2"))
+            && element_set_name &&
+            !strcmp(element_set_name, "pz2"))
         {
             if (b->sptr->request_syntax.length())
             {
@@ -655,6 +780,38 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
     return records;
 }
     
+struct cql_node *yf::Zoom::Impl::convert_cql_fields(struct cql_node *cn,
+                                                    ODR odr)
+{
+    struct cql_node *r = 0;
+    if (!cn)
+        return 0;
+    switch (cn->which)
+    {
+    case CQL_NODE_ST:
+        if (cn->u.st.index)
+        {
+            std::map<std::string,std::string>::const_iterator it;
+            it = fieldmap.find(cn->u.st.index);
+            if (it == fieldmap.end())
+                return cn;
+            if (it->second.length())
+                cn->u.st.index = odr_strdup(odr, it->second.c_str());
+            else
+                cn->u.st.index = 0;
+        }
+        break;
+    case CQL_NODE_BOOL:
+        r = convert_cql_fields(cn->u.boolean.left, odr);
+        if (!r)
+            r = convert_cql_fields(cn->u.boolean.right, odr);
+        break;
+    case CQL_NODE_SORT:
+        r = convert_cql_fields(cn->u.sort.search, odr);
+        break;
+    }
+    return r;
+}
 
 void yf::Zoom::Frontend::handle_search(mp::Package &package)
 {
@@ -722,10 +879,24 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
             return;
         }
         struct cql_node *cn = cql_parser_result(cp);
+        struct cql_node *cn_error = m_p->convert_cql_fields(cn, odr);
+        if (cn_error)
+        {
+            // hopefully we are getting a ptr to a index+relation+term node
+            addinfo = 0;
+            if (cn_error->which == CQL_NODE_ST)
+                addinfo = cn_error->u.st.index;
+
+            apdu_res = 
+                odr.create_searchResponse(apdu_req, 
+                                          YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
+                                          addinfo);
+            package.response() = apdu_res;
+            return;
+        }
         char ccl_buf[1024];
 
         r = cql_to_ccl_buf(cn, ccl_buf, sizeof(ccl_buf));
-        yaz_log(YLOG_LOG, "cql_to_ccl_buf returned %d", r);
         if (r == 0)
         {
             ccl_wrbuf = wrbuf_alloc();
@@ -756,17 +927,28 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
         assert(pqf_wrbuf == 0);
         int cerror, cpos;
         struct ccl_rpn_node *cn;
+        yaz_log(YLOG_LOG, "CCL: %s", wrbuf_cstr(ccl_wrbuf));
         cn = ccl_find_str(b->sptr->ccl_bibset, wrbuf_cstr(ccl_wrbuf),
                           &cerror, &cpos);
         wrbuf_destroy(ccl_wrbuf);
         if (!cn)
         {
             char *addinfo = odr_strdup(odr, ccl_err_msg(cerror));
+            int z3950_diag = YAZ_BIB1_MALFORMED_QUERY;
 
+            switch (cerror)
+            {
+            case CCL_ERR_UNKNOWN_QUAL:
+                z3950_diag = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE;
+                break;
+            case CCL_ERR_TRUNC_NOT_LEFT: 
+            case CCL_ERR_TRUNC_NOT_RIGHT:
+            case CCL_ERR_TRUNC_NOT_BOTH:
+                z3950_diag = YAZ_BIB1_UNSUPP_TRUNCATION_ATTRIBUTE;
+                break;
+            }
             apdu_res = 
-                odr.create_searchResponse(apdu_req, 
-                                          YAZ_BIB1_MALFORMED_QUERY,
-                                          addinfo);
+                odr.create_searchResponse(apdu_req, z3950_diag, addinfo);
             package.response() = apdu_res;
             return;
         }
@@ -776,9 +958,42 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
     }
     
     assert(pqf_wrbuf);
-    b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo);
+    if (b->get_option("sru"))
+    {
+        cql_transform_t cqlt = cql_transform_create();
+        Z_RPNQuery *zquery;
+        WRBUF wrb = wrbuf_alloc();
+        int status;
+        
+        zquery = p_query_rpn(odr, wrbuf_cstr(pqf_wrbuf));
+        status = cql_transform_rpn2cql_wrbuf(cqlt, wrb, zquery);
+        
+        cql_transform_close(cqlt);
+
+        if (status == 0)
+        {
+            yaz_log(YLOG_LOG, "search CQL: %s", wrbuf_cstr(wrb));
+            b->search_cql(wrbuf_cstr(wrb), &hits, &error, &addinfo);
+        }
+
+        wrbuf_destroy(wrb);
+        wrbuf_destroy(pqf_wrbuf);
+        if (status)
+        {
+            apdu_res = 
+                odr.create_searchResponse(apdu_req, YAZ_BIB1_MALFORMED_QUERY,
+                                          "can not convert from RPN to CQL");
+            package.response() = apdu_res;
+            return;
+        }
+    }
+    else
+    {
+        yaz_log(YLOG_LOG, "search PQF: %s", wrbuf_cstr(pqf_wrbuf));
+        b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo);
+        wrbuf_destroy(pqf_wrbuf);
+    }
     
-    wrbuf_destroy(pqf_wrbuf);
     
     const char *element_set_name = 0;
     Odr_int number_to_present = 0;