Beginnings of url recipe handling
[metaproxy-moved-to-github.git] / src / filter_zoom.cpp
index f9c07df..39b5624 100644 (file)
@@ -23,6 +23,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/srw.h>
 #include <metaproxy/package.hpp>
 #include <metaproxy/util.hpp>
+#include <metaproxy/xmlutil.hpp>
 #include "torus.hpp"
 
 #include <libxslt/xsltutils.h>
@@ -60,6 +61,7 @@ namespace metaproxy_1 {
             std::string element_set;
             std::string record_encoding;
             std::string transform_xsl_fname;
+            std::string urlRecipe;
             bool use_turbomarc;
             bool piggyback;
             CCL_bibset ccl_bibset;
@@ -101,7 +103,8 @@ namespace metaproxy_1 {
             void handle_present(mp::Package &package);
             BackendPtr get_backend_from_databases(std::string &database,
                                                   int *error,
-                                                  const char **addinfo);
+                                                  char **addinfo,
+                                                  ODR odr);
             Z_Records *get_records(Odr_int start,
                                    Odr_int number_to_present,
                                    int *error,
@@ -134,6 +137,8 @@ namespace metaproxy_1 {
             std::map<std::string,std::string> fieldmap;
             std::string xsldir;
             CCL_bibset bibset;
+            std::string element_transform;
+            std::string element_raw;
             std::map<std::string,SearchablePtr> s_map;
         };
     }
@@ -314,7 +319,7 @@ void yf::Zoom::Impl::release_frontend(mp::Package &package)
     }
 }
 
-yf::Zoom::Impl::Impl()
+yf::Zoom::Impl::Impl() : element_transform("pz2") , element_raw("raw")
 {
     bibset = ccl_qual_mk();
 }
@@ -397,6 +402,11 @@ yf::Zoom::SearchablePtr yf::Zoom::Impl::parse_torus_record(const xmlNode *ptr)
             s->transform_xsl_fname = mp::xml::get_text(ptr);
         }
         else if (!strcmp((const char *) ptr->name,
+                         "urlRecipe"))
+        {
+            s->urlRecipe = mp::xml::get_text(ptr);
+        }
+        else if (!strcmp((const char *) ptr->name,
                          "useTurboMarc"))
         {
             ; // useTurboMarc is ignored
@@ -475,6 +485,10 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
                     torus_url = mp::xml::get_text(attr->children);
                 else if (!strcmp((const char *) attr->name, "xsldir"))
                     xsldir = mp::xml::get_text(attr->children);
+                else if (!strcmp((const char *) attr->name, "element_transform"))
+                    element_transform = mp::xml::get_text(attr->children);
+                else if (!strcmp((const char *) attr->name, "element_raw"))
+                    element_raw = mp::xml::get_text(attr->children);
                 else
                     throw mp::filter::FilterException(
                         "Bad attribute " + std::string((const char *)
@@ -517,20 +531,19 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only)
 }
 
 yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
-    std::string &database, int *error, const char **addinfo)
+    std::string &database, int *error, char **addinfo, ODR odr)
 {
     std::list<BackendPtr>::const_iterator map_it;
     if (m_backend && m_backend->m_frontend_database == database)
         return m_backend;
 
     std::string db_args;
-    std::string cf_parm;
     std::string torus_db;
     size_t db_arg_pos = database.find(',');
     if (db_arg_pos != std::string::npos)
     {
         torus_db = database.substr(0, db_arg_pos);
-        db_args = database.substr(db_arg_pos+1);
+        db_args = database.substr(db_arg_pos + 1);
     }
     else
         torus_db = database;
@@ -547,7 +560,7 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
         if (!doc)
         {
             *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
-            *addinfo = database.c_str();
+            *addinfo = odr_strdup(odr, database.c_str());
             BackendPtr b;
             return b;
         }
@@ -571,7 +584,7 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     if (!sptr)
     {
         *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
-        *addinfo = database.c_str();
+        *addinfo = odr_strdup(odr, database.c_str());
         BackendPtr b;
         return b;
     }
@@ -589,7 +602,8 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
         if (!xsp_doc)
         {
             *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
-            *addinfo = "xmlParseFile failed";
+            *addinfo = (char *) odr_malloc(odr, 40 + strlen(fname.c_str()));
+            sprintf(*addinfo, "xmlParseFile failed. File %s", fname.c_str());
             BackendPtr b;
             return b;
         }
@@ -597,7 +611,7 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
         if (!xsp)
         {
             *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST;
-            *addinfo = "xsltParseStylesheetDoc failed";
+            *addinfo = odr_strdup(odr, "xsltParseStylesheetDoc failed");
             BackendPtr b;
             xmlFreeDoc(xsp_doc);
             return b;
@@ -619,33 +633,83 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
 
     if (sptr->cfAuth.length())
     {
+        // A CF target
         b->set_option("user", sptr->cfAuth.c_str());
-        if (authentication.length())
+        if (authentication.length() && db_args.length() == 0)
         {
+            // no database (auth) args specified already.. and the
+            // Torus authentication has it.. Generate the args that CF
+            // understands..
             size_t found = authentication.find('/');
             if (found != std::string::npos)
             {
-                cf_parm += "user=" + mp::util::uri_encode(authentication.substr(0, found))
+                db_args += "user=" + mp::util::uri_encode(authentication.substr(0, found))
                     + "&password=" + mp::util::uri_encode(authentication.substr(found+1));
             }
             else
-                cf_parm += "user=" + mp::util::uri_encode(authentication);
+                db_args += "user=" + mp::util::uri_encode(authentication);
+        }
+    }
+    else
+    {
+        // A non-CF target
+        if (db_args.length())
+        {
+            // user has specified backend authentication
+            const char *param_user = 0;
+            const char *param_password = 0;
+            char **names;
+            char **values;
+            int i;
+            int no_parms = yaz_uri_to_array(db_args.c_str(),
+                                            odr, &names, &values);
+            for (i = 0; i < no_parms; i++)
+            {
+                const char *name = names[i];
+                const char *value = values[i];
+                if (!strcmp(name, "user"))
+                    param_user = value;
+                else if (!strcmp(name, "password"))
+                    param_password = value;
+                else
+                {
+                    BackendPtr notfound;
+                    char *msg = (char*) odr_malloc(odr, strlen(name) + 30);
+                    *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+                    sprintf(msg, "Bad database argument: %s", name);
+                    *addinfo = msg;
+                    return notfound;
+                }
+            }
+            if (param_user && param_password)
+            {
+                char *auth = (char*) odr_malloc(
+                    odr, strlen(param_user) + strlen(param_password) + 2);
+                strcpy(auth, param_user);
+                strcat(auth, "/");
+                strcat(auth, param_password);
+                b->set_option("user", auth);
+            }
+            db_args.clear(); // no arguments to be passed (non-CF)
+        }
+        else
+        {
+            // use authentication from Torus, if given
+            if (authentication.length())
+                b->set_option("user", authentication.c_str());
         }
     }
-    else if (authentication.length())
-        b->set_option("user", authentication.c_str());
-
     if (sptr->cfProxy.length())
     {
-        if (cf_parm.length())
-            cf_parm += "&";
-        cf_parm += "proxy=" + mp::util::uri_encode(sptr->cfProxy);
+        if (db_args.length())
+            db_args += "&";
+        db_args += "proxy=" + mp::util::uri_encode(sptr->cfProxy);
     }
     if (sptr->cfSubDb.length())
     {
-        if (cf_parm.length())
-            cf_parm += "&";
-        cf_parm += "subdatabase=" + mp::util::uri_encode(sptr->cfSubDb);
+        if (db_args.length())
+            db_args += "&";
+        db_args += "subdatabase=" + mp::util::uri_encode(sptr->cfSubDb);
     }
 
     std::string url;
@@ -660,10 +724,11 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases(
     }
     if (db_args.length())
         url += "," + db_args;
-    else if (cf_parm.length())
-        url += "," + cf_parm;
     yaz_log(YLOG_LOG, "url=%s", url.c_str());
-    b->connect(url, error, addinfo);
+    const char *addinfo_c = 0;
+    b->connect(url, error, &addinfo_c);
+    if (addinfo_c)
+        *addinfo = odr_strdup(odr, addinfo_c);
     if (*error == 0)
     {
         m_backend = b;
@@ -683,7 +748,9 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
 {
     *number_of_records_returned = 0;
     Z_Records *records = 0;
-    bool enable_pz2_transform = false;
+    bool enable_pz2_retrieval = false; // whether target profile is used
+    bool enable_pz2_transform = false; // whether XSLT is used as well
+    bool assume_marc8_charset = false;
 
     if (start < 0 || number_to_present <= 0)
         return records;
@@ -696,27 +763,41 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
 
     char oid_name_str[OID_STR_MAX];
     const char *syntax_name = 0;
-
-    if (preferredRecordSyntax)
+    
+    if (preferredRecordSyntax &&
+        !oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml)
+        && element_set_name)
     {
-        if (!oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml)
-            && element_set_name &&
-            !strcmp(element_set_name, "pz2"))
+        if (!strcmp(element_set_name, m_p->element_transform.c_str()))
         {
-            if (b->sptr->request_syntax.length())
-                syntax_name = b->sptr->request_syntax.c_str();
+            enable_pz2_retrieval = true;
             enable_pz2_transform = true;
         }
-        else
+        else if (!strcmp(element_set_name, m_p->element_raw.c_str()))
+        {
+            enable_pz2_retrieval = true;
+        }
+    }
+    
+    if (enable_pz2_retrieval)
+    {
+        if (b->sptr->request_syntax.length())
         {
-            syntax_name =
-                yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
+            syntax_name = b->sptr->request_syntax.c_str();
+            const Odr_oid *syntax_oid = 
+                yaz_string_to_oid(yaz_oid_std(), CLASS_RECSYN, syntax_name);
+            if (!oid_oidcmp(syntax_oid, yaz_oid_recsyn_usmarc)
+                || !oid_oidcmp(syntax_oid, yaz_oid_recsyn_opac))
+                assume_marc8_charset = true;
         }
     }
+    else if (preferredRecordSyntax)
+        syntax_name =
+            yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str);
 
     b->set_option("preferredRecordSyntax", syntax_name);
 
-    if (enable_pz2_transform)
+    if (enable_pz2_retrieval)
     {
         element_set_name = 0;
         if (b->sptr->element_set.length())
@@ -756,25 +837,27 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
                 npr = zget_surrogateDiagRec(odr, odr_database, sur_error,
                                             addinfo);
             }
-            else if (enable_pz2_transform)
+            else if (enable_pz2_retrieval)
             {
                 char rec_type_str[100];
+                const char *record_encoding = 0;
+
+                if (b->sptr->record_encoding.length())
+                    record_encoding = b->sptr->record_encoding.c_str();
+                else if (assume_marc8_charset)
+                    record_encoding = "marc8";
 
-                strcpy(rec_type_str, b->sptr->use_turbomarc ?
-                       "txml" : "xml");
-                // prevent buffer overflow ...
-                if (b->sptr->record_encoding.length() > 0 &&
-                    b->sptr->record_encoding.length() < 
-                    (sizeof(rec_type_str)-20))
+                strcpy(rec_type_str, b->sptr->use_turbomarc ? "txml" : "xml");
+                if (record_encoding)
                 {
                     strcat(rec_type_str, "; charset=");
-                    strcat(rec_type_str, b->sptr->record_encoding.c_str());
+                    strcat(rec_type_str, record_encoding);
                 }
                 
                 int rec_len;
                 const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str,
                                                       &rec_len);
-                if (rec_buf && b->xsp)
+                if (rec_buf && b->xsp && enable_pz2_transform)
                 {
                     xmlDoc *rec_doc = xmlParseMemory(rec_buf, rec_len);
                     if (rec_doc)
@@ -790,6 +873,12 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
 
                 if (rec_buf)
                 {
+                    xmlDoc *doc = xmlParseMemory(rec_buf, rec_len);
+                    mp::xml::url_recipe_handle(doc, b->sptr->urlRecipe);
+                    xmlFreeDoc(doc);
+                }
+                if (rec_buf)
+                {
                     npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr));
                     npr->databaseName = odr_database;
                     npr->which = Z_NamePlusRecord_databaseRecord;
@@ -881,18 +970,19 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
     }
 
     int error = 0;
-    const char *addinfo = 0;
+    char *addinfo_s = 0;
     std::string db(sr->databaseNames[0]);
-    BackendPtr b = get_backend_from_databases(db, &error, &addinfo);
+    BackendPtr b = get_backend_from_databases(db, &error, &addinfo_s, odr);
     if (error)
     {
         apdu_res = 
             odr.create_searchResponse(
-                apdu_req, error, addinfo);
+                apdu_req, error, addinfo_s);
         package.response() = apdu_res;
         return;
     }
 
+    const char *addinfo_c = 0;
     b->set_option("setname", "default");
 
     Odr_int hits = 0;
@@ -935,14 +1025,14 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
         if (cn_error)
         {
             // hopefully we are getting a ptr to a index+relation+term node
-            addinfo = 0;
+            addinfo_c = 0;
             if (cn_error->which == CQL_NODE_ST)
-                addinfo = cn_error->u.st.index;
+                addinfo_c = cn_error->u.st.index;
 
             apdu_res = 
                 odr.create_searchResponse(apdu_req, 
                                           YAZ_BIB1_UNSUPP_USE_ATTRIBUTE,
-                                          addinfo);
+                                          addinfo_c);
             package.response() = apdu_res;
             return;
         }
@@ -1036,7 +1126,7 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
         if (status == 0)
         {
             yaz_log(YLOG_LOG, "search CQL: %s", wrbuf_cstr(wrb));
-            b->search_cql(wrbuf_cstr(wrb), &hits, &error, &addinfo);
+            b->search_cql(wrbuf_cstr(wrb), &hits, &error, &addinfo_c);
         }
         
         wrbuf_destroy(wrb);
@@ -1053,7 +1143,7 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
     else
     {
         yaz_log(YLOG_LOG, "search PQF: %s", wrbuf_cstr(pqf_wrbuf));
-        b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo);
+        b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo_c);
         wrbuf_destroy(pqf_wrbuf);
     }
     
@@ -1065,10 +1155,10 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package)
     
     Odr_int number_of_records_returned = 0;
     Z_Records *records = get_records(
-        0, number_to_present, &error, &addinfo,
+        0, number_to_present, &error, &addinfo_c,
         &number_of_records_returned, odr, b, sr->preferredRecordSyntax,
         element_set_name);
-    apdu_res = odr.create_searchResponse(apdu_req, error, addinfo);
+    apdu_res = odr.create_searchResponse(apdu_req, error, addinfo_c);
     if (records)
     {
         apdu_res->u.searchResponse->records = records;