X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ffilter_zoom.cpp;h=97a1fded1a622e37fdfc879fffd3faa10575c79b;hb=0f9a9f5d11f0f8089048cddc7d2df36c7f83345d;hp=a42d9f54f57b5dda46d07ed2bf86b7fda2627b55;hpb=0f92fb88a4e9fec525392fb665043c91b9268f24;p=metaproxy-moved-to-github.git diff --git a/src/filter_zoom.cpp b/src/filter_zoom.cpp index a42d9f5..97a1fde 100644 --- a/src/filter_zoom.cpp +++ b/src/filter_zoom.cpp @@ -1,5 +1,5 @@ /* This file is part of Metaproxy. - Copyright (C) 2005-2011 Index Data + Copyright (C) 2005-2012 Index Data Metaproxy is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -17,10 +17,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ #include "config.hpp" + +#include +#include #include "filter_zoom.hpp" -#include #include #include +#include #include "torus.hpp" #include @@ -28,53 +31,83 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include + +#include +#include +#include +#include #include +#include +#include +#include #include #include #include #include #include #include +#include +#include +#include namespace mp = metaproxy_1; namespace yf = mp::filter; namespace metaproxy_1 { namespace filter { - struct Zoom::Searchable : boost::noncopyable { - std::string database; + class Zoom::Searchable : boost::noncopyable { + public: + std::string authentication; + std::string cfAuth; + std::string cfProxy; + std::string cfSubDB; + std::string udb; std::string target; std::string query_encoding; std::string sru; + std::string sru_version; std::string request_syntax; std::string element_set; std::string record_encoding; std::string transform_xsl_fname; + std::string transform_xsl_content; + std::string urlRecipe; + std::string contentConnector; + std::string sortStrategy; bool use_turbomarc; bool piggyback; CCL_bibset ccl_bibset; - Searchable(); + std::map sortmap; + Searchable(CCL_bibset base); ~Searchable(); }; class Zoom::Backend : boost::noncopyable { friend class Impl; friend class Frontend; std::string zurl; + WRBUF m_apdu_wrbuf; ZOOM_connection m_connection; ZOOM_resultset m_resultset; std::string m_frontend_database; SearchablePtr sptr; xsltStylesheetPtr xsp; + std::string content_session_id; + bool enable_cproxy; + bool enable_explain; + xmlDoc *explain_doc; public: - Backend(SearchablePtr sptr); + Backend(); ~Backend(); - void connect(std::string zurl, int *error, const char **addinfo); - void search_pqf(const char *pqf, Odr_int *hits, - int *error, const char **addinfo); + void connect(std::string zurl, int *error, char **addinfo, + ODR odr); + void search(ZOOM_query q, Odr_int *hits, + int *error, char **addinfo, ODR odr); void present(Odr_int start, Odr_int number, ZOOM_record *recs, - int *error, const char **addinfo); + int *error, char **addinfo, ODR odr); void set_option(const char *name, const char *value); - int get_error(const char **addinfo); + void set_option(const char *name, std::string value); + const char *get_option(const char *name); + void get_zoom_error(int *error, char **addinfo, ODR odr); }; class Zoom::Frontend : boost::noncopyable { friend class Impl; @@ -85,18 +118,59 @@ namespace metaproxy_1 { BackendPtr m_backend; void handle_package(mp::Package &package); void handle_search(mp::Package &package); + + BackendPtr explain_search(mp::Package &package, + std::string &database, + int *error, + char **addinfo, + mp::odr &odr, + std::string &torus_db, + std::string &realm); void handle_present(mp::Package &package); - BackendPtr get_backend_from_databases(std::string &database, + BackendPtr get_backend_from_databases(mp::Package &package, + std::string &database, int *error, - const char **addinfo); - Z_Records *get_records(Odr_int start, + char **addinfo, + mp::odr &odr, + int *proxy_step); + + bool create_content_session(mp::Package &package, + BackendPtr b, + int *error, + char **addinfo, + ODR odr, + std::string authentication, + std::string proxy, + std::string realm); + + void prepare_elements(BackendPtr b, + Odr_oid *preferredRecordSyntax, + const char *element_set_name, + bool &enable_pz2_retrieval, + bool &enable_pz2_transform, + bool &assume_marc8_charset); + + Z_Records *get_records(Package &package, + Odr_int start, Odr_int number_to_present, int *error, - const char **addinfo, + char **addinfo, Odr_int *number_of_records_returned, ODR odr, BackendPtr b, Odr_oid *preferredRecordSyntax, const char *element_set_name); + Z_Records *get_explain_records(Package &package, + Odr_int start, + Odr_int number_to_present, + int *error, + char **addinfo, + Odr_int *number_of_records_returned, + ODR odr, BackendPtr b, + Odr_oid *preferredRecordSyntax, + const char *element_set_name); + + void log_diagnostic(mp::Package &package, + int error, const char *addinfo); public: Frontend(Impl *impl); ~Frontend(); @@ -107,22 +181,59 @@ namespace metaproxy_1 { Impl(); ~Impl(); void process(metaproxy_1::Package & package); - void configure(const xmlNode * ptr, bool test_only); + void configure(const xmlNode * ptr, bool test_only, + const char *path); private: + void configure_local_records(const xmlNode * ptr, bool test_only); FrontendPtr get_frontend(mp::Package &package); void release_frontend(mp::Package &package); - void parse_torus(const xmlNode *ptr); - - std::listm_searchables; - + SearchablePtr parse_torus_record(const xmlNode *ptr); + struct cql_node *convert_cql_fields(struct cql_node *cn, ODR odr); std::map m_clients; boost::mutex m_mutex; boost::condition m_cond_session_ready; - mp::Torus torus; + std::string torus_searchable_url; + std::string torus_content_url; + std::string default_realm; + std::map fieldmap; + std::string xsldir; + std::string file_path; + std::string content_proxy_server; + std::string content_tmp_file; + bool apdu_log; + CCL_bibset bibset; + std::string element_transform; + std::string element_raw; + std::string proxy; + xsltStylesheetPtr explain_xsp; + std::map s_map; }; } } + +static xmlNode *xml_node_search(xmlNode *ptr, int *num, int m) +{ + while (ptr) + { + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "recordData")) + { + (*num)++; + if (m == *num) + return ptr; + } + else // else: we don't want to find nested nodes + { + xmlNode *ret_node = xml_node_search(ptr->children, num, m); + if (ret_node) + return ret_node; + } + ptr = ptr->next; + } + return 0; +} + // define Pimpl wrapper forwarding to Impl yf::Zoom::Zoom() : m_p(new Impl) @@ -133,9 +244,10 @@ yf::Zoom::~Zoom() { // must have a destructor because of boost::scoped_ptr } -void yf::Zoom::configure(const xmlNode *xmlnode, bool test_only) +void yf::Zoom::configure(const xmlNode *xmlnode, bool test_only, + const char *path) { - m_p->configure(xmlnode, test_only); + m_p->configure(xmlnode, test_only, path); } void yf::Zoom::process(mp::Package &package) const @@ -146,33 +258,77 @@ void yf::Zoom::process(mp::Package &package) const // define Implementation stuff -yf::Zoom::Backend::Backend(SearchablePtr ptr) : sptr(ptr) +yf::Zoom::Backend::Backend() { + m_apdu_wrbuf = wrbuf_alloc(); m_connection = ZOOM_connection_create(0); + ZOOM_connection_save_apdu_wrbuf(m_connection, m_apdu_wrbuf); m_resultset = 0; xsp = 0; + enable_cproxy = true; + enable_explain = false; + explain_doc = 0; } yf::Zoom::Backend::~Backend() { if (xsp) xsltFreeStylesheet(xsp); + if (explain_doc) + xmlFreeDoc(explain_doc); ZOOM_connection_destroy(m_connection); ZOOM_resultset_destroy(m_resultset); + wrbuf_destroy(m_apdu_wrbuf); +} + + +void yf::Zoom::Backend::get_zoom_error(int *error, char **addinfo, + ODR odr) +{ + const char *msg = 0; + const char *zoom_addinfo = 0; + const char *dset = 0; + int error0 = ZOOM_connection_error_x(m_connection, &msg, + &zoom_addinfo, &dset); + if (error0) + { + if (!dset) + dset = "Unknown"; + + if (!strcmp(dset, "info:srw/diagnostic/1")) + *error = yaz_diag_srw_to_bib1(error0); + else if (!strcmp(dset, "Bib-1")) + *error = error0; + else + *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; + + *addinfo = (char *) odr_malloc( + odr, 30 + strlen(dset) + strlen(msg) + + (zoom_addinfo ? strlen(zoom_addinfo) : 0)); + **addinfo = '\0'; + if (zoom_addinfo && *zoom_addinfo) + { + strcpy(*addinfo, zoom_addinfo); + strcat(*addinfo, " "); + } + sprintf(*addinfo + strlen(*addinfo), "(%s %d %s)", dset, error0, msg); + } } void yf::Zoom::Backend::connect(std::string zurl, - int *error, const char **addinfo) + int *error, char **addinfo, + ODR odr) { ZOOM_connection_connect(m_connection, zurl.c_str(), 0); - *error = ZOOM_connection_error(m_connection, 0, addinfo); + get_zoom_error(error, addinfo, odr); } -void yf::Zoom::Backend::search_pqf(const char *pqf, Odr_int *hits, - int *error, const char **addinfo) +void yf::Zoom::Backend::search(ZOOM_query q, Odr_int *hits, + int *error, char **addinfo, ODR odr) { - m_resultset = ZOOM_connection_search_pqf(m_connection, pqf); - *error = ZOOM_connection_error(m_connection, 0, addinfo); + ZOOM_resultset_destroy(m_resultset); + m_resultset = ZOOM_connection_search(m_connection, q); + get_zoom_error(error, addinfo, odr); if (*error == 0) *hits = ZOOM_resultset_size(m_resultset); else @@ -181,10 +337,10 @@ void yf::Zoom::Backend::search_pqf(const char *pqf, Odr_int *hits, void yf::Zoom::Backend::present(Odr_int start, Odr_int number, ZOOM_record *recs, - int *error, const char **addinfo) + int *error, char **addinfo, ODR odr) { ZOOM_resultset_records(m_resultset, recs, start, number); - *error = ZOOM_connection_error(m_connection, 0, addinfo); + get_zoom_error(error, addinfo, odr); } void yf::Zoom::Backend::set_option(const char *name, const char *value) @@ -194,16 +350,23 @@ void yf::Zoom::Backend::set_option(const char *name, const char *value) ZOOM_resultset_option_set(m_resultset, name, value); } -int yf::Zoom::Backend::get_error(const char **addinfo) +void yf::Zoom::Backend::set_option(const char *name, std::string value) +{ + set_option(name, value.c_str()); +} + +const char *yf::Zoom::Backend::get_option(const char *name) { - return ZOOM_connection_error(m_connection, 0, addinfo); + return ZOOM_connection_option_get(m_connection, name); } -yf::Zoom::Searchable::Searchable() +yf::Zoom::Searchable::Searchable(CCL_bibset base) { piggyback = true; use_turbomarc = true; - ccl_bibset = ccl_qual_mk(); + sortStrategy = "embed"; + urlRecipe = "${md-electronic-url}"; + ccl_bibset = ccl_qual_dup(base); } yf::Zoom::Searchable::~Searchable() @@ -265,135 +428,288 @@ void yf::Zoom::Impl::release_frontend(mp::Package &package) } } -yf::Zoom::Impl::Impl() +yf::Zoom::Impl::Impl() : + apdu_log(false), element_transform("pz2") , element_raw("raw") { + bibset = ccl_qual_mk(); + + explain_xsp = 0; + srand((unsigned int) time(0)); } yf::Zoom::Impl::~Impl() -{ +{ + if (explain_xsp) + xsltFreeStylesheet(explain_xsp); + ccl_qual_rm(&bibset); } -void yf::Zoom::Impl::parse_torus(const xmlNode *ptr1) +yf::Zoom::SearchablePtr yf::Zoom::Impl::parse_torus_record(const xmlNode *ptr) { - if (!ptr1) - return ; - for (ptr1 = ptr1->children; ptr1; ptr1 = ptr1->next) + Zoom::SearchablePtr s(new Searchable(bibset)); + + for (ptr = ptr->children; ptr; ptr = ptr->next) { - if (ptr1->type != XML_ELEMENT_NODE) + if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr1->name, "record")) + if (!strcmp((const char *) ptr->name, "layer")) + ptr = ptr->children; + else if (!strcmp((const char *) ptr->name, + "authentication")) + { + s->authentication = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "cfAuth")) + { + s->cfAuth = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "cfProxy")) + { + s->cfProxy = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "cfSubDB")) + { + s->cfSubDB = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "contentConnector")) + { + s->contentConnector = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, "udb")) + { + s->udb = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, "zurl")) + { + s->target = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, "sru")) + { + s->sru = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, "SRUVersion") || + !strcmp((const char *) ptr->name, "sruVersion")) + { + s->sru_version = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "queryEncoding")) + { + s->query_encoding = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "piggyback")) + { + s->piggyback = mp::xml::get_bool(ptr, true); + } + else if (!strcmp((const char *) ptr->name, + "requestSyntax")) + { + s->request_syntax = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "elementSet")) + { + s->element_set = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "recordEncoding")) { - const xmlNode *ptr2 = ptr1; - for (ptr2 = ptr2->children; ptr2; ptr2 = ptr2->next) + s->record_encoding = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "transform")) + { + s->transform_xsl_fname = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "literalTransform")) + { + s->transform_xsl_content = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "urlRecipe")) + { + s->urlRecipe = mp::xml::get_text(ptr); + } + else if (!strcmp((const char *) ptr->name, + "useTurboMarc")) + { + ; // useTurboMarc is ignored + } + else if (!strncmp((const char *) ptr->name, + "cclmap_", 7)) + { + std::string value = mp::xml::get_text(ptr); + if (value.length() > 0) + { + ccl_qual_fitem(s->ccl_bibset, value.c_str(), + (const char *) ptr->name + 7); + } + } + else if (!strncmp((const char *) ptr->name, + "sortmap_", 8)) + { + std::string value = mp::xml::get_text(ptr); + s->sortmap[(const char *) ptr->name + 8] = value; + } + else if (!strcmp((const char *) ptr->name, + "sortStrategy")) + { + s->sortStrategy = mp::xml::get_text(ptr); + } + } + return s; +} + +void yf::Zoom::Impl::configure_local_records(const xmlNode *ptr, bool test_only) +{ + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + + if (ptr) + { + if (!strcmp((const char *) ptr->name, "records")) + { + for (ptr = ptr->children; ptr; ptr = ptr->next) { - if (ptr2->type != XML_ELEMENT_NODE) + if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr2->name, "layer")) + if (!strcmp((const char *) ptr->name, "record")) { - Zoom::SearchablePtr s(new Searchable); - - const xmlNode *ptr3 = ptr2; - for (ptr3 = ptr3->children; ptr3; ptr3 = ptr3->next) + SearchablePtr s = parse_torus_record(ptr); + if (s) { - if (ptr3->type != XML_ELEMENT_NODE) - continue; - if (!strcmp((const char *) ptr3->name, "id")) - { - s->database = mp::xml::get_text(ptr3); - } - else if (!strcmp((const char *) ptr3->name, "zurl")) - { - s->target = mp::xml::get_text(ptr3); - } - else if (!strcmp((const char *) ptr3->name, "sru")) - { - s->sru = mp::xml::get_text(ptr3); - } - else if (!strcmp((const char *) ptr3->name, - "queryEncoding")) + std::string udb = s->udb; + if (udb.length()) + s_map[s->udb] = s; + else { - s->query_encoding = mp::xml::get_text(ptr3); + throw mp::filter::FilterException + ("No udb for local torus record"); } - else if (!strcmp((const char *) ptr3->name, - "piggyback")) - { - s->piggyback = mp::xml::get_bool(ptr3, true); - } - else if (!strcmp((const char *) ptr3->name, - "requestSyntax")) - { - s->request_syntax = mp::xml::get_text(ptr3); - } - else if (!strcmp((const char *) ptr3->name, - "elementSet")) - { - s->element_set = mp::xml::get_text(ptr3); - } - else if (!strcmp((const char *) ptr3->name, - "recordEncoding")) - { - s->record_encoding = mp::xml::get_text(ptr3); - } - else if (!strcmp((const char *) ptr3->name, - "transform")) - { - s->transform_xsl_fname = mp::xml::get_text(ptr3); - } - else if (!strcmp((const char *) ptr3->name, - "useTurboMarc")) - { - ; // useTurboMarc is ignored - } - else if (!strncmp((const char *) ptr3->name, - "cclmap_", 7)) - { - std::string value = mp::xml::get_text(ptr3); - ccl_qual_fitem(s->ccl_bibset, value.c_str(), - (const char *) ptr3->name + 7); - } - } - if (s->database.length() && s->target.length()) - { - yaz_log(YLOG_LOG, "add db=%s target=%s turbomarc=%s", - s->database.c_str(), s->target.c_str(), - s->use_turbomarc ? "1" : "0"); - m_searchables.push_back(s); } } + else + { + throw mp::filter::FilterException + ("Bad element " + + std::string((const char *) ptr->name) + + " in zoom filter inside element " + ""); + } } } + else + { + throw mp::filter::FilterException + ("Bad element " + + std::string((const char *) ptr->name) + + " in zoom filter inside element "); + } } } -void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only) +void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only, + const char *path) { + std::string explain_xslt_fname; + + content_tmp_file = "/tmp/cf.XXXXXX.p"; + if (path && *path) + { + file_path = path; + } for (ptr = ptr->children; ptr; ptr = ptr->next) { if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr->name, "records")) - { - parse_torus(ptr); - } else if (!strcmp((const char *) ptr->name, "torus")) { - std::string url; const struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) { if (!strcmp((const char *) attr->name, "url")) - url = mp::xml::get_text(attr->children); + torus_searchable_url = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "content_url")) + torus_content_url = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "realm")) + default_realm = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "xsldir")) + xsldir = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "element_transform")) + element_transform = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "element_raw")) + element_raw = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "proxy")) + proxy = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "explain_xsl")) + explain_xslt_fname = mp::xml::get_text(attr->children); + else + throw mp::filter::FilterException( + "Bad attribute " + std::string((const char *) + attr->name)); + } + // If content_url is not given, use value of searchable, to + // ensure backwards compatibility + if (!torus_content_url.length()) + torus_content_url = torus_searchable_url; + configure_local_records(ptr->children, test_only); + } + else if (!strcmp((const char *) ptr->name, "cclmap")) + { + const char *addinfo = 0; + ccl_xml_config(bibset, ptr, &addinfo); + } + else if (!strcmp((const char *) ptr->name, "fieldmap")) + { + const struct _xmlAttr *attr; + std::string ccl_field; + std::string cql_field; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "ccl")) + ccl_field = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "cql")) + cql_field = mp::xml::get_text(attr->children); + else + throw mp::filter::FilterException( + "Bad attribute " + std::string((const char *) + attr->name)); + } + if (cql_field.length()) + fieldmap[cql_field] = ccl_field; + } + else if (!strcmp((const char *) ptr->name, "contentProxy")) + { + const struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "server")) + content_proxy_server = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "tmp_file")) + content_tmp_file = mp::xml::get_text(attr->children); else throw mp::filter::FilterException( "Bad attribute " + std::string((const char *) attr->name)); } - torus.read_searchables(url); - xmlDoc *doc = torus.get_doc(); - if (doc) + } + else if (!strcmp((const char *) ptr->name, "log")) + { + const struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) { - xmlNode *ptr = xmlDocGetRootElement(doc); - parse_torus(ptr); + if (!strcmp((const char *) attr->name, "apdu")) + apdu_log = mp::xml::get_bool(attr->children, false); + else + throw mp::filter::FilterException( + "Bad attribute " + std::string((const char *) + attr->name)); } } else @@ -404,42 +720,281 @@ void yf::Zoom::Impl::configure(const xmlNode *ptr, bool test_only) + " in zoom filter"); } } + + if (explain_xslt_fname.length()) + { + const char *path = 0; + + if (xsldir.length()) + path = xsldir.c_str(); + else + path = file_path.c_str(); + + char fullpath[1024]; + char *cp = yaz_filepath_resolve(explain_xslt_fname.c_str(), + path, 0, fullpath); + if (!cp) + { + throw mp::filter::FilterException + ("Cannot read XSLT " + explain_xslt_fname); + } + + xmlDoc *xsp_doc = xmlParseFile(cp); + if (!xsp_doc) + { + throw mp::filter::FilterException + ("Cannot parse XSLT " + explain_xslt_fname); + } + + explain_xsp = xsltParseStylesheetDoc(xsp_doc); + if (!explain_xsp) + { + xmlFreeDoc(xsp_doc); + throw mp::filter::FilterException + ("Cannot parse XSLT " + explain_xslt_fname); + + } + } +} + +bool yf::Zoom::Frontend::create_content_session(mp::Package &package, + BackendPtr b, + int *error, char **addinfo, + ODR odr, + std::string authentication, + std::string proxy, + std::string realm) +{ + if (b->sptr->contentConnector.length()) + { + char *fname = (char *) xmalloc(m_p->content_tmp_file.length() + 8); + strcpy(fname, m_p->content_tmp_file.c_str()); + char *xx = strstr(fname, "XXXXXX"); + if (!xx) + { + xx = fname + strlen(fname); + strcat(fname, "XXXXXX"); + } + char tmp_char = xx[6]; + sprintf(xx, "%06d", ((unsigned) rand()) % 1000000); + xx[6] = tmp_char; + + FILE *file = fopen(fname, "w"); + if (!file) + { + package.log("zoom", YLOG_WARN|YLOG_ERRNO, "create %s", fname); + *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; + *addinfo = (char *) odr_malloc(odr, 40 + strlen(fname)); + sprintf(*addinfo, "Could not create %s", fname); + xfree(fname); + return false; + } + b->content_session_id.assign(xx, 6); + WRBUF w = wrbuf_alloc(); + wrbuf_puts(w, "#content_proxy\n"); + wrbuf_printf(w, "connector: %s\n", b->sptr->contentConnector.c_str()); + if (authentication.length()) + wrbuf_printf(w, "auth: %s\n", authentication.c_str()); + if (proxy.length()) + wrbuf_printf(w, "proxy: %s\n", proxy.c_str()); + if (realm.length()) + wrbuf_printf(w, "realm: %s\n", realm.c_str()); + + fwrite(wrbuf_buf(w), 1, wrbuf_len(w), file); + fclose(file); + package.log("zoom", YLOG_LOG, "content file: %s", fname); + xfree(fname); + wrbuf_destroy(w); + } + return true; } yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases( - std::string &database, int *error, const char **addinfo) + mp::Package &package, + std::string &database, int *error, char **addinfo, mp::odr &odr, + int *proxy_step) { std::list::const_iterator map_it; - if (m_backend && m_backend->m_frontend_database == database) + if (m_backend && !m_backend->enable_explain && + m_backend->m_frontend_database == database) return m_backend; - std::list::iterator map_s = - m_p->m_searchables.begin(); + std::string input_args; + std::string torus_db; + size_t db_arg_pos = database.find(','); + if (db_arg_pos != std::string::npos) + { + torus_db = database.substr(0, db_arg_pos); + input_args = database.substr(db_arg_pos + 1); + } + else + torus_db = database; + + std::string authentication; + std::string content_authentication; + std::string proxy; + std::string content_proxy; + std::string realm = m_p->default_realm; + + const char *param_user = 0; + const char *param_password = 0; + const char *param_content_user = 0; + const char *param_content_password = 0; + const char *param_nocproxy = 0; + int no_parms = 0; + + char **names; + char **values; + int no_out_args = 0; + if (input_args.length()) + no_parms = yaz_uri_to_array(input_args.c_str(), + odr, &names, &values); + // adding 10 because we'll be adding other URL args + const char **out_names = (const char **) + odr_malloc(odr, (10 + no_parms) * sizeof(*out_names)); + const char **out_values = (const char **) + odr_malloc(odr, (10 + no_parms) * sizeof(*out_values)); + + // may be changed if it's a content connection + std::string torus_url = m_p->torus_searchable_url; + int i; + for (i = 0; i < no_parms; i++) + { + const char *name = names[i]; + const char *value = values[i]; + assert(name); + assert(value); + if (!strcmp(name, "user")) + param_user = value; + else if (!strcmp(name, "password")) + param_password = value; + else if (!strcmp(name, "content-user")) + param_content_user = value; + else if (!strcmp(name, "content-password")) + param_content_password = value; + else if (!strcmp(name, "content-proxy")) + content_proxy = value; + else if (!strcmp(name, "nocproxy")) + param_nocproxy = value; + else if (!strcmp(name, "proxy")) + { + char **dstr; + int dnum = 0; + nmem_strsplit(((ODR) odr)->mem, ",", value, &dstr, &dnum); + if (*proxy_step >= dnum) + *proxy_step = 0; + else + { + proxy = dstr[*proxy_step]; + + (*proxy_step)++; + if (*proxy_step == dnum) + *proxy_step = 0; + } + } + else if (!strcmp(name, "cproxysession")) + { + out_names[no_out_args] = name; + out_values[no_out_args++] = value; + torus_url = m_p->torus_content_url; + } + else if (!strcmp(name, "realm")) + realm = value; + else if (name[0] == 'x' && name[1] == '-') + { + out_names[no_out_args] = name; + out_values[no_out_args++] = value; + } + else + { + BackendPtr notfound; + char *msg = (char*) odr_malloc(odr, strlen(name) + 30); + *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; + sprintf(msg, "Bad database argument: %s", name); + *addinfo = msg; + return notfound; + } + } + if (param_user) + { + authentication = std::string(param_user); + if (param_password) + authentication += "/" + std::string(param_password); + } + if (param_content_user) + { + content_authentication = std::string(param_content_user); + if (param_content_password) + content_authentication += "/" + std::string(param_content_password); + } - std::string c_db = mp::util::database_name_normalize(database); + if (torus_db.compare("IR-Explain---1") == 0) + return explain_search(package, database, error, addinfo, odr, torus_db, + realm); + + SearchablePtr sptr; - while (map_s != m_p->m_searchables.end()) + std::map::iterator it; + it = m_p->s_map.find(torus_db); + if (it != m_p->s_map.end()) + sptr = it->second; + else if (torus_url.length() > 0) { - if (c_db.compare((*map_s)->database) == 0) - break; - map_s++; + std::string torus_query = "udb==" + torus_db; + xmlDoc *doc = mp::get_searchable(package,torus_url, torus_db, + torus_query, + realm, m_p->proxy); + if (!doc) + { + *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST; + *addinfo = odr_strdup(odr, database.c_str()); + BackendPtr b; + return b; + } + const xmlNode *ptr = xmlDocGetRootElement(doc); + if (ptr) + { // presumably ptr is a records element node + // parse first record in document + for (ptr = ptr->children; ptr; ptr = ptr->next) + { + if (ptr->type == XML_ELEMENT_NODE + && !strcmp((const char *) ptr->name, "record")) + { + if (sptr) + { + *error = YAZ_BIB1_UNSPECIFIED_ERROR; + *addinfo = (char*) odr_malloc(odr, 40 + database.length()), + sprintf(*addinfo, "multiple records for udb=%s", + database.c_str()); + xmlFreeDoc(doc); + BackendPtr b; + return b; + } + sptr = m_p->parse_torus_record(ptr); + } + } + } + xmlFreeDoc(doc); } - if (map_s == m_p->m_searchables.end()) + + if (!sptr) { *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST; - *addinfo = database.c_str(); + *addinfo = odr_strdup(odr, database.c_str()); BackendPtr b; return b; } - + xsltStylesheetPtr xsp = 0; - if ((*map_s)->transform_xsl_fname.length()) + if (sptr->transform_xsl_content.length()) { - xmlDoc *xsp_doc = xmlParseFile((*map_s)->transform_xsl_fname.c_str()); + xmlDoc *xsp_doc = xmlParseMemory(sptr->transform_xsl_content.c_str(), + sptr->transform_xsl_content.length()); if (!xsp_doc) { *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; - *addinfo = "xmlParseFile failed"; + *addinfo = (char *) odr_malloc(odr, 40); + sprintf(*addinfo, "xmlParseMemory failed"); BackendPtr b; return b; } @@ -447,100 +1002,337 @@ yf::Zoom::BackendPtr yf::Zoom::Frontend::get_backend_from_databases( if (!xsp) { *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST; - *addinfo = "xsltParseStylesheetDoc failed"; + *addinfo = odr_strdup(odr, "xsltParseStylesheetDoc failed"); BackendPtr b; xmlFreeDoc(xsp_doc); return b; } } + else if (sptr->transform_xsl_fname.length()) + { + const char *path = 0; - SearchablePtr sptr = *map_s; + if (m_p->xsldir.length()) + path = m_p->xsldir.c_str(); + else + path = m_p->file_path.c_str(); + std::string fname; + + char fullpath[1024]; + char *cp = yaz_filepath_resolve(sptr->transform_xsl_fname.c_str(), + path, 0, fullpath); + if (cp) + fname.assign(cp); + else + { + *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; + *addinfo = (char *) + odr_malloc(odr, 40 + sptr->transform_xsl_fname.length()); + sprintf(*addinfo, "File could not be read: %s", + sptr->transform_xsl_fname.c_str()); + BackendPtr b; + return b; + } + xmlDoc *xsp_doc = xmlParseFile(fname.c_str()); + if (!xsp_doc) + { + *error = YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; + *addinfo = (char *) odr_malloc(odr, 40 + fname.length()); + sprintf(*addinfo, "xmlParseFile failed. File: %s", fname.c_str()); + BackendPtr b; + return b; + } + xsp = xsltParseStylesheetDoc(xsp_doc); + if (!xsp) + { + *error = YAZ_BIB1_DATABASE_DOES_NOT_EXIST; + *addinfo = odr_strdup(odr, "xsltParseStylesheetDoc failed"); + BackendPtr b; + xmlFreeDoc(xsp_doc); + return b; + } + } m_backend.reset(); - BackendPtr b(new Backend(sptr)); + BackendPtr b(new Backend); + b->sptr = sptr; b->xsp = xsp; b->m_frontend_database = database; + b->enable_cproxy = param_nocproxy ? false : true; if (sptr->query_encoding.length()) - b->set_option("rpnCharset", sptr->query_encoding.c_str()); + b->set_option("rpnCharset", sptr->query_encoding); - std::string url; - if (sptr->sru.length()) - { - url = "http://" + sptr->target; - b->set_option("sru", sptr->sru.c_str()); - } - else - url = sptr->target; + b->set_option("timeout", "40"); + + if (m_p->apdu_log) + b->set_option("apdulog", "1"); - b->connect(url, error, addinfo); - if (*error == 0) + if (sptr->piggyback && sptr->sru.length()) + b->set_option("count", "1"); /* some SRU servers INSIST on getting + maximumRecords > 0 */ + b->set_option("piggyback", sptr->piggyback ? "1" : "0"); + + if (authentication.length() == 0) + authentication = sptr->authentication; + + if (proxy.length() == 0) + proxy = sptr->cfProxy; + + if (sptr->cfAuth.length()) { - m_backend = b; + // A CF target + b->set_option("user", sptr->cfAuth); + if (authentication.length()) + { + size_t found = authentication.find('/'); + if (found != std::string::npos) + { + out_names[no_out_args] = "user"; + out_values[no_out_args++] = + odr_strdup(odr, authentication.substr(0, found).c_str()); + + out_names[no_out_args] = "password"; + out_values[no_out_args++] = + odr_strdup(odr, authentication.substr(found+1).c_str()); + } + else + { + out_names[no_out_args] = "user"; + out_values[no_out_args++] = + odr_strdup(odr, authentication.c_str()); + } + } + if (proxy.length()) + { + out_names[no_out_args] = "proxy"; + out_values[no_out_args++] = odr_strdup(odr, proxy.c_str()); + } + if (sptr->cfSubDB.length()) + { + out_names[no_out_args] = "subdatabase"; + out_values[no_out_args++] = odr_strdup(odr, sptr->cfSubDB.c_str()); + } + if (param_nocproxy) + { + out_names[no_out_args] = "nocproxy"; + out_values[no_out_args++] = odr_strdup(odr, param_nocproxy); + } + } + else + { + size_t found = authentication.find('/'); + + if (sptr->sru.length() && found != std::string::npos) + { + b->set_option("user", authentication.substr(0, found)); + b->set_option("password", authentication.substr(found+1)); + } + else + b->set_option("user", authentication); + + if (proxy.length()) + b->set_option("proxy", proxy); } + if (proxy.length()) + package.log("zoom", YLOG_LOG, "proxy: %s", proxy.c_str()); + + std::string url; + if (sptr->sru.length()) + { + url = "http://" + sptr->target; + b->set_option("sru", sptr->sru); + + if (sptr->sru_version.length()) + b->set_option("sru_version", sptr->sru_version); + } + else + { + url = sptr->target; + } + if (no_out_args) + { + char *x_args = 0; + out_names[no_out_args] = 0; // terminate list + + yaz_array_to_uri(&x_args, odr, (char **) out_names, + (char **) out_values); + url += "," + std::string(x_args); + } + package.log("zoom", YLOG_LOG, "url: %s", url.c_str()); + b->connect(url, error, addinfo, odr); + if (*error == 0 && b->enable_cproxy) + create_content_session(package, b, error, addinfo, odr, + content_authentication.length() ? + content_authentication : authentication, + content_proxy.length() ? content_proxy : proxy, + realm); + if (*error == 0) + m_backend = b; return b; } -Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, - Odr_int number_to_present, - int *error, - const char **addinfo, - Odr_int *number_of_records_returned, - ODR odr, - BackendPtr b, - Odr_oid *preferredRecordSyntax, - const char *element_set_name) -{ - *number_of_records_returned = 0; - Z_Records *records = 0; - bool enable_pz2_transform = false; - - if (start < 0 || number_to_present <= 0) - return records; - - if (number_to_present > 10000) - number_to_present = 10000; - - ZOOM_record *recs = (ZOOM_record *) - odr_malloc(odr, number_to_present * sizeof(*recs)); +void yf::Zoom::Frontend::prepare_elements(BackendPtr b, + Odr_oid *preferredRecordSyntax, + const char *element_set_name, + bool &enable_pz2_retrieval, + bool &enable_pz2_transform, + bool &assume_marc8_charset) +{ char oid_name_str[OID_STR_MAX]; const char *syntax_name = 0; - - if (preferredRecordSyntax) + + if (preferredRecordSyntax && + !oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml) + && element_set_name) { - if (!oid_oidcmp(preferredRecordSyntax, yaz_oid_recsyn_xml) - && !strcmp(element_set_name, "pz2")) + if (!strcmp(element_set_name, m_p->element_transform.c_str())) { - if (b->sptr->request_syntax.length()) - { - syntax_name = b->sptr->request_syntax.c_str(); - enable_pz2_transform = true; - } + enable_pz2_retrieval = true; + enable_pz2_transform = true; } - else + else if (!strcmp(element_set_name, m_p->element_raw.c_str())) { - syntax_name = - yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str); + enable_pz2_retrieval = true; } } + + if (enable_pz2_retrieval) + { + std::string configured_request_syntax = b->sptr->request_syntax; + if (configured_request_syntax.length()) + { + syntax_name = configured_request_syntax.c_str(); + const Odr_oid *syntax_oid = + yaz_string_to_oid(yaz_oid_std(), CLASS_RECSYN, syntax_name); + if (!oid_oidcmp(syntax_oid, yaz_oid_recsyn_usmarc) + || !oid_oidcmp(syntax_oid, yaz_oid_recsyn_opac)) + assume_marc8_charset = true; + } + } + else if (preferredRecordSyntax) + syntax_name = + yaz_oid_to_string_buf(preferredRecordSyntax, 0, oid_name_str); + + if (b->sptr->sru.length()) + syntax_name = "XML"; b->set_option("preferredRecordSyntax", syntax_name); - if (enable_pz2_transform) + if (enable_pz2_retrieval) { - element_set_name = "F"; + element_set_name = 0; if (b->sptr->element_set.length()) element_set_name = b->sptr->element_set.c_str(); } b->set_option("elementSetName", element_set_name); + if (b->sptr->sru.length() && element_set_name) + b->set_option("schema", element_set_name); +} + +Z_Records *yf::Zoom::Frontend::get_explain_records( + Package &package, + Odr_int start, + Odr_int number_to_present, + int *error, + char **addinfo, + Odr_int *number_of_records_returned, + ODR odr, + BackendPtr b, + Odr_oid *preferredRecordSyntax, + const char *element_set_name) +{ + Odr_int i; + Z_Records *records = 0; + + if (!b->explain_doc) + { + return records; + } + if (number_to_present > 10000) + number_to_present = 10000; + + xmlNode *ptr = xmlDocGetRootElement(b->explain_doc); + + Z_NamePlusRecordList *npl = (Z_NamePlusRecordList *) + odr_malloc(odr, sizeof(*npl)); + npl->records = (Z_NamePlusRecord **) + odr_malloc(odr, number_to_present * sizeof(*npl->records)); + + for (i = 0; i < number_to_present; i++) + { + int num = 0; + xmlNode *res = xml_node_search(ptr, &num, start + i + 1); + if (!res) + break; + xmlBufferPtr xml_buf = xmlBufferCreate(); + xmlNode *tmp_node = xmlCopyNode(res->children, 1); + xmlNodeDump(xml_buf, tmp_node->doc, tmp_node, 0, 0); + + Z_NamePlusRecord *npr = + (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr)); + npr->databaseName = odr_strdup(odr, b->m_frontend_database.c_str()); + npr->which = Z_NamePlusRecord_databaseRecord; + npr->u.databaseRecord = + z_ext_record_xml(odr, + (const char *) xml_buf->content, xml_buf->use); + npl->records[i] = npr; + xmlFreeNode(tmp_node); + xmlBufferFree(xml_buf); + } + records = (Z_Records*) odr_malloc(odr, sizeof(*records)); + records->which = Z_Records_DBOSD; + records->u.databaseOrSurDiagnostics = npl; + + npl->num_records = i; + *number_of_records_returned = i; + return records; +} + + +Z_Records *yf::Zoom::Frontend::get_records(Package &package, + Odr_int start, + Odr_int number_to_present, + int *error, + char **addinfo, + Odr_int *number_of_records_returned, + ODR odr, + BackendPtr b, + Odr_oid *preferredRecordSyntax, + const char *element_set_name) +{ + *number_of_records_returned = 0; + Z_Records *records = 0; + bool enable_pz2_retrieval = false; // whether target profile is used + bool enable_pz2_transform = false; // whether XSLT is used as well + bool assume_marc8_charset = false; + + prepare_elements(b, preferredRecordSyntax, + element_set_name, + enable_pz2_retrieval, + enable_pz2_transform, + assume_marc8_charset); - b->present(start, number_to_present, recs, error, addinfo); + package.log("zoom", YLOG_LOG, "pz2_retrieval: %s . pz2_transform: %s", + enable_pz2_retrieval ? "yes" : "no", + enable_pz2_transform ? "yes" : "no"); - Odr_int i = 0; + if (start < 0 || number_to_present <=0) + return records; + + if (number_to_present > 10000) + number_to_present = 10000; + + ZOOM_record *recs = (ZOOM_record *) + odr_malloc(odr, (size_t) number_to_present * sizeof(*recs)); + + b->present(start, number_to_present, recs, error, addinfo, odr); + + int i = 0; if (!*error) { for (i = 0; i < number_to_present; i++) @@ -561,62 +1353,153 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, { Z_NamePlusRecord *npr = 0; const char *addinfo; + int sur_error = ZOOM_record_error(recs[i], 0 /* msg */, &addinfo, 0 /* diagset */); if (sur_error) { + log_diagnostic(package, sur_error, addinfo); npr = zget_surrogateDiagRec(odr, odr_database, sur_error, addinfo); } - else if (enable_pz2_transform) + else if (enable_pz2_retrieval) { char rec_type_str[100]; + const char *record_encoding = 0; - strcpy(rec_type_str, b->sptr->use_turbomarc ? - "txml" : "xml"); - - // prevent buffer overflow ... - if (b->sptr->record_encoding.length() > 0 && - b->sptr->record_encoding.length() < - (sizeof(rec_type_str)-20)) + if (b->sptr->record_encoding.length()) + record_encoding = b->sptr->record_encoding.c_str(); + else if (assume_marc8_charset) + record_encoding = "marc8"; + + strcpy(rec_type_str, b->sptr->use_turbomarc ? "txml" : "xml"); + if (record_encoding) { strcat(rec_type_str, "; charset="); - strcat(rec_type_str, b->sptr->record_encoding.c_str()); + strcat(rec_type_str, record_encoding); } - + + package.log("zoom", YLOG_LOG, "Getting record of type %s", + rec_type_str); int rec_len; + xmlChar *xmlrec_buf = 0; const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str, &rec_len); - if (rec_buf && b->xsp) + if (!rec_buf && !npr) + { + std::string addinfo("ZOOM_record_get failed for type "); + + int error = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; + addinfo += rec_type_str; + log_diagnostic(package, error, addinfo.c_str()); + npr = zget_surrogateDiagRec(odr, odr_database, + error, addinfo.c_str()); + } + else + { + package.log_write(rec_buf, rec_len); + package.log_write("\r\n", 2); + } + + if (rec_buf && b->xsp && enable_pz2_transform) { xmlDoc *rec_doc = xmlParseMemory(rec_buf, rec_len); - if (rec_doc) + if (!rec_doc) + { + const char *addinfo = "xml parse failed for record"; + int error = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; + log_diagnostic(package, error, addinfo); + npr = zget_surrogateDiagRec( + odr, odr_database, error, addinfo); + } + else { - xmlDoc *rec_res; - rec_res = xsltApplyStylesheet(b->xsp, rec_doc, 0); + xmlDoc *rec_res = + xsltApplyStylesheet(b->xsp, rec_doc, 0); if (rec_res) - xsltSaveResultToString((xmlChar **) &rec_buf, &rec_len, + { + xsltSaveResultToString(&xmlrec_buf, &rec_len, rec_res, b->xsp); + rec_buf = (const char *) xmlrec_buf; + package.log("zoom", YLOG_LOG, "xslt successful"); + package.log_write(rec_buf, rec_len); + + xmlFreeDoc(rec_res); + } + if (!rec_buf) + { + std::string addinfo; + int error = + YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; + + addinfo = "xslt apply failed for " + + b->sptr->transform_xsl_fname; + log_diagnostic(package, error, addinfo.c_str()); + npr = zget_surrogateDiagRec( + odr, odr_database, error, addinfo.c_str()); + } + xmlFreeDoc(rec_doc); } } - if (rec_buf) + if (rec_buf && b->enable_cproxy) { - npr = (Z_NamePlusRecord *) odr_malloc(odr, sizeof(*npr)); - npr->databaseName = odr_database; - npr->which = Z_NamePlusRecord_databaseRecord; - npr->u.databaseRecord = - z_ext_record_xml(odr, rec_buf, rec_len); + xmlDoc *doc = xmlParseMemory(rec_buf, rec_len); + std::string res = + mp::xml::url_recipe_handle(doc, b->sptr->urlRecipe); + if (res.length() && b->content_session_id.length()) + { + size_t off = res.find_first_of("://"); + if (off != std::string::npos) + { + char tmp[1024]; + sprintf(tmp, "%s.%s/", + b->content_session_id.c_str(), + m_p->content_proxy_server.c_str()); + res.insert(off + 3, tmp); + } + } + if (res.length()) + { + xmlNode *ptr = xmlDocGetRootElement(doc); + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + xmlNode *c = + xmlNewChild(ptr, 0, BAD_CAST "metadata", 0); + xmlNewProp(c, BAD_CAST "type", BAD_CAST + "generated-url"); + xmlNode * t = xmlNewText(BAD_CAST res.c_str()); + xmlAddChild(c, t); + + if (xmlrec_buf) + xmlFree(xmlrec_buf); + + xmlDocDumpMemory(doc, &xmlrec_buf, &rec_len); + rec_buf = (const char *) xmlrec_buf; + } + xmlFreeDoc(doc); } - else + if (!npr) { - npr = zget_surrogateDiagRec( - odr, odr_database, - YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS, - rec_type_str); + if (!rec_buf) + npr = zget_surrogateDiagRec( + odr, odr_database, + YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS, + rec_type_str); + else + { + npr = (Z_NamePlusRecord *) + odr_malloc(odr, sizeof(*npr)); + npr->databaseName = odr_database; + npr->which = Z_NamePlusRecord_databaseRecord; + npr->u.databaseRecord = + z_ext_record_xml(odr, rec_buf, rec_len); + } } + if (xmlrec_buf) + xmlFree(xmlrec_buf); } else { @@ -645,7 +1528,124 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start, } return records; } - + +struct cql_node *yf::Zoom::Impl::convert_cql_fields(struct cql_node *cn, + ODR odr) +{ + struct cql_node *r = 0; + if (!cn) + return 0; + switch (cn->which) + { + case CQL_NODE_ST: + if (cn->u.st.index) + { + std::map::const_iterator it; + it = fieldmap.find(cn->u.st.index); + if (it == fieldmap.end()) + return cn; + if (it->second.length()) + cn->u.st.index = odr_strdup(odr, it->second.c_str()); + else + cn->u.st.index = 0; + } + break; + case CQL_NODE_BOOL: + r = convert_cql_fields(cn->u.boolean.left, odr); + if (!r) + r = convert_cql_fields(cn->u.boolean.right, odr); + break; + case CQL_NODE_SORT: + r = convert_cql_fields(cn->u.sort.search, odr); + break; + } + return r; +} + +void yf::Zoom::Frontend::log_diagnostic(mp::Package &package, + int error, const char *addinfo) +{ + const char *err_msg = yaz_diag_bib1_str(error); + if (addinfo) + package.log("zoom", YLOG_WARN, "Diagnostic %d %s: %s", + error, err_msg, addinfo); + else + package.log("zoom", YLOG_WARN, "Diagnostic %d %s:", + error, err_msg); +} + +yf::Zoom::BackendPtr yf::Zoom::Frontend::explain_search(mp::Package &package, + std::string &database, + int *error, + char **addinfo, + mp::odr &odr, + std::string &torus_db, + std::string &realm) +{ + m_backend.reset(); + + BackendPtr b(new Backend); + + b->m_frontend_database = database; + b->enable_explain = true; + + Z_GDU *gdu = package.request().get(); + Z_APDU *apdu_req = gdu->u.z3950; + Z_SearchRequest *sr = apdu_req->u.searchRequest; + Z_Query *query = sr->query; + + if (!m_p->explain_xsp) + { + *error = YAZ_BIB1_UNSPECIFIED_ERROR; + *addinfo = + odr_strdup(odr, "IR-Explain---1 unsupported. torus explain_xsl not defined"); + return m_backend; + } + else if (query->which == Z_Query_type_104 && + query->u.type_104->which == Z_External_CQL) + { + std::string torus_url = m_p->torus_searchable_url; + std::string torus_query(query->u.type_104->u.cql); + xmlDoc *doc = mp::get_searchable(package, torus_url, "", + torus_query, + realm, m_p->proxy); + if (m_p->explain_xsp) + { + xmlDoc *rec_res = xsltApplyStylesheet(m_p->explain_xsp, doc, 0); + + xmlFreeDoc(doc); + doc = rec_res; + } + if (!doc) + { + *error = YAZ_BIB1_UNSPECIFIED_ERROR; + *addinfo = odr_strdup(odr, "IR-Explain--1 problem. " + "Could not obtain Torus records for Explain"); + } + else + { + xmlNode *ptr = xmlDocGetRootElement(doc); + int hits = 0; + + xml_node_search(ptr, &hits, 0); + + Z_APDU *apdu_res = odr.create_searchResponse(apdu_req, 0, 0); + apdu_res->u.searchResponse->resultCount = odr_intdup(odr, hits); + package.response() = apdu_res; + m_backend = b; + } + if (b->explain_doc) + xmlFreeDoc(b->explain_doc); + b->explain_doc = doc; + return m_backend; + } + else + { + *error = YAZ_BIB1_QUERY_TYPE_UNSUPP; + *addinfo = odr_strdup(odr, "IR-Explain---1 only supports CQL"); + return m_backend; + } +} void yf::Zoom::Frontend::handle_search(mp::Package &package) { @@ -656,31 +1656,53 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package) Z_SearchRequest *sr = apdu_req->u.searchRequest; if (sr->num_databaseNames != 1) { - apdu_res = odr.create_searchResponse( - apdu_req, YAZ_BIB1_TOO_MANY_DATABASES_SPECIFIED, 0); + int error = YAZ_BIB1_TOO_MANY_DATABASES_SPECIFIED; + log_diagnostic(package, error, 0); + apdu_res = odr.create_searchResponse(apdu_req, error, 0); package.response() = apdu_res; return; } + int proxy_step = 0; + +next_proxy: int error = 0; - const char *addinfo = 0; + char *addinfo = 0; std::string db(sr->databaseNames[0]); - BackendPtr b = get_backend_from_databases(db, &error, &addinfo); + + BackendPtr b = get_backend_from_databases(package, db, &error, + &addinfo, odr, &proxy_step); + if (error && proxy_step) + { + package.log("zoom", YLOG_WARN, + "create backend failed: trying next proxy"); + goto next_proxy; + } if (error) { - apdu_res = - odr.create_searchResponse( - apdu_req, error, addinfo); + log_diagnostic(package, error, addinfo); + apdu_res = odr.create_searchResponse(apdu_req, error, addinfo); package.response() = apdu_res; return; } + if (!b || b->enable_explain) + return; b->set_option("setname", "default"); + bool enable_pz2_retrieval = false; + bool enable_pz2_transform = false; + bool assume_marc8_charset = false; + prepare_elements(b, sr->preferredRecordSyntax, 0 /*element_set_name */, + enable_pz2_retrieval, + enable_pz2_transform, + assume_marc8_charset); + Odr_int hits = 0; Z_Query *query = sr->query; WRBUF ccl_wrbuf = 0; WRBUF pqf_wrbuf = 0; + std::string sortkeys; if (query->which == Z_Query_type_1 || query->which == Z_Query_type_101) { @@ -702,41 +1724,111 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package) const char *cql = query->u.type_104->u.cql; CQL_parser cp = cql_parser_create(); int r = cql_parser_string(cp, cql); + package.log("zoom", YLOG_LOG, "CQL: %s", cql); if (r) { cql_parser_destroy(cp); + error = YAZ_BIB1_MALFORMED_QUERY; + const char *addinfo = "CQL syntax error"; + log_diagnostic(package, error, addinfo); apdu_res = - odr.create_searchResponse(apdu_req, - YAZ_BIB1_MALFORMED_QUERY, - "CQL syntax error"); + odr.create_searchResponse(apdu_req, error, addinfo); package.response() = apdu_res; return; } struct cql_node *cn = cql_parser_result(cp); + struct cql_node *cn_error = m_p->convert_cql_fields(cn, odr); + if (cn_error) + { + // hopefully we are getting a ptr to a index+relation+term node + error = YAZ_BIB1_UNSUPP_USE_ATTRIBUTE; + addinfo = 0; + if (cn_error->which == CQL_NODE_ST) + addinfo = cn_error->u.st.index; + + log_diagnostic(package, error, addinfo); + apdu_res = odr.create_searchResponse(apdu_req, error, addinfo); + package.response() = apdu_res; + cql_parser_destroy(cp); + return; + } char ccl_buf[1024]; - r = cql_to_ccl_buf(cn, ccl_buf, sizeof(ccl_buf)); - yaz_log(YLOG_LOG, "cql_to_ccl_buf returned %d", r); - if (r == 0) + if (r) { - ccl_wrbuf = wrbuf_alloc(); - wrbuf_puts(ccl_wrbuf, ccl_buf); + error = YAZ_BIB1_MALFORMED_QUERY; + const char *addinfo = "CQL to CCL conversion error"; + + log_diagnostic(package, error, addinfo); + apdu_res = odr.create_searchResponse(apdu_req, error, addinfo); + package.response() = apdu_res; + cql_parser_destroy(cp); + return; } - cql_parser_destroy(cp); - if (r) + + WRBUF sru_sortkeys_wrbuf = wrbuf_alloc(); + if (cql_sortby_to_sortkeys(cn, wrbuf_vp_puts, sru_sortkeys_wrbuf)) { - apdu_res = - odr.create_searchResponse(apdu_req, - YAZ_BIB1_MALFORMED_QUERY, - "CQL to CCL conversion error"); + error = YAZ_BIB1_ILLEGAL_SORT_RELATION; + const char *addinfo = "CQL to CCL sortby conversion"; + + log_diagnostic(package, error, addinfo); + apdu_res = odr.create_searchResponse(apdu_req, error, addinfo); package.response() = apdu_res; + wrbuf_destroy(sru_sortkeys_wrbuf); + cql_parser_destroy(cp); return; } + WRBUF sort_spec_wrbuf = wrbuf_alloc(); + yaz_srw_sortkeys_to_sort_spec(wrbuf_cstr(sru_sortkeys_wrbuf), + sort_spec_wrbuf); + wrbuf_destroy(sru_sortkeys_wrbuf); + + ccl_wrbuf = wrbuf_alloc(); + wrbuf_puts(ccl_wrbuf, ccl_buf); + + yaz_tok_cfg_t tc = yaz_tok_cfg_create(); + yaz_tok_parse_t tp = + yaz_tok_parse_buf(tc, wrbuf_cstr(sort_spec_wrbuf)); + yaz_tok_cfg_destroy(tc); + + /* go through sortspec and map fields */ + int token = yaz_tok_move(tp); + while (token != YAZ_TOK_EOF) + { + if (token == YAZ_TOK_STRING) + { + const char *field = yaz_tok_parse_string(tp); + std::map::iterator it; + it = b->sptr->sortmap.find(field); + if (it != b->sptr->sortmap.end()) + sortkeys += it->second; + else + sortkeys += field; + } + sortkeys += " "; + token = yaz_tok_move(tp); + if (token == YAZ_TOK_STRING) + { + sortkeys += yaz_tok_parse_string(tp); + } + if (token != YAZ_TOK_EOF) + { + sortkeys += " "; + token = yaz_tok_move(tp); + } + } + yaz_tok_parse_destroy(tp); + wrbuf_destroy(sort_spec_wrbuf); + + cql_parser_destroy(cp); } else { - apdu_res = - odr.create_searchResponse(apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0); + error = YAZ_BIB1_QUERY_TYPE_UNSUPP; + const char *addinfo = 0; + log_diagnostic(package, error, addinfo); + apdu_res = odr.create_searchResponse(apdu_req, error, addinfo); package.response() = apdu_res; return; } @@ -747,30 +1839,106 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package) assert(pqf_wrbuf == 0); int cerror, cpos; struct ccl_rpn_node *cn; + package.log("zoom", YLOG_LOG, "CCL: %s", wrbuf_cstr(ccl_wrbuf)); cn = ccl_find_str(b->sptr->ccl_bibset, wrbuf_cstr(ccl_wrbuf), &cerror, &cpos); wrbuf_destroy(ccl_wrbuf); if (!cn) { char *addinfo = odr_strdup(odr, ccl_err_msg(cerror)); + error = YAZ_BIB1_MALFORMED_QUERY; - apdu_res = - odr.create_searchResponse(apdu_req, - YAZ_BIB1_MALFORMED_QUERY, - addinfo); + switch (cerror) + { + case CCL_ERR_UNKNOWN_QUAL: + case CCL_ERR_TRUNC_NOT_LEFT: + case CCL_ERR_TRUNC_NOT_RIGHT: + case CCL_ERR_TRUNC_NOT_BOTH: +#ifdef CCL_ERR_TRUNC_NOT_EMBED + case CCL_ERR_TRUNC_NOT_EMBED: +#endif +#ifdef CCL_ERR_TRUNC_NOT_SINGLE + case CCL_ERR_TRUNC_NOT_SINGLE: +#endif + error = YAZ_BIB1_UNSUPP_SEARCH; + break; + } + log_diagnostic(package, error, addinfo); + apdu_res = odr.create_searchResponse(apdu_req, error, addinfo); package.response() = apdu_res; return; } pqf_wrbuf = wrbuf_alloc(); ccl_pquery(pqf_wrbuf, cn); + package.log("zoom", YLOG_LOG, "RPN: %s", wrbuf_cstr(pqf_wrbuf)); ccl_rpn_delete(cn); } assert(pqf_wrbuf); - b->search_pqf(wrbuf_cstr(pqf_wrbuf), &hits, &error, &addinfo); - - wrbuf_destroy(pqf_wrbuf); - + + ZOOM_query q = ZOOM_query_create(); + ZOOM_query_sortby2(q, b->sptr->sortStrategy.c_str(), sortkeys.c_str()); + + if (b->get_option("sru")) + { + int status = 0; + Z_RPNQuery *zquery; + zquery = p_query_rpn(odr, wrbuf_cstr(pqf_wrbuf)); + WRBUF wrb = wrbuf_alloc(); + + if (!strcmp(b->get_option("sru"), "solr")) + { + solr_transform_t cqlt = solr_transform_create(); + + status = solr_transform_rpn2solr_wrbuf(cqlt, wrb, zquery); + + solr_transform_close(cqlt); + } + else + { + cql_transform_t cqlt = cql_transform_create(); + + status = cql_transform_rpn2cql_wrbuf(cqlt, wrb, zquery); + + cql_transform_close(cqlt); + } + if (status == 0) + { + ZOOM_query_cql(q, wrbuf_cstr(wrb)); + package.log("zoom", YLOG_LOG, "CQL: %s", wrbuf_cstr(wrb)); + b->search(q, &hits, &error, &addinfo, odr); + } + ZOOM_query_destroy(q); + + wrbuf_destroy(wrb); + wrbuf_destroy(pqf_wrbuf); + if (status) + { + error = YAZ_BIB1_MALFORMED_QUERY; + const char *addinfo = "can not convert from RPN to CQL/SOLR"; + log_diagnostic(package, error, addinfo); + apdu_res = odr.create_searchResponse(apdu_req, error, addinfo); + package.response() = apdu_res; + return; + } + } + else + { + ZOOM_query_prefix(q, wrbuf_cstr(pqf_wrbuf)); + package.log("zoom", YLOG_LOG, "search PQF: %s", wrbuf_cstr(pqf_wrbuf)); + b->search(q, &hits, &error, &addinfo, odr); + ZOOM_query_destroy(q); + wrbuf_destroy(pqf_wrbuf); + } + + if (error && proxy_step) + { + // reset below prevent reuse in get_backend_from_databases + m_backend.reset(); + package.log("zoom", YLOG_WARN, "search failed: trying next proxy"); + goto next_proxy; + } + const char *element_set_name = 0; Odr_int number_to_present = 0; if (!error) @@ -778,9 +1946,12 @@ void yf::Zoom::Frontend::handle_search(mp::Package &package) Odr_int number_of_records_returned = 0; Z_Records *records = get_records( + package, 0, number_to_present, &error, &addinfo, &number_of_records_returned, odr, b, sr->preferredRecordSyntax, element_set_name); + if (error) + log_diagnostic(package, error, addinfo); apdu_res = odr.create_searchResponse(apdu_req, error, addinfo); if (records) { @@ -819,20 +1990,43 @@ void yf::Zoom::Frontend::handle_present(mp::Package &package) element_set_name = comp->u.simple->u.generic; Odr_int number_of_records_returned = 0; int error = 0; - const char *addinfo = 0; - Z_Records *records = get_records( - *pr->resultSetStartPoint - 1, *pr->numberOfRecordsRequested, - &error, &addinfo, &number_of_records_returned, odr, m_backend, - pr->preferredRecordSyntax, element_set_name); + char *addinfo = 0; - apdu_res = odr.create_presentResponse(apdu_req, error, addinfo); - if (records) + if (m_backend->enable_explain) { - apdu_res->u.presentResponse->records = records; - apdu_res->u.presentResponse->numberOfRecordsReturned = - odr_intdup(odr, number_of_records_returned); + Z_Records *records = + get_explain_records( + package, + *pr->resultSetStartPoint - 1, *pr->numberOfRecordsRequested, + &error, &addinfo, &number_of_records_returned, odr, m_backend, + pr->preferredRecordSyntax, element_set_name); + + apdu_res = odr.create_presentResponse(apdu_req, error, addinfo); + if (records) + { + apdu_res->u.presentResponse->records = records; + apdu_res->u.presentResponse->numberOfRecordsReturned = + odr_intdup(odr, number_of_records_returned); + } + package.response() = apdu_res; + } + else + { + Z_Records *records = + get_records(package, + *pr->resultSetStartPoint - 1, *pr->numberOfRecordsRequested, + &error, &addinfo, &number_of_records_returned, odr, m_backend, + pr->preferredRecordSyntax, element_set_name); + + apdu_res = odr.create_presentResponse(apdu_req, error, addinfo); + if (records) + { + apdu_res->u.presentResponse->records = records; + apdu_res->u.presentResponse->numberOfRecordsReturned = + odr_intdup(odr, number_of_records_returned); + } + package.response() = apdu_res; } - package.response() = apdu_res; } void yf::Zoom::Frontend::handle_package(mp::Package &package) @@ -843,6 +2037,9 @@ void yf::Zoom::Frontend::handle_package(mp::Package &package) else if (gdu->which == Z_GDU_Z3950) { Z_APDU *apdu_req = gdu->u.z3950; + + if (m_backend) + wrbuf_rewind(m_backend->m_apdu_wrbuf); if (apdu_req->which == Z_APDU_initRequest) { mp::odr odr; @@ -868,6 +2065,11 @@ void yf::Zoom::Frontend::handle_package(mp::Package &package) "zoom filter cannot handle this APDU"); package.session().close(); } + if (m_backend) + { + WRBUF w = m_backend->m_apdu_wrbuf; + package.log_write(wrbuf_buf(w), wrbuf_len(w)); + } } else {