X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ffilter_query_rewrite.cpp;h=4828781d41329755165253fd9015be1dfb230d73;hb=17981d61e25178362e3041a3e0090f0852514fe1;hp=cffa7e386e2f24879ccf3d422ae3b73895fdc90f;hpb=743341841a21a695d08c99b27a30329119030db5;p=metaproxy-moved-to-github.git diff --git a/src/filter_query_rewrite.cpp b/src/filter_query_rewrite.cpp index cffa7e3..4828781 100644 --- a/src/filter_query_rewrite.cpp +++ b/src/filter_query_rewrite.cpp @@ -1,36 +1,68 @@ -/* $Id: filter_query_rewrite.cpp,v 1.2 2006-01-20 22:38:12 marc Exp $ - Copyright (c) 2005, Index Data. +/* This file is part of Metaproxy. + Copyright (C) Index Data -%LICENSE% - */ +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. -#include "config.hpp" -#include "filter.hpp" -#include "package.hpp" +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ -//#include -#include +#include "config.hpp" +#include +#include -#include "util.hpp" +#include #include "filter_query_rewrite.hpp" +#include #include +#include +#include +#include +#include + +#include +#include -namespace yf = yp2::filter; +namespace mp = metaproxy_1; +namespace yf = mp::filter; -namespace yp2 { +namespace metaproxy_1 { namespace filter { class QueryRewrite::Rep { - //friend class QueryRewrite; public: - void process(yp2::Package &package) const; + Rep(); + ~Rep(); + void process(mp::Package &package) const; + void configure(const xmlNode * ptr, bool test_only, + const char *path); private: - void rewriteRegex(Z_Query *query) const; - }; + xsltStylesheetPtr m_stylesheet; + std::string charset_from; + std::string charset_to; + }; } } +yf::QueryRewrite::Rep::Rep() : m_stylesheet(0), charset_from("UTF-8") +{ +} + +yf::QueryRewrite::Rep::~Rep() +{ + if (m_stylesheet) + xsltFreeStylesheet(m_stylesheet); +} + yf::QueryRewrite::QueryRewrite() : m_p(new Rep) { } @@ -39,135 +71,182 @@ yf::QueryRewrite::~QueryRewrite() { // must have a destructor because of boost::scoped_ptr } -void yf::QueryRewrite::process(yp2::Package &package) const +void mp::filter::QueryRewrite::configure(const xmlNode *ptr, bool test_only, + const char *path) +{ + m_p->configure(ptr, test_only, path); +} + +void yf::QueryRewrite::process(mp::Package &package) const { m_p->process(package); } -void yf::QueryRewrite::Rep::process(yp2::Package &package) const +void yf::QueryRewrite::Rep::process(mp::Package &package) const { - if (package.session().is_closed()) - { - std::cout << "Got Close.\n"; - } - Z_GDU *gdu = package.request().get(); - - if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == - Z_APDU_initRequest) - { - std::cout << "Got Z3950 Init PDU\n"; - //Z_InitRequest *req = gdu->u.z3950->u.initRequest; - //package.request() = gdu; - } - else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == - Z_APDU_searchRequest) - { - std::cout << "Got Z3950 Search PDU\n"; - Z_SearchRequest *req = gdu->u.z3950->u.searchRequest; - - // applying regex query rewriting - rewriteRegex(req->query); - - // fold new query structure into gdu package .. - // yp2::util::pqf(odr, gdu->u.z3950, query_out); - // question: which odr structure to use in this call ?? - // memory alignment has to be correct, this is a little tricky ... - // I'd rather like to alter the gdu and pack it back using: - package.request() = gdu; - } - else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == - Z_APDU_scanRequest) + + if (gdu && gdu->which == Z_GDU_Z3950) { - std::cout << "Got Z3950 Scan PDU\n"; - //Z_ScanRequest *req = gdu->u.z3950->u.scanRequest; - //package.request() = gdu; - } + Z_APDU *apdu_req = gdu->u.z3950; + if (apdu_req->which == Z_APDU_searchRequest) + { + int error_code = 0; + const char *addinfo = 0; + mp::odr odr; + Z_SearchRequest *req = apdu_req->u.searchRequest; + + if (m_stylesheet) + { + xmlDocPtr doc_input = 0; + yaz_query2xml(req->query, &doc_input); + + if (doc_input) + { + xmlDocPtr doc_res = xsltApplyStylesheet(m_stylesheet, + doc_input, 0); + if (!doc_res) + { + error_code = YAZ_BIB1_MALFORMED_QUERY; + addinfo = "XSLT transform failed for query"; + } + else + { + const xmlNode *root_element = xmlDocGetRootElement(doc_res); + yaz_xml2query(root_element, &req->query, odr, + &error_code, &addinfo); + xmlFreeDoc(doc_res); + } + xmlFreeDoc(doc_input); + } + } + if (!error_code && charset_to.length() && charset_from.length() && + (req->query->which == Z_Query_type_1 + || req->query->which == Z_Query_type_101)) + { + yaz_iconv_t cd = yaz_iconv_open(charset_to.c_str(), + charset_from.c_str()); + if (cd) + { + int r = yaz_query_charset_convert_rpnquery_check( + req->query->u.type_1, odr, cd); + yaz_iconv_close(cd); + if (r) + { /* query could not be char converted */ + error_code = YAZ_BIB1_MALFORMED_QUERY; + addinfo = "could not convert query to target charset"; + } + } + } + if (error_code) + { + Z_APDU *f_apdu = + odr.create_searchResponse(apdu_req, error_code, addinfo); + package.response() = f_apdu; + return; + } + package.request() = gdu; + } + } package.move(); } - -void yf::QueryRewrite::Rep::rewriteRegex(Z_Query *query) const +void mp::filter::QueryRewrite::Rep::configure(const xmlNode *ptr, + bool test_only, const char *path) { - std::string query_in = yp2::util::zQueryToString(query); - std::cout << "QUERY IN '" << query_in << "'\n"; - - std::string query_out; - - boost::regex rgx; - try{ - // make regular expression replacement here - std::string expression("@attr 1=4"); - std::string format("@attr 1=4 @attr 4=3"); - //std::string expression("the"); - //std::string format("else"); - //std::string expression("(<)|(>)|\\r"); - //std::string format("(?1<)(?2>)"); - - std::cout << "EXPRESSION '" << expression << "'\n"; - std::cout << "FORMAT '" << format << "'\n"; - - rgx.assign(expression.c_str()); - - bool match(false); - bool search(false); - - // other flags - // see http://www.boost.org/libs/regex/doc/match_flag_type.html - //boost::match_flag_type flags = boost::match_default; - // boost::format_default - // boost::format_perl - // boost::format_literal - // boost::format_all - // boost::format_no_copy - // boost::format_first_only - - boost::match_flag_type flags - = boost::match_default | boost::format_all; - - match = regex_match(query_in, rgx, flags); - search = regex_search(query_in, rgx, flags); - query_out = boost::regex_replace(query_in, rgx, format, flags); - std::cout << "MATCH '" << match << "'\n"; - std::cout << "SEARCH '" << search << "'\n"; - std::cout << "QUERY OUT '" << query_out << "'\n"; - - } - catch(boost::regex_error &e) + for (ptr = ptr->children; ptr; ptr = ptr->next) { - std::cout << "REGEX Error code=" << e.code() - << " position=" << e.position() << "\n"; + if (ptr->type != XML_ELEMENT_NODE) + continue; + + if (mp::xml::is_element_mp(ptr, "xslt")) + { + if (m_stylesheet) + { + throw mp::filter::FilterException + ("Only one xslt element allowed in query_rewrite filter"); + } + + std::string fname; + + for (struct _xmlAttr *attr = ptr->properties; + attr; attr = attr->next) + { + mp::xml::check_attribute(attr, "", "stylesheet"); + fname = mp::xml::get_text(attr); + } + + if (0 == fname.size()) + throw mp::filter::FilterException + ("Attribute needs XSLT stylesheet path content" + + " in query_rewrite filter"); + + char fullpath[1024]; + char *cp = yaz_filepath_resolve(fname.c_str(), path, 0, fullpath); + if (!cp) + { + throw mp::filter::FilterException("Cannot read XSLT " + fname); + } + + m_stylesheet = xsltParseStylesheetFile(BAD_CAST cp); + if (!m_stylesheet) + { + throw mp::filter::FilterException + ("Failed to read XSLT stylesheet '" + + fname + + "' in query_rewrite filter"); + } + } + else if (mp::xml::is_element_mp(ptr, "charset")) + { + for (struct _xmlAttr *attr = ptr->properties; + attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "from")) + { + charset_from = mp::xml::get_text(attr); + } + else if (!strcmp((const char *) attr->name, "to")) + { + charset_to = mp::xml::get_text(attr); + } + else + throw mp::filter::FilterException + ("Invalid attribute inside charset inside " + "query_rewrite filter"); + } + } + else + { + throw mp::filter::FilterException + ("Bad element " + + std::string((const char *) ptr->name) + + " in query_rewrite filter"); + } } - - //std::cout << "QUERY OUT '" << query_out << "'\n"; - // still need to fold this new rpn query string into Z_Query structure... } - - -static yp2::filter::Base* filter_creator() +static mp::filter::Base* filter_creator() { - return new yp2::filter::QueryRewrite; + return new mp::filter::QueryRewrite; } extern "C" { - struct yp2_filter_struct yp2_filter_query_rewrite = { + struct metaproxy_1_filter_struct metaproxy_1_filter_query_rewrite = { 0, - "query-rewrite", + "query_rewrite", filter_creator }; } -extern "C" { - extern struct yp2_filter_struct yp2_filter_query_rewrite; -} - - /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil - * c-file-style: "stroustrup" * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +