X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=src%2Ffilter_query_rewrite.cpp;h=4828781d41329755165253fd9015be1dfb230d73;hb=586d78659d671683f33ec55f4a7d32b28e345ccd;hp=159fbb99426ecfda23fe48eb1f53b91fc568cfec;hpb=70d6c3992f09dded5f10476ab43590dd6b80979f;p=metaproxy-moved-to-github.git diff --git a/src/filter_query_rewrite.cpp b/src/filter_query_rewrite.cpp index 159fbb9..4828781 100644 --- a/src/filter_query_rewrite.cpp +++ b/src/filter_query_rewrite.cpp @@ -1,38 +1,67 @@ -/* $Id: filter_query_rewrite.cpp,v 1.3 2006-01-22 00:05:51 marc Exp $ - Copyright (c) 2005, Index Data. +/* This file is part of Metaproxy. + Copyright (C) Index Data -%LICENSE% - */ +Metaproxy is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. +Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. -#include "config.hpp" -#include "filter.hpp" -#include "package.hpp" +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ -//#include -#include +#include "config.hpp" +#include +#include -#include "util.hpp" +#include #include "filter_query_rewrite.hpp" +#include #include +#include +#include +#include +#include -namespace yf = yp2::filter; +#include +#include -namespace yp2 { +namespace mp = metaproxy_1; +namespace yf = mp::filter; + +namespace metaproxy_1 { namespace filter { class QueryRewrite::Rep { - //friend class QueryRewrite; public: - void process(yp2::Package &package) const; - void configure(const xmlNode * ptr); + Rep(); + ~Rep(); + void process(mp::Package &package) const; + void configure(const xmlNode * ptr, bool test_only, + const char *path); private: - void rewriteRegex(Z_Query *query) const; - }; + xsltStylesheetPtr m_stylesheet; + std::string charset_from; + std::string charset_to; + }; } } -// Class QueryRewrite frowarding to class QueryRewrite::Rep +yf::QueryRewrite::Rep::Rep() : m_stylesheet(0), charset_from("UTF-8") +{ +} + +yf::QueryRewrite::Rep::~Rep() +{ + if (m_stylesheet) + xsltFreeStylesheet(m_stylesheet); +} yf::QueryRewrite::QueryRewrite() : m_p(new Rep) { @@ -42,200 +71,182 @@ yf::QueryRewrite::~QueryRewrite() { // must have a destructor because of boost::scoped_ptr } -void yf::QueryRewrite::process(yp2::Package &package) const +void mp::filter::QueryRewrite::configure(const xmlNode *ptr, bool test_only, + const char *path) { - m_p->process(package); + m_p->configure(ptr, test_only, path); } -void yp2::filter::QueryRewrite::configure(const xmlNode *ptr) +void yf::QueryRewrite::process(mp::Package &package) const { - m_p->configure(ptr); + m_p->process(package); } - -// Class QueryRewrite::Rep implementation - -void yf::QueryRewrite::Rep::process(yp2::Package &package) const +void yf::QueryRewrite::Rep::process(mp::Package &package) const { - if (package.session().is_closed()) - { - //std::cout << "Got Close.\n"; - } - Z_GDU *gdu = package.request().get(); - - if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == - Z_APDU_initRequest) - { - //std::cout << "Got Z3950 Init PDU\n"; - //Z_InitRequest *req = gdu->u.z3950->u.initRequest; - //package.request() = gdu; - } - else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == - Z_APDU_searchRequest) - { - //std::cout << "Got Z3950 Search PDU\n"; - Z_SearchRequest *req = gdu->u.z3950->u.searchRequest; - - // applying regex query rewriting - rewriteRegex(req->query); - - // fold new query structure into gdu package .. - // yp2::util::pqf(odr, gdu->u.z3950, query_out); - // question: which odr structure to use in this call ?? - // memory alignment has to be correct, this is a little tricky ... - // I'd rather like to alter the gdu and pack it back using: - package.request() = gdu; - } - else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which == - Z_APDU_scanRequest) - { - std::cout << "Got Z3950 Scan PDU\n"; - //Z_ScanRequest *req = gdu->u.z3950->u.scanRequest; - //package.request() = gdu; - } - package.move(); -} - -void yf::QueryRewrite::Rep::rewriteRegex(Z_Query *query) const -{ - std::string query_in = yp2::util::zQueryToString(query); - //std::cout << "QUERY IN '" << query_in << "'\n"; - - std::string query_out; - - boost::regex rgx; - try{ - // make regular expression replacement here - std::string expression("@attr 1=4"); - std::string format("@attr 1=4 @attr 4=3"); - //std::string expression("the"); - //std::string format("else"); - //std::string expression("(<)|(>)|\\r"); - //std::string format("(?1<)(?2>)"); - - //std::cout << "EXPRESSION '" << expression << "'\n"; - //std::cout << "FORMAT '" << format << "'\n"; - - rgx.assign(expression.c_str()); - - bool match(false); - bool search(false); - - // other flags - // see http://www.boost.org/libs/regex/doc/match_flag_type.html - //boost::match_flag_type flags = boost::match_default; - // boost::format_default - // boost::format_perl - // boost::format_literal - // boost::format_all - // boost::format_no_copy - // boost::format_first_only - - boost::match_flag_type flags - = boost::match_default | boost::format_all; - - match = regex_match(query_in, rgx, flags); - search = regex_search(query_in, rgx, flags); - query_out = boost::regex_replace(query_in, rgx, format, flags); - //std::cout << "MATCH '" << match << "'\n"; - //std::cout << "SEARCH '" << search << "'\n"; - //std::cout << "QUERY OUT '" << query_out << "'\n"; - - } - catch(boost::regex_error &e) + if (gdu && gdu->which == Z_GDU_Z3950) { - std::cout << "REGEX Error code=" << e.code() - << " position=" << e.position() << "\n"; + Z_APDU *apdu_req = gdu->u.z3950; + if (apdu_req->which == Z_APDU_searchRequest) + { + int error_code = 0; + const char *addinfo = 0; + mp::odr odr; + Z_SearchRequest *req = apdu_req->u.searchRequest; + + if (m_stylesheet) + { + xmlDocPtr doc_input = 0; + yaz_query2xml(req->query, &doc_input); + + if (doc_input) + { + xmlDocPtr doc_res = xsltApplyStylesheet(m_stylesheet, + doc_input, 0); + if (!doc_res) + { + error_code = YAZ_BIB1_MALFORMED_QUERY; + addinfo = "XSLT transform failed for query"; + } + else + { + const xmlNode *root_element = xmlDocGetRootElement(doc_res); + yaz_xml2query(root_element, &req->query, odr, + &error_code, &addinfo); + xmlFreeDoc(doc_res); + } + xmlFreeDoc(doc_input); + } + } + if (!error_code && charset_to.length() && charset_from.length() && + (req->query->which == Z_Query_type_1 + || req->query->which == Z_Query_type_101)) + { + yaz_iconv_t cd = yaz_iconv_open(charset_to.c_str(), + charset_from.c_str()); + if (cd) + { + int r = yaz_query_charset_convert_rpnquery_check( + req->query->u.type_1, odr, cd); + yaz_iconv_close(cd); + if (r) + { /* query could not be char converted */ + error_code = YAZ_BIB1_MALFORMED_QUERY; + addinfo = "could not convert query to target charset"; + } + } + } + if (error_code) + { + Z_APDU *f_apdu = + odr.create_searchResponse(apdu_req, error_code, addinfo); + package.response() = f_apdu; + return; + } + package.request() = gdu; + } } - - //std::cout << "QUERY OUT '" << query_out << "'\n"; - // still need to fold this new rpn query string into Z_Query structure... + package.move(); } - - -void yp2::filter::QueryRewrite::Rep::configure(const xmlNode *filter) +void mp::filter::QueryRewrite::Rep::configure(const xmlNode *ptr, + bool test_only, const char *path) { - - //std::cout << "XML node '" << filter->name << "'\n"; - yp2::xml::check_element_yp2(filter, "filter"); - - const xmlNode* regex - = yp2::xml::jump_to_children(filter, XML_ELEMENT_NODE); - - while (regex){ - //std::cout << "XML node '" << regex->name << "'\n"; - yp2::xml::check_element_yp2(regex, "regex"); - - // parsing action -// const xmlNode* action -// = yp2::xml::jump_to_children(regex, XML_ATTRIBUTE_NODE); -// if (action){ -// std::cout << "XML node '" << action->name << "' '"; -// std::cout << yp2::xml::get_text(action) << "'\n"; -// //yp2::xml::check_element_yp2(expression, "expression"); -// } - - // parsing regex expression - std::string expr; - const xmlNode* expression - = yp2::xml::jump_to_children(regex, XML_ELEMENT_NODE); - if (expression){ - yp2::xml::check_element_yp2(expression, "expression"); - expr = yp2::xml::get_text(expression); - //std::cout << "XML node '" << expression->name << "' '"; - //std::cout << yp2::xml::get_text(expression) << "'\n"; + for (ptr = ptr->children; ptr; ptr = ptr->next) + { + if (ptr->type != XML_ELEMENT_NODE) + continue; + + if (mp::xml::is_element_mp(ptr, "xslt")) + { + if (m_stylesheet) + { + throw mp::filter::FilterException + ("Only one xslt element allowed in query_rewrite filter"); + } + + std::string fname; + + for (struct _xmlAttr *attr = ptr->properties; + attr; attr = attr->next) + { + mp::xml::check_attribute(attr, "", "stylesheet"); + fname = mp::xml::get_text(attr); + } + + if (0 == fname.size()) + throw mp::filter::FilterException + ("Attribute needs XSLT stylesheet path content" + + " in query_rewrite filter"); + + char fullpath[1024]; + char *cp = yaz_filepath_resolve(fname.c_str(), path, 0, fullpath); + if (!cp) + { + throw mp::filter::FilterException("Cannot read XSLT " + fname); + } + + m_stylesheet = xsltParseStylesheetFile(BAD_CAST cp); + if (!m_stylesheet) + { + throw mp::filter::FilterException + ("Failed to read XSLT stylesheet '" + + fname + + "' in query_rewrite filter"); + } } - - // parsing regex format - std::string form; - const xmlNode* format - = yp2::xml::jump_to_next(expression, XML_ELEMENT_NODE); - if (format){ - yp2::xml::check_element_yp2(format, "format"); - form = yp2::xml::get_text(format); - //std::cout << "XML node '" << format->name << "' '"; - //std::cout << yp2::xml::get_text(format) << "'\n"; + else if (mp::xml::is_element_mp(ptr, "charset")) + { + for (struct _xmlAttr *attr = ptr->properties; + attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "from")) + { + charset_from = mp::xml::get_text(attr); + } + else if (!strcmp((const char *) attr->name, "to")) + { + charset_to = mp::xml::get_text(attr); + } + else + throw mp::filter::FilterException + ("Invalid attribute inside charset inside " + "query_rewrite filter"); + } } - - // adding configuration - if (expr.size() && form.size()){ - //std::cout << "adding regular expression\n"; + else + { + throw mp::filter::FilterException + ("Bad element " + + std::string((const char *) ptr->name) + + " in query_rewrite filter"); } - - // moving forward to next regex - regex = yp2::xml::jump_to_next(regex, XML_ELEMENT_NODE); } - - // done parsing XML config - } -static yp2::filter::Base* filter_creator() +static mp::filter::Base* filter_creator() { - return new yp2::filter::QueryRewrite; + return new mp::filter::QueryRewrite; } extern "C" { - struct yp2_filter_struct yp2_filter_query_rewrite = { + struct metaproxy_1_filter_struct metaproxy_1_filter_query_rewrite = { 0, - "query-rewrite", + "query_rewrite", filter_creator }; } -extern "C" { - extern struct yp2_filter_struct yp2_filter_query_rewrite; -} - - /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil - * c-file-style: "stroustrup" * End: * vim: shiftwidth=4 tabstop=8 expandtab */ +