Fix Metaproxy stops logging after check config failed MP-590
[metaproxy-moved-to-github.git] / src / filter_query_rewrite.cpp
index 159fbb9..4828781 100644 (file)
@@ -1,38 +1,67 @@
-/* $Id: filter_query_rewrite.cpp,v 1.3 2006-01-22 00:05:51 marc Exp $
-   Copyright (c) 2005, Index Data.
+/* This file is part of Metaproxy.
+   Copyright (C) Index Data
 
-%LICENSE%
- */
+Metaproxy is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
 
+Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
 
-#include "config.hpp"
-#include "filter.hpp"
-#include "package.hpp"
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
 
-//#include <boost/thread/mutex.hpp>
-#include <boost/regex.hpp>
+#include "config.hpp"
+#include <metaproxy/filter.hpp>
+#include <metaproxy/package.hpp>
 
-#include "util.hpp"
+#include <metaproxy/util.hpp>
 #include "filter_query_rewrite.hpp"
 
+#include <yaz/log.h>
 #include <yaz/zgdu.h>
+#include <yaz/xmlquery.h>
+#include <yaz/diagbib1.h>
+#include <yaz/query-charset.h>
+#include <yaz/tpath.h>
 
-namespace yf = yp2::filter;
+#include <libxslt/xsltutils.h>
+#include <libxslt/transform.h>
 
-namespace yp2 {
+namespace mp = metaproxy_1;
+namespace yf = mp::filter;
+
+namespace metaproxy_1 {
     namespace filter {
         class QueryRewrite::Rep {
-            //friend class QueryRewrite;
         public:
-            void process(yp2::Package &package) const;
-            void configure(const xmlNode * ptr);
+            Rep();
+            ~Rep();
+            void process(mp::Package &package) const;
+            void configure(const xmlNode * ptr, bool test_only,
+                           const char *path);
         private:
-            void rewriteRegex(Z_Query *query) const;
-        };
+            xsltStylesheetPtr m_stylesheet;
+            std::string charset_from;
+            std::string charset_to;
+       };
     }
 }
 
-// Class QueryRewrite frowarding to class QueryRewrite::Rep
+yf::QueryRewrite::Rep::Rep() : m_stylesheet(0), charset_from("UTF-8")
+{
+}
+
+yf::QueryRewrite::Rep::~Rep()
+{
+    if (m_stylesheet)
+        xsltFreeStylesheet(m_stylesheet);
+}
 
 yf::QueryRewrite::QueryRewrite() : m_p(new Rep)
 {
@@ -42,200 +71,182 @@ yf::QueryRewrite::~QueryRewrite()
 {  // must have a destructor because of boost::scoped_ptr
 }
 
-void yf::QueryRewrite::process(yp2::Package &package) const
+void mp::filter::QueryRewrite::configure(const xmlNode *ptr, bool test_only,
+                                         const char *path)
 {
-    m_p->process(package);
+    m_p->configure(ptr, test_only, path);
 }
 
-void yp2::filter::QueryRewrite::configure(const xmlNode *ptr)
+void yf::QueryRewrite::process(mp::Package &package) const
 {
-    m_p->configure(ptr);
+    m_p->process(package);
 }
 
-
-// Class QueryRewrite::Rep implementation
-
-void yf::QueryRewrite::Rep::process(yp2::Package &package) const
+void yf::QueryRewrite::Rep::process(mp::Package &package) const
 {
-    if (package.session().is_closed())
-    {
-        //std::cout << "Got Close.\n";
-    }
-    
     Z_GDU *gdu = package.request().get();
-    
-    if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which ==
-        Z_APDU_initRequest)
-    {
-        //std::cout << "Got Z3950 Init PDU\n";         
-        //Z_InitRequest *req = gdu->u.z3950->u.initRequest;
-        //package.request() = gdu;
-    } 
-    else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which ==
-             Z_APDU_searchRequest)
-    {
-        //std::cout << "Got Z3950 Search PDU\n";   
-        Z_SearchRequest *req = gdu->u.z3950->u.searchRequest;
-
-        // applying regex query rewriting
-        rewriteRegex(req->query);
-            
-        // fold new query structure into gdu package ..       
-        // yp2::util::pqf(odr, gdu->u.z3950, query_out);
-        // question: which odr structure to use in this call ??
-        // memory alignment has to be correct, this is a little tricky ...
-        // I'd rather like to alter the gdu and pack it back using:
-        package.request() = gdu;
-    } 
-    else if (gdu && gdu->which == Z_GDU_Z3950 && gdu->u.z3950->which ==
-             Z_APDU_scanRequest)
-    {
-        std::cout << "Got Z3950 Scan PDU\n";   
-        //Z_ScanRequest *req = gdu->u.z3950->u.scanRequest;
-        //package.request() = gdu;
-    } 
-    package.move();
-}
-
 
-void yf::QueryRewrite::Rep::rewriteRegex(Z_Query *query) const
-{
-    std::string query_in = yp2::util::zQueryToString(query);
-    //std::cout << "QUERY IN  '" << query_in << "'\n";
-
-    std::string query_out;
-    
-    boost::regex rgx;
-    try{
-        // make regular expression replacement here 
-        std::string expression("@attr 1=4");
-        std::string format("@attr 1=4 @attr 4=3");
-        //std::string expression("the");
-        //std::string format("else");
-        //std::string expression("(<)|(>)|\\r");
-        //std::string format("(?1&lt;)(?2&gt;)");
-
-        //std::cout << "EXPRESSION  '" << expression << "'\n";
-        //std::cout << "FORMAT      '" << format << "'\n";
-
-        rgx.assign(expression.c_str());
-
-        bool match(false);
-        bool search(false);
-
-        // other flags
-        // see http://www.boost.org/libs/regex/doc/match_flag_type.html
-        //boost::match_flag_type flags = boost::match_default;
-        // boost::format_default
-        // boost::format_perl
-        // boost::format_literal
-        // boost::format_all
-        // boost::format_no_copy
-        // boost::format_first_only
-
-        boost::match_flag_type flags 
-            = boost::match_default | boost::format_all;
-
-        match = regex_match(query_in, rgx, flags);
-        search = regex_search(query_in, rgx, flags);
-        query_out = boost::regex_replace(query_in, rgx, format, flags);
-        //std::cout << "MATCH  '" << match <<  "'\n";
-        //std::cout << "SEARCH '" << search <<  "'\n";
-        //std::cout << "QUERY OUT '" << query_out << "'\n";
-
-    }
-    catch(boost::regex_error &e)
+    if (gdu && gdu->which == Z_GDU_Z3950)
     {
-        std::cout << "REGEX Error code=" << e.code() 
-                  << " position=" << e.position() << "\n";
+        Z_APDU *apdu_req = gdu->u.z3950;
+        if (apdu_req->which == Z_APDU_searchRequest)
+        {
+            int error_code = 0;
+            const char *addinfo = 0;
+            mp::odr odr;
+            Z_SearchRequest *req = apdu_req->u.searchRequest;
+
+            if (m_stylesheet)
+            {
+                xmlDocPtr doc_input = 0;
+                yaz_query2xml(req->query, &doc_input);
+
+                if (doc_input)
+                {
+                    xmlDocPtr doc_res = xsltApplyStylesheet(m_stylesheet,
+                                                            doc_input, 0);
+                    if (!doc_res)
+                    {
+                        error_code = YAZ_BIB1_MALFORMED_QUERY;
+                        addinfo = "XSLT transform failed for query";
+                    }
+                    else
+                    {
+                        const xmlNode *root_element = xmlDocGetRootElement(doc_res);
+                        yaz_xml2query(root_element, &req->query, odr,
+                                      &error_code, &addinfo);
+                        xmlFreeDoc(doc_res);
+                    }
+                    xmlFreeDoc(doc_input);
+                }
+            }
+            if (!error_code && charset_to.length() && charset_from.length() &&
+                (req->query->which == Z_Query_type_1
+                 || req->query->which == Z_Query_type_101))
+            {
+                yaz_iconv_t cd = yaz_iconv_open(charset_to.c_str(),
+                                                charset_from.c_str());
+                if (cd)
+                {
+                    int r = yaz_query_charset_convert_rpnquery_check(
+                        req->query->u.type_1, odr, cd);
+                    yaz_iconv_close(cd);
+                    if (r)
+                    {  /* query could not be char converted */
+                        error_code = YAZ_BIB1_MALFORMED_QUERY;
+                        addinfo = "could not convert query to target charset";
+                    }
+                }
+            }
+            if (error_code)
+            {
+                Z_APDU *f_apdu =
+                    odr.create_searchResponse(apdu_req, error_code, addinfo);
+                package.response() = f_apdu;
+                return;
+            }
+            package.request() = gdu;
+        }
     }
-    
-    //std::cout << "QUERY OUT '" << query_out << "'\n";
-    // still need to fold this new rpn query string into Z_Query structure...
+    package.move();
 }
 
-
-
-void yp2::filter::QueryRewrite::Rep::configure(const xmlNode *filter)
+void mp::filter::QueryRewrite::Rep::configure(const xmlNode *ptr,
+                                              bool test_only, const char *path)
 {
-
-    //std::cout << "XML node '" << filter->name << "'\n";
-    yp2::xml::check_element_yp2(filter, "filter");
-
-    const xmlNode* regex 
-        = yp2::xml::jump_to_children(filter, XML_ELEMENT_NODE);
-    
-    while (regex){
-        //std::cout << "XML node '" << regex->name << "'\n";
-        yp2::xml::check_element_yp2(regex, "regex");
-
-        // parsing action
-//         const xmlNode* action 
-//             = yp2::xml::jump_to_children(regex, XML_ATTRIBUTE_NODE);
-//         if (action){
-//             std::cout << "XML node '" << action->name << "' '";
-//             std::cout << yp2::xml::get_text(action) << "'\n";
-//             //yp2::xml::check_element_yp2(expression, "expression");
-//         }
-
-        // parsing regex expression
-        std::string expr;
-        const xmlNode* expression 
-            = yp2::xml::jump_to_children(regex, XML_ELEMENT_NODE);
-        if (expression){
-            yp2::xml::check_element_yp2(expression, "expression");
-            expr = yp2::xml::get_text(expression);
-            //std::cout << "XML node '" << expression->name << "' '";
-            //std::cout << yp2::xml::get_text(expression) << "'\n";
+    for (ptr = ptr->children; ptr; ptr = ptr->next)
+    {
+        if (ptr->type != XML_ELEMENT_NODE)
+            continue;
+
+        if (mp::xml::is_element_mp(ptr, "xslt"))
+        {
+            if (m_stylesheet)
+            {
+                throw mp::filter::FilterException
+                    ("Only one xslt element allowed in query_rewrite filter");
+            }
+
+            std::string fname;
+
+            for (struct _xmlAttr *attr = ptr->properties;
+                 attr; attr = attr->next)
+            {
+                mp::xml::check_attribute(attr, "", "stylesheet");
+                fname = mp::xml::get_text(attr);
+            }
+
+            if (0 == fname.size())
+                throw mp::filter::FilterException
+                    ("Attribute <xslt stylesheet=\""
+                     + fname
+                     + "\"> needs XSLT stylesheet path content"
+                     + " in query_rewrite filter");
+
+            char fullpath[1024];
+            char *cp = yaz_filepath_resolve(fname.c_str(), path, 0, fullpath);
+            if (!cp)
+            {
+                throw mp::filter::FilterException("Cannot read XSLT " + fname);
+            }
+
+            m_stylesheet = xsltParseStylesheetFile(BAD_CAST cp);
+            if (!m_stylesheet)
+            {
+                throw mp::filter::FilterException
+                    ("Failed to read XSLT stylesheet '"
+                     + fname
+                     + "' in query_rewrite filter");
+            }
         }
-        
-        // parsing regex format
-        std::string form;
-        const xmlNode* format
-            =  yp2::xml::jump_to_next(expression, XML_ELEMENT_NODE);
-        if (format){
-            yp2::xml::check_element_yp2(format, "format");
-            form = yp2::xml::get_text(format);
-            //std::cout << "XML node '" << format->name << "' '";
-            //std::cout << yp2::xml::get_text(format) << "'\n";
+        else if (mp::xml::is_element_mp(ptr, "charset"))
+        {
+            for (struct _xmlAttr *attr = ptr->properties;
+                 attr; attr = attr->next)
+            {
+                if (!strcmp((const char *) attr->name, "from"))
+                {
+                    charset_from = mp::xml::get_text(attr);
+                }
+                else if (!strcmp((const char *) attr->name, "to"))
+                {
+                    charset_to = mp::xml::get_text(attr);
+                }
+                else
+                    throw mp::filter::FilterException
+                        ("Invalid attribute inside charset inside "
+                         "query_rewrite filter");
+            }
         }
-
-        // adding configuration
-        if (expr.size() && form.size()){
-            //std::cout << "adding regular expression\n";
+        else
+        {
+            throw mp::filter::FilterException
+                ("Bad element "
+                 + std::string((const char *) ptr->name)
+                 + " in query_rewrite filter");
         }
-
-        // moving forward to next regex
-        regex = yp2::xml::jump_to_next(regex, XML_ELEMENT_NODE);
     }
-    
-    // done parsing XML config
-    
 }
 
-static yp2::filter::Base* filter_creator()
+static mp::filter::Base* filter_creator()
 {
-    return new yp2::filter::QueryRewrite;
+    return new mp::filter::QueryRewrite;
 }
 
 extern "C" {
-    struct yp2_filter_struct yp2_filter_query_rewrite = {
+    struct metaproxy_1_filter_struct metaproxy_1_filter_query_rewrite = {
         0,
-        "query-rewrite",
+        "query_rewrite",
         filter_creator
     };
 }
 
-extern "C" {
-    extern struct yp2_filter_struct yp2_filter_query_rewrite;
-}
-
-
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
- * c-file-style: "stroustrup"
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab
  */
+