query_rewrite: charset conversion of query terms
[metaproxy-moved-to-github.git] / src / filter_query_rewrite.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) 2005-2012 Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include "config.hpp"
20 #include <metaproxy/filter.hpp>
21 #include <metaproxy/package.hpp>
22
23 #include <metaproxy/util.hpp>
24 #include "filter_query_rewrite.hpp"
25
26 #include <yaz/log.h>
27 #include <yaz/zgdu.h>
28 #include <yaz/xmlquery.h>
29 #include <yaz/diagbib1.h>
30 #include <yaz/query-charset.h>
31
32 #include <libxslt/xsltutils.h>
33 #include <libxslt/transform.h>
34
35 namespace mp = metaproxy_1;
36 namespace yf = mp::filter;
37
38 namespace metaproxy_1 {
39     namespace filter {
40         class QueryRewrite::Rep {
41         public:
42             Rep();
43             ~Rep();
44             void process(mp::Package &package) const;
45             void configure(const xmlNode * ptr);
46         private:
47             xsltStylesheetPtr m_stylesheet;
48             std::string charset_from;
49             std::string charset_to;
50        };
51     }
52 }
53
54 yf::QueryRewrite::Rep::Rep() : m_stylesheet(0), charset_from("UTF-8")
55 {
56 }
57
58 yf::QueryRewrite::Rep::~Rep()
59 {
60     if (m_stylesheet)
61         xsltFreeStylesheet(m_stylesheet);
62 }
63
64 yf::QueryRewrite::QueryRewrite() : m_p(new Rep)
65 {
66 }
67
68 yf::QueryRewrite::~QueryRewrite()
69 {  // must have a destructor because of boost::scoped_ptr
70 }
71
72 void yf::QueryRewrite::process(mp::Package &package) const
73 {
74     m_p->process(package);
75 }
76
77 void mp::filter::QueryRewrite::configure(const xmlNode *ptr, bool test_only,
78                                          const char *path)
79 {
80     m_p->configure(ptr);
81 }
82
83 void yf::QueryRewrite::Rep::process(mp::Package &package) const
84 {
85     Z_GDU *gdu = package.request().get();
86     
87     if (gdu && gdu->which == Z_GDU_Z3950)
88     {
89         Z_APDU *apdu_req = gdu->u.z3950;
90         if (apdu_req->which == Z_APDU_searchRequest)
91         {
92             int error_code = 0;
93             const char *addinfo = 0;
94             mp::odr odr;
95             Z_SearchRequest *req = apdu_req->u.searchRequest;
96             
97             if (m_stylesheet)
98             {
99                 xmlDocPtr doc_input = 0;
100                 yaz_query2xml(req->query, &doc_input);
101                 
102                 if (!doc_input)
103                 {
104                     error_code = YAZ_BIB1_MALFORMED_QUERY;
105                     addinfo = "converion from Query to XML failed";
106                 }
107                 else
108                 {
109                     xmlDocPtr doc_res = xsltApplyStylesheet(m_stylesheet,
110                                                             doc_input, 0);
111                     if (!doc_res)
112                     {
113                         error_code = YAZ_BIB1_MALFORMED_QUERY;
114                         addinfo = "XSLT transform failed for query";
115                     }
116                     else
117                     {
118                         const xmlNode *root_element = xmlDocGetRootElement(doc_res);
119                         yaz_xml2query(root_element, &req->query, odr,
120                                       &error_code, &addinfo);
121                         xmlFreeDoc(doc_res);
122                     }
123                     xmlFreeDoc(doc_input);
124                 }
125             }
126             if (charset_to.length() && charset_from.length() &&
127                 (req->query->which == Z_Query_type_1
128                  || req->query->which == Z_Query_type_101))
129             {
130                 yaz_iconv_t cd = yaz_iconv_open(charset_to.c_str(),
131                                                 charset_from.c_str());
132                 if (cd)
133                 {
134                     int r = yaz_query_charset_convert_rpnquery_check(
135                         req->query->u.type_1, odr, cd);
136                     yaz_iconv_close(cd);
137                     if (r)
138                     {  /* query could not be char converted */
139                         error_code = YAZ_BIB1_MALFORMED_QUERY;
140                         addinfo = "could not convert query to target charset";
141                     }
142                 }
143             }
144             if (error_code)
145             {
146                 Z_APDU *f_apdu = 
147                     odr.create_searchResponse(apdu_req, error_code, addinfo);
148                 package.response() = f_apdu;
149                 return;
150             }
151             package.request() = gdu;
152         } 
153     }
154     package.move();
155 }
156
157 void mp::filter::QueryRewrite::Rep::configure(const xmlNode *ptr)
158 {
159     for (ptr = ptr->children; ptr; ptr = ptr->next)
160     {
161         if (ptr->type != XML_ELEMENT_NODE)
162             continue;
163
164         if (mp::xml::is_element_mp(ptr, "xslt"))
165         {
166             if (m_stylesheet)
167             {
168                 throw mp::filter::FilterException
169                     ("Only one xslt element allowed in query_rewrite filter");
170             }
171
172             std::string fname;// = mp::xml::get_text(ptr);
173
174             for (struct _xmlAttr *attr = ptr->properties; 
175                  attr; attr = attr->next)
176             {
177                 mp::xml::check_attribute(attr, "", "stylesheet");
178                 fname = mp::xml::get_text(attr);            
179             }
180
181             if (0 == fname.size())
182                 throw mp::filter::FilterException
183                     ("Attribute <xslt stylesheet=\"" 
184                      + fname
185                      + "\"> needs XSLT stylesheet path content"
186                      + " in query_rewrite filter");
187             
188             m_stylesheet = xsltParseStylesheetFile(BAD_CAST fname.c_str());
189             if (!m_stylesheet)
190             {
191                 throw mp::filter::FilterException
192                     ("Failed to read XSLT stylesheet '" 
193                      + fname
194                      + "' in query_rewrite filter");
195             }
196         }
197         else if (mp::xml::is_element_mp(ptr, "charset"))
198         {
199             for (struct _xmlAttr *attr = ptr->properties; 
200                  attr; attr = attr->next)
201             {
202                 if (!strcmp((const char *) attr->name, "from"))
203                 {
204                     charset_from = mp::xml::get_text(attr);
205                 }
206                 else if (!strcmp((const char *) attr->name, "to"))
207                 {
208                     charset_to = mp::xml::get_text(attr);
209                 }
210                 else
211                     throw mp::filter::FilterException
212                         ("Invalid attribute inside charset inside "
213                          "query_rewrite filter");
214             }
215         }
216         else
217         {
218             throw mp::filter::FilterException
219                 ("Bad element " 
220                  + std::string((const char *) ptr->name)
221                  + " in query_rewrite filter");
222         }
223     }
224 }
225
226 static mp::filter::Base* filter_creator()
227 {
228     return new mp::filter::QueryRewrite;
229 }
230
231 extern "C" {
232     struct metaproxy_1_filter_struct metaproxy_1_filter_query_rewrite = {
233         0,
234         "query_rewrite",
235         filter_creator
236     };
237 }
238
239 /*
240  * Local variables:
241  * c-basic-offset: 4
242  * c-file-style: "Stroustrup"
243  * indent-tabs-mode: nil
244  * End:
245  * vim: shiftwidth=4 tabstop=8 expandtab
246  */
247