Almost fully operational xquery MP module
[mp-xquery-moved-to-github.git] / src / metaproxy_filter_xquery.cpp
1 /* This file is part of mp-xquery
2    Copyright (C) Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include <metaproxy/package.hpp>
20 #include <metaproxy/util.hpp>
21 #include <yaz/log.h>
22 #include <yaz/oid_db.h>
23 #include <map>
24
25 #include <iostream>
26 #include <fstream>
27 #include <sstream>
28
29 #include <zorba/zorba.h>
30 #include <zorba/store_manager.h>
31 #include <zorba/serializer.h>
32 #include <zorba/singleton_item_sequence.h>
33 #include <zorba/zorba_exception.h>
34
35
36 namespace mp = metaproxy_1;
37 namespace yf = mp::filter;
38 namespace mp_util = metaproxy_1::util;
39 using namespace mp;
40 using namespace zorba;
41
42 namespace metaproxy_1 {
43     namespace filter {
44         class XQuery : public Base {
45         public:
46             ~XQuery();
47             XQuery();
48             void process(metaproxy_1::Package & package) const;
49             void configure(const xmlNode * ptr, bool test_only,
50                            const char *path);
51             void start() const;
52             void stop(int signo) const;
53         private:
54             bool convert_one_record(const char *input_buf,
55                                     size_t input_len,
56                                     std::string &result) const;
57             std::map<std::string, std::string> zorba_variables;
58             std::string zorba_filename;
59             std::string zorba_script;
60             std::string zorba_record_variable;
61             Zorba *lZorba;
62             XQuery_t lQuery;
63         };
64     }
65 }
66
67 yf::XQuery::XQuery()
68 {
69     lZorba = 0;
70 }
71
72 yf::XQuery::~XQuery()
73 {
74     if (lZorba)
75         lZorba->shutdown();
76 }
77
78 void yf::XQuery::start() const
79 {
80 }
81
82 void yf::XQuery::stop(int signo) const
83 {
84 }
85
86 bool yf::XQuery::convert_one_record(const char *input_buf,
87                                     size_t input_len,
88                                     std::string &result) const
89 {
90     XQuery_t tQuery = lQuery->clone();
91
92     zorba::DynamicContext* lDynamicContext = tQuery->getDynamicContext();
93
94     zorba::Item lItem;
95     std::map<std::string, std::string>::const_iterator it;
96     for (it = zorba_variables.begin(); it != zorba_variables.end(); it++)
97     {
98         lItem = lZorba->getItemFactory()->createString(it->second);
99         lDynamicContext->setVariable(it->first, lItem);
100     }
101     // TODO: Make tmp file here
102     lItem = lZorba->getItemFactory()->createString(
103         "/home/adam/proj/yaz/test/marc7.xml");
104     lDynamicContext->setVariable(zorba_record_variable, lItem);
105
106     std::stringstream ss;
107     tQuery->execute(ss);
108     result = ss.str();
109     return true;
110 }
111
112 void yf::XQuery::process(Package &package) const
113 {
114     Z_GDU *gdu_req = package.request().get();
115     Z_PresentRequest *pr_req = 0;
116     Z_SearchRequest *sr_req = 0;
117
118     const char *input_schema = 0;
119     Odr_oid *input_syntax = 0;
120
121     if (gdu_req && gdu_req->which == Z_GDU_Z3950 &&
122         gdu_req->u.z3950->which == Z_APDU_presentRequest)
123     {
124         pr_req = gdu_req->u.z3950->u.presentRequest;
125
126         input_schema =
127             mp_util::record_composition_to_esn(pr_req->recordComposition);
128         input_syntax = pr_req->preferredRecordSyntax;
129     }
130     else if (gdu_req && gdu_req->which == Z_GDU_Z3950 &&
131              gdu_req->u.z3950->which == Z_APDU_searchRequest)
132     {
133         sr_req = gdu_req->u.z3950->u.searchRequest;
134
135         input_syntax = sr_req->preferredRecordSyntax;
136
137         // we don't know how many hits we're going to get and therefore
138         // the effective element set name.. Therefore we can only allow
139         // two cases.. Both equal or absent.. If not, we'll just have to
140         // disable the piggyback!
141         if (sr_req->smallSetElementSetNames
142             &&
143             sr_req->mediumSetElementSetNames
144             &&
145             sr_req->smallSetElementSetNames->which == Z_ElementSetNames_generic
146             &&
147             sr_req->mediumSetElementSetNames->which == Z_ElementSetNames_generic
148             &&
149             !strcmp(sr_req->smallSetElementSetNames->u.generic,
150                     sr_req->mediumSetElementSetNames->u.generic))
151         {
152             input_schema = sr_req->smallSetElementSetNames->u.generic;
153         }
154         else if (!sr_req->smallSetElementSetNames &&
155                  !sr_req->mediumSetElementSetNames)
156             ; // input_schema is 0 already
157         else
158         {
159             // disable piggyback (perhaps it was disabled already)
160             *sr_req->smallSetUpperBound = 0;
161             *sr_req->largeSetLowerBound = 0;
162             *sr_req->mediumSetPresentNumber = 0;
163             package.move();
164             return;
165         }
166         // we can handle it in record_transform.
167     }
168     else
169     {
170         package.move();
171         return;
172     }
173
174     mp::odr odr_en(ODR_ENCODE);
175
176     const char *backend_schema = 0;
177     const Odr_oid *backend_syntax = 0;
178
179     if (input_schema && !strcmp(input_schema, "bibframe") &&
180         (!input_syntax || !oid_oidcmp(input_syntax, yaz_oid_recsyn_xml)))
181     {
182         backend_schema = "marcxml";
183         backend_syntax = yaz_oid_recsyn_xml;
184     }
185     else
186     {
187         package.move();
188         return;
189     }
190
191     if (sr_req)
192     {
193         if (backend_syntax)
194             sr_req->preferredRecordSyntax = odr_oiddup(odr_en, backend_syntax);
195         else
196             sr_req->preferredRecordSyntax = 0;
197         if (backend_schema)
198         {
199             sr_req->smallSetElementSetNames
200                 = (Z_ElementSetNames *)
201                 odr_malloc(odr_en, sizeof(Z_ElementSetNames));
202             sr_req->smallSetElementSetNames->which = Z_ElementSetNames_generic;
203             sr_req->smallSetElementSetNames->u.generic
204                 = odr_strdup(odr_en, backend_schema);
205             sr_req->mediumSetElementSetNames = sr_req->smallSetElementSetNames;
206         }
207         else
208         {
209             sr_req->smallSetElementSetNames = 0;
210             sr_req->mediumSetElementSetNames = 0;
211         }
212     }
213     else if (pr_req)
214     {
215         if (backend_syntax)
216             pr_req->preferredRecordSyntax = odr_oiddup(odr_en, backend_syntax);
217         else
218             pr_req->preferredRecordSyntax = 0;
219
220         if (backend_schema)
221         {
222             pr_req->recordComposition
223                 = (Z_RecordComposition *)
224                 odr_malloc(odr_en, sizeof(Z_RecordComposition));
225             pr_req->recordComposition->which
226                 = Z_RecordComp_simple;
227             pr_req->recordComposition->u.simple
228                 = (Z_ElementSetNames *)
229                 odr_malloc(odr_en, sizeof(Z_ElementSetNames));
230             pr_req->recordComposition->u.simple->which = Z_ElementSetNames_generic;
231             pr_req->recordComposition->u.simple->u.generic
232                 = odr_strdup(odr_en, backend_schema);
233         }
234         else
235             pr_req->recordComposition = 0;
236     }
237     package.move();
238
239     Z_GDU *gdu_res = package.response().get();
240
241     // see if we have a records list to patch!
242     Z_NamePlusRecordList *records = 0;
243     if (gdu_res && gdu_res->which == Z_GDU_Z3950 &&
244         gdu_res->u.z3950->which == Z_APDU_presentResponse)
245     {
246         Z_PresentResponse * pr_res = gdu_res->u.z3950->u.presentResponse;
247
248         if (pr_res
249             && pr_res->numberOfRecordsReturned
250             && *(pr_res->numberOfRecordsReturned) > 0
251             && pr_res->records
252             && pr_res->records->which == Z_Records_DBOSD)
253         {
254             records = pr_res->records->u.databaseOrSurDiagnostics;
255         }
256     }
257     if (gdu_res && gdu_res->which == Z_GDU_Z3950 &&
258         gdu_res->u.z3950->which == Z_APDU_searchResponse)
259     {
260         Z_SearchResponse *sr_res = gdu_res->u.z3950->u.searchResponse;
261
262         if (sr_res
263             && sr_res->numberOfRecordsReturned
264             && *(sr_res->numberOfRecordsReturned) > 0
265             && sr_res->records
266             && sr_res->records->which == Z_Records_DBOSD)
267         {
268             records = sr_res->records->u.databaseOrSurDiagnostics;
269         }
270     }
271     if (records)
272     {
273         int i;
274         for (i = 0; i < records->num_records; i++)
275         {
276             Z_NamePlusRecord *npr = records->records[i];
277             if (npr->which == Z_NamePlusRecord_databaseRecord)
278             {
279                 const char *details = 0;
280                 Z_External *r = npr->u.databaseRecord;
281                 int ret_trans = -1;
282                 if (r->which == Z_External_octet &&
283                     !oid_oidcmp(r->direct_reference, yaz_oid_recsyn_xml))
284                 {
285                     std::string result;
286                     if (convert_one_record(
287                         r->u.octet_aligned->buf, r->u.octet_aligned->len,
288                         result))
289                     {
290                         npr->u.databaseRecord =
291                             z_ext_record_oid(odr_en, yaz_oid_recsyn_xml,
292                                              result.c_str(),
293                                              result.length());
294                     }
295                 }
296             }
297         }
298         package.response() = gdu_res;
299     }
300 }
301
302 void yf::XQuery::configure(const xmlNode * ptr, bool test_only,
303                            const char *path)
304 {
305     for (ptr = ptr->children; ptr; ptr = ptr->next)
306     {
307         if (ptr->type != XML_ELEMENT_NODE)
308             continue;
309         if (!strcmp((const char *) ptr->name, "setVariable"))
310         {
311             std::string name;
312             std::string value;
313             struct _xmlAttr *attr;
314             for (attr = ptr->properties; attr; attr = attr->next)
315                 if (!strcmp((const char *) attr->name, "name"))
316                     name = mp::xml::get_text(attr->children);
317                 else if (!strcmp((const char *) attr->name, "value"))
318                     value = mp::xml::get_text(attr->children);
319                 else
320                     throw mp::filter::FilterException(
321                         "Bad attribute " + std::string((const char *)
322                                                        attr->name));
323             if (name.length() > 0)
324                 zorba_variables[name] = value;
325         }
326         else if (!strcmp((const char *) ptr->name, "filename"))
327         {
328             std::string value;
329             struct _xmlAttr *attr;
330             for (attr = ptr->properties; attr; attr = attr->next)
331                 if (!strcmp((const char *) attr->name, "value"))
332                     value = mp::xml::get_text(attr->children);
333                 else
334                     throw mp::filter::FilterException(
335                         "Bad attribute " + std::string((const char *)
336                                                        attr->name));
337             zorba_filename = value;
338         }
339         else if (!strcmp((const char *) ptr->name, "script"))
340         {
341             std::string value;
342             struct _xmlAttr *attr;
343             for (attr = ptr->properties; attr; attr = attr->next)
344                 if (!strcmp((const char *) attr->name, "value"))
345                     value = mp::xml::get_text(attr->children);
346                 else
347                     throw mp::filter::FilterException(
348                         "Bad attribute " + std::string((const char *)
349                                                        attr->name));
350             zorba_script = value;
351         }
352         else if (!strcmp((const char *) ptr->name, "record"))
353         {
354             std::string value;
355             struct _xmlAttr *attr;
356             for (attr = ptr->properties; attr; attr = attr->next)
357                 if (!strcmp((const char *) attr->name, "value"))
358                     value = mp::xml::get_text(attr->children);
359                 else
360                     throw mp::filter::FilterException(
361                         "Bad attribute " + std::string((const char *)
362                                                        attr->name));
363             zorba_record_variable = value;
364         }
365         else
366         {
367             throw mp::filter::FilterException("Bad element "
368                                                + std::string((const char *)
369                                                              ptr->name));
370         }
371     }
372     if (zorba_script.length() == 0)
373         throw mp::filter::FilterException("Missing element script");
374     if (zorba_record_variable.length() == 0)
375         throw mp::filter::FilterException("Missing element record");
376     if (zorba_filename.length() == 0)
377         throw mp::filter::FilterException("Missing element filename");
378     if (!test_only)
379     {
380         void* lStore = StoreManager::getStore();
381         lZorba = Zorba::getInstance(lStore);
382
383         lQuery = lZorba->createQuery();
384
385         lQuery->setFileName(zorba_filename);
386
387         std::unique_ptr<std::istream> qfile;
388         qfile.reset(new std::ifstream(zorba_script.c_str()));
389
390         Zorba_CompilerHints lHints;
391         lQuery->compile(*qfile, lHints);
392     }
393 }
394
395 static yf::Base* filter_creator()
396 {
397     return new mp::filter::XQuery;
398 }
399
400 extern "C" {
401     struct metaproxy_1_filter_struct metaproxy_1_filter_xquery = {
402         0,
403         "xquery",
404         filter_creator
405     };
406 }
407
408
409 /*
410  * Local variables:
411  * c-basic-offset: 4
412  * c-file-style: "Stroustrup"
413  * indent-tabs-mode: nil
414  * End:
415  * vim: shiftwidth=4 tabstop=8 expandtab
416  */
417