b74885dbff2b63c81a653c890881a019651453b6
[mp-xquery-moved-to-github.git] / src / metaproxy_filter_xquery.cpp
1 /* This file is part of mp-xquery
2    Copyright (C) Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include <metaproxy/package.hpp>
20 #include <metaproxy/util.hpp>
21 #include <yaz/log.h>
22 #include <yaz/oid_db.h>
23 #include <yaz/diagbib1.h>
24 #include <map>
25 #include <string>
26 #include <iostream>
27 #include <fstream>
28 #include <sstream>
29
30 #include <zorba/zorba.h>
31 #include <zorba/store_manager.h>
32 #include <zorba/serializer.h>
33 #include <zorba/singleton_item_sequence.h>
34 #include <zorba/zorba_exception.h>
35
36
37 namespace mp = metaproxy_1;
38 namespace yf = mp::filter;
39 namespace mp_util = metaproxy_1::util;
40 using namespace mp;
41 using namespace zorba;
42
43 namespace metaproxy_1 {
44     namespace filter {
45         class XQuery : public Base {
46         public:
47             ~XQuery();
48             XQuery();
49             void process(metaproxy_1::Package & package) const;
50             void configure(const xmlNode * ptr, bool test_only,
51                            const char *path);
52             void start() const;
53             void stop(int signo) const;
54         private:
55             bool convert_one_record(const char *input_buf,
56                                     size_t input_len,
57                                     std::string &result) const;
58             std::map<std::string, std::string> zorba_variables;
59             std::string zorba_filename;
60             std::string zorba_script;
61             std::string zorba_record_variable;
62             Zorba *lZorba;
63             XQuery_t lQuery;
64         };
65     }
66 }
67
68 yf::XQuery::XQuery()
69 {
70     lZorba = 0;
71 }
72
73 yf::XQuery::~XQuery()
74 {
75     if (lZorba)
76         lZorba->shutdown();
77 }
78
79 void yf::XQuery::start() const
80 {
81 }
82
83 void yf::XQuery::stop(int signo) const
84 {
85 }
86
87 bool yf::XQuery::convert_one_record(const char *input_buf,
88                                     size_t input_len,
89                                     std::string &result) const
90 {
91     XQuery_t tQuery = lQuery->clone();
92
93     zorba::DynamicContext* lDynamicContext = tQuery->getDynamicContext();
94
95     zorba::Item lItem;
96     std::map<std::string, std::string>::const_iterator it;
97     for (it = zorba_variables.begin(); it != zorba_variables.end(); it++)
98     {
99         lItem = lZorba->getItemFactory()->createString(it->second);
100         lDynamicContext->setVariable(it->first, lItem);
101     }
102     std::string rec_content = "raw:" + std::string(input_buf, input_len);
103     lItem = lZorba->getItemFactory()->createString(rec_content);
104     lDynamicContext->setVariable(zorba_record_variable, lItem);
105
106     try {
107         std::stringstream ss;
108         tQuery->execute(ss);
109         result = ss.str();
110         return true;
111     } catch ( ZorbaException &e) {
112         result = e.what();
113         yaz_log(YLOG_WARN, "XQuery execute: %s", result.c_str());
114         return false;
115     }
116 }
117
118 void yf::XQuery::process(Package &package) const
119 {
120     Z_GDU *gdu_req = package.request().get();
121     Z_PresentRequest *pr_req = 0;
122     Z_SearchRequest *sr_req = 0;
123
124     const char *input_schema = 0;
125     Odr_oid *input_syntax = 0;
126
127     if (gdu_req && gdu_req->which == Z_GDU_Z3950 &&
128         gdu_req->u.z3950->which == Z_APDU_presentRequest)
129     {
130         pr_req = gdu_req->u.z3950->u.presentRequest;
131
132         input_schema =
133             mp_util::record_composition_to_esn(pr_req->recordComposition);
134         input_syntax = pr_req->preferredRecordSyntax;
135     }
136     else if (gdu_req && gdu_req->which == Z_GDU_Z3950 &&
137              gdu_req->u.z3950->which == Z_APDU_searchRequest)
138     {
139         sr_req = gdu_req->u.z3950->u.searchRequest;
140
141         input_syntax = sr_req->preferredRecordSyntax;
142
143         // we don't know how many hits we're going to get and therefore
144         // the effective element set name.. Therefore we can only allow
145         // two cases.. Both equal or absent.. If not, we'll just have to
146         // disable the piggyback!
147         if (sr_req->smallSetElementSetNames
148             &&
149             sr_req->mediumSetElementSetNames
150             &&
151             sr_req->smallSetElementSetNames->which == Z_ElementSetNames_generic
152             &&
153             sr_req->mediumSetElementSetNames->which == Z_ElementSetNames_generic
154             &&
155             !strcmp(sr_req->smallSetElementSetNames->u.generic,
156                     sr_req->mediumSetElementSetNames->u.generic))
157         {
158             input_schema = sr_req->smallSetElementSetNames->u.generic;
159         }
160         else if (!sr_req->smallSetElementSetNames &&
161                  !sr_req->mediumSetElementSetNames)
162             ; // input_schema is 0 already
163         else
164         {
165             // disable piggyback (perhaps it was disabled already)
166             *sr_req->smallSetUpperBound = 0;
167             *sr_req->largeSetLowerBound = 0;
168             *sr_req->mediumSetPresentNumber = 0;
169             package.move();
170             return;
171         }
172         // we can handle it in record_transform.
173     }
174     else
175     {
176         package.move();
177         return;
178     }
179
180     mp::odr odr_en(ODR_ENCODE);
181
182     const char *backend_schema = 0;
183     const Odr_oid *backend_syntax = 0;
184
185     if (input_schema && !strcmp(input_schema, "bibframe") &&
186         (!input_syntax || !oid_oidcmp(input_syntax, yaz_oid_recsyn_xml)))
187     {
188         backend_schema = "marcxml";
189         backend_syntax = yaz_oid_recsyn_xml;
190     }
191     else
192     {
193         package.move();
194         return;
195     }
196
197     if (sr_req)
198     {
199         if (backend_syntax)
200             sr_req->preferredRecordSyntax = odr_oiddup(odr_en, backend_syntax);
201         else
202             sr_req->preferredRecordSyntax = 0;
203         if (backend_schema)
204         {
205             sr_req->smallSetElementSetNames
206                 = (Z_ElementSetNames *)
207                 odr_malloc(odr_en, sizeof(Z_ElementSetNames));
208             sr_req->smallSetElementSetNames->which = Z_ElementSetNames_generic;
209             sr_req->smallSetElementSetNames->u.generic
210                 = odr_strdup(odr_en, backend_schema);
211             sr_req->mediumSetElementSetNames = sr_req->smallSetElementSetNames;
212         }
213         else
214         {
215             sr_req->smallSetElementSetNames = 0;
216             sr_req->mediumSetElementSetNames = 0;
217         }
218     }
219     else if (pr_req)
220     {
221         if (backend_syntax)
222             pr_req->preferredRecordSyntax = odr_oiddup(odr_en, backend_syntax);
223         else
224             pr_req->preferredRecordSyntax = 0;
225
226         if (backend_schema)
227         {
228             pr_req->recordComposition
229                 = (Z_RecordComposition *)
230                 odr_malloc(odr_en, sizeof(Z_RecordComposition));
231             pr_req->recordComposition->which
232                 = Z_RecordComp_simple;
233             pr_req->recordComposition->u.simple
234                 = (Z_ElementSetNames *)
235                 odr_malloc(odr_en, sizeof(Z_ElementSetNames));
236             pr_req->recordComposition->u.simple->which = Z_ElementSetNames_generic;
237             pr_req->recordComposition->u.simple->u.generic
238                 = odr_strdup(odr_en, backend_schema);
239         }
240         else
241             pr_req->recordComposition = 0;
242     }
243     package.move();
244
245     Z_GDU *gdu_res = package.response().get();
246
247     // see if we have a records list to patch!
248     Z_NamePlusRecordList *records = 0;
249     if (gdu_res && gdu_res->which == Z_GDU_Z3950 &&
250         gdu_res->u.z3950->which == Z_APDU_presentResponse)
251     {
252         Z_PresentResponse * pr_res = gdu_res->u.z3950->u.presentResponse;
253
254         if (pr_res
255             && pr_res->numberOfRecordsReturned
256             && *(pr_res->numberOfRecordsReturned) > 0
257             && pr_res->records
258             && pr_res->records->which == Z_Records_DBOSD)
259         {
260             records = pr_res->records->u.databaseOrSurDiagnostics;
261         }
262     }
263     if (gdu_res && gdu_res->which == Z_GDU_Z3950 &&
264         gdu_res->u.z3950->which == Z_APDU_searchResponse)
265     {
266         Z_SearchResponse *sr_res = gdu_res->u.z3950->u.searchResponse;
267
268         if (sr_res
269             && sr_res->numberOfRecordsReturned
270             && *(sr_res->numberOfRecordsReturned) > 0
271             && sr_res->records
272             && sr_res->records->which == Z_Records_DBOSD)
273         {
274             records = sr_res->records->u.databaseOrSurDiagnostics;
275         }
276     }
277     if (records)
278     {
279         int i;
280         for (i = 0; i < records->num_records; i++)
281         {
282             Z_NamePlusRecord **npr = &records->records[i];
283             if ((*npr)->which == Z_NamePlusRecord_databaseRecord)
284             {
285                 const char *details = 0;
286                 Z_External *r = (*npr)->u.databaseRecord;
287                 int ret_trans = -1;
288                 if (r->which == Z_External_octet &&
289                     !oid_oidcmp(r->direct_reference, yaz_oid_recsyn_xml))
290                 {
291                     std::string result;
292                     if (convert_one_record(
293                         r->u.octet_aligned->buf, r->u.octet_aligned->len,
294                         result))
295                     {
296                         (*npr)->u.databaseRecord =
297                             z_ext_record_oid(odr_en, yaz_oid_recsyn_xml,
298                                              result.c_str(),
299                                              result.length());
300                     }
301                     else
302                     {
303                         *npr = zget_surrogateDiagRec(
304                             odr_en, (*npr)->databaseName,
305                             YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
306                             result.c_str());
307                     }
308                 }
309             }
310         }
311         package.response() = gdu_res;
312     }
313 }
314
315 void yf::XQuery::configure(const xmlNode * ptr, bool test_only,
316                            const char *path)
317 {
318     for (ptr = ptr->children; ptr; ptr = ptr->next)
319     {
320         if (ptr->type != XML_ELEMENT_NODE)
321             continue;
322         if (!strcmp((const char *) ptr->name, "setVariable"))
323         {
324             std::string name;
325             std::string value;
326             struct _xmlAttr *attr;
327             for (attr = ptr->properties; attr; attr = attr->next)
328                 if (!strcmp((const char *) attr->name, "name"))
329                     name = mp::xml::get_text(attr->children);
330                 else if (!strcmp((const char *) attr->name, "value"))
331                     value = mp::xml::get_text(attr->children);
332                 else
333                     throw mp::filter::FilterException(
334                         "Bad attribute " + std::string((const char *)
335                                                        attr->name));
336             if (name.length() > 0)
337                 zorba_variables[name] = value;
338         }
339         else if (!strcmp((const char *) ptr->name, "script"))
340         {
341             std::string value;
342             struct _xmlAttr *attr;
343             for (attr = ptr->properties; attr; attr = attr->next)
344                 if (!strcmp((const char *) attr->name, "value"))
345                     value = mp::xml::get_text(attr->children);
346                 else
347                     throw mp::filter::FilterException(
348                         "Bad attribute " + std::string((const char *)
349                                                        attr->name));
350             zorba_script = value;
351         }
352         else if (!strcmp((const char *) ptr->name, "record"))
353         {
354             std::string value;
355             struct _xmlAttr *attr;
356             for (attr = ptr->properties; attr; attr = attr->next)
357                 if (!strcmp((const char *) attr->name, "value"))
358                     value = mp::xml::get_text(attr->children);
359                 else
360                     throw mp::filter::FilterException(
361                         "Bad attribute " + std::string((const char *)
362                                                        attr->name));
363             zorba_record_variable = value;
364         }
365         else
366         {
367             throw mp::filter::FilterException("Bad element "
368                                                + std::string((const char *)
369                                                              ptr->name));
370         }
371     }
372     if (zorba_script.length() == 0)
373         throw mp::filter::FilterException("Missing element script");
374     if (zorba_record_variable.length() == 0)
375         throw mp::filter::FilterException("Missing element record");
376     if (!test_only)
377     {
378         void* lStore = StoreManager::getStore();
379         lZorba = Zorba::getInstance(lStore);
380
381         lQuery = lZorba->createQuery();
382
383         try {
384             size_t t = zorba_script.find_last_of('/');
385             if (t != std::string::npos)
386                 lQuery->setFileName(zorba_script.substr(0, t + 1));
387             std::unique_ptr<std::istream> qfile(
388                 new std::ifstream(zorba_script.c_str()));
389             Zorba_CompilerHints lHints;
390             lQuery->compile(*qfile, lHints);
391         } catch ( ZorbaException &e) {
392             std::string msg = "XQuery compile: ";
393             msg += e.what();
394             throw mp::filter::FilterException(msg);
395         }
396     }
397 }
398
399 static yf::Base* filter_creator()
400 {
401     return new mp::filter::XQuery;
402 }
403
404 extern "C" {
405     struct metaproxy_1_filter_struct metaproxy_1_filter_xquery = {
406         0,
407         "xquery",
408         filter_creator
409     };
410 }
411
412
413 /*
414  * Local variables:
415  * c-basic-offset: 4
416  * c-file-style: "Stroustrup"
417  * indent-tabs-mode: nil
418  * End:
419  * vim: shiftwidth=4 tabstop=8 expandtab
420  */
421