8aa9bab6bc72b9dae3bd0c3f9f9385704691dee1
[mp-sparql-moved-to-github.git] / src / filter_sparql.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include <metaproxy/package.hpp>
20 #include <metaproxy/util.hpp>
21 #include <yaz/log.h>
22 #include <yaz/srw.h>
23 #include <yaz/diagbib1.h>
24 #include <yaz/match_glob.h>
25 #include <boost/scoped_ptr.hpp>
26 #include <boost/thread/mutex.hpp>
27 #include <boost/thread/condition.hpp>
28 #include "sparql.h"
29
30 #include <yaz/zgdu.h>
31
32 namespace mp = metaproxy_1;
33 namespace yf = mp::filter;
34
35 namespace metaproxy_1 {
36     namespace filter {
37         class SPARQL : public Base {
38             class Session;
39             class Rep;
40             class Conf;
41             class FrontendSet;
42
43             typedef boost::shared_ptr<Session> SessionPtr;
44             typedef boost::shared_ptr<Conf> ConfPtr;
45
46             typedef boost::shared_ptr<FrontendSet> FrontendSetPtr;
47             typedef std::map<std::string,FrontendSetPtr> FrontendSets;
48         public:
49             SPARQL();
50             ~SPARQL();
51             void process(metaproxy_1::Package & package) const;
52             void configure(const xmlNode * ptr, bool test_only,
53                            const char *path);
54             SessionPtr get_session(Package &package, Z_APDU **apdu) const;
55             void release_session(Package &package) const;
56             boost::scoped_ptr<Rep> m_p;
57             std::list<ConfPtr> db_conf;
58         };
59         class SPARQL::Conf {
60         public:
61             std::string db;
62             std::string uri;
63             yaz_sparql_t s;
64             ~Conf();
65         };
66         class SPARQL::Rep {
67             friend class SPARQL;
68             boost::condition m_cond_session_ready;
69             boost::mutex m_mutex;
70             std::map<mp::Session,SessionPtr> m_clients;
71         };
72         class SPARQL::FrontendSet {
73         public:
74             FrontendSet();
75             ~FrontendSet();
76         private:
77             friend class Session;
78             Odr_int hits;
79             std::string db;
80             xmlDoc *doc;
81         };
82         class SPARQL::Session {
83         public:
84             Session(const SPARQL *);
85             ~Session();
86             void handle_z(Package &package, Z_APDU *apdu);
87             Z_APDU *run_sparql(mp::Package &package,
88                                Z_APDU *apdu_req,
89                                mp::odr &odr,
90                                const char *sparql_query,
91                                const char *uri);
92             Z_Records *fetch(
93                 FrontendSetPtr fset,
94                 ODR odr, Odr_oid *preferredRecordSyntax,
95                 Z_ElementSetNames *esn,
96                 int start, int number, int &error_code, std::string &addinfo,
97                 int *number_returned, int *next_position);
98             bool m_in_use;
99         private:
100             bool m_support_named_result_sets;
101             FrontendSets m_frontend_sets;
102             const SPARQL *m_sparql;
103         };
104     }
105 }
106
107 yf::SPARQL::FrontendSet::~FrontendSet()
108 {
109     if (doc)
110         xmlFreeDoc(doc);
111 }
112
113 yf::SPARQL::FrontendSet::FrontendSet()
114 {
115     doc = 0;
116 }
117
118 yf::SPARQL::SPARQL() : m_p(new Rep)
119 {
120 }
121
122 yf::SPARQL::~SPARQL()
123 {
124 }
125
126 void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only,
127                            const char *path)
128 {
129     const xmlNode *ptr = xmlnode->children;
130
131     for (; ptr; ptr = ptr->next)
132     {
133         if (ptr->type != XML_ELEMENT_NODE)
134             continue;
135         if (!strcmp((const char *) ptr->name, "db"))
136         {
137             yaz_sparql_t s = yaz_sparql_create();
138             ConfPtr conf(new Conf);
139             conf->s = s;
140
141             const struct _xmlAttr *attr;
142             for (attr = ptr->properties; attr; attr = attr->next)
143             {
144                 if (!strcmp((const char *) attr->name, "path"))
145                     conf->db = mp::xml::get_text(attr->children);
146                 else if (!strcmp((const char *) attr->name, "uri"))
147                     conf->uri = mp::xml::get_text(attr->children);
148                 else
149                     throw mp::filter::FilterException(
150                         "Bad attribute " + std::string((const char *)
151                                                        attr->name));
152             }
153             xmlNode *p = ptr->children;
154             for (; p; p = p->next)
155             {
156                 if (p->type != XML_ELEMENT_NODE)
157                     continue;
158                 std::string name = (const char *) p->name;
159                 const struct _xmlAttr *attr;
160                 for (attr = p->properties; attr; attr = attr->next)
161                 {
162                     if (!strcmp((const char *) attr->name, "type"))
163                     {
164                         name.append(".");
165                         name.append(mp::xml::get_text(attr->children));
166                     }
167                     else
168                         throw mp::filter::FilterException(
169                             "Bad attribute " + std::string((const char *)
170                                                            attr->name));
171                 }
172                 std::string value = mp::xml::get_text(p);
173                 if (yaz_sparql_add_pattern(s, name.c_str(), value.c_str()))
174                 {
175                     throw mp::filter::FilterException(
176                         "Bad SPARQL config " + name);
177                 }
178             }
179             if (!conf->uri.length())
180             {
181                 throw mp::filter::FilterException("Missing uri");
182             }
183             if (!conf->db.length())
184             {
185                 throw mp::filter::FilterException("Missing path");
186             }
187             db_conf.push_back(conf);
188         }
189         else
190         {
191             throw mp::filter::FilterException
192                 ("Bad element "
193                  + std::string((const char *) ptr->name)
194                  + " in sparql filter");
195         }
196     }
197 }
198
199 yf::SPARQL::Conf::~Conf()
200 {
201     yaz_sparql_destroy(s);
202 }
203
204 yf::SPARQL::Session::Session(const SPARQL *sparql) :
205     m_in_use(true),
206     m_support_named_result_sets(false),
207     m_sparql(sparql)
208 {
209 }
210
211 yf::SPARQL::Session::~Session()
212 {
213 }
214
215 yf::SPARQL::SessionPtr yf::SPARQL::get_session(Package & package,
216                                                Z_APDU **apdu) const
217 {
218     SessionPtr ptr0;
219
220     Z_GDU *gdu = package.request().get();
221
222     boost::mutex::scoped_lock lock(m_p->m_mutex);
223
224     std::map<mp::Session,SPARQL::SessionPtr>::iterator it;
225
226     if (gdu && gdu->which == Z_GDU_Z3950)
227         *apdu = gdu->u.z3950;
228     else
229         *apdu = 0;
230
231     while (true)
232     {
233         it = m_p->m_clients.find(package.session());
234         if (it == m_p->m_clients.end())
235             break;
236         if (!it->second->m_in_use)
237         {
238             it->second->m_in_use = true;
239             return it->second;
240         }
241         m_p->m_cond_session_ready.wait(lock);
242     }
243     if (!*apdu)
244         return ptr0;
245
246     // new Z39.50 session ..
247     SessionPtr p(new Session(this));
248     m_p->m_clients[package.session()] = p;
249     return p;
250 }
251
252 void yf::SPARQL::release_session(Package &package) const
253 {
254     boost::mutex::scoped_lock lock(m_p->m_mutex);
255     std::map<mp::Session,SessionPtr>::iterator it;
256
257     it = m_p->m_clients.find(package.session());
258     if (it != m_p->m_clients.end())
259     {
260         it->second->m_in_use = false;
261
262         if (package.session().is_closed())
263             m_p->m_clients.erase(it);
264         m_p->m_cond_session_ready.notify_all();
265     }
266 }
267
268 static xmlNode *get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos)
269 {
270     xmlNode *ptr = xmlDocGetRootElement(doc);
271     Odr_int cur = 0;
272
273     if (ptr->type == XML_ELEMENT_NODE &&
274         !strcmp((const char *) ptr->name, "RDF"))
275     {
276         ptr = ptr->children;
277
278         while (ptr && ptr->type != XML_ELEMENT_NODE)
279             ptr = ptr->next;
280         if (ptr && ptr->type == XML_ELEMENT_NODE &&
281             !strcmp((const char *) ptr->name, "Description"))
282         {
283             xmlNode *p = ptr->children;
284
285             while (p && p->type != XML_ELEMENT_NODE)
286                 p = p->next;
287             if (p && p->type == XML_ELEMENT_NODE &&
288                 !strcmp((const char *) p->name, "type"))
289             { /* SELECT RESULT */
290                 for (ptr = ptr->children; ptr; ptr = ptr->next)
291                     if (ptr->type == XML_ELEMENT_NODE &&
292                         !strcmp((const char *) ptr->name, "solution"))
293                     {
294                         if (cur++ == pos)
295                             break;
296                     }
297             }
298             else
299             {   /* CONSTRUCT result */
300                 for (; ptr; ptr = ptr->next)
301                     if (ptr->type == XML_ELEMENT_NODE &&
302                         !strcmp((const char *) ptr->name, "Description"))
303                     {
304                         if (cur++ == pos)
305                             break;
306                     }
307             }
308         }
309     }
310     else
311     {
312         for (; ptr; ptr = ptr->next)
313             if (ptr->type == XML_ELEMENT_NODE &&
314                 !strcmp((const char *) ptr->name, "sparql"))
315                 break;
316         if (ptr)
317         {
318             for (ptr = ptr->children; ptr; ptr = ptr->next)
319                 if (ptr->type == XML_ELEMENT_NODE &&
320                     !strcmp((const char *) ptr->name, "results"))
321                     break;
322         }
323         if (ptr)
324         {
325             for (ptr = ptr->children; ptr; ptr = ptr->next)
326                 if (ptr->type == XML_ELEMENT_NODE &&
327                     !strcmp((const char *) ptr->name, "result"))
328                 {
329                     if (cur++ == pos)
330                         break;
331                 }
332         }
333     }
334     if (sz)
335         *sz = cur;
336     return ptr;
337 }
338
339 Z_Records *yf::SPARQL::Session::fetch(
340     FrontendSetPtr fset,
341     ODR odr, Odr_oid *preferredRecordSyntax,
342     Z_ElementSetNames *esn,
343     int start, int number, int &error_code, std::string &addinfo,
344     int *number_returned, int *next_position)
345 {
346     Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records));
347     rec->which = Z_Records_DBOSD;
348     rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *)
349         odr_malloc(odr, sizeof(Z_NamePlusRecordList));
350     rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **)
351         odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number);
352     int i;
353     for (i = 0; i < number; i++)
354     {
355         rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *)
356             odr_malloc(odr, sizeof(Z_NamePlusRecord));
357         Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i];
358         npr->databaseName = odr_strdup(odr, fset->db.c_str());
359         npr->which = Z_NamePlusRecord_databaseRecord;
360
361         xmlNode *node = get_result(fset->doc, 0, start - 1 + i);
362         if (!node)
363             break;
364         assert(node->type == XML_ELEMENT_NODE);
365         xmlNode *tmp = xmlCopyNode(node, 1);
366         xmlBufferPtr buf = xmlBufferCreate();
367         xmlNodeDump(buf, tmp->doc, tmp, 0, 0);
368         npr->u.databaseRecord =
369             z_ext_record_xml(odr, (const char *) buf->content, buf->use);
370         xmlFreeNode(tmp);
371         xmlBufferFree(buf);
372     }
373     rec->u.databaseOrSurDiagnostics->num_records = i;
374     *number_returned = i;
375     if (start + number > fset->hits)
376         *next_position = 0;
377     else
378         *next_position = start + number;
379     return rec;
380 }
381
382 Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
383                                         Z_APDU *apdu_req,
384                                         mp::odr &odr,
385                                         const char *sparql_query,
386                                         const char *uri)
387 {
388     Z_SearchRequest *req = apdu_req->u.searchRequest;
389     Package http_package(package.session(), package.origin());
390
391     http_package.copy_filter(package);
392     Z_GDU *gdu = z_get_HTTP_Request_uri(odr, uri, 0, 1);
393
394     z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
395                       "Content-Type", "application/x-www-form-urlencoded");
396     z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
397                       "Accept", "application/rdf+xml");
398     const char *names[2];
399     names[0] = "query";
400     names[1] = 0;
401     const char *values[1];
402     values[0] = sparql_query;
403     char *path = 0;
404     yaz_array_to_uri(&path, odr, (char **) names, (char **) values);
405
406     gdu->u.HTTP_Request->content_buf = path;
407     gdu->u.HTTP_Request->content_len = strlen(path);
408
409     yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query);
410
411     http_package.request() = gdu;
412     http_package.move();
413
414     Z_GDU *gdu_resp = http_package.response().get();
415     Z_APDU *apdu_res = 0;
416     if (!gdu_resp || gdu_resp->which != Z_GDU_HTTP_Response)
417     {
418         yaz_log(YLOG_LOG, "sparql: no HTTP response");
419         apdu_res = odr.create_searchResponse(apdu_req,
420                                              YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
421                                              "no HTTP response from backend");
422     }
423     else if (gdu_resp->u.HTTP_Response->code != 200)
424     {
425         mp::wrbuf w;
426
427         wrbuf_printf(w, "sparql: HTTP error %d from backend",
428                      gdu_resp->u.HTTP_Response->code);
429         apdu_res = odr.create_searchResponse(apdu_req,
430                                              YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
431                                              w.c_str());
432     }
433     else
434     {
435         Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
436         FrontendSetPtr fset(new FrontendSet);
437
438         fset->doc = xmlParseMemory(resp->content_buf, resp->content_len);
439         fset->db = req->databaseNames[0];
440         if (!fset->doc)
441             apdu_res = odr.create_searchResponse(apdu_req,
442                                              YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
443                                              "invalid XML from backendbackend");
444         else
445         {
446             Z_Records *records = 0;
447             int number_returned = 0;
448             int next_position = 0;
449             int error_code = 0;
450             std::string addinfo;
451
452             get_result(fset->doc, &fset->hits, -1);
453             m_frontend_sets[req->resultSetName] = fset;
454
455             Odr_int number = 0;
456             const char *element_set_name = 0;
457             mp::util::piggyback_sr(req, fset->hits, number, &element_set_name);
458             if (number)
459             {
460                 Z_ElementSetNames *esn;
461
462                 if (number > *req->smallSetUpperBound)
463                     esn = req->mediumSetElementSetNames;
464                 else
465                     esn = req->smallSetElementSetNames;
466                 records = fetch(fset,
467                                 odr, req->preferredRecordSyntax, esn,
468                                 1, number,
469                                 error_code, addinfo,
470                                 &number_returned,
471                                 &next_position);
472             }
473             if (error_code)
474             {
475                 apdu_res =
476                     odr.create_searchResponse(
477                         apdu_req, error_code, addinfo.c_str());
478             }
479             else
480             {
481                 apdu_res =
482                     odr.create_searchResponse(apdu_req, 0, 0);
483                 Z_SearchResponse *resp = apdu_res->u.searchResponse;
484                 *resp->resultCount = fset->hits;
485                 *resp->numberOfRecordsReturned = number_returned;
486                 *resp->nextResultSetPosition = next_position;
487                 resp->records = records;
488             }
489         }
490     }
491     return apdu_res;
492 }
493
494 void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
495 {
496     mp::odr odr;
497     Z_APDU *apdu_res = 0;
498     if (apdu_req->which == Z_APDU_initRequest)
499     {
500         apdu_res = odr.create_initResponse(apdu_req, 0, 0);
501         Z_InitRequest *req = apdu_req->u.initRequest;
502         Z_InitResponse *resp = apdu_res->u.initResponse;
503
504         resp->implementationName = odr_strdup(odr, "sparql");
505         if (ODR_MASK_GET(req->options, Z_Options_namedResultSets))
506             m_support_named_result_sets = true;
507         int i;
508         static const int masks[] = {
509             Z_Options_search, Z_Options_present,
510             Z_Options_namedResultSets, -1
511         };
512         for (i = 0; masks[i] != -1; i++)
513             if (ODR_MASK_GET(req->options, masks[i]))
514                 ODR_MASK_SET(resp->options, masks[i]);
515         static const int versions[] = {
516             Z_ProtocolVersion_1,
517             Z_ProtocolVersion_2,
518             Z_ProtocolVersion_3,
519             -1
520         };
521         for (i = 0; versions[i] != -1; i++)
522             if (ODR_MASK_GET(req->protocolVersion, versions[i]))
523                 ODR_MASK_SET(resp->protocolVersion, versions[i]);
524             else
525                 break;
526         *resp->preferredMessageSize = *req->preferredMessageSize;
527         *resp->maximumRecordSize = *req->maximumRecordSize;
528     }
529     else if (apdu_req->which == Z_APDU_close)
530     {
531         apdu_res = odr.create_close(apdu_req,
532                                     Z_Close_finished, 0);
533         package.session().close();
534     }
535     else if (apdu_req->which == Z_APDU_searchRequest)
536     {
537         Z_SearchRequest *req = apdu_req->u.searchRequest;
538
539         FrontendSets::iterator fset_it =
540             m_frontend_sets.find(req->resultSetName);
541         if (fset_it != m_frontend_sets.end())
542         {
543             // result set already exist
544             // if replace indicator is off: we return diagnostic if
545             // result set already exist.
546             if (*req->replaceIndicator == 0)
547             {
548                 Z_APDU *apdu =
549                     odr.create_searchResponse(
550                         apdu_req,
551                         YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF,
552                         0);
553                 package.response() = apdu_res;
554             }
555             m_frontend_sets.erase(fset_it);
556         }
557         if (req->query->which != Z_Query_type_1)
558         {
559             apdu_res = odr.create_searchResponse(
560                 apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
561         }
562         else if (req->num_databaseNames != 1)
563         {
564             apdu_res = odr.create_searchResponse(
565                 apdu_req,
566                 YAZ_BIB1_ACCESS_TO_SPECIFIED_DATABASE_DENIED, 0);
567         }
568         else
569         {
570             std::string db = req->databaseNames[0];
571             std::list<ConfPtr>::const_iterator it;
572
573             it = m_sparql->db_conf.begin();
574             for (; it != m_sparql->db_conf.end(); it++)
575                 if (yaz_match_glob((*it)->db.c_str(), db.c_str()))
576                     break;
577             if (it == m_sparql->db_conf.end())
578             {
579                 apdu_res = odr.create_searchResponse(
580                     apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str());
581             }
582             else
583             {
584                 WRBUF addinfo_wr = wrbuf_alloc();
585                 WRBUF sparql_wr = wrbuf_alloc();
586                 int error =
587                     yaz_sparql_from_rpn_wrbuf((*it)->s,
588                                               addinfo_wr, sparql_wr,
589                                               req->query->u.type_1);
590                 if (error)
591                 {
592                     apdu_res = odr.create_searchResponse(
593                         apdu_req, error,
594                         wrbuf_len(addinfo_wr) ?
595                         wrbuf_cstr(addinfo_wr) : 0);
596                 }
597                 else
598                 {
599                     apdu_res = run_sparql(package, apdu_req, odr,
600                                           wrbuf_cstr(sparql_wr),
601                                           (*it)->uri.c_str());
602                 }
603                 wrbuf_destroy(addinfo_wr);
604                 wrbuf_destroy(sparql_wr);
605             }
606         }
607     }
608     else if (apdu_req->which == Z_APDU_presentRequest)
609     {
610         Z_PresentRequest *req = apdu_req->u.presentRequest;
611         FrontendSets::iterator fset_it =
612             m_frontend_sets.find(req->resultSetId);
613         if (fset_it == m_frontend_sets.end())
614         {
615             apdu_res =
616                 odr.create_presentResponse(
617                     apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
618                     req->resultSetId);
619             package.response() = apdu_res;
620             return;
621         }
622         int number_returned = 0;
623         int next_position = 0;
624         int error_code = 0;
625         std::string addinfo;
626         Z_ElementSetNames *esn = 0;
627         if (req->recordComposition)
628         {
629             if (req->recordComposition->which == Z_RecordComp_simple)
630                 esn = req->recordComposition->u.simple;
631             else
632             {
633                 apdu_res =
634                     odr.create_presentResponse(
635                         apdu_req,
636                         YAZ_BIB1_ONLY_A_SINGLE_ELEMENT_SET_NAME_SUPPORTED,
637                         0);
638                 package.response() = apdu_res;
639                 return;
640             }
641         }
642         Z_Records *records = fetch(
643             fset_it->second,
644             odr, req->preferredRecordSyntax, esn,
645             *req->resultSetStartPoint, *req->numberOfRecordsRequested,
646             error_code, addinfo,
647             &number_returned,
648             &next_position);
649         if (error_code)
650         {
651             apdu_res =
652                 odr.create_presentResponse(apdu_req, error_code,
653                                            addinfo.c_str());
654         }
655         else
656         {
657             apdu_res =
658                 odr.create_presentResponse(apdu_req, 0, 0);
659             Z_PresentResponse *resp = apdu_res->u.presentResponse;
660             resp->records = records;
661             *resp->numberOfRecordsReturned = number_returned;
662             *resp->nextResultSetPosition = next_position;
663         }
664     }
665     else
666     {
667         apdu_res = odr.create_close(apdu_req,
668                                     Z_Close_protocolError,
669                                     "sparql: unhandled APDU");
670         package.session().close();
671     }
672
673     assert(apdu_res);
674     package.response() = apdu_res;
675 }
676
677 void yf::SPARQL::process(mp::Package &package) const
678 {
679     Z_APDU *apdu;
680     SessionPtr p = get_session(package, &apdu);
681     if (p && apdu)
682     {
683         p->handle_z(package, apdu);
684     }
685     else
686         package.move();
687     release_session(package);
688 }
689
690 static mp::filter::Base* filter_creator()
691 {
692     return new mp::filter::SPARQL;
693 }
694
695 extern "C" {
696     struct metaproxy_1_filter_struct metaproxy_1_filter_sparql = {
697         0,
698         "sparql",
699         filter_creator
700     };
701 }
702
703
704 /*
705  * Local variables:
706  * c-basic-offset: 4
707  * c-file-style: "Stroustrup"
708  * indent-tabs-mode: nil
709  * End:
710  * vim: shiftwidth=4 tabstop=8 expandtab
711  */
712