Hit count
[mp-sparql-moved-to-github.git] / src / filter_sparql.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include <metaproxy/package.hpp>
20 #include <metaproxy/util.hpp>
21 #include <yaz/log.h>
22 #include <yaz/srw.h>
23 #include <yaz/diagbib1.h>
24 #include <yaz/match_glob.h>
25 #include <boost/scoped_ptr.hpp>
26 #include <boost/thread/mutex.hpp>
27 #include <boost/thread/condition.hpp>
28 #include "sparql.h"
29
30 #include <yaz/zgdu.h>
31
32 namespace mp = metaproxy_1;
33 namespace yf = mp::filter;
34
35 namespace metaproxy_1 {
36     namespace filter {
37         class SPARQL : public Base {
38             class Session;
39             class Rep;
40             class Conf;
41             class FrontendSet;
42
43             typedef boost::shared_ptr<Session> SessionPtr;
44             typedef boost::shared_ptr<Conf> ConfPtr;
45
46             typedef boost::shared_ptr<FrontendSet> FrontendSetPtr;
47             typedef std::map<std::string,FrontendSetPtr> FrontendSets;
48         public:
49             SPARQL();
50             ~SPARQL();
51             void process(metaproxy_1::Package & package) const;
52             void configure(const xmlNode * ptr, bool test_only,
53                            const char *path);
54             SessionPtr get_session(Package &package, Z_APDU **apdu) const;
55             void release_session(Package &package) const;
56             boost::scoped_ptr<Rep> m_p;
57             std::list<ConfPtr> db_conf;
58         };
59         class SPARQL::Conf {
60         public:
61             std::string db;
62             std::string uri;
63             yaz_sparql_t s;
64             ~Conf();
65         };
66         class SPARQL::Rep {
67             friend class SPARQL;
68             boost::condition m_cond_session_ready;
69             boost::mutex m_mutex;
70             std::map<mp::Session,SessionPtr> m_clients;
71         };
72         class SPARQL::FrontendSet {
73         public:
74             FrontendSet();
75             ~FrontendSet();
76         private:
77             friend class Session;
78             Odr_int hits;
79             xmlDoc *doc;
80         };
81         class SPARQL::Session {
82         public:
83             Session(const SPARQL *);
84             ~Session();
85             void handle_z(Package &package, Z_APDU *apdu);
86             Z_APDU *run_sparql(mp::Package &package,
87                                Z_APDU *apdu_req,
88                                mp::odr &odr,
89                                const char *sparql_query,
90                                const char *uri);
91             bool m_in_use;
92         private:
93             bool m_support_named_result_sets;
94             FrontendSets m_frontend_sets;
95             const SPARQL *m_sparql;
96         };
97     }
98 }
99
100 yf::SPARQL::FrontendSet::~FrontendSet()
101 {
102     if (doc)
103         xmlFreeDoc(doc);
104 }
105
106 yf::SPARQL::FrontendSet::FrontendSet()
107 {
108     doc = 0;
109 }
110
111 yf::SPARQL::SPARQL() : m_p(new Rep)
112 {
113 }
114
115 yf::SPARQL::~SPARQL()
116 {
117 }
118
119 void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only,
120                            const char *path)
121 {
122     const xmlNode *ptr = xmlnode->children;
123
124     for (; ptr; ptr = ptr->next)
125     {
126         if (ptr->type != XML_ELEMENT_NODE)
127             continue;
128         if (!strcmp((const char *) ptr->name, "db"))
129         {
130             yaz_sparql_t s = yaz_sparql_create();
131             ConfPtr conf(new Conf);
132             conf->s = s;
133
134             const struct _xmlAttr *attr;
135             for (attr = ptr->properties; attr; attr = attr->next)
136             {
137                 if (!strcmp((const char *) attr->name, "path"))
138                     conf->db = mp::xml::get_text(attr->children);
139                 else if (!strcmp((const char *) attr->name, "uri"))
140                     conf->uri = mp::xml::get_text(attr->children);
141                 else
142                     throw mp::filter::FilterException(
143                         "Bad attribute " + std::string((const char *)
144                                                        attr->name));
145             }
146             xmlNode *p = ptr->children;
147             for (; p; p = p->next)
148             {
149                 if (p->type != XML_ELEMENT_NODE)
150                     continue;
151                 std::string name = (const char *) p->name;
152                 const struct _xmlAttr *attr;
153                 for (attr = p->properties; attr; attr = attr->next)
154                 {
155                     if (!strcmp((const char *) attr->name, "type"))
156                     {
157                         name.append(".");
158                         name.append(mp::xml::get_text(attr->children));
159                     }
160                     else
161                         throw mp::filter::FilterException(
162                             "Bad attribute " + std::string((const char *)
163                                                            attr->name));
164                 }
165                 std::string value = mp::xml::get_text(p);
166                 if (yaz_sparql_add_pattern(s, name.c_str(), value.c_str()))
167                 {
168                     throw mp::filter::FilterException(
169                         "Bad SPARQL config " + name);
170                 }
171             }
172             if (!conf->uri.length())
173             {
174                 throw mp::filter::FilterException("Missing uri");
175             }
176             if (!conf->db.length())
177             {
178                 throw mp::filter::FilterException("Missing path");
179             }
180             db_conf.push_back(conf);
181         }
182         else
183         {
184             throw mp::filter::FilterException
185                 ("Bad element "
186                  + std::string((const char *) ptr->name)
187                  + " in sparql filter");
188         }
189     }
190 }
191
192 yf::SPARQL::Conf::~Conf()
193 {
194     yaz_sparql_destroy(s);
195 }
196
197 yf::SPARQL::Session::Session(const SPARQL *sparql) :
198     m_in_use(true),
199     m_support_named_result_sets(false),
200     m_sparql(sparql)
201 {
202 }
203
204 yf::SPARQL::Session::~Session()
205 {
206 }
207
208 yf::SPARQL::SessionPtr yf::SPARQL::get_session(Package & package,
209                                                Z_APDU **apdu) const
210 {
211     SessionPtr ptr0;
212
213     Z_GDU *gdu = package.request().get();
214
215     boost::mutex::scoped_lock lock(m_p->m_mutex);
216
217     std::map<mp::Session,SPARQL::SessionPtr>::iterator it;
218
219     if (gdu && gdu->which == Z_GDU_Z3950)
220         *apdu = gdu->u.z3950;
221     else
222         *apdu = 0;
223
224     while (true)
225     {
226         it = m_p->m_clients.find(package.session());
227         if (it == m_p->m_clients.end())
228             break;
229         if (!it->second->m_in_use)
230         {
231             it->second->m_in_use = true;
232             return it->second;
233         }
234         m_p->m_cond_session_ready.wait(lock);
235     }
236     if (!*apdu)
237         return ptr0;
238
239     // new Z39.50 session ..
240     SessionPtr p(new Session(this));
241     m_p->m_clients[package.session()] = p;
242     return p;
243 }
244
245 void yf::SPARQL::release_session(Package &package) const
246 {
247     boost::mutex::scoped_lock lock(m_p->m_mutex);
248     std::map<mp::Session,SessionPtr>::iterator it;
249
250     it = m_p->m_clients.find(package.session());
251     if (it != m_p->m_clients.end())
252     {
253         it->second->m_in_use = false;
254
255         if (package.session().is_closed())
256             m_p->m_clients.erase(it);
257         m_p->m_cond_session_ready.notify_all();
258     }
259 }
260
261 static const xmlNode *get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos)
262 {
263     const xmlNode *ptr = xmlDocGetRootElement(doc);
264     Odr_int cur = 0;
265     for (; ptr; ptr = ptr->next)
266         if (ptr->type == XML_ELEMENT_NODE &&
267             !strcmp((const char *) ptr->name, "sparql"))
268             break;
269     if (ptr)
270     {
271         for (ptr = ptr->children; ptr; ptr = ptr->next)
272             if (ptr->type == XML_ELEMENT_NODE &&
273                 !strcmp((const char *) ptr->name, "results"))
274                 break;
275     }
276     if (ptr)
277     {
278         for (ptr = ptr->children; ptr; ptr = ptr->next)
279             if (ptr->type == XML_ELEMENT_NODE &&
280                 !strcmp((const char *) ptr->name, "result"))
281             {
282                 if (cur++ == pos)
283                     break;
284             }
285     }
286     if (sz)
287         *sz = cur;
288     return ptr;
289 }
290
291 Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
292                                         Z_APDU *apdu_req,
293                                         mp::odr &odr,
294                                         const char *sparql_query,
295                                         const char *uri)
296 {
297     Package http_package(package.session(), package.origin());
298
299     http_package.copy_filter(package);
300     Z_GDU *gdu = z_get_HTTP_Request_uri(odr, uri, 0, 1);
301
302     z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
303                       "Content-Type", "application/x-www-form-urlencoded");
304     const char *names[2];
305     names[0] = "query";
306     names[1] = 0;
307     const char *values[1];
308     values[0] = sparql_query;
309     char *path = 0;
310     yaz_array_to_uri(&path, odr, (char **) names, (char **) values);
311
312     gdu->u.HTTP_Request->content_buf = path;
313     gdu->u.HTTP_Request->content_len = strlen(path);
314
315     yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query);
316
317     http_package.request() = gdu;
318     http_package.move();
319
320     Z_GDU *gdu_resp = http_package.response().get();
321     Z_APDU *apdu_res = 0;
322     if (gdu_resp && gdu_resp->which == Z_GDU_HTTP_Response)
323     {
324         Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
325         FrontendSetPtr fset(new FrontendSet);
326
327         fset->doc = xmlParseMemory(resp->content_buf, resp->content_len);
328         if (!fset->doc)
329             apdu_res = odr.create_searchResponse(apdu_req,
330                                              YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
331                                              "invalid XML from backendbackend");
332         else
333         {
334             apdu_res = odr.create_searchResponse(apdu_req, 0, 0);
335             get_result(fset->doc, apdu_res->u.searchResponse->resultCount,
336                        -1);
337             m_frontend_sets[apdu_req->u.searchRequest->resultSetName] = fset;
338         }
339     }
340     else
341     {
342         yaz_log(YLOG_LOG, "sparql: no HTTP response");
343         apdu_res = odr.create_searchResponse(apdu_req,
344                                              YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
345                                              "no HTTP response from backend");
346     }
347     return apdu_res;
348 }
349
350 void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
351 {
352     mp::odr odr;
353     Z_APDU *apdu_res = 0;
354     if (apdu_req->which == Z_APDU_initRequest)
355     {
356         apdu_res = odr.create_initResponse(apdu_req, 0, 0);
357         Z_InitRequest *req = apdu_req->u.initRequest;
358         Z_InitResponse *resp = apdu_res->u.initResponse;
359
360         resp->implementationName = odr_strdup(odr, "sparql");
361         if (ODR_MASK_GET(req->options, Z_Options_namedResultSets))
362             m_support_named_result_sets = true;
363         int i;
364         static const int masks[] = {
365             Z_Options_search, Z_Options_present,
366             Z_Options_namedResultSets, -1
367         };
368         for (i = 0; masks[i] != -1; i++)
369             if (ODR_MASK_GET(req->options, masks[i]))
370                 ODR_MASK_SET(resp->options, masks[i]);
371         static const int versions[] = {
372             Z_ProtocolVersion_1,
373             Z_ProtocolVersion_2,
374             Z_ProtocolVersion_3,
375             -1
376         };
377         for (i = 0; versions[i] != -1; i++)
378             if (ODR_MASK_GET(req->protocolVersion, versions[i]))
379                 ODR_MASK_SET(resp->protocolVersion, versions[i]);
380             else
381                 break;
382         *resp->preferredMessageSize = *req->preferredMessageSize;
383         *resp->maximumRecordSize = *req->maximumRecordSize;
384     }
385     else if (apdu_req->which == Z_APDU_close)
386     {
387         apdu_res = odr.create_close(apdu_req,
388                                     Z_Close_finished, 0);
389         package.session().close();
390     }
391     else if (apdu_req->which == Z_APDU_searchRequest)
392     {
393         Z_SearchRequest *req = apdu_req->u.searchRequest;
394
395         FrontendSets::iterator fset_it =
396             m_frontend_sets.find(req->resultSetName);
397         if (fset_it != m_frontend_sets.end())
398         {
399             // result set already exist
400             // if replace indicator is off: we return diagnostic if
401             // result set already exist.
402             if (*req->replaceIndicator == 0)
403             {
404                 Z_APDU *apdu =
405                     odr.create_searchResponse(
406                         apdu_req,
407                         YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF,
408                         0);
409                 package.response() = apdu_res;
410             }
411             m_frontend_sets.erase(fset_it);
412         }
413         if (req->query->which != Z_Query_type_1)
414         {
415             apdu_res = odr.create_searchResponse(
416                 apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
417         }
418         else if (req->num_databaseNames != 1)
419         {
420             apdu_res = odr.create_searchResponse(
421                 apdu_req,
422                 YAZ_BIB1_ACCESS_TO_SPECIFIED_DATABASE_DENIED, 0);
423         }
424         else
425         {
426             std::string db = req->databaseNames[0];
427             std::list<ConfPtr>::const_iterator it;
428
429             it = m_sparql->db_conf.begin();
430             for (; it != m_sparql->db_conf.end(); it++)
431                 if (yaz_match_glob((*it)->db.c_str(), db.c_str()))
432                     break;
433             if (it == m_sparql->db_conf.end())
434             {
435                 apdu_res = odr.create_searchResponse(
436                     apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str());
437             }
438             else
439             {
440                 WRBUF addinfo_wr = wrbuf_alloc();
441                 WRBUF sparql_wr = wrbuf_alloc();
442                 int error =
443                     yaz_sparql_from_rpn_wrbuf((*it)->s,
444                                               addinfo_wr, sparql_wr,
445                                               req->query->u.type_1);
446                 if (error)
447                 {
448                     apdu_res = odr.create_searchResponse(
449                         apdu_req, error,
450                         wrbuf_len(addinfo_wr) ?
451                         wrbuf_cstr(addinfo_wr) : 0);
452                 }
453                 else
454                 {
455                     apdu_res = run_sparql(package, apdu_req, odr,
456                                           wrbuf_cstr(sparql_wr),
457                                           (*it)->uri.c_str());
458                 }
459                 wrbuf_destroy(addinfo_wr);
460                 wrbuf_destroy(sparql_wr);
461             }
462         }
463     }
464     else
465     {
466         apdu_res = odr.create_close(apdu_req,
467                                     Z_Close_protocolError,
468                                     "sparql: unhandled APDU");
469         package.session().close();
470     }
471
472     assert(apdu_res);
473     package.response() = apdu_res;
474 }
475
476 void yf::SPARQL::process(mp::Package &package) const
477 {
478     Z_APDU *apdu;
479     SessionPtr p = get_session(package, &apdu);
480     if (p && apdu)
481     {
482         p->handle_z(package, apdu);
483     }
484     else
485         package.move();
486     release_session(package);
487 }
488
489 static mp::filter::Base* filter_creator()
490 {
491     return new mp::filter::SPARQL;
492 }
493
494 extern "C" {
495     struct metaproxy_1_filter_struct metaproxy_1_filter_sparql = {
496         0,
497         "sparql",
498         filter_creator
499     };
500 }
501
502
503 /*
504  * Local variables:
505  * c-basic-offset: 4
506  * c-file-style: "Stroustrup"
507  * indent-tabs-mode: nil
508  * End:
509  * vim: shiftwidth=4 tabstop=8 expandtab
510  */
511