Result set handling
[mp-sparql-moved-to-github.git] / src / filter_sparql.cpp
1 /* This file is part of Metaproxy.
2    Copyright (C) Index Data
3
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17 */
18
19 #include <metaproxy/package.hpp>
20 #include <metaproxy/util.hpp>
21 #include <yaz/log.h>
22 #include <yaz/srw.h>
23 #include <yaz/diagbib1.h>
24 #include <yaz/match_glob.h>
25 #include <boost/scoped_ptr.hpp>
26 #include <boost/thread/mutex.hpp>
27 #include <boost/thread/condition.hpp>
28 #include "sparql.h"
29
30 #include <yaz/zgdu.h>
31
32 namespace mp = metaproxy_1;
33 namespace yf = mp::filter;
34
35 namespace metaproxy_1 {
36     namespace filter {
37         class SPARQL : public Base {
38             class Session;
39             class Rep;
40             class Conf;
41             class FrontendSet;
42
43             typedef boost::shared_ptr<Session> SessionPtr;
44             typedef boost::shared_ptr<Conf> ConfPtr;
45
46             typedef boost::shared_ptr<FrontendSet> FrontendSetPtr;
47             typedef std::map<std::string,FrontendSetPtr> FrontendSets;
48         public:
49             SPARQL();
50             ~SPARQL();
51             void process(metaproxy_1::Package & package) const;
52             void configure(const xmlNode * ptr, bool test_only,
53                            const char *path);
54             SessionPtr get_session(Package &package, Z_APDU **apdu) const;
55             void release_session(Package &package) const;
56             boost::scoped_ptr<Rep> m_p;
57             std::list<ConfPtr> db_conf;
58         };
59         class SPARQL::Conf {
60         public:
61             std::string db;
62             std::string uri;
63             yaz_sparql_t s;
64             ~Conf();
65         };
66         class SPARQL::Rep {
67             friend class SPARQL;
68             boost::condition m_cond_session_ready;
69             boost::mutex m_mutex;
70             std::map<mp::Session,SessionPtr> m_clients;
71         };
72         class SPARQL::FrontendSet {
73         public:
74             FrontendSet();
75             ~FrontendSet();
76         private:
77             friend class Session;
78             Odr_int hits;
79             xmlDoc *doc;
80         };
81         class SPARQL::Session {
82         public:
83             Session(const SPARQL *);
84             ~Session();
85             void handle_z(Package &package, Z_APDU *apdu);
86             Z_APDU *run_sparql(mp::Package &package,
87                                Z_APDU *apdu_req,
88                                mp::odr &odr,
89                                const char *sparql_query,
90                                const char *uri);
91             bool m_in_use;
92         private:
93             bool m_support_named_result_sets;
94             FrontendSets m_frontend_sets;
95             const SPARQL *m_sparql;
96         };
97     }
98 }
99
100 yf::SPARQL::FrontendSet::~FrontendSet()
101 {
102     if (doc)
103         xmlFreeDoc(doc);
104 }
105
106 yf::SPARQL::FrontendSet::FrontendSet()
107 {
108     doc = 0;
109 }
110
111 yf::SPARQL::SPARQL() : m_p(new Rep)
112 {
113 }
114
115 yf::SPARQL::~SPARQL()
116 {
117 }
118
119 void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only,
120                            const char *path)
121 {
122     const xmlNode *ptr = xmlnode->children;
123
124     for (; ptr; ptr = ptr->next)
125     {
126         if (ptr->type != XML_ELEMENT_NODE)
127             continue;
128         if (!strcmp((const char *) ptr->name, "db"))
129         {
130             yaz_sparql_t s = yaz_sparql_create();
131             ConfPtr conf(new Conf);
132             conf->s = s;
133
134             const struct _xmlAttr *attr;
135             for (attr = ptr->properties; attr; attr = attr->next)
136             {
137                 if (!strcmp((const char *) attr->name, "path"))
138                     conf->db = mp::xml::get_text(attr->children);
139                 else if (!strcmp((const char *) attr->name, "uri"))
140                     conf->uri = mp::xml::get_text(attr->children);
141                 else
142                     throw mp::filter::FilterException(
143                         "Bad attribute " + std::string((const char *)
144                                                        attr->name));
145             }
146             xmlNode *p = ptr->children;
147             for (; p; p = p->next)
148             {
149                 if (p->type != XML_ELEMENT_NODE)
150                     continue;
151                 std::string name = (const char *) p->name;
152                 const struct _xmlAttr *attr;
153                 for (attr = p->properties; attr; attr = attr->next)
154                 {
155                     if (!strcmp((const char *) attr->name, "type"))
156                     {
157                         name.append(".");
158                         name.append(mp::xml::get_text(attr->children));
159                     }
160                     else
161                         throw mp::filter::FilterException(
162                             "Bad attribute " + std::string((const char *)
163                                                            attr->name));
164                 }
165                 std::string value = mp::xml::get_text(p);
166                 if (yaz_sparql_add_pattern(s, name.c_str(), value.c_str()))
167                 {
168                     throw mp::filter::FilterException(
169                         "Bad SPARQL config " + name);
170                 }
171             }
172             if (!conf->uri.length())
173             {
174                 throw mp::filter::FilterException("Missing uri");
175             }
176             if (!conf->db.length())
177             {
178                 throw mp::filter::FilterException("Missing path");
179             }
180             db_conf.push_back(conf);
181         }
182         else
183         {
184             throw mp::filter::FilterException
185                 ("Bad element "
186                  + std::string((const char *) ptr->name)
187                  + " in sparql filter");
188         }
189     }
190 }
191
192 yf::SPARQL::Conf::~Conf()
193 {
194     yaz_sparql_destroy(s);
195 }
196
197 yf::SPARQL::Session::Session(const SPARQL *sparql) :
198     m_in_use(true),
199     m_support_named_result_sets(false),
200     m_sparql(sparql)
201 {
202 }
203
204 yf::SPARQL::Session::~Session()
205 {
206 }
207
208 yf::SPARQL::SessionPtr yf::SPARQL::get_session(Package & package,
209                                                Z_APDU **apdu) const
210 {
211     SessionPtr ptr0;
212
213     Z_GDU *gdu = package.request().get();
214
215     boost::mutex::scoped_lock lock(m_p->m_mutex);
216
217     std::map<mp::Session,SPARQL::SessionPtr>::iterator it;
218
219     if (gdu && gdu->which == Z_GDU_Z3950)
220         *apdu = gdu->u.z3950;
221     else
222         *apdu = 0;
223
224     while (true)
225     {
226         it = m_p->m_clients.find(package.session());
227         if (it == m_p->m_clients.end())
228             break;
229         if (!it->second->m_in_use)
230         {
231             it->second->m_in_use = true;
232             return it->second;
233         }
234         m_p->m_cond_session_ready.wait(lock);
235     }
236     if (!*apdu)
237         return ptr0;
238
239     // new Z39.50 session ..
240     SessionPtr p(new Session(this));
241     m_p->m_clients[package.session()] = p;
242     return p;
243 }
244
245 void yf::SPARQL::release_session(Package &package) const
246 {
247     boost::mutex::scoped_lock lock(m_p->m_mutex);
248     std::map<mp::Session,SessionPtr>::iterator it;
249
250     it = m_p->m_clients.find(package.session());
251     if (it != m_p->m_clients.end())
252     {
253         it->second->m_in_use = false;
254
255         if (package.session().is_closed())
256             m_p->m_clients.erase(it);
257         m_p->m_cond_session_ready.notify_all();
258     }
259 }
260
261 Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
262                                         Z_APDU *apdu_req,
263                                         mp::odr &odr,
264                                         const char *sparql_query,
265                                         const char *uri)
266 {
267     Package http_package(package.session(), package.origin());
268
269     http_package.copy_filter(package);
270     Z_GDU *gdu = z_get_HTTP_Request_uri(odr, uri, 0, 1);
271
272     z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
273                       "Content-Type", "application/x-www-form-urlencoded");
274     const char *names[2];
275     names[0] = "query";
276     names[1] = 0;
277     const char *values[1];
278     values[0] = sparql_query;
279     char *path = 0;
280     yaz_array_to_uri(&path, odr, (char **) names, (char **) values);
281
282     gdu->u.HTTP_Request->content_buf = path;
283     gdu->u.HTTP_Request->content_len = strlen(path);
284
285     yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query);
286
287     http_package.request() = gdu;
288     http_package.move();
289
290     Z_GDU *gdu_resp = http_package.response().get();
291     Z_APDU *apdu_res = 0;
292     if (gdu_resp && gdu_resp->which == Z_GDU_HTTP_Response)
293     {
294         Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
295         FrontendSetPtr fset(new FrontendSet);
296
297         fset->doc = xmlParseMemory(resp->content_buf, resp->content_len);
298         if (!fset->doc)
299             apdu_res = odr.create_searchResponse(apdu_req,
300                                              YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
301                                              "invalid XML from backendbackend");
302         else
303         {
304             apdu_res = odr.create_searchResponse(apdu_req, 0, 0);
305
306             m_frontend_sets[apdu_req->u.searchRequest->resultSetName] = fset;
307         }
308     }
309     else
310     {
311         yaz_log(YLOG_LOG, "sparql: no HTTP response");
312         apdu_res = odr.create_searchResponse(apdu_req,
313                                              YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
314                                              "no HTTP response from backend");
315     }
316     return apdu_res;
317 }
318
319 void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
320 {
321     mp::odr odr;
322     Z_APDU *apdu_res = 0;
323     if (apdu_req->which == Z_APDU_initRequest)
324     {
325         apdu_res = odr.create_initResponse(apdu_req, 0, 0);
326         Z_InitRequest *req = apdu_req->u.initRequest;
327         Z_InitResponse *resp = apdu_res->u.initResponse;
328
329         resp->implementationName = odr_strdup(odr, "sparql");
330         if (ODR_MASK_GET(req->options, Z_Options_namedResultSets))
331             m_support_named_result_sets = true;
332         int i;
333         static const int masks[] = {
334             Z_Options_search, Z_Options_present,
335             Z_Options_namedResultSets, -1
336         };
337         for (i = 0; masks[i] != -1; i++)
338             if (ODR_MASK_GET(req->options, masks[i]))
339                 ODR_MASK_SET(resp->options, masks[i]);
340         static const int versions[] = {
341             Z_ProtocolVersion_1,
342             Z_ProtocolVersion_2,
343             Z_ProtocolVersion_3,
344             -1
345         };
346         for (i = 0; versions[i] != -1; i++)
347             if (ODR_MASK_GET(req->protocolVersion, versions[i]))
348                 ODR_MASK_SET(resp->protocolVersion, versions[i]);
349             else
350                 break;
351         *resp->preferredMessageSize = *req->preferredMessageSize;
352         *resp->maximumRecordSize = *req->maximumRecordSize;
353     }
354     else if (apdu_req->which == Z_APDU_close)
355     {
356         apdu_res = odr.create_close(apdu_req,
357                                     Z_Close_finished, 0);
358         package.session().close();
359     }
360     else if (apdu_req->which == Z_APDU_searchRequest)
361     {
362         Z_SearchRequest *req = apdu_req->u.searchRequest;
363
364         FrontendSets::iterator fset_it =
365             m_frontend_sets.find(req->resultSetName);
366         if (fset_it != m_frontend_sets.end())
367         {
368             // result set already exist
369             // if replace indicator is off: we return diagnostic if
370             // result set already exist.
371             if (*req->replaceIndicator == 0)
372             {
373                 Z_APDU *apdu =
374                     odr.create_searchResponse(
375                         apdu_req,
376                         YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF,
377                         0);
378                 package.response() = apdu_res;
379             }
380             m_frontend_sets.erase(fset_it);
381         }
382         if (req->query->which != Z_Query_type_1)
383         {
384             apdu_res = odr.create_searchResponse(
385                 apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
386         }
387         else if (req->num_databaseNames != 1)
388         {
389             apdu_res = odr.create_searchResponse(
390                 apdu_req,
391                 YAZ_BIB1_ACCESS_TO_SPECIFIED_DATABASE_DENIED, 0);
392         }
393         else
394         {
395             std::string db = req->databaseNames[0];
396             std::list<ConfPtr>::const_iterator it;
397
398             it = m_sparql->db_conf.begin();
399             for (; it != m_sparql->db_conf.end(); it++)
400                 if (yaz_match_glob((*it)->db.c_str(), db.c_str()))
401                     break;
402             if (it == m_sparql->db_conf.end())
403             {
404                 apdu_res = odr.create_searchResponse(
405                     apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str());
406             }
407             else
408             {
409                 WRBUF addinfo_wr = wrbuf_alloc();
410                 WRBUF sparql_wr = wrbuf_alloc();
411                 int error =
412                     yaz_sparql_from_rpn_wrbuf((*it)->s,
413                                               addinfo_wr, sparql_wr,
414                                               req->query->u.type_1);
415                 if (error)
416                 {
417                     apdu_res = odr.create_searchResponse(
418                         apdu_req, error,
419                         wrbuf_len(addinfo_wr) ?
420                         wrbuf_cstr(addinfo_wr) : 0);
421                 }
422                 else
423                 {
424                     apdu_res = run_sparql(package, apdu_req, odr,
425                                           wrbuf_cstr(sparql_wr),
426                                           (*it)->uri.c_str());
427                 }
428                 wrbuf_destroy(addinfo_wr);
429                 wrbuf_destroy(sparql_wr);
430             }
431         }
432     }
433     else
434     {
435         apdu_res = odr.create_close(apdu_req,
436                                     Z_Close_protocolError,
437                                     "sparql: unhandled APDU");
438         package.session().close();
439     }
440
441     assert(apdu_res);
442     package.response() = apdu_res;
443 }
444
445 void yf::SPARQL::process(mp::Package &package) const
446 {
447     Z_APDU *apdu;
448     SessionPtr p = get_session(package, &apdu);
449     if (p && apdu)
450     {
451         p->handle_z(package, apdu);
452     }
453     else
454         package.move();
455     release_session(package);
456 }
457
458 static mp::filter::Base* filter_creator()
459 {
460     return new mp::filter::SPARQL;
461 }
462
463 extern "C" {
464     struct metaproxy_1_filter_struct metaproxy_1_filter_sparql = {
465         0,
466         "sparql",
467         filter_creator
468     };
469 }
470
471
472 /*
473  * Local variables:
474  * c-basic-offset: 4
475  * c-file-style: "Stroustrup"
476  * indent-tabs-mode: nil
477  * End:
478  * vim: shiftwidth=4 tabstop=8 expandtab
479  */
480