1 /* This file is part of Metaproxy.
2 Copyright (C) Index Data
4 Metaproxy is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
9 Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
19 #include <metaproxy/package.hpp>
20 #include <metaproxy/util.hpp>
23 #include <yaz/diagbib1.h>
24 #include <yaz/match_glob.h>
25 #include <boost/scoped_ptr.hpp>
26 #include <boost/thread/mutex.hpp>
27 #include <boost/thread/condition.hpp>
28 #include <boost/algorithm/string.hpp>
33 namespace mp = metaproxy_1;
34 namespace yf = mp::filter;
36 namespace metaproxy_1 {
38 class SPARQL : public Base {
45 typedef boost::shared_ptr<Session> SessionPtr;
46 typedef boost::shared_ptr<Conf> ConfPtr;
48 typedef boost::shared_ptr<FrontendSet> FrontendSetPtr;
49 typedef std::map<std::string,FrontendSetPtr> FrontendSets;
53 void process(metaproxy_1::Package & package) const;
54 void configure(const xmlNode * ptr, bool test_only,
56 SessionPtr get_session(Package &package, Z_APDU **apdu) const;
57 void release_session(Package &package) const;
58 boost::scoped_ptr<Rep> m_p;
59 std::list<ConfPtr> db_conf;
71 boost::condition m_cond_session_ready;
73 std::map<mp::Session,SessionPtr> m_clients;
75 class SPARQL::Result {
80 friend class FrontendSet;
85 class SPARQL::FrontendSet {
90 std::list<Result> results;
92 class SPARQL::Session {
94 Session(const SPARQL *);
96 void handle_z(Package &package, Z_APDU *apdu);
97 Z_APDU *search(mp::Package &package,
100 const char *sparql_query,
101 ConfPtr conf, FrontendSetPtr fset);
102 int invoke_sparql(mp::Package &package,
103 const char *sparql_query,
110 ODR odr, Odr_oid *preferredRecordSyntax,
111 Z_ElementSetNames *esn,
112 int start, int number, int &error_code, std::string &addinfo,
113 int *number_returned, int *next_position);
116 bool m_support_named_result_sets;
117 FrontendSets m_frontend_sets;
118 const SPARQL *m_sparql;
123 yf::SPARQL::Result::~Result()
129 yf::SPARQL::Result::Result()
134 yf::SPARQL::SPARQL() : m_p(new Rep)
138 yf::SPARQL::~SPARQL()
142 void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only,
145 const xmlNode *ptr = xmlnode->children;
148 for (; ptr; ptr = ptr->next)
150 if (ptr->type != XML_ELEMENT_NODE)
152 if (!strcmp((const char *) ptr->name, "defaults"))
154 const struct _xmlAttr *attr;
155 for (attr = ptr->properties; attr; attr = attr->next)
157 if (!strcmp((const char *) attr->name, "uri"))
158 uri = mp::xml::get_text(attr->children);
160 throw mp::filter::FilterException(
161 "Bad attribute " + std::string((const char *)
165 else if (!strcmp((const char *) ptr->name, "db"))
167 yaz_sparql_t s = yaz_sparql_create();
168 ConfPtr conf(new Conf);
172 const struct _xmlAttr *attr;
173 for (attr = ptr->properties; attr; attr = attr->next)
175 if (!strcmp((const char *) attr->name, "path"))
176 conf->db = mp::xml::get_text(attr->children);
177 else if (!strcmp((const char *) attr->name, "uri"))
178 conf->uri = mp::xml::get_text(attr->children);
179 else if (!strcmp((const char *) attr->name, "schema"))
180 conf->schema = mp::xml::get_text(attr->children);
181 else if (!strcmp((const char *) attr->name, "include"))
183 std::vector<std::string> dbs;
184 std::string db = mp::xml::get_text(attr->children);
185 boost::split(dbs, db, boost::is_any_of(" \t"));
187 for (i = 0; i < dbs.size(); i++)
189 if (dbs[i].length() == 0)
191 std::list<ConfPtr>::const_iterator it = db_conf.begin();
193 if (it == db_conf.end())
195 throw mp::filter::FilterException(
196 "include db not found: " + dbs[i]);
198 else if (dbs[i].compare((*it)->db) == 0)
200 yaz_sparql_include(s, (*it)->s);
208 throw mp::filter::FilterException(
209 "Bad attribute " + std::string((const char *)
212 xmlNode *p = ptr->children;
213 for (; p; p = p->next)
215 if (p->type != XML_ELEMENT_NODE)
217 std::string name = (const char *) p->name;
218 const struct _xmlAttr *attr;
219 for (attr = p->properties; attr; attr = attr->next)
221 if (!strcmp((const char *) attr->name, "type"))
224 name.append(mp::xml::get_text(attr->children));
227 throw mp::filter::FilterException(
228 "Bad attribute " + std::string((const char *)
231 std::string value = mp::xml::get_text(p);
232 if (yaz_sparql_add_pattern(s, name.c_str(), value.c_str()))
234 throw mp::filter::FilterException(
235 "Bad SPARQL config " + name);
238 if (!conf->uri.length())
240 throw mp::filter::FilterException("Missing uri");
242 if (!conf->db.length())
244 throw mp::filter::FilterException("Missing path");
246 db_conf.push_back(conf);
250 throw mp::filter::FilterException
252 + std::string((const char *) ptr->name)
253 + " in sparql filter");
258 yf::SPARQL::Conf::~Conf()
260 yaz_sparql_destroy(s);
263 yf::SPARQL::Session::Session(const SPARQL *sparql) :
265 m_support_named_result_sets(false),
270 yf::SPARQL::Session::~Session()
274 yf::SPARQL::SessionPtr yf::SPARQL::get_session(Package & package,
279 Z_GDU *gdu = package.request().get();
281 boost::mutex::scoped_lock lock(m_p->m_mutex);
283 std::map<mp::Session,SPARQL::SessionPtr>::iterator it;
285 if (gdu && gdu->which == Z_GDU_Z3950)
286 *apdu = gdu->u.z3950;
292 it = m_p->m_clients.find(package.session());
293 if (it == m_p->m_clients.end())
295 if (!it->second->m_in_use)
297 it->second->m_in_use = true;
300 m_p->m_cond_session_ready.wait(lock);
305 // new Z39.50 session ..
306 SessionPtr p(new Session(this));
307 m_p->m_clients[package.session()] = p;
311 void yf::SPARQL::release_session(Package &package) const
313 boost::mutex::scoped_lock lock(m_p->m_mutex);
314 std::map<mp::Session,SessionPtr>::iterator it;
316 it = m_p->m_clients.find(package.session());
317 if (it != m_p->m_clients.end())
319 it->second->m_in_use = false;
321 if (package.session().is_closed())
322 m_p->m_clients.erase(it);
323 m_p->m_cond_session_ready.notify_all();
327 static bool get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos, xmlDoc **ndoc)
329 xmlNode *ptr = xmlDocGetRootElement(doc);
334 *ndoc = xmlNewDoc(BAD_CAST "1.0");
336 if (ptr->type == XML_ELEMENT_NODE &&
337 !strcmp((const char *) ptr->name, "RDF"))
341 q0 = xmlCopyNode(ptr, 2);
342 xmlDocSetRootElement(*ndoc, q0);
346 while (ptr && ptr->type != XML_ELEMENT_NODE)
348 if (ptr && ptr->type == XML_ELEMENT_NODE &&
349 !strcmp((const char *) ptr->name, "Description"))
351 xmlNode *p = ptr->children;
353 while (p && p->type != XML_ELEMENT_NODE)
355 if (p && p->type == XML_ELEMENT_NODE &&
356 !strcmp((const char *) p->name, "type"))
357 { /* SELECT RESULT */
358 for (ptr = ptr->children; ptr; ptr = ptr->next)
359 if (ptr->type == XML_ELEMENT_NODE &&
360 !strcmp((const char *) ptr->name, "solution"))
366 xmlNode *q1 = xmlCopyNode(ptr, 1);
374 { /* CONSTRUCT result */
375 for (; ptr; ptr = ptr->next)
376 if (ptr->type == XML_ELEMENT_NODE &&
377 !strcmp((const char *) ptr->name, "Description"))
383 xmlNode *q1 = xmlCopyNode(ptr, 1);
394 for (; ptr; ptr = ptr->next)
395 if (ptr->type == XML_ELEMENT_NODE &&
396 !strcmp((const char *) ptr->name, "sparql"))
402 q0 = xmlCopyNode(ptr, 2);
403 xmlDocSetRootElement(*ndoc, q0);
405 for (ptr = ptr->children; ptr; ptr = ptr->next)
406 if (ptr->type == XML_ELEMENT_NODE &&
407 !strcmp((const char *) ptr->name, "results"))
415 q1 = xmlCopyNode(ptr, 0);
418 for (ptr = ptr->children; ptr; ptr = ptr->next)
419 if (ptr->type == XML_ELEMENT_NODE &&
420 !strcmp((const char *) ptr->name, "result"))
426 xmlNode *q2 = xmlCopyNode(ptr, 1);
439 Z_Records *yf::SPARQL::Session::fetch(
442 ODR odr, Odr_oid *preferredRecordSyntax,
443 Z_ElementSetNames *esn,
444 int start, int number, int &error_code, std::string &addinfo,
445 int *number_returned, int *next_position)
447 Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records));
448 std::list<Result>::iterator it = fset->results.begin();
449 const char *schema = 0;
450 bool uri_lookup = false;
451 bool fetch_logged = false;
452 if (esn && esn->which == Z_ElementSetNames_generic)
453 schema = esn->u.generic;
455 for (; it != fset->results.end(); it++)
457 if (yaz_sparql_lookup_schema(it->conf->s, schema))
462 if (!schema || !strcmp(esn->u.generic, it->conf->schema.c_str()))
465 if (it == fset->results.end())
467 rec->which = Z_Records_NSD;
468 rec->u.nonSurrogateDiagnostic =
469 zget_DefaultDiagFormat(
471 YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_,
475 rec->which = Z_Records_DBOSD;
476 rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *)
477 odr_malloc(odr, sizeof(Z_NamePlusRecordList));
478 rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **)
479 odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number);
481 for (i = 0; i < number; i++)
483 rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *)
484 odr_malloc(odr, sizeof(Z_NamePlusRecord));
485 Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i];
486 npr->databaseName = odr_strdup(odr, fset->db.c_str());
487 npr->which = Z_NamePlusRecord_databaseRecord;
490 if (!get_result(it->doc, 0, start - 1 + i, &ndoc))
496 xmlNode *ndoc_root = xmlDocGetRootElement(ndoc);
505 xmlNode *n = ndoc_root;
508 if (n->type == XML_ELEMENT_NODE)
510 //if (!strcmp((const char *) n->name, "uri"))
511 if (!strcmp((const char *) n->name, "uri") ||
512 !strcmp((const char *) n->name, "bnode") )
514 uri = mp::xml::get_text(n->children);
524 rec->which = Z_Records_NSD;
525 rec->u.nonSurrogateDiagnostic =
526 zget_DefaultDiagFormat(
528 YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS, 0);
534 mp::wrbuf addinfo, query, w;
535 int error = yaz_sparql_from_uri_wrbuf(it->conf->s,
537 uri.c_str(), schema);
541 { // Log the fetch query only once
542 package.log("sparql", YLOG_LOG,
543 "fetch query: for %s \n%s",
544 uri.c_str(), query.c_str() );
549 package.log("sparql", YLOG_LOG,
550 "fetch uri:%s", uri.c_str() );
552 //yaz_log(YLOG_LOG, "query=%s", query.c_str());
553 error = invoke_sparql(package, query.c_str(),
558 rec->which = Z_Records_NSD;
559 rec->u.nonSurrogateDiagnostic =
560 zget_DefaultDiagFormat(
563 addinfo.len() ? addinfo.c_str() : 0);
567 npr->u.databaseRecord =
568 z_ext_record_xml(odr, w.c_str(), w.len());
573 xmlBufferPtr buf = xmlBufferCreate();
574 xmlNodeDump(buf, ndoc, ndoc_root, 0, 0);
575 yaz_log(YLOG_LOG, "record %s %.*s", uri_lookup ? "uri" : "normal",
576 (int) buf->use, (const char *) buf->content);
577 npr->u.databaseRecord =
578 z_ext_record_xml(odr, (const char *) buf->content, buf->use);
583 rec->u.databaseOrSurDiagnostics->num_records = i;
584 *number_returned = i;
585 if (start + number > fset->hits)
588 *next_position = start + number;
592 int yf::SPARQL::Session::invoke_sparql(mp::Package &package,
593 const char *sparql_query,
597 Package http_package(package.session(), package.origin());
600 http_package.copy_filter(package);
601 Z_GDU *gdu = z_get_HTTP_Request_uri(odr, conf->uri.c_str(), 0, 1);
603 z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
604 "Content-Type", "application/x-www-form-urlencoded");
605 z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
606 "Accept", "application/sparql-results+xml,"
607 "application/rdf+xml");
608 const char *names[2];
611 const char *values[1];
612 values[0] = sparql_query;
614 yaz_array_to_uri(&path, odr, (char **) names, (char **) values);
616 gdu->u.HTTP_Request->content_buf = path;
617 gdu->u.HTTP_Request->content_len = strlen(path);
619 yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query);
621 http_package.request() = gdu;
624 Z_GDU *gdu_resp = http_package.response().get();
626 if (!gdu_resp || gdu_resp->which != Z_GDU_HTTP_Response)
628 wrbuf_puts(w, "no HTTP response from backend");
629 return YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
631 else if (gdu_resp->u.HTTP_Response->code != 200)
633 wrbuf_printf(w, "sparql: HTTP error %d from backend",
634 gdu_resp->u.HTTP_Response->code);
635 return YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
637 Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
638 wrbuf_write(w, resp->content_buf, resp->content_len);
642 Z_APDU *yf::SPARQL::Session::search(mp::Package &package,
645 const char *sparql_query,
646 ConfPtr conf, FrontendSetPtr fset)
648 Z_SearchRequest *req = apdu_req->u.searchRequest;
649 Z_APDU *apdu_res = 0;
652 package.log("sparql", YLOG_LOG,
653 "search query:\n%s", sparql_query );
655 int error = invoke_sparql(package, sparql_query, conf, w);
658 apdu_res = odr.create_searchResponse(apdu_req, error,
664 xmlDocPtr doc = xmlParseMemory(w.c_str(), w.len());
667 apdu_res = odr.create_searchResponse(
669 YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
670 "invalid XML from backendbackend");
675 Z_Records *records = 0;
676 int number_returned = 0;
677 int next_position = 0;
683 fset->results.push_back(result);
684 yaz_log(YLOG_LOG, "saving sparql result xmldoc=%p", doc);
686 get_result(result.doc, &fset->hits, -1, 0);
687 m_frontend_sets[req->resultSetName] = fset;
692 const char *element_set_name = 0;
693 mp::util::piggyback_sr(req, fset->hits, number, &element_set_name);
696 Z_ElementSetNames *esn;
698 if (number > *req->smallSetUpperBound)
699 esn = req->mediumSetElementSetNames;
701 esn = req->smallSetElementSetNames;
702 records = fetch(package, fset,
703 odr, req->preferredRecordSyntax, esn,
712 odr.create_searchResponse(
713 apdu_req, error_code, addinfo.c_str());
718 odr.create_searchResponse(apdu_req, 0, 0);
719 Z_SearchResponse *resp = apdu_res->u.searchResponse;
720 *resp->resultCount = fset->hits;
721 *resp->numberOfRecordsReturned = number_returned;
722 *resp->nextResultSetPosition = next_position;
723 resp->records = records;
730 void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
733 Z_APDU *apdu_res = 0;
734 if (apdu_req->which == Z_APDU_initRequest)
736 apdu_res = odr.create_initResponse(apdu_req, 0, 0);
737 Z_InitRequest *req = apdu_req->u.initRequest;
738 Z_InitResponse *resp = apdu_res->u.initResponse;
740 resp->implementationName = odr_strdup(odr, "sparql");
741 if (ODR_MASK_GET(req->options, Z_Options_namedResultSets))
742 m_support_named_result_sets = true;
744 static const int masks[] = {
745 Z_Options_search, Z_Options_present,
746 Z_Options_namedResultSets, -1
748 for (i = 0; masks[i] != -1; i++)
749 if (ODR_MASK_GET(req->options, masks[i]))
750 ODR_MASK_SET(resp->options, masks[i]);
751 static const int versions[] = {
757 for (i = 0; versions[i] != -1; i++)
758 if (ODR_MASK_GET(req->protocolVersion, versions[i]))
759 ODR_MASK_SET(resp->protocolVersion, versions[i]);
762 *resp->preferredMessageSize = *req->preferredMessageSize;
763 *resp->maximumRecordSize = *req->maximumRecordSize;
765 else if (apdu_req->which == Z_APDU_close)
767 apdu_res = odr.create_close(apdu_req,
768 Z_Close_finished, 0);
769 package.session().close();
771 else if (apdu_req->which == Z_APDU_searchRequest)
773 Z_SearchRequest *req = apdu_req->u.searchRequest;
775 FrontendSets::iterator fset_it =
776 m_frontend_sets.find(req->resultSetName);
777 if (fset_it != m_frontend_sets.end())
779 // result set already exist
780 // if replace indicator is off: we return diagnostic if
781 // result set already exist.
782 if (*req->replaceIndicator == 0)
785 odr.create_searchResponse(
787 YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF,
789 package.response() = apdu;
791 m_frontend_sets.erase(fset_it);
793 if (req->query->which != Z_Query_type_1)
795 apdu_res = odr.create_searchResponse(
796 apdu_req, YAZ_BIB1_QUERY_TYPE_UNSUPP, 0);
798 else if (req->num_databaseNames != 1)
800 apdu_res = odr.create_searchResponse(
802 YAZ_BIB1_ACCESS_TO_SPECIFIED_DATABASE_DENIED, 0);
806 std::string db = req->databaseNames[0];
807 std::list<ConfPtr>::const_iterator it;
808 FrontendSetPtr fset(new FrontendSet);
810 m_frontend_sets.erase(req->resultSetName);
812 it = m_sparql->db_conf.begin();
813 for (; it != m_sparql->db_conf.end(); it++)
814 if ((*it)->schema.length() > 0
815 && yaz_match_glob((*it)->db.c_str(), db.c_str()))
817 mp::wrbuf addinfo_wr;
820 yaz_sparql_from_rpn_wrbuf((*it)->s,
821 addinfo_wr, sparql_wr,
822 req->query->u.type_1);
825 apdu_res = odr.create_searchResponse(
827 addinfo_wr.len() ? addinfo_wr.c_str() : 0);
831 Z_APDU *apdu_1 = search(package, apdu_req, odr,
832 sparql_wr.c_str(), *it,
840 apdu_res = odr.create_searchResponse(
841 apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str());
845 else if (apdu_req->which == Z_APDU_presentRequest)
847 Z_PresentRequest *req = apdu_req->u.presentRequest;
848 FrontendSets::iterator fset_it =
849 m_frontend_sets.find(req->resultSetId);
850 if (fset_it == m_frontend_sets.end())
853 odr.create_presentResponse(
854 apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
856 package.response() = apdu_res;
859 int number_returned = 0;
860 int next_position = 0;
863 Z_ElementSetNames *esn = 0;
864 if (req->recordComposition)
866 if (req->recordComposition->which == Z_RecordComp_simple)
867 esn = req->recordComposition->u.simple;
871 odr.create_presentResponse(
873 YAZ_BIB1_ONLY_A_SINGLE_ELEMENT_SET_NAME_SUPPORTED,
875 package.response() = apdu_res;
879 Z_Records *records = fetch(
882 odr, req->preferredRecordSyntax, esn,
883 *req->resultSetStartPoint, *req->numberOfRecordsRequested,
890 odr.create_presentResponse(apdu_req, error_code,
896 odr.create_presentResponse(apdu_req, 0, 0);
897 Z_PresentResponse *resp = apdu_res->u.presentResponse;
898 resp->records = records;
899 *resp->numberOfRecordsReturned = number_returned;
900 *resp->nextResultSetPosition = next_position;
905 apdu_res = odr.create_close(apdu_req,
906 Z_Close_protocolError,
907 "sparql: unhandled APDU");
908 package.session().close();
912 package.response() = apdu_res;
915 void yf::SPARQL::process(mp::Package &package) const
918 SessionPtr p = get_session(package, &apdu);
921 p->handle_z(package, apdu);
925 release_session(package);
928 static mp::filter::Base* filter_creator()
930 return new mp::filter::SPARQL;
934 struct metaproxy_1_filter_struct metaproxy_1_filter_sparql = {
945 * c-file-style: "Stroustrup"
946 * indent-tabs-mode: nil
948 * vim: shiftwidth=4 tabstop=8 expandtab