X-Git-Url: http://git.indexdata.com/?p=mp-sparql-moved-to-github.git;a=blobdiff_plain;f=src%2Ffilter_sparql.cpp;h=34759e5feff3a531233cc3b2e41ae45b21c92fa6;hp=97af63c0c1da6112a648c86877c80a7854068420;hb=a74de0cefd7999972739abb1e8a10b00d746b8ee;hpb=17c8ff11004548c1f0b9332af3ac1ba2fad066fa diff --git a/src/filter_sparql.cpp b/src/filter_sparql.cpp index 97af63c..34759e5 100644 --- a/src/filter_sparql.cpp +++ b/src/filter_sparql.cpp @@ -38,6 +38,7 @@ namespace metaproxy_1 { class Session; class Rep; class Conf; + class Result; class FrontendSet; typedef boost::shared_ptr SessionPtr; @@ -60,6 +61,7 @@ namespace metaproxy_1 { public: std::string db; std::string uri; + std::string schema; yaz_sparql_t s; ~Conf(); }; @@ -69,25 +71,45 @@ namespace metaproxy_1 { boost::mutex m_mutex; std::map m_clients; }; - class SPARQL::FrontendSet { + class SPARQL::Result { public: - FrontendSet(); - ~FrontendSet(); + Result(); + ~Result(); private: + friend class FrontendSet; friend class Session; - Odr_int hits; + ConfPtr conf; xmlDoc *doc; }; + class SPARQL::FrontendSet { + private: + friend class Session; + Odr_int hits; + std::string db; + std::list results; + }; class SPARQL::Session { public: Session(const SPARQL *); ~Session(); void handle_z(Package &package, Z_APDU *apdu); - Z_APDU *run_sparql(mp::Package &package, - Z_APDU *apdu_req, - mp::odr &odr, - const char *sparql_query, - const char *uri); + Z_APDU *search(mp::Package &package, + Z_APDU *apdu_req, + mp::odr &odr, + const char *sparql_query, + ConfPtr conf, FrontendSetPtr fset); + int invoke_sparql(mp::Package &package, + const char *sparql_query, + ConfPtr conf, + WRBUF w); + + Z_Records *fetch( + Package &package, + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position); bool m_in_use; private: bool m_support_named_result_sets; @@ -97,13 +119,13 @@ namespace metaproxy_1 { } } -yf::SPARQL::FrontendSet::~FrontendSet() +yf::SPARQL::Result::~Result() { if (doc) xmlFreeDoc(doc); } -yf::SPARQL::FrontendSet::FrontendSet() +yf::SPARQL::Result::Result() { doc = 0; } @@ -120,16 +142,31 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only, const char *path) { const xmlNode *ptr = xmlnode->children; + std::string uri; for (; ptr; ptr = ptr->next) { if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr->name, "db")) + if (!strcmp((const char *) ptr->name, "defaults")) + { + const struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "uri")) + uri = mp::xml::get_text(attr->children); + else + throw mp::filter::FilterException( + "Bad attribute " + std::string((const char *) + attr->name)); + } + } + else if (!strcmp((const char *) ptr->name, "db")) { yaz_sparql_t s = yaz_sparql_create(); ConfPtr conf(new Conf); conf->s = s; + conf->uri = uri; const struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) @@ -138,6 +175,8 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only, conf->db = mp::xml::get_text(attr->children); else if (!strcmp((const char *) attr->name, "uri")) conf->uri = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "schema")) + conf->schema = mp::xml::get_text(attr->children); else throw mp::filter::FilterException( "Bad attribute " + std::string((const char *) @@ -258,19 +297,272 @@ void yf::SPARQL::release_session(Package &package) const } } -Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package, - Z_APDU *apdu_req, - mp::odr &odr, - const char *sparql_query, - const char *uri) +static bool get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos, xmlDoc **ndoc) +{ + xmlNode *ptr = xmlDocGetRootElement(doc); + xmlNode *q0; + Odr_int cur = 0; + + if (ndoc) + *ndoc = xmlNewDoc(BAD_CAST "1.0"); + + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "RDF")) + { + if (ndoc) + { + q0 = xmlCopyNode(ptr, 2); + xmlDocSetRootElement(*ndoc, q0); + } + ptr = ptr->children; + + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + if (ptr && ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "Description")) + { + xmlNode *p = ptr->children; + + while (p && p->type != XML_ELEMENT_NODE) + p = p->next; + if (p && p->type == XML_ELEMENT_NODE && + !strcmp((const char *) p->name, "type")) + { /* SELECT RESULT */ + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "solution")) + { + if (cur++ == pos) + { + if (ndoc) + { + xmlNode *q1 = xmlCopyNode(ptr, 1); + xmlAddChild(q0, q1); + } + break; + } + } + } + else + { /* CONSTRUCT result */ + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "Description")) + { + if (cur++ == pos) + { + if (ndoc) + { + xmlNode *q1 = xmlCopyNode(ptr, 1); + xmlAddChild(q0, q1); + } + return true; + } + } + } + } + } + else + { + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "sparql")) + break; + if (ptr) + { + if (ndoc) + { + q0 = xmlCopyNode(ptr, 2); + xmlDocSetRootElement(*ndoc, q0); + } + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "results")) + break; + } + if (ptr) + { + xmlNode *q1 = 0; + if (ndoc) + { + q1 = xmlCopyNode(ptr, 0); + xmlAddChild(q0, q1); + } + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "result")) + { + if (cur++ == pos) + { + if (ndoc) + { + xmlNode *q2 = xmlCopyNode(ptr, 1); + xmlAddChild(q1, q2); + } + return true; + } + } + } + } + if (sz) + *sz = cur; + return false; +} + +Z_Records *yf::SPARQL::Session::fetch( + Package &package, + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position) +{ + Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records)); + std::list::iterator it = fset->results.begin(); + const char *schema = 0; + bool uri_lookup = false; + if (esn && esn->which == Z_ElementSetNames_generic) + schema = esn->u.generic; + + for (; it != fset->results.end(); it++) + { + if (yaz_sparql_lookup_schema(it->conf->s, schema)) + { + uri_lookup = true; + break; + } + if (!schema || !strcmp(esn->u.generic, it->conf->schema.c_str())) + break; + } + if (it == fset->results.end()) + { + rec->which = Z_Records_NSD; + rec->u.nonSurrogateDiagnostic = + zget_DefaultDiagFormat( + odr, + YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_, + schema); + return rec; + } + rec->which = Z_Records_DBOSD; + rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *) + odr_malloc(odr, sizeof(Z_NamePlusRecordList)); + rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **) + odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number); + int i; + for (i = 0; i < number; i++) + { + rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *) + odr_malloc(odr, sizeof(Z_NamePlusRecord)); + Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i]; + npr->databaseName = odr_strdup(odr, fset->db.c_str()); + npr->which = Z_NamePlusRecord_databaseRecord; + xmlDoc *ndoc = 0; + + if (!get_result(it->doc, 0, start - 1 + i, &ndoc)) + { + if (ndoc) + xmlFreeDoc(ndoc); + break; + } + xmlNode *ndoc_root = xmlDocGetRootElement(ndoc); + if (!ndoc_root) + { + xmlFreeDoc(ndoc); + break; + } + if (uri_lookup) + { + std::string uri; + xmlNode *n = ndoc_root; + while (n) + { + if (n->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) n->name, "uri")) + { + uri = mp::xml::get_text(n->children); + + } + n = n->children; + } + else + n = n->next; + } + if (!uri.length()) + { + rec->which = Z_Records_NSD; + rec->u.nonSurrogateDiagnostic = + zget_DefaultDiagFormat( + odr, + YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS, 0); + xmlFreeDoc(ndoc); + return rec; + } + else + { + mp::wrbuf addinfo, query, w; + int error = yaz_sparql_from_uri_wrbuf(it->conf->s, + addinfo, query, + uri.c_str(), schema); + if (!error) + { + yaz_log(YLOG_LOG, "query=%s", query.c_str()); + error = invoke_sparql(package, query.c_str(), + it->conf, w); + } + if (error) + { + rec->which = Z_Records_NSD; + rec->u.nonSurrogateDiagnostic = + zget_DefaultDiagFormat( + odr, + error, + addinfo.len() ? addinfo.c_str() : 0); + xmlFreeDoc(ndoc); + return rec; + } + npr->u.databaseRecord = + z_ext_record_xml(odr, w.c_str(), w.len()); + } + } + else + { + xmlBufferPtr buf = xmlBufferCreate(); + xmlNodeDump(buf, ndoc, ndoc_root, 0, 0); + yaz_log(YLOG_LOG, "record %s %.*s", uri_lookup ? "uri" : "normal", + (int) buf->use, (const char *) buf->content); + npr->u.databaseRecord = + z_ext_record_xml(odr, (const char *) buf->content, buf->use); + xmlBufferFree(buf); + } + xmlFreeDoc(ndoc); + } + rec->u.databaseOrSurDiagnostics->num_records = i; + *number_returned = i; + if (start + number > fset->hits) + *next_position = 0; + else + *next_position = start + number; + return rec; +} + +int yf::SPARQL::Session::invoke_sparql(mp::Package &package, + const char *sparql_query, + ConfPtr conf, + WRBUF w) { Package http_package(package.session(), package.origin()); + mp::odr odr; http_package.copy_filter(package); - Z_GDU *gdu = z_get_HTTP_Request_uri(odr, uri, 0, 1); + Z_GDU *gdu = z_get_HTTP_Request_uri(odr, conf->uri.c_str(), 0, 1); z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers, "Content-Type", "application/x-www-form-urlencoded"); + z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers, + "Accept", "application/sparql-results+xml," + "application/rdf+xml"); const char *names[2]; names[0] = "query"; names[1] = 0; @@ -288,31 +580,105 @@ Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package, http_package.move(); Z_GDU *gdu_resp = http_package.response().get(); + + if (!gdu_resp || gdu_resp->which != Z_GDU_HTTP_Response) + { + wrbuf_puts(w, "no HTTP response from backend"); + return YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; + } + else if (gdu_resp->u.HTTP_Response->code != 200) + { + wrbuf_printf(w, "sparql: HTTP error %d from backend", + gdu_resp->u.HTTP_Response->code); + return YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; + } + Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response; + wrbuf_write(w, resp->content_buf, resp->content_len); + return 0; +} + +Z_APDU *yf::SPARQL::Session::search(mp::Package &package, + Z_APDU *apdu_req, + mp::odr &odr, + const char *sparql_query, + ConfPtr conf, FrontendSetPtr fset) +{ + Z_SearchRequest *req = apdu_req->u.searchRequest; Z_APDU *apdu_res = 0; - if (gdu_resp && gdu_resp->which == Z_GDU_HTTP_Response) + mp::wrbuf w; + + int error = invoke_sparql(package, sparql_query, conf, w); + if (error) { - Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response; - FrontendSetPtr fset(new FrontendSet); - - fset->doc = xmlParseMemory(resp->content_buf, resp->content_len); - if (!fset->doc) - apdu_res = odr.create_searchResponse(apdu_req, - YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, - "invalid XML from backendbackend"); + apdu_res = odr.create_searchResponse(apdu_req, error, + w.len() ? + w.c_str() : 0); + } + else + { + xmlDocPtr doc = xmlParseMemory(w.c_str(), w.len()); + if (!doc) + { + apdu_res = odr.create_searchResponse( + apdu_req, + YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, + "invalid XML from backendbackend"); + } else { - apdu_res = odr.create_searchResponse(apdu_req, 0, 0); + Result result; + Z_Records *records = 0; + int number_returned = 0; + int next_position = 0; + int error_code = 0; + std::string addinfo; + + result.doc = doc; + result.conf = conf; + fset->results.push_back(result); + yaz_log(YLOG_LOG, "saving sparql result xmldoc=%p", doc); + + get_result(result.doc, &fset->hits, -1, 0); + m_frontend_sets[req->resultSetName] = fset; + + result.doc = 0; + + Odr_int number = 0; + const char *element_set_name = 0; + mp::util::piggyback_sr(req, fset->hits, number, &element_set_name); + if (number) + { + Z_ElementSetNames *esn; - m_frontend_sets[apdu_req->u.searchRequest->resultSetName] = fset; + if (number > *req->smallSetUpperBound) + esn = req->mediumSetElementSetNames; + else + esn = req->smallSetElementSetNames; + records = fetch(package, fset, + odr, req->preferredRecordSyntax, esn, + 1, number, + error_code, addinfo, + &number_returned, + &next_position); + } + if (error_code) + { + apdu_res = + odr.create_searchResponse( + apdu_req, error_code, addinfo.c_str()); + } + else + { + apdu_res = + odr.create_searchResponse(apdu_req, 0, 0); + Z_SearchResponse *resp = apdu_res->u.searchResponse; + *resp->resultCount = fset->hits; + *resp->numberOfRecordsReturned = number_returned; + *resp->nextResultSetPosition = next_position; + resp->records = records; + } } } - else - { - yaz_log(YLOG_LOG, "sparql: no HTTP response"); - apdu_res = odr.create_searchResponse(apdu_req, - YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, - "no HTTP response from backend"); - } return apdu_res; } @@ -375,7 +741,7 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) apdu_req, YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF, 0); - package.response() = apdu_res; + package.response() = apdu; } m_frontend_sets.erase(fset_it); } @@ -394,41 +760,99 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) { std::string db = req->databaseNames[0]; std::list::const_iterator it; + FrontendSetPtr fset(new FrontendSet); + m_frontend_sets.erase(req->resultSetName); + fset->db = db; it = m_sparql->db_conf.begin(); for (; it != m_sparql->db_conf.end(); it++) if (yaz_match_glob((*it)->db.c_str(), db.c_str())) - break; - if (it == m_sparql->db_conf.end()) + { + mp::wrbuf addinfo_wr; + mp::wrbuf sparql_wr; + int error = + yaz_sparql_from_rpn_wrbuf((*it)->s, + addinfo_wr, sparql_wr, + req->query->u.type_1); + if (error) + { + apdu_res = odr.create_searchResponse( + apdu_req, error, + addinfo_wr.len() ? addinfo_wr.c_str() : 0); + } + else + { + Z_APDU *apdu_1 = search(package, apdu_req, odr, + sparql_wr.c_str(), *it, + fset); + if (!apdu_res) + apdu_res = apdu_1; + } + } + if (apdu_res == 0) { apdu_res = odr.create_searchResponse( apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str()); } + } + } + else if (apdu_req->which == Z_APDU_presentRequest) + { + Z_PresentRequest *req = apdu_req->u.presentRequest; + FrontendSets::iterator fset_it = + m_frontend_sets.find(req->resultSetId); + if (fset_it == m_frontend_sets.end()) + { + apdu_res = + odr.create_presentResponse( + apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, + req->resultSetId); + package.response() = apdu_res; + return; + } + int number_returned = 0; + int next_position = 0; + int error_code = 0; + std::string addinfo; + Z_ElementSetNames *esn = 0; + if (req->recordComposition) + { + if (req->recordComposition->which == Z_RecordComp_simple) + esn = req->recordComposition->u.simple; else { - WRBUF addinfo_wr = wrbuf_alloc(); - WRBUF sparql_wr = wrbuf_alloc(); - int error = - yaz_sparql_from_rpn_wrbuf((*it)->s, - addinfo_wr, sparql_wr, - req->query->u.type_1); - if (error) - { - apdu_res = odr.create_searchResponse( - apdu_req, error, - wrbuf_len(addinfo_wr) ? - wrbuf_cstr(addinfo_wr) : 0); - } - else - { - apdu_res = run_sparql(package, apdu_req, odr, - wrbuf_cstr(sparql_wr), - (*it)->uri.c_str()); - } - wrbuf_destroy(addinfo_wr); - wrbuf_destroy(sparql_wr); + apdu_res = + odr.create_presentResponse( + apdu_req, + YAZ_BIB1_ONLY_A_SINGLE_ELEMENT_SET_NAME_SUPPORTED, + 0); + package.response() = apdu_res; + return; } } + Z_Records *records = fetch( + package, + fset_it->second, + odr, req->preferredRecordSyntax, esn, + *req->resultSetStartPoint, *req->numberOfRecordsRequested, + error_code, addinfo, + &number_returned, + &next_position); + if (error_code) + { + apdu_res = + odr.create_presentResponse(apdu_req, error_code, + addinfo.c_str()); + } + else + { + apdu_res = + odr.create_presentResponse(apdu_req, 0, 0); + Z_PresentResponse *resp = apdu_res->u.presentResponse; + resp->records = records; + *resp->numberOfRecordsReturned = number_returned; + *resp->nextResultSetPosition = next_position; + } } else {