X-Git-Url: http://git.indexdata.com/?p=mp-sparql-moved-to-github.git;a=blobdiff_plain;f=src%2Ffilter_sparql.cpp;h=235989b3c461f95bb5a460b62686c83017e2cab8;hp=8900033778df30e489075fa0e6a8cd1267225fdb;hb=e5b5ae2b4836f76d84aae25c0edf4312b761dae4;hpb=90d0517a535806d6b076f7c9812f8292a037cd06 diff --git a/src/filter_sparql.cpp b/src/filter_sparql.cpp index 8900033..235989b 100644 --- a/src/filter_sparql.cpp +++ b/src/filter_sparql.cpp @@ -38,9 +38,13 @@ namespace metaproxy_1 { class Session; class Rep; class Conf; + class FrontendSet; typedef boost::shared_ptr SessionPtr; typedef boost::shared_ptr ConfPtr; + + typedef boost::shared_ptr FrontendSetPtr; + typedef std::map FrontendSets; public: SPARQL(); ~SPARQL(); @@ -56,6 +60,7 @@ namespace metaproxy_1 { public: std::string db; std::string uri; + std::string schema; yaz_sparql_t s; ~Conf(); }; @@ -65,6 +70,17 @@ namespace metaproxy_1 { boost::mutex m_mutex; std::map m_clients; }; + class SPARQL::FrontendSet { + public: + FrontendSet(); + ~FrontendSet(); + private: + friend class Session; + Odr_int hits; + std::string db; + ConfPtr conf; + xmlDoc *doc; + }; class SPARQL::Session { public: Session(const SPARQL *); @@ -74,15 +90,33 @@ namespace metaproxy_1 { Z_APDU *apdu_req, mp::odr &odr, const char *sparql_query, - const char *uri); + ConfPtr conf); + Z_Records *fetch( + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position); bool m_in_use; private: bool m_support_named_result_sets; + FrontendSets m_frontend_sets; const SPARQL *m_sparql; }; } } +yf::SPARQL::FrontendSet::~FrontendSet() +{ + if (doc) + xmlFreeDoc(doc); +} + +yf::SPARQL::FrontendSet::FrontendSet() +{ + doc = 0; +} + yf::SPARQL::SPARQL() : m_p(new Rep) { } @@ -113,6 +147,8 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only, conf->db = mp::xml::get_text(attr->children); else if (!strcmp((const char *) attr->name, "uri")) conf->uri = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "schema")) + conf->schema = mp::xml::get_text(attr->children); else throw mp::filter::FilterException( "Bad attribute " + std::string((const char *) @@ -233,19 +269,151 @@ void yf::SPARQL::release_session(Package &package) const } } +static xmlNode *get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos) +{ + xmlNode *ptr = xmlDocGetRootElement(doc); + Odr_int cur = 0; + + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "RDF")) + { + ptr = ptr->children; + + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + if (ptr && ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "Description")) + { + xmlNode *p = ptr->children; + + while (p && p->type != XML_ELEMENT_NODE) + p = p->next; + if (p && p->type == XML_ELEMENT_NODE && + !strcmp((const char *) p->name, "type")) + { /* SELECT RESULT */ + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "solution")) + { + if (cur++ == pos) + break; + } + } + else + { /* CONSTRUCT result */ + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "Description")) + { + if (cur++ == pos) + break; + } + } + } + } + else + { + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "sparql")) + break; + if (ptr) + { + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "results")) + break; + } + if (ptr) + { + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "result")) + { + if (cur++ == pos) + break; + } + } + } + if (sz) + *sz = cur; + return ptr; +} + +Z_Records *yf::SPARQL::Session::fetch( + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position) +{ + Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records)); + if (esn && esn->which == Z_ElementSetNames_generic && + fset->conf->schema.length()) + { + if (strcmp(esn->u.generic, fset->conf->schema.c_str())) + { + rec->which = Z_Records_NSD; + rec->u.nonSurrogateDiagnostic = + zget_DefaultDiagFormat( + odr, + YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_, + esn->u.generic); + return rec; + } + } + rec->which = Z_Records_DBOSD; + rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *) + odr_malloc(odr, sizeof(Z_NamePlusRecordList)); + rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **) + odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number); + int i; + for (i = 0; i < number; i++) + { + rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *) + odr_malloc(odr, sizeof(Z_NamePlusRecord)); + Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i]; + npr->databaseName = odr_strdup(odr, fset->db.c_str()); + npr->which = Z_NamePlusRecord_databaseRecord; + + xmlNode *node = get_result(fset->doc, 0, start - 1 + i); + if (!node) + break; + assert(node->type == XML_ELEMENT_NODE); + xmlNode *tmp = xmlCopyNode(node, 1); + xmlBufferPtr buf = xmlBufferCreate(); + xmlNodeDump(buf, tmp->doc, tmp, 0, 0); + npr->u.databaseRecord = + z_ext_record_xml(odr, (const char *) buf->content, buf->use); + xmlFreeNode(tmp); + xmlBufferFree(buf); + } + rec->u.databaseOrSurDiagnostics->num_records = i; + *number_returned = i; + if (start + number > fset->hits) + *next_position = 0; + else + *next_position = start + number; + return rec; +} + Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package, Z_APDU *apdu_req, mp::odr &odr, const char *sparql_query, - const char *uri) + ConfPtr conf) { + Z_SearchRequest *req = apdu_req->u.searchRequest; Package http_package(package.session(), package.origin()); http_package.copy_filter(package); - Z_GDU *gdu = z_get_HTTP_Request_uri(odr, uri, 0, 1); + Z_GDU *gdu = z_get_HTTP_Request_uri(odr, conf->uri.c_str(), 0, 1); z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers, "Content-Type", "application/x-www-form-urlencoded"); + z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers, + "Accept", "application/sparql-results+xml," + "application/rdf+xml"); const char *names[2]; names[0] = "query"; names[1] = 0; @@ -257,22 +425,89 @@ Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package, gdu->u.HTTP_Request->content_buf = path; gdu->u.HTTP_Request->content_len = strlen(path); - yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query); http_package.request() = gdu; http_package.move(); Z_GDU *gdu_resp = http_package.response().get(); - if (gdu_resp && gdu_resp->which == Z_GDU_HTTP_Response) + Z_APDU *apdu_res = 0; + if (!gdu_resp || gdu_resp->which != Z_GDU_HTTP_Response) { - Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response; + yaz_log(YLOG_LOG, "sparql: no HTTP response"); + apdu_res = odr.create_searchResponse(apdu_req, + YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, + "no HTTP response from backend"); + } + else if (gdu_resp->u.HTTP_Response->code != 200) + { + mp::wrbuf w; + + wrbuf_printf(w, "sparql: HTTP error %d from backend", + gdu_resp->u.HTTP_Response->code); + apdu_res = odr.create_searchResponse(apdu_req, + YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, + w.c_str()); } else { - yaz_log(YLOG_LOG, "sparql: no HTTP response"); + Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response; + FrontendSetPtr fset(new FrontendSet); + + fset->doc = xmlParseMemory(resp->content_buf, resp->content_len); + fset->db = req->databaseNames[0]; + fset->conf = conf; + if (!fset->doc) + apdu_res = odr.create_searchResponse(apdu_req, + YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, + "invalid XML from backendbackend"); + else + { + Z_Records *records = 0; + int number_returned = 0; + int next_position = 0; + int error_code = 0; + std::string addinfo; + + get_result(fset->doc, &fset->hits, -1); + m_frontend_sets[req->resultSetName] = fset; + + Odr_int number = 0; + const char *element_set_name = 0; + mp::util::piggyback_sr(req, fset->hits, number, &element_set_name); + if (number) + { + Z_ElementSetNames *esn; + + if (number > *req->smallSetUpperBound) + esn = req->mediumSetElementSetNames; + else + esn = req->smallSetElementSetNames; + records = fetch(fset, + odr, req->preferredRecordSyntax, esn, + 1, number, + error_code, addinfo, + &number_returned, + &next_position); + } + if (error_code) + { + apdu_res = + odr.create_searchResponse( + apdu_req, error_code, addinfo.c_str()); + } + else + { + apdu_res = + odr.create_searchResponse(apdu_req, 0, 0); + Z_SearchResponse *resp = apdu_res->u.searchResponse; + *resp->resultCount = fset->hits; + *resp->numberOfRecordsReturned = number_returned; + *resp->nextResultSetPosition = next_position; + resp->records = records; + } + } } - Z_APDU *apdu_res = odr.create_searchResponse(apdu_req, 0, 0); return apdu_res; } @@ -321,6 +556,24 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) { Z_SearchRequest *req = apdu_req->u.searchRequest; + FrontendSets::iterator fset_it = + m_frontend_sets.find(req->resultSetName); + if (fset_it != m_frontend_sets.end()) + { + // result set already exist + // if replace indicator is off: we return diagnostic if + // result set already exist. + if (*req->replaceIndicator == 0) + { + Z_APDU *apdu = + odr.create_searchResponse( + apdu_req, + YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF, + 0); + package.response() = apdu_res; + } + m_frontend_sets.erase(fset_it); + } if (req->query->which != Z_Query_type_1) { apdu_res = odr.create_searchResponse( @@ -364,14 +617,70 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) else { apdu_res = run_sparql(package, apdu_req, odr, - wrbuf_cstr(sparql_wr), - (*it)->uri.c_str()); + wrbuf_cstr(sparql_wr), *it); } wrbuf_destroy(addinfo_wr); wrbuf_destroy(sparql_wr); } } } + else if (apdu_req->which == Z_APDU_presentRequest) + { + Z_PresentRequest *req = apdu_req->u.presentRequest; + FrontendSets::iterator fset_it = + m_frontend_sets.find(req->resultSetId); + if (fset_it == m_frontend_sets.end()) + { + apdu_res = + odr.create_presentResponse( + apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, + req->resultSetId); + package.response() = apdu_res; + return; + } + int number_returned = 0; + int next_position = 0; + int error_code = 0; + std::string addinfo; + Z_ElementSetNames *esn = 0; + if (req->recordComposition) + { + if (req->recordComposition->which == Z_RecordComp_simple) + esn = req->recordComposition->u.simple; + else + { + apdu_res = + odr.create_presentResponse( + apdu_req, + YAZ_BIB1_ONLY_A_SINGLE_ELEMENT_SET_NAME_SUPPORTED, + 0); + package.response() = apdu_res; + return; + } + } + Z_Records *records = fetch( + fset_it->second, + odr, req->preferredRecordSyntax, esn, + *req->resultSetStartPoint, *req->numberOfRecordsRequested, + error_code, addinfo, + &number_returned, + &next_position); + if (error_code) + { + apdu_res = + odr.create_presentResponse(apdu_req, error_code, + addinfo.c_str()); + } + else + { + apdu_res = + odr.create_presentResponse(apdu_req, 0, 0); + Z_PresentResponse *resp = apdu_res->u.presentResponse; + resp->records = records; + *resp->numberOfRecordsReturned = number_returned; + *resp->nextResultSetPosition = next_position; + } + } else { apdu_res = odr.create_close(apdu_req,