X-Git-Url: http://git.indexdata.com/?p=mp-sparql-moved-to-github.git;a=blobdiff_plain;f=src%2Ffilter_sparql.cpp;h=3a824f01ab5d60023eae9ad51a047179964a5b06;hp=201b938a2f4bda54b83bca808e4f949e11eadcaa;hb=5b4b5eec4062062050112a58a151768d7ab7e579;hpb=13764e0b41ded87eda4bb6d1777f2277fa11f4dc diff --git a/src/filter_sparql.cpp b/src/filter_sparql.cpp index 201b938..3a824f0 100644 --- a/src/filter_sparql.cpp +++ b/src/filter_sparql.cpp @@ -60,6 +60,7 @@ namespace metaproxy_1 { public: std::string db; std::string uri; + std::string schema; yaz_sparql_t s; ~Conf(); }; @@ -76,6 +77,8 @@ namespace metaproxy_1 { private: friend class Session; Odr_int hits; + std::string db; + ConfPtr conf; xmlDoc *doc; }; class SPARQL::Session { @@ -87,7 +90,13 @@ namespace metaproxy_1 { Z_APDU *apdu_req, mp::odr &odr, const char *sparql_query, - const char *uri); + ConfPtr conf); + Z_Records *fetch( + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position); bool m_in_use; private: bool m_support_named_result_sets; @@ -120,16 +129,31 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only, const char *path) { const xmlNode *ptr = xmlnode->children; + std::string uri; for (; ptr; ptr = ptr->next) { if (ptr->type != XML_ELEMENT_NODE) continue; - if (!strcmp((const char *) ptr->name, "db")) + if (!strcmp((const char *) ptr->name, "defaults")) + { + const struct _xmlAttr *attr; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!strcmp((const char *) attr->name, "uri")) + uri = mp::xml::get_text(attr->children); + else + throw mp::filter::FilterException( + "Bad attribute " + std::string((const char *) + attr->name)); + } + } + else if (!strcmp((const char *) ptr->name, "db")) { yaz_sparql_t s = yaz_sparql_create(); ConfPtr conf(new Conf); conf->s = s; + conf->uri = uri; const struct _xmlAttr *attr; for (attr = ptr->properties; attr; attr = attr->next) @@ -138,6 +162,8 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only, conf->db = mp::xml::get_text(attr->children); else if (!strcmp((const char *) attr->name, "uri")) conf->uri = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "schema")) + conf->schema = mp::xml::get_text(attr->children); else throw mp::filter::FilterException( "Bad attribute " + std::string((const char *) @@ -258,49 +284,201 @@ void yf::SPARQL::release_session(Package &package) const } } -static const xmlNode *get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos) +static bool get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos, + xmlDoc **ndoc) { - const xmlNode *ptr = xmlDocGetRootElement(doc); + xmlNode *ptr = xmlDocGetRootElement(doc); + xmlNode *q0; Odr_int cur = 0; - for (; ptr; ptr = ptr->next) - if (ptr->type == XML_ELEMENT_NODE && - !strcmp((const char *) ptr->name, "sparql")) - break; - if (ptr) + + if (ndoc) + *ndoc = xmlNewDoc(BAD_CAST "1.0"); + + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "RDF")) { - for (ptr = ptr->children; ptr; ptr = ptr->next) - if (ptr->type == XML_ELEMENT_NODE && - !strcmp((const char *) ptr->name, "results")) - break; + if (ndoc) + { + q0 = xmlCopyNode(ptr, 2); + xmlDocSetRootElement(*ndoc, q0); + } + ptr = ptr->children; + + while (ptr && ptr->type != XML_ELEMENT_NODE) + ptr = ptr->next; + if (ptr && ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "Description")) + { + xmlNode *p = ptr->children; + + while (p && p->type != XML_ELEMENT_NODE) + p = p->next; + if (p && p->type == XML_ELEMENT_NODE && + !strcmp((const char *) p->name, "type")) + { /* SELECT RESULT */ + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "solution")) + { + if (cur++ == pos) + { + if (ndoc) + { + xmlNode *q1 = xmlCopyNode(ptr, 1); + xmlAddChild(q0, q1); + } + break; + } + } + } + else + { /* CONSTRUCT result */ + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "Description")) + { + if (cur++ == pos) + { + if (ndoc) + { + xmlNode *q1 = xmlCopyNode(ptr, 1); + xmlAddChild(q0, q1); + } + return true; + } + } + } + } } - if (ptr) + else { - for (ptr = ptr->children; ptr; ptr = ptr->next) + for (; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE && - !strcmp((const char *) ptr->name, "result")) + !strcmp((const char *) ptr->name, "sparql")) + break; + if (ptr) + { + if (ndoc) { - if (cur++ == pos) + q0 = xmlCopyNode(ptr, 2); + xmlDocSetRootElement(*ndoc, q0); + } + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "results")) break; + } + if (ptr) + { + xmlNode *q1 = 0; + if (ndoc) + { + q1 = xmlCopyNode(ptr, 0); + xmlAddChild(q0, q1); } + for (ptr = ptr->children; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE && + !strcmp((const char *) ptr->name, "result")) + { + if (cur++ == pos) + { + if (ndoc) + { + xmlNode *q2 = xmlCopyNode(ptr, 1); + xmlAddChild(q1, q2); + } + return true; + } + } + } } if (sz) *sz = cur; - return ptr; + return false; +} + +Z_Records *yf::SPARQL::Session::fetch( + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position) +{ + Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records)); + if (esn && esn->which == Z_ElementSetNames_generic && + fset->conf->schema.length()) + { + if (strcmp(esn->u.generic, fset->conf->schema.c_str())) + { + rec->which = Z_Records_NSD; + rec->u.nonSurrogateDiagnostic = + zget_DefaultDiagFormat( + odr, + YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_, + esn->u.generic); + return rec; + } + } + rec->which = Z_Records_DBOSD; + rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *) + odr_malloc(odr, sizeof(Z_NamePlusRecordList)); + rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **) + odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number); + int i; + for (i = 0; i < number; i++) + { + rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *) + odr_malloc(odr, sizeof(Z_NamePlusRecord)); + Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i]; + npr->databaseName = odr_strdup(odr, fset->db.c_str()); + npr->which = Z_NamePlusRecord_databaseRecord; + xmlDoc *ndoc = 0; + + if (!get_result(fset->doc, 0, start - 1 + i, &ndoc)) + { + if (ndoc) + xmlFreeDoc(ndoc); + break; + } + xmlNode *ndoc_root = xmlDocGetRootElement(ndoc); + if (!ndoc_root) + { + xmlFreeDoc(ndoc); + break; + } + xmlBufferPtr buf = xmlBufferCreate(); + xmlNodeDump(buf, ndoc, ndoc_root, 0, 0); + npr->u.databaseRecord = + z_ext_record_xml(odr, (const char *) buf->content, buf->use); + xmlFreeDoc(ndoc); + xmlBufferFree(buf); + } + rec->u.databaseOrSurDiagnostics->num_records = i; + *number_returned = i; + if (start + number > fset->hits) + *next_position = 0; + else + *next_position = start + number; + return rec; } Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package, Z_APDU *apdu_req, mp::odr &odr, const char *sparql_query, - const char *uri) + ConfPtr conf) { + Z_SearchRequest *req = apdu_req->u.searchRequest; Package http_package(package.session(), package.origin()); http_package.copy_filter(package); - Z_GDU *gdu = z_get_HTTP_Request_uri(odr, uri, 0, 1); + Z_GDU *gdu = z_get_HTTP_Request_uri(odr, conf->uri.c_str(), 0, 1); z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers, "Content-Type", "application/x-www-form-urlencoded"); + z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers, + "Accept", "application/sparql-results+xml," + "application/rdf+xml"); const char *names[2]; names[0] = "query"; names[1] = 0; @@ -319,31 +497,82 @@ Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package, Z_GDU *gdu_resp = http_package.response().get(); Z_APDU *apdu_res = 0; - if (gdu_resp && gdu_resp->which == Z_GDU_HTTP_Response) + if (!gdu_resp || gdu_resp->which != Z_GDU_HTTP_Response) + { + yaz_log(YLOG_LOG, "sparql: no HTTP response"); + apdu_res = odr.create_searchResponse(apdu_req, + YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, + "no HTTP response from backend"); + } + else if (gdu_resp->u.HTTP_Response->code != 200) + { + mp::wrbuf w; + + wrbuf_printf(w, "sparql: HTTP error %d from backend", + gdu_resp->u.HTTP_Response->code); + apdu_res = odr.create_searchResponse(apdu_req, + YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, + w.c_str()); + } + else { Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response; FrontendSetPtr fset(new FrontendSet); fset->doc = xmlParseMemory(resp->content_buf, resp->content_len); + fset->db = req->databaseNames[0]; + fset->conf = conf; if (!fset->doc) apdu_res = odr.create_searchResponse(apdu_req, YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, "invalid XML from backendbackend"); else { - apdu_res = odr.create_searchResponse(apdu_req, 0, 0); - get_result(fset->doc, apdu_res->u.searchResponse->resultCount, - -1); - m_frontend_sets[apdu_req->u.searchRequest->resultSetName] = fset; + Z_Records *records = 0; + int number_returned = 0; + int next_position = 0; + int error_code = 0; + std::string addinfo; + + get_result(fset->doc, &fset->hits, -1, 0); + m_frontend_sets[req->resultSetName] = fset; + + Odr_int number = 0; + const char *element_set_name = 0; + mp::util::piggyback_sr(req, fset->hits, number, &element_set_name); + if (number) + { + Z_ElementSetNames *esn; + + if (number > *req->smallSetUpperBound) + esn = req->mediumSetElementSetNames; + else + esn = req->smallSetElementSetNames; + records = fetch(fset, + odr, req->preferredRecordSyntax, esn, + 1, number, + error_code, addinfo, + &number_returned, + &next_position); + } + if (error_code) + { + apdu_res = + odr.create_searchResponse( + apdu_req, error_code, addinfo.c_str()); + } + else + { + apdu_res = + odr.create_searchResponse(apdu_req, 0, 0); + Z_SearchResponse *resp = apdu_res->u.searchResponse; + *resp->resultCount = fset->hits; + *resp->numberOfRecordsReturned = number_returned; + *resp->nextResultSetPosition = next_position; + resp->records = records; + } } } - else - { - yaz_log(YLOG_LOG, "sparql: no HTTP response"); - apdu_res = odr.create_searchResponse(apdu_req, - YAZ_BIB1_TEMPORARY_SYSTEM_ERROR, - "no HTTP response from backend"); - } return apdu_res; } @@ -453,14 +682,70 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) else { apdu_res = run_sparql(package, apdu_req, odr, - wrbuf_cstr(sparql_wr), - (*it)->uri.c_str()); + wrbuf_cstr(sparql_wr), *it); } wrbuf_destroy(addinfo_wr); wrbuf_destroy(sparql_wr); } } } + else if (apdu_req->which == Z_APDU_presentRequest) + { + Z_PresentRequest *req = apdu_req->u.presentRequest; + FrontendSets::iterator fset_it = + m_frontend_sets.find(req->resultSetId); + if (fset_it == m_frontend_sets.end()) + { + apdu_res = + odr.create_presentResponse( + apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, + req->resultSetId); + package.response() = apdu_res; + return; + } + int number_returned = 0; + int next_position = 0; + int error_code = 0; + std::string addinfo; + Z_ElementSetNames *esn = 0; + if (req->recordComposition) + { + if (req->recordComposition->which == Z_RecordComp_simple) + esn = req->recordComposition->u.simple; + else + { + apdu_res = + odr.create_presentResponse( + apdu_req, + YAZ_BIB1_ONLY_A_SINGLE_ELEMENT_SET_NAME_SUPPORTED, + 0); + package.response() = apdu_res; + return; + } + } + Z_Records *records = fetch( + fset_it->second, + odr, req->preferredRecordSyntax, esn, + *req->resultSetStartPoint, *req->numberOfRecordsRequested, + error_code, addinfo, + &number_returned, + &next_position); + if (error_code) + { + apdu_res = + odr.create_presentResponse(apdu_req, error_code, + addinfo.c_str()); + } + else + { + apdu_res = + odr.create_presentResponse(apdu_req, 0, 0); + Z_PresentResponse *resp = apdu_res->u.presentResponse; + resp->records = records; + *resp->numberOfRecordsReturned = number_returned; + *resp->nextResultSetPosition = next_position; + } + } else { apdu_res = odr.create_close(apdu_req,