X-Git-Url: http://git.indexdata.com/?p=mp-sparql-moved-to-github.git;a=blobdiff_plain;f=src%2Ffilter_sparql.cpp;h=a38567d0f83b5eeab93dce04abd3bfe409a44e2d;hp=34759e5feff3a531233cc3b2e41ae45b21c92fa6;hb=e625c126cea3e3e11aaa8d40b03237d9b22b6525;hpb=a74de0cefd7999972739abb1e8a10b00d746b8ee diff --git a/src/filter_sparql.cpp b/src/filter_sparql.cpp index 34759e5..a38567d 100644 --- a/src/filter_sparql.cpp +++ b/src/filter_sparql.cpp @@ -22,9 +22,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include +#include #include #include #include +#include #include "sparql.h" #include @@ -87,6 +89,7 @@ namespace metaproxy_1 { Odr_int hits; std::string db; std::list results; + std::vector explaindblist; }; class SPARQL::Session { public: @@ -97,12 +100,17 @@ namespace metaproxy_1 { Z_APDU *apdu_req, mp::odr &odr, const char *sparql_query, - ConfPtr conf, FrontendSetPtr fset); + ConfPtr conf, + FrontendSetPtr fset); + Z_APDU *explain_search(mp::Package &package, + Z_APDU *apdu_req, + mp::odr &odr, + const char *sparql_query, + FrontendSetPtr fset); int invoke_sparql(mp::Package &package, const char *sparql_query, ConfPtr conf, WRBUF w); - Z_Records *fetch( Package &package, FrontendSetPtr fset, @@ -110,6 +118,13 @@ namespace metaproxy_1 { Z_ElementSetNames *esn, int start, int number, int &error_code, std::string &addinfo, int *number_returned, int *next_position); + Z_Records *explain_fetch( + Package &package, + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position); bool m_in_use; private: bool m_support_named_result_sets; @@ -177,6 +192,32 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only, conf->uri = mp::xml::get_text(attr->children); else if (!strcmp((const char *) attr->name, "schema")) conf->schema = mp::xml::get_text(attr->children); + else if (!strcmp((const char *) attr->name, "include")) + { + std::vector dbs; + std::string db = mp::xml::get_text(attr->children); + boost::split(dbs, db, boost::is_any_of(" \t")); + size_t i; + for (i = 0; i < dbs.size(); i++) + { + if (dbs[i].length() == 0) + continue; + std::list::const_iterator it = db_conf.begin(); + while (1) + if (it == db_conf.end()) + { + throw mp::filter::FilterException( + "include db not found: " + dbs[i]); + } + else if (dbs[i].compare((*it)->db) == 0) + { + yaz_sparql_include(s, (*it)->s); + break; + } + else + it++; + } + } else throw mp::filter::FilterException( "Bad attribute " + std::string((const char *) @@ -421,6 +462,7 @@ Z_Records *yf::SPARQL::Session::fetch( std::list::iterator it = fset->results.begin(); const char *schema = 0; bool uri_lookup = false; + bool fetch_logged = false; if (esn && esn->which == Z_ElementSetNames_generic) schema = esn->u.generic; @@ -479,7 +521,8 @@ Z_Records *yf::SPARQL::Session::fetch( { if (n->type == XML_ELEMENT_NODE) { - if (!strcmp((const char *) n->name, "uri")) + if (!strcmp((const char *) n->name, "uri") || + !strcmp((const char *) n->name, "bnode") ) { uri = mp::xml::get_text(n->children); @@ -507,7 +550,18 @@ Z_Records *yf::SPARQL::Session::fetch( uri.c_str(), schema); if (!error) { - yaz_log(YLOG_LOG, "query=%s", query.c_str()); + if (!fetch_logged) + { // Log the fetch query only once + package.log("sparql", YLOG_LOG, + "fetch query: for %s \n%s", + uri.c_str(), query.c_str() ); + fetch_logged = true; + } + else + { + package.log("sparql", YLOG_LOG, + "fetch uri:%s", uri.c_str() ); + } error = invoke_sparql(package, query.c_str(), it->conf, w); } @@ -574,7 +628,7 @@ int yf::SPARQL::Session::invoke_sparql(mp::Package &package, gdu->u.HTTP_Request->content_buf = path; gdu->u.HTTP_Request->content_len = strlen(path); - yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query); + yaz_log(YLOG_DEBUG, "sparql: HTTP request\n%s", sparql_query); http_package.request() = gdu; http_package.move(); @@ -588,8 +642,14 @@ int yf::SPARQL::Session::invoke_sparql(mp::Package &package, } else if (gdu_resp->u.HTTP_Response->code != 200) { + Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response; wrbuf_printf(w, "sparql: HTTP error %d from backend", - gdu_resp->u.HTTP_Response->code); + resp->code); + package.log("sparql", YLOG_LOG, + "HTTP error %d from backend ", + resp->code ); + package.log("sparql", YLOG_LOG, + "%.*s" , resp->content_len, resp->content_buf ); return YAZ_BIB1_TEMPORARY_SYSTEM_ERROR; } Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response; @@ -597,6 +657,134 @@ int yf::SPARQL::Session::invoke_sparql(mp::Package &package, return 0; } +Z_Records *yf::SPARQL::Session::explain_fetch( + Package &package, + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position) +{ + Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records)); + rec->which = Z_Records_DBOSD; + rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *) + odr_malloc(odr, sizeof(Z_NamePlusRecordList)); + rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **) + odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number); + int i; + for (i = 0; i < number; i++) + { + unsigned int idx = start + i - 1; + if ( idx >= fset->explaindblist.size() ) + break; + ConfPtr cp = fset->explaindblist[idx]; + mp::wrbuf w; + wrbuf_puts(w,"\n"); + wrbuf_puts(w," \n"); + wrbuf_puts(w," "); + wrbuf_xmlputs(w, cp->db.c_str()); + wrbuf_puts(w,"\n"); + wrbuf_puts(w," \n"); + yaz_sparql_explain_indexes( cp->s, w, 2); + wrbuf_puts(w,"\n"); + + rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *) + odr_malloc(odr, sizeof(Z_NamePlusRecord)); + Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i]; + npr->databaseName = odr_strdup(odr, fset->db.c_str()); + npr->which = Z_NamePlusRecord_databaseRecord; + npr->u.databaseRecord = + z_ext_record_xml(odr, w.buf(), w.len() ); + } + rec->u.databaseOrSurDiagnostics->num_records = i; + *number_returned = i; + if (start + number > (int)fset->explaindblist.size()) + *next_position = 0; + else + *next_position = start + number; + return rec; +} + + + +Z_APDU *yf::SPARQL::Session::explain_search(mp::Package &package, + Z_APDU *apdu_req, + mp::odr &odr, + const char *explain_query, + FrontendSetPtr fset) +{ + Z_SearchRequest *req = apdu_req->u.searchRequest; + Z_APDU *apdu_res = 0; + //mp::wrbuf w; + + package.log("sparql", YLOG_LOG, "Explain search '%s'", explain_query ); + const char *term = explain_query + strlen(explain_query); + while ( term > explain_query && *term != ' ') + term--; + term++; + if ( ! isalpha( *term) ) + term=""; // anything non-alpha is taken to mean all + // Empty string is seen here as two double quotes "" + // so it returns all bases as well + int numbases = 0; + std::list::const_iterator it = m_sparql->db_conf.begin(); + m_frontend_sets[req->resultSetName] = fset; + fset->explaindblist.clear(); + fset->explaindblist.reserve(m_sparql->db_conf.size()); + + for (; it != m_sparql->db_conf.end(); it++) + if ( (*it)->schema.length() > 0 && // searchable db + (!*term || strcmp(term,(*it)->db.c_str())==0) ) + { // and want all, or found the matching one + numbases++; + package.log("sparql", YLOG_LOG, "Explain %d: '%s'", + numbases, (*it)->db.c_str() ); + fset->explaindblist.push_back(*it); + } + int number_returned = 0; + int next_position = 0; + Z_Records *records = 0; + int error_code = 0; + std::string addinfo; + + Odr_int number = 0; + const char *element_set_name = 0; + mp::util::piggyback_sr(req, numbases, number, &element_set_name); + if (number) + { + Z_ElementSetNames *esn; + + if (number > *req->smallSetUpperBound) + esn = req->mediumSetElementSetNames; + else + esn = req->smallSetElementSetNames; + records = explain_fetch(package, fset, + odr, req->preferredRecordSyntax, esn, + 1, number, + error_code, addinfo, + &number_returned, + &next_position); + } + + if (error_code) + { + apdu_res = odr.create_searchResponse( + apdu_req, error_code, addinfo.c_str()); + } + else + { + apdu_res = odr.create_searchResponse(apdu_req, 0, 0); + Z_SearchResponse *resp = apdu_res->u.searchResponse; + *resp->resultCount = numbases; + *resp->numberOfRecordsReturned = number_returned; + *resp->nextResultSetPosition = next_position; + resp->records = records; + } + + return apdu_res; +} + + Z_APDU *yf::SPARQL::Session::search(mp::Package &package, Z_APDU *apdu_req, mp::odr &odr, @@ -607,6 +795,9 @@ Z_APDU *yf::SPARQL::Session::search(mp::Package &package, Z_APDU *apdu_res = 0; mp::wrbuf w; + package.log("sparql", YLOG_LOG, + "search query:\n%s", sparql_query ); + int error = invoke_sparql(package, sparql_query, conf, w); if (error) { @@ -636,7 +827,7 @@ Z_APDU *yf::SPARQL::Session::search(mp::Package &package, result.doc = doc; result.conf = conf; fset->results.push_back(result); - yaz_log(YLOG_LOG, "saving sparql result xmldoc=%p", doc); + yaz_log(YLOG_DEBUG, "saving sparql result xmldoc=%p", doc); get_result(result.doc, &fset->hits, -1, 0); m_frontend_sets[req->resultSetName] = fset; @@ -764,35 +955,50 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) m_frontend_sets.erase(req->resultSetName); fset->db = db; - it = m_sparql->db_conf.begin(); - for (; it != m_sparql->db_conf.end(); it++) - if (yaz_match_glob((*it)->db.c_str(), db.c_str())) - { - mp::wrbuf addinfo_wr; - mp::wrbuf sparql_wr; - int error = - yaz_sparql_from_rpn_wrbuf((*it)->s, - addinfo_wr, sparql_wr, - req->query->u.type_1); - if (error) - { - apdu_res = odr.create_searchResponse( - apdu_req, error, - addinfo_wr.len() ? addinfo_wr.c_str() : 0); - } - else + if ( db != "info" ) + { + it = m_sparql->db_conf.begin(); + for (; it != m_sparql->db_conf.end(); it++) + if ((*it)->schema.length() > 0 + && yaz_match_glob((*it)->db.c_str(), db.c_str())) { - Z_APDU *apdu_1 = search(package, apdu_req, odr, - sparql_wr.c_str(), *it, - fset); - if (!apdu_res) - apdu_res = apdu_1; + mp::wrbuf addinfo_wr; + mp::wrbuf sparql_wr; + int error = + yaz_sparql_from_rpn_wrbuf((*it)->s, + addinfo_wr, sparql_wr, + req->query->u.type_1); + if (error) + { + apdu_res = odr.create_searchResponse( + apdu_req, error, + addinfo_wr.len() ? addinfo_wr.c_str() : 0); + } + else + { + Z_APDU *apdu_1 = search(package, apdu_req, odr, + sparql_wr.c_str(), *it, + fset); + if (!apdu_res) + apdu_res = apdu_1; + } } + if (apdu_res == 0) + { + apdu_res = odr.create_searchResponse( + apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str()); } - if (apdu_res == 0) - { - apdu_res = odr.create_searchResponse( - apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str()); + } + else + { // The magic "explain" base + yaz_log(YLOG_LOG,"About to call explain_search"); + mp::wrbuf qry; + yaz_query_to_wrbuf(qry, req->query); + apdu_res = explain_search( package, apdu_req, odr, + qry.c_str(), fset); + // TODO - Extract at least a term from the query, and + // do some filtering by that + yaz_log(YLOG_LOG,"Returned from explain_search"); } } } @@ -830,14 +1036,25 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) return; } } - Z_Records *records = fetch( - package, - fset_it->second, - odr, req->preferredRecordSyntax, esn, - *req->resultSetStartPoint, *req->numberOfRecordsRequested, - error_code, addinfo, - &number_returned, - &next_position); + Z_Records *records; + if ( fset_it->second->explaindblist.size() > 0 ) + records = explain_fetch( + package, + fset_it->second, + odr, req->preferredRecordSyntax, esn, + *req->resultSetStartPoint, *req->numberOfRecordsRequested, + error_code, addinfo, + &number_returned, + &next_position); + else + records = fetch( + package, + fset_it->second, + odr, req->preferredRecordSyntax, esn, + *req->resultSetStartPoint, *req->numberOfRecordsRequested, + error_code, addinfo, + &number_returned, + &next_position); if (error_code) { apdu_res =