From dc40821899058dd61bf0a18ef16ec136d689c156 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Wed, 20 May 2015 13:28:07 +0200 Subject: [PATCH] MPSPARQL-22: Return some kind of explain --- bibframe/triplestore.xml | 1 + src/filter_sparql.cpp | 244 ++++++++++++++++++++++++++++++++++++++-------- src/sparql.c | 42 ++++++++ src/sparql.h | 3 + 4 files changed, 252 insertions(+), 38 deletions(-) diff --git a/bibframe/triplestore.xml b/bibframe/triplestore.xml index 6450a9f..a3fb568 100644 --- a/bibframe/triplestore.xml +++ b/bibframe/triplestore.xml @@ -575,6 +575,7 @@ ?thing ?rel ?obj FILTER( str(?thing) = %s ) + diff --git a/src/filter_sparql.cpp b/src/filter_sparql.cpp index d9b51eb..2ff3cf4 100644 --- a/src/filter_sparql.cpp +++ b/src/filter_sparql.cpp @@ -88,6 +88,7 @@ namespace metaproxy_1 { Odr_int hits; std::string db; std::list results; + std::vector explaindblist; }; class SPARQL::Session { public: @@ -98,12 +99,17 @@ namespace metaproxy_1 { Z_APDU *apdu_req, mp::odr &odr, const char *sparql_query, - ConfPtr conf, FrontendSetPtr fset); + ConfPtr conf, + FrontendSetPtr fset); + Z_APDU *explain_search(mp::Package &package, + Z_APDU *apdu_req, + mp::odr &odr, + const char *sparql_query, + FrontendSetPtr fset); int invoke_sparql(mp::Package &package, const char *sparql_query, ConfPtr conf, WRBUF w); - Z_Records *fetch( Package &package, FrontendSetPtr fset, @@ -111,6 +117,13 @@ namespace metaproxy_1 { Z_ElementSetNames *esn, int start, int number, int &error_code, std::string &addinfo, int *number_returned, int *next_position); + Z_Records *explain_fetch( + Package &package, + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position); bool m_in_use; private: bool m_support_named_result_sets; @@ -507,7 +520,6 @@ Z_Records *yf::SPARQL::Session::fetch( { if (n->type == XML_ELEMENT_NODE) { - //if (!strcmp((const char *) n->name, "uri")) if (!strcmp((const char *) n->name, "uri") || !strcmp((const char *) n->name, "bnode") ) { @@ -644,6 +656,138 @@ int yf::SPARQL::Session::invoke_sparql(mp::Package &package, return 0; } +Z_Records *yf::SPARQL::Session::explain_fetch( + Package &package, + FrontendSetPtr fset, + ODR odr, Odr_oid *preferredRecordSyntax, + Z_ElementSetNames *esn, + int start, int number, int &error_code, std::string &addinfo, + int *number_returned, int *next_position) +{ + Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records)); + rec->which = Z_Records_DBOSD; + rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *) + odr_malloc(odr, sizeof(Z_NamePlusRecordList)); + rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **) + odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number); + int i; + for (i = 0; i < number; i++) + { + int idx = start + i - 1; + ConfPtr cp = fset->explaindblist[ idx]; + package.log("sparql", YLOG_LOG, "fetch explain %d:%s", idx, cp->db.c_str() ); + mp::wrbuf w; + wrbuf_puts(w,"\n"); + wrbuf_puts(w," \n"); + wrbuf_puts(w," "); + wrbuf_xmlputs(w, cp->db.c_str()); + wrbuf_puts(w,"\n"); + wrbuf_puts(w," \n"); + yaz_sparql_explain_indexes( cp->s, w, 2); + wrbuf_puts(w,"\n"); + + rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *) + odr_malloc(odr, sizeof(Z_NamePlusRecord)); + Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i]; + npr->databaseName = odr_strdup(odr, fset->db.c_str()); + npr->which = Z_NamePlusRecord_databaseRecord; + npr->u.databaseRecord = + z_ext_record_xml(odr, w.buf(), w.len() ); + } + rec->u.databaseOrSurDiagnostics->num_records = i; + *number_returned = i; + if (start + number > fset->hits) + *next_position = 0; + else + *next_position = start + number; + return rec; +} + + + +Z_APDU *yf::SPARQL::Session::explain_search(mp::Package &package, + Z_APDU *apdu_req, + mp::odr &odr, + const char *sparql_query, + FrontendSetPtr fset) +{ + Z_SearchRequest *req = apdu_req->u.searchRequest; + Z_APDU *apdu_res = 0; + //mp::wrbuf w; + + package.log("sparql", YLOG_LOG, "Explain search" ); + int numbases = 0; + //std::list dblist; + std::list::const_iterator it = m_sparql->db_conf.begin(); + m_frontend_sets[req->resultSetName] = fset; + fset->explaindblist.clear(); + fset->explaindblist.reserve(m_sparql->db_conf.size()); + + for (; it != m_sparql->db_conf.end(); it++) + if ((*it)->schema.length() > 0 ) // searchable db + { + numbases++; + package.log("sparql", YLOG_LOG, "Explain %d: '%s'", + numbases, (*it)->db.c_str() ); + fset->explaindblist.push_back(*it); +/* + //yf::SPARQL::Result res; + //res.conf = *it; + std::string z = + "" + "" + "" + + (*it)->db + + "" + "" + ""; + //res.doc = xmlParseMemory(z.c_str(), z.size()); + dblist.push_back(z); +*/ + } + int number_returned = 0; + int next_position = 0; + Z_Records *records = 0; + int error_code = 0; + std::string addinfo; + + Odr_int number = 0; + const char *element_set_name = 0; + mp::util::piggyback_sr(req, numbases, number, &element_set_name); + if (number) + { + Z_ElementSetNames *esn; + + if (number > *req->smallSetUpperBound) + esn = req->mediumSetElementSetNames; + else + esn = req->smallSetElementSetNames; + records = explain_fetch(package, fset, + odr, req->preferredRecordSyntax, esn, + 1, number, + error_code, addinfo, + &number_returned, + &next_position); + } + + if (error_code) + { + apdu_res = odr.create_searchResponse( + apdu_req, error_code, addinfo.c_str()); + } + else + { + apdu_res = odr.create_searchResponse(apdu_req, 0, 0); + Z_SearchResponse *resp = apdu_res->u.searchResponse; + *resp->resultCount = numbases; + *resp->numberOfRecordsReturned = number_returned; + *resp->nextResultSetPosition = next_position; + resp->records = records; + } + + return apdu_res; +} + Z_APDU *yf::SPARQL::Session::search(mp::Package &package, Z_APDU *apdu_req, mp::odr &odr, @@ -814,36 +958,49 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) m_frontend_sets.erase(req->resultSetName); fset->db = db; - it = m_sparql->db_conf.begin(); - for (; it != m_sparql->db_conf.end(); it++) - if ((*it)->schema.length() > 0 - && yaz_match_glob((*it)->db.c_str(), db.c_str())) - { - mp::wrbuf addinfo_wr; - mp::wrbuf sparql_wr; - int error = - yaz_sparql_from_rpn_wrbuf((*it)->s, - addinfo_wr, sparql_wr, - req->query->u.type_1); - if (error) - { - apdu_res = odr.create_searchResponse( - apdu_req, error, - addinfo_wr.len() ? addinfo_wr.c_str() : 0); - } - else + if ( db != "explain" ) + { + it = m_sparql->db_conf.begin(); + for (; it != m_sparql->db_conf.end(); it++) + if ((*it)->schema.length() > 0 + && yaz_match_glob((*it)->db.c_str(), db.c_str())) { - Z_APDU *apdu_1 = search(package, apdu_req, odr, - sparql_wr.c_str(), *it, - fset); - if (!apdu_res) - apdu_res = apdu_1; + mp::wrbuf addinfo_wr; + mp::wrbuf sparql_wr; + int error = + yaz_sparql_from_rpn_wrbuf((*it)->s, + addinfo_wr, sparql_wr, + req->query->u.type_1); + if (error) + { + apdu_res = odr.create_searchResponse( + apdu_req, error, + addinfo_wr.len() ? addinfo_wr.c_str() : 0); + } + else + { + Z_APDU *apdu_1 = search(package, apdu_req, odr, + sparql_wr.c_str(), *it, + fset); + if (!apdu_res) + apdu_res = apdu_1; + } } + if (apdu_res == 0) + { + apdu_res = odr.create_searchResponse( + apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str()); } - if (apdu_res == 0) - { - apdu_res = odr.create_searchResponse( - apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str()); + } + else + { // The magic "explain" base + yaz_log(YLOG_LOG,"About to call explain_search"); + const char *qry = "query"; + apdu_res = explain_search( package, apdu_req, odr, + qry, fset); + // TODO - Extract at least a term from the query, and + // do some filtering by that + yaz_log(YLOG_LOG,"Returned from explain_search"); } } } @@ -881,14 +1038,25 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req) return; } } - Z_Records *records = fetch( - package, - fset_it->second, - odr, req->preferredRecordSyntax, esn, - *req->resultSetStartPoint, *req->numberOfRecordsRequested, - error_code, addinfo, - &number_returned, - &next_position); + Z_Records *records; + if ( fset_it->second->explaindblist.size() > 0 ) + records = explain_fetch( + package, + fset_it->second, + odr, req->preferredRecordSyntax, esn, + *req->resultSetStartPoint, *req->numberOfRecordsRequested, + error_code, addinfo, + &number_returned, + &next_position); + else + records = fetch( + package, + fset_it->second, + odr, req->preferredRecordSyntax, esn, + *req->resultSetStartPoint, *req->numberOfRecordsRequested, + error_code, addinfo, + &number_returned, + &next_position); if (error_code) { apdu_res = diff --git a/src/sparql.c b/src/sparql.c index 0f38d57..6fe5e5c 100644 --- a/src/sparql.c +++ b/src/sparql.c @@ -540,6 +540,48 @@ int yaz_sparql_from_rpn_stream(yaz_sparql_t s, return errors ? -1 : r; } +void yaz_sparql_explain_indexes( yaz_sparql_t s, WRBUF w, int indent) +{ + char indentspace[200]; // must be enough + assert(indent<200); + int i; + for (i=0; i < indent; i++) + indentspace[i] = ' '; + indentspace[indent] = '\0'; + + struct sparql_entry *e; + wrbuf_puts(w,indentspace); + wrbuf_puts(w,"\n"); + + for (e = s->conf; e; e = e->next) + { + /* + wrbuf_puts(w," "); + wrbuf_xmlputs(w, e->pattern ); + wrbuf_puts(w," : "); + wrbuf_xmlputs(w, e->value ); + wrbuf_puts(w," \n"); + */ + if ( strncmp(e->pattern, "index.", 6 ) == 0 ) + { + wrbuf_puts(w,indentspace); + wrbuf_puts(w," \n"); + wrbuf_puts(w,indentspace); + wrbuf_puts(w," "); + wrbuf_xmlputs(w, e->pattern + 6); + wrbuf_puts(w,"\n"); + wrbuf_puts(w,indentspace); + wrbuf_puts(w," "); + wrbuf_xmlputs(w, e->pattern + 6); + wrbuf_puts(w,"\n"); + wrbuf_puts(w,indentspace); + wrbuf_puts(w," \n"); + } + } + wrbuf_puts(w,indentspace); + wrbuf_puts(w,"\n"); +} + /* * Local variables: * c-basic-offset: 4 diff --git a/src/sparql.h b/src/sparql.h index 907ff66..71365fb 100644 --- a/src/sparql.h +++ b/src/sparql.h @@ -49,6 +49,9 @@ int yaz_sparql_lookup_schema(yaz_sparql_t s, const char *schema); YAZ_EXPORT void yaz_sparql_include(yaz_sparql_t s, yaz_sparql_t u); +YAZ_EXPORT +void yaz_sparql_explain_indexes( yaz_sparql_t s, WRBUF w, int indent); + YAZ_END_CDECL #endif -- 1.7.10.4