Checkout submodules as necessary MPSPARQL-23
[mp-sparql-moved-to-github.git] / src / filter_sparql.cpp
index f168571..a38567d 100644 (file)
@@ -22,9 +22,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #include <yaz/srw.h>
 #include <yaz/diagbib1.h>
 #include <yaz/match_glob.h>
+#include <yaz/querytowrbuf.h>
 #include <boost/scoped_ptr.hpp>
 #include <boost/thread/mutex.hpp>
 #include <boost/thread/condition.hpp>
+#include <boost/algorithm/string.hpp>
 #include "sparql.h"
 
 #include <yaz/zgdu.h>
@@ -87,6 +89,7 @@ namespace metaproxy_1 {
             Odr_int hits;
             std::string db;
             std::list<Result> results;
+            std::vector<ConfPtr> explaindblist;
         };
         class SPARQL::Session {
         public:
@@ -97,12 +100,17 @@ namespace metaproxy_1 {
                            Z_APDU *apdu_req,
                            mp::odr &odr,
                            const char *sparql_query,
-                           ConfPtr conf, FrontendSetPtr fset);
+                           ConfPtr conf,
+                           FrontendSetPtr fset);
+            Z_APDU *explain_search(mp::Package &package,
+                           Z_APDU *apdu_req,
+                           mp::odr &odr,
+                           const char *sparql_query,
+                           FrontendSetPtr fset);
             int invoke_sparql(mp::Package &package,
                               const char *sparql_query,
                               ConfPtr conf,
                               WRBUF w);
-
             Z_Records *fetch(
                 Package &package,
                 FrontendSetPtr fset,
@@ -110,6 +118,13 @@ namespace metaproxy_1 {
                 Z_ElementSetNames *esn,
                 int start, int number, int &error_code, std::string &addinfo,
                 int *number_returned, int *next_position);
+            Z_Records *explain_fetch(
+                Package &package,
+                FrontendSetPtr fset,
+                ODR odr, Odr_oid *preferredRecordSyntax,
+                Z_ElementSetNames *esn,
+                int start, int number, int &error_code, std::string &addinfo,
+                int *number_returned, int *next_position);
             bool m_in_use;
         private:
             bool m_support_named_result_sets;
@@ -179,21 +194,29 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only,
                     conf->schema = mp::xml::get_text(attr->children);
                 else if (!strcmp((const char *) attr->name, "include"))
                 {
-                    std::string name = mp::xml::get_text(attr->children);
-                    std::list<ConfPtr>::const_iterator it = db_conf.begin();
-                    while (1)
-                        if (it == db_conf.end())
-                        {
-                            throw mp::filter::FilterException(
-                                "include db not found: " + name);
-                        }
-                        else if (name.compare((*it)->db) == 0)
-                        {
-                            yaz_sparql_include(s, (*it)->s);
-                            break;
-                        }
-                        else
-                            it++;
+                    std::vector<std::string> dbs;
+                    std::string db = mp::xml::get_text(attr->children);
+                    boost::split(dbs, db, boost::is_any_of(" \t"));
+                    size_t i;
+                    for (i = 0; i < dbs.size(); i++)
+                    {
+                        if (dbs[i].length() == 0)
+                            continue;
+                        std::list<ConfPtr>::const_iterator it = db_conf.begin();
+                        while (1)
+                            if (it == db_conf.end())
+                            {
+                                throw mp::filter::FilterException(
+                                    "include db not found: " + dbs[i]);
+                            }
+                            else if (dbs[i].compare((*it)->db) == 0)
+                            {
+                                yaz_sparql_include(s, (*it)->s);
+                                break;
+                            }
+                            else
+                                it++;
+                    }
                 }
                 else
                     throw mp::filter::FilterException(
@@ -439,6 +462,7 @@ Z_Records *yf::SPARQL::Session::fetch(
     std::list<Result>::iterator it = fset->results.begin();
     const char *schema = 0;
     bool uri_lookup = false;
+    bool fetch_logged = false;
     if (esn && esn->which == Z_ElementSetNames_generic)
         schema = esn->u.generic;
 
@@ -497,7 +521,8 @@ Z_Records *yf::SPARQL::Session::fetch(
             {
                 if (n->type == XML_ELEMENT_NODE)
                 {
-                    if (!strcmp((const char *) n->name, "uri"))
+                    if (!strcmp((const char *) n->name, "uri") ||
+                        !strcmp((const char *) n->name, "bnode") )
                     {
                         uri = mp::xml::get_text(n->children);
 
@@ -525,7 +550,18 @@ Z_Records *yf::SPARQL::Session::fetch(
                                                       uri.c_str(), schema);
                 if (!error)
                 {
-                    yaz_log(YLOG_LOG, "query=%s", query.c_str());
+                    if (!fetch_logged)
+                    { // Log the fetch query only once
+                        package.log("sparql", YLOG_LOG,
+                            "fetch query: for %s \n%s",
+                            uri.c_str(), query.c_str() );
+                        fetch_logged = true;
+                    }
+                    else
+                    {
+                        package.log("sparql", YLOG_LOG,
+                            "fetch uri:%s", uri.c_str() );
+                    }
                     error = invoke_sparql(package, query.c_str(),
                                           it->conf, w);
                 }
@@ -592,7 +628,7 @@ int yf::SPARQL::Session::invoke_sparql(mp::Package &package,
     gdu->u.HTTP_Request->content_buf = path;
     gdu->u.HTTP_Request->content_len = strlen(path);
 
-    yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query);
+    yaz_log(YLOG_DEBUG, "sparql: HTTP request\n%s", sparql_query);
 
     http_package.request() = gdu;
     http_package.move();
@@ -606,8 +642,14 @@ int yf::SPARQL::Session::invoke_sparql(mp::Package &package,
     }
     else if (gdu_resp->u.HTTP_Response->code != 200)
     {
+        Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
         wrbuf_printf(w, "sparql: HTTP error %d from backend",
-                     gdu_resp->u.HTTP_Response->code);
+                     resp->code);
+        package.log("sparql", YLOG_LOG,
+            "HTTP error %d from backend ",
+            resp->code );
+        package.log("sparql", YLOG_LOG,
+            "%.*s" , resp->content_len, resp->content_buf );
         return YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
     }
     Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
@@ -615,6 +657,134 @@ int yf::SPARQL::Session::invoke_sparql(mp::Package &package,
     return 0;
 }
 
+Z_Records *yf::SPARQL::Session::explain_fetch(
+    Package &package,
+    FrontendSetPtr fset,
+    ODR odr, Odr_oid *preferredRecordSyntax,
+    Z_ElementSetNames *esn,
+    int start, int number, int &error_code, std::string &addinfo,
+    int *number_returned, int *next_position)
+{
+    Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records));
+    rec->which = Z_Records_DBOSD;
+    rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *)
+        odr_malloc(odr, sizeof(Z_NamePlusRecordList));
+    rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **)
+        odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number);
+    int i;
+    for (i = 0; i < number; i++)
+    {
+        unsigned int idx = start + i - 1;
+        if ( idx >= fset->explaindblist.size() )
+            break; 
+        ConfPtr cp = fset->explaindblist[idx];
+        mp::wrbuf w;
+        wrbuf_puts(w,"<info>\n");
+        wrbuf_puts(w,"  <databaseInfo>\n");
+        wrbuf_puts(w,"    <title>");
+        wrbuf_xmlputs(w, cp->db.c_str());
+        wrbuf_puts(w,"</title>\n");
+        wrbuf_puts(w,"  </databaseInfo>\n");
+        yaz_sparql_explain_indexes( cp->s, w, 2);
+        wrbuf_puts(w,"</info>\n");
+
+        rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *)
+            odr_malloc(odr, sizeof(Z_NamePlusRecord));
+        Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i];
+        npr->databaseName = odr_strdup(odr, fset->db.c_str());
+        npr->which = Z_NamePlusRecord_databaseRecord;
+        npr->u.databaseRecord =
+            z_ext_record_xml(odr, w.buf(), w.len() );
+    }
+    rec->u.databaseOrSurDiagnostics->num_records = i;
+    *number_returned = i;
+    if (start + number > (int)fset->explaindblist.size())
+        *next_position = 0;
+    else
+        *next_position = start + number;
+    return rec;
+}
+
+
+
+Z_APDU *yf::SPARQL::Session::explain_search(mp::Package &package,
+                           Z_APDU *apdu_req,
+                           mp::odr &odr,
+                           const char *explain_query,
+                           FrontendSetPtr fset)
+{
+    Z_SearchRequest *req = apdu_req->u.searchRequest;
+    Z_APDU *apdu_res = 0;
+    //mp::wrbuf w;
+
+    package.log("sparql", YLOG_LOG, "Explain search '%s'", explain_query );
+    const char *term = explain_query + strlen(explain_query);
+    while ( term > explain_query && *term != ' ')
+        term--;
+    term++;
+    if ( ! isalpha( *term) )
+        term=""; // anything non-alpha is taken to mean all 
+                 // Empty string is seen here as two double quotes ""
+                 // so it returns all bases as well
+    int numbases = 0;
+    std::list<ConfPtr>::const_iterator it = m_sparql->db_conf.begin();
+    m_frontend_sets[req->resultSetName] = fset;
+    fset->explaindblist.clear();
+    fset->explaindblist.reserve(m_sparql->db_conf.size());
+
+    for (; it != m_sparql->db_conf.end(); it++)
+        if ( (*it)->schema.length() > 0  &&  // searchable db
+            (!*term || strcmp(term,(*it)->db.c_str())==0)  )
+        { // and want all, or found the matching one
+            numbases++;
+            package.log("sparql", YLOG_LOG, "Explain %d: '%s'",
+                        numbases, (*it)->db.c_str() );
+            fset->explaindblist.push_back(*it);
+        }
+    int number_returned = 0;
+    int next_position = 0;
+    Z_Records *records = 0;
+    int error_code = 0;
+    std::string addinfo;
+
+    Odr_int number = 0;
+    const char *element_set_name = 0;
+    mp::util::piggyback_sr(req, numbases, number, &element_set_name);
+    if (number)
+    {
+        Z_ElementSetNames *esn;
+
+        if (number > *req->smallSetUpperBound)
+            esn = req->mediumSetElementSetNames;
+        else
+            esn = req->smallSetElementSetNames;
+        records = explain_fetch(package, fset,
+                        odr, req->preferredRecordSyntax, esn,
+                        1, number,
+                        error_code, addinfo,
+                        &number_returned,
+                        &next_position);
+    }
+
+    if (error_code)
+    {
+        apdu_res = odr.create_searchResponse(
+                apdu_req, error_code, addinfo.c_str());
+    }
+    else
+    {
+        apdu_res = odr.create_searchResponse(apdu_req, 0, 0);
+        Z_SearchResponse *resp = apdu_res->u.searchResponse;
+        *resp->resultCount = numbases;
+        *resp->numberOfRecordsReturned = number_returned;
+        *resp->nextResultSetPosition = next_position;
+        resp->records = records;
+    }
+
+    return apdu_res;
+}
+
+
 Z_APDU *yf::SPARQL::Session::search(mp::Package &package,
                                     Z_APDU *apdu_req,
                                     mp::odr &odr,
@@ -625,6 +795,9 @@ Z_APDU *yf::SPARQL::Session::search(mp::Package &package,
     Z_APDU *apdu_res = 0;
     mp::wrbuf w;
 
+    package.log("sparql", YLOG_LOG,
+        "search query:\n%s", sparql_query );
+
     int error = invoke_sparql(package, sparql_query, conf, w);
     if (error)
     {
@@ -654,7 +827,7 @@ Z_APDU *yf::SPARQL::Session::search(mp::Package &package,
             result.doc = doc;
             result.conf = conf;
             fset->results.push_back(result);
-            yaz_log(YLOG_LOG, "saving sparql result xmldoc=%p", doc);
+            yaz_log(YLOG_DEBUG, "saving sparql result xmldoc=%p", doc);
 
             get_result(result.doc, &fset->hits, -1, 0);
             m_frontend_sets[req->resultSetName] = fset;
@@ -782,36 +955,50 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
 
             m_frontend_sets.erase(req->resultSetName);
             fset->db = db;
-            it = m_sparql->db_conf.begin();
-            for (; it != m_sparql->db_conf.end(); it++)
-                if ((*it)->schema.length() > 0
-                    && yaz_match_glob((*it)->db.c_str(), db.c_str()))
-                {
-                    mp::wrbuf addinfo_wr;
-                    mp::wrbuf sparql_wr;
-                    int error =
-                        yaz_sparql_from_rpn_wrbuf((*it)->s,
-                                                  addinfo_wr, sparql_wr,
-                                                  req->query->u.type_1);
-                    if (error)
-                    {
-                        apdu_res = odr.create_searchResponse(
-                            apdu_req, error,
-                            addinfo_wr.len() ? addinfo_wr.c_str() : 0);
-                    }
-                    else
+            if ( db != "info" )
+            {
+                it = m_sparql->db_conf.begin();
+                for (; it != m_sparql->db_conf.end(); it++)
+                    if ((*it)->schema.length() > 0
+                        && yaz_match_glob((*it)->db.c_str(), db.c_str()))
                     {
-                        Z_APDU *apdu_1 = search(package, apdu_req, odr,
-                                                sparql_wr.c_str(), *it,
-                                                fset);
-                        if (!apdu_res)
-                            apdu_res = apdu_1;
+                        mp::wrbuf addinfo_wr;
+                        mp::wrbuf sparql_wr;
+                        int error =
+                            yaz_sparql_from_rpn_wrbuf((*it)->s,
+                                                    addinfo_wr, sparql_wr,
+                                                    req->query->u.type_1);
+                        if (error)
+                        {
+                            apdu_res = odr.create_searchResponse(
+                                apdu_req, error,
+                                addinfo_wr.len() ? addinfo_wr.c_str() : 0);
+                        }
+                        else
+                        {
+                            Z_APDU *apdu_1 = search(package, apdu_req, odr,
+                                                    sparql_wr.c_str(), *it,
+                                                    fset);
+                            if (!apdu_res)
+                                apdu_res = apdu_1;
+                        }
                     }
+                if (apdu_res == 0)
+                {
+                    apdu_res = odr.create_searchResponse(
+                        apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str());
                 }
-            if (apdu_res == 0)
-            {
-                apdu_res = odr.create_searchResponse(
-                    apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str());
+            }
+            else
+            { // The magic "explain" base
+                yaz_log(YLOG_LOG,"About to call explain_search");
+                mp::wrbuf qry;
+                yaz_query_to_wrbuf(qry, req->query);
+                apdu_res = explain_search( package, apdu_req, odr,
+                                           qry.c_str(), fset);
+                  // TODO - Extract at least a term from the query, and
+                  // do some filtering by that
+                yaz_log(YLOG_LOG,"Returned from explain_search");
             }
         }
     }
@@ -849,14 +1036,25 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
                 return;
             }
         }
-        Z_Records *records = fetch(
-            package,
-            fset_it->second,
-            odr, req->preferredRecordSyntax, esn,
-            *req->resultSetStartPoint, *req->numberOfRecordsRequested,
-            error_code, addinfo,
-            &number_returned,
-            &next_position);
+        Z_Records *records;
+        if ( fset_it->second->explaindblist.size() > 0 )
+            records = explain_fetch(
+                package,
+                fset_it->second,
+                odr, req->preferredRecordSyntax, esn,
+                *req->resultSetStartPoint, *req->numberOfRecordsRequested,
+                error_code, addinfo,
+                &number_returned,
+                &next_position);
+        else
+            records = fetch(
+                package,
+                fset_it->second,
+                odr, req->preferredRecordSyntax, esn,
+                *req->resultSetStartPoint, *req->numberOfRecordsRequested,
+                error_code, addinfo,
+                &number_returned,
+                &next_position);
         if (error_code)
         {
             apdu_res =