Allow no element set name again
[mp-sparql-moved-to-github.git] / src / filter_sparql.cpp
index 6bc889e..34759e5 100644 (file)
@@ -38,6 +38,7 @@ namespace metaproxy_1 {
             class Session;
             class Rep;
             class Conf;
+            class Result;
             class FrontendSet;
 
             typedef boost::shared_ptr<Session> SessionPtr;
@@ -60,6 +61,7 @@ namespace metaproxy_1 {
         public:
             std::string db;
             std::string uri;
+            std::string schema;
             yaz_sparql_t s;
             ~Conf();
         };
@@ -69,27 +71,40 @@ namespace metaproxy_1 {
             boost::mutex m_mutex;
             std::map<mp::Session,SessionPtr> m_clients;
         };
-        class SPARQL::FrontendSet {
+        class SPARQL::Result {
         public:
-            FrontendSet();
-            ~FrontendSet();
+            Result();
+            ~Result();
+        private:
+            friend class FrontendSet;
+            friend class Session;
+            ConfPtr conf;
+            xmlDoc *doc;
+        };
+        class SPARQL::FrontendSet {
         private:
             friend class Session;
             Odr_int hits;
             std::string db;
-            xmlDoc *doc;
+            std::list<Result> results;
         };
         class SPARQL::Session {
         public:
             Session(const SPARQL *);
             ~Session();
             void handle_z(Package &package, Z_APDU *apdu);
-            Z_APDU *run_sparql(mp::Package &package,
-                               Z_APDU *apdu_req,
-                               mp::odr &odr,
-                               const char *sparql_query,
-                               const char *uri);
+            Z_APDU *search(mp::Package &package,
+                           Z_APDU *apdu_req,
+                           mp::odr &odr,
+                           const char *sparql_query,
+                           ConfPtr conf, FrontendSetPtr fset);
+            int invoke_sparql(mp::Package &package,
+                              const char *sparql_query,
+                              ConfPtr conf,
+                              WRBUF w);
+
             Z_Records *fetch(
+                Package &package,
                 FrontendSetPtr fset,
                 ODR odr, Odr_oid *preferredRecordSyntax,
                 Z_ElementSetNames *esn,
@@ -104,13 +119,13 @@ namespace metaproxy_1 {
     }
 }
 
-yf::SPARQL::FrontendSet::~FrontendSet()
+yf::SPARQL::Result::~Result()
 {
     if (doc)
         xmlFreeDoc(doc);
 }
 
-yf::SPARQL::FrontendSet::FrontendSet()
+yf::SPARQL::Result::Result()
 {
     doc = 0;
 }
@@ -127,16 +142,31 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only,
                            const char *path)
 {
     const xmlNode *ptr = xmlnode->children;
+    std::string uri;
 
     for (; ptr; ptr = ptr->next)
     {
         if (ptr->type != XML_ELEMENT_NODE)
             continue;
-        if (!strcmp((const char *) ptr->name, "db"))
+        if (!strcmp((const char *) ptr->name, "defaults"))
+        {
+            const struct _xmlAttr *attr;
+            for (attr = ptr->properties; attr; attr = attr->next)
+            {
+                if (!strcmp((const char *) attr->name, "uri"))
+                    uri = mp::xml::get_text(attr->children);
+                else
+                    throw mp::filter::FilterException(
+                        "Bad attribute " + std::string((const char *)
+                                                       attr->name));
+            }
+        }
+        else if (!strcmp((const char *) ptr->name, "db"))
         {
             yaz_sparql_t s = yaz_sparql_create();
             ConfPtr conf(new Conf);
             conf->s = s;
+            conf->uri = uri;
 
             const struct _xmlAttr *attr;
             for (attr = ptr->properties; attr; attr = attr->next)
@@ -145,6 +175,8 @@ void yf::SPARQL::configure(const xmlNode *xmlnode, bool test_only,
                     conf->db = mp::xml::get_text(attr->children);
                 else if (!strcmp((const char *) attr->name, "uri"))
                     conf->uri = mp::xml::get_text(attr->children);
+                else if (!strcmp((const char *) attr->name, "schema"))
+                    conf->schema = mp::xml::get_text(attr->children);
                 else
                     throw mp::filter::FilterException(
                         "Bad attribute " + std::string((const char *)
@@ -265,37 +297,120 @@ void yf::SPARQL::release_session(Package &package) const
     }
 }
 
-static xmlNode *get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos)
+static bool get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos, xmlDoc **ndoc)
 {
     xmlNode *ptr = xmlDocGetRootElement(doc);
+    xmlNode *q0;
     Odr_int cur = 0;
-    for (; ptr; ptr = ptr->next)
-        if (ptr->type == XML_ELEMENT_NODE &&
-            !strcmp((const char *) ptr->name, "sparql"))
-            break;
-    if (ptr)
+
+    if (ndoc)
+        *ndoc = xmlNewDoc(BAD_CAST "1.0");
+
+    if (ptr->type == XML_ELEMENT_NODE &&
+        !strcmp((const char *) ptr->name, "RDF"))
     {
-        for (ptr = ptr->children; ptr; ptr = ptr->next)
-            if (ptr->type == XML_ELEMENT_NODE &&
-                !strcmp((const char *) ptr->name, "results"))
-                break;
+        if (ndoc)
+        {
+            q0 = xmlCopyNode(ptr, 2);
+            xmlDocSetRootElement(*ndoc, q0);
+        }
+        ptr = ptr->children;
+
+        while (ptr && ptr->type != XML_ELEMENT_NODE)
+            ptr = ptr->next;
+        if (ptr && ptr->type == XML_ELEMENT_NODE &&
+            !strcmp((const char *) ptr->name, "Description"))
+        {
+            xmlNode *p = ptr->children;
+
+            while (p && p->type != XML_ELEMENT_NODE)
+                p = p->next;
+            if (p && p->type == XML_ELEMENT_NODE &&
+                !strcmp((const char *) p->name, "type"))
+            { /* SELECT RESULT */
+                for (ptr = ptr->children; ptr; ptr = ptr->next)
+                    if (ptr->type == XML_ELEMENT_NODE &&
+                        !strcmp((const char *) ptr->name, "solution"))
+                    {
+                        if (cur++ == pos)
+                        {
+                            if (ndoc)
+                            {
+                                xmlNode *q1 = xmlCopyNode(ptr, 1);
+                                xmlAddChild(q0, q1);
+                            }
+                            break;
+                        }
+                    }
+            }
+            else
+            {   /* CONSTRUCT result */
+                for (; ptr; ptr = ptr->next)
+                    if (ptr->type == XML_ELEMENT_NODE &&
+                        !strcmp((const char *) ptr->name, "Description"))
+                    {
+                        if (cur++ == pos)
+                        {
+                            if (ndoc)
+                            {
+                                xmlNode *q1 = xmlCopyNode(ptr, 1);
+                                xmlAddChild(q0, q1);
+                            }
+                            return true;
+                        }
+                    }
+            }
+        }
     }
-    if (ptr)
+    else
     {
-        for (ptr = ptr->children; ptr; ptr = ptr->next)
+        for (; ptr; ptr = ptr->next)
             if (ptr->type == XML_ELEMENT_NODE &&
-                !strcmp((const char *) ptr->name, "result"))
+                !strcmp((const char *) ptr->name, "sparql"))
+                break;
+        if (ptr)
+        {
+            if (ndoc)
             {
-                if (cur++ == pos)
+                q0 = xmlCopyNode(ptr, 2);
+                xmlDocSetRootElement(*ndoc, q0);
+            }
+            for (ptr = ptr->children; ptr; ptr = ptr->next)
+                if (ptr->type == XML_ELEMENT_NODE &&
+                    !strcmp((const char *) ptr->name, "results"))
                     break;
+        }
+        if (ptr)
+        {
+            xmlNode *q1 = 0;
+            if (ndoc)
+            {
+                q1 = xmlCopyNode(ptr, 0);
+                xmlAddChild(q0, q1);
             }
+            for (ptr = ptr->children; ptr; ptr = ptr->next)
+                if (ptr->type == XML_ELEMENT_NODE &&
+                    !strcmp((const char *) ptr->name, "result"))
+                {
+                    if (cur++ == pos)
+                    {
+                        if (ndoc)
+                        {
+                            xmlNode *q2 = xmlCopyNode(ptr, 1);
+                            xmlAddChild(q1, q2);
+                        }
+                        return true;
+                    }
+                }
+        }
     }
     if (sz)
         *sz = cur;
-    return ptr;
+    return false;
 }
 
 Z_Records *yf::SPARQL::Session::fetch(
+    Package &package,
     FrontendSetPtr fset,
     ODR odr, Odr_oid *preferredRecordSyntax,
     Z_ElementSetNames *esn,
@@ -303,6 +418,32 @@ Z_Records *yf::SPARQL::Session::fetch(
     int *number_returned, int *next_position)
 {
     Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records));
+    std::list<Result>::iterator it = fset->results.begin();
+    const char *schema = 0;
+    bool uri_lookup = false;
+    if (esn && esn->which == Z_ElementSetNames_generic)
+        schema = esn->u.generic;
+
+    for (; it != fset->results.end(); it++)
+    {
+        if (yaz_sparql_lookup_schema(it->conf->s, schema))
+        {
+            uri_lookup = true;
+            break;
+        }
+        if (!schema || !strcmp(esn->u.generic, it->conf->schema.c_str()))
+            break;
+    }
+    if (it == fset->results.end())
+    {
+        rec->which = Z_Records_NSD;
+        rec->u.nonSurrogateDiagnostic =
+            zget_DefaultDiagFormat(
+                odr,
+                YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_,
+                schema);
+        return rec;
+    }
     rec->which = Z_Records_DBOSD;
     rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *)
         odr_malloc(odr, sizeof(Z_NamePlusRecordList));
@@ -316,19 +457,86 @@ Z_Records *yf::SPARQL::Session::fetch(
         Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i];
         npr->databaseName = odr_strdup(odr, fset->db.c_str());
         npr->which = Z_NamePlusRecord_databaseRecord;
+        xmlDoc *ndoc = 0;
 
-        xmlNode *node = get_result(fset->doc, 0, start - 1 + i);
-        if (!node)
+        if (!get_result(it->doc, 0, start - 1 + i, &ndoc))
+        {
+            if (ndoc)
+                xmlFreeDoc(ndoc);
+            break;
+        }
+        xmlNode *ndoc_root = xmlDocGetRootElement(ndoc);
+        if (!ndoc_root)
+        {
+            xmlFreeDoc(ndoc);
             break;
-        assert(node->type == XML_ELEMENT_NODE);
-        assert(!strcmp((const char *) node->name, "result"));
-        xmlNode *tmp = xmlCopyNode(node, 1);
-        xmlBufferPtr buf = xmlBufferCreate();
-        xmlNodeDump(buf, tmp->doc, tmp, 0, 0);
-        npr->u.databaseRecord =
-            z_ext_record_xml(odr, (const char *) buf->content, buf->use);
-        xmlFreeNode(tmp);
-        xmlBufferFree(buf);
+        }
+        if (uri_lookup)
+        {
+            std::string uri;
+            xmlNode *n = ndoc_root;
+            while (n)
+            {
+                if (n->type == XML_ELEMENT_NODE)
+                {
+                    if (!strcmp((const char *) n->name, "uri"))
+                    {
+                        uri = mp::xml::get_text(n->children);
+
+                    }
+                    n = n->children;
+                }
+                else
+                    n = n->next;
+            }
+            if (!uri.length())
+            {
+                rec->which = Z_Records_NSD;
+                rec->u.nonSurrogateDiagnostic =
+                    zget_DefaultDiagFormat(
+                        odr,
+                        YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS, 0);
+                xmlFreeDoc(ndoc);
+                return rec;
+            }
+            else
+            {
+                mp::wrbuf addinfo, query, w;
+                int error = yaz_sparql_from_uri_wrbuf(it->conf->s,
+                                                      addinfo, query,
+                                                      uri.c_str(), schema);
+                if (!error)
+                {
+                    yaz_log(YLOG_LOG, "query=%s", query.c_str());
+                    error = invoke_sparql(package, query.c_str(),
+                                          it->conf, w);
+                }
+                if (error)
+                {
+                    rec->which = Z_Records_NSD;
+                    rec->u.nonSurrogateDiagnostic =
+                        zget_DefaultDiagFormat(
+                            odr,
+                            error,
+                            addinfo.len() ? addinfo.c_str() : 0);
+                    xmlFreeDoc(ndoc);
+                    return rec;
+                }
+                npr->u.databaseRecord =
+                    z_ext_record_xml(odr, w.c_str(), w.len());
+            }
+        }
+        else
+        {
+            xmlBufferPtr buf = xmlBufferCreate();
+            xmlNodeDump(buf, ndoc, ndoc_root, 0, 0);
+            yaz_log(YLOG_LOG, "record %s %.*s", uri_lookup ? "uri" : "normal",
+                    (int) buf->use, (const char *) buf->content);
+            npr->u.databaseRecord =
+                z_ext_record_xml(odr, (const char *) buf->content, buf->use);
+            xmlBufferFree(buf);
+        }
+        xmlFreeDoc(ndoc);
     }
     rec->u.databaseOrSurDiagnostics->num_records = i;
     *number_returned = i;
@@ -339,20 +547,22 @@ Z_Records *yf::SPARQL::Session::fetch(
     return rec;
 }
 
-Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
-                                        Z_APDU *apdu_req,
-                                        mp::odr &odr,
-                                        const char *sparql_query,
-                                        const char *uri)
+int yf::SPARQL::Session::invoke_sparql(mp::Package &package,
+                                       const char *sparql_query,
+                                       ConfPtr conf,
+                                       WRBUF w)
 {
-    Z_SearchRequest *req = apdu_req->u.searchRequest;
     Package http_package(package.session(), package.origin());
+    mp::odr odr;
 
     http_package.copy_filter(package);
-    Z_GDU *gdu = z_get_HTTP_Request_uri(odr, uri, 0, 1);
+    Z_GDU *gdu = z_get_HTTP_Request_uri(odr, conf->uri.c_str(), 0, 1);
 
     z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
                       "Content-Type", "application/x-www-form-urlencoded");
+    z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
+                      "Accept", "application/sparql-results+xml,"
+                      "application/rdf+xml");
     const char *names[2];
     names[0] = "query";
     names[1] = 0;
@@ -370,46 +580,69 @@ Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
     http_package.move();
 
     Z_GDU *gdu_resp = http_package.response().get();
-    Z_APDU *apdu_res = 0;
+
     if (!gdu_resp || gdu_resp->which != Z_GDU_HTTP_Response)
     {
-        yaz_log(YLOG_LOG, "sparql: no HTTP response");
-        apdu_res = odr.create_searchResponse(apdu_req,
-                                             YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
-                                             "no HTTP response from backend");
+        wrbuf_puts(w, "no HTTP response from backend");
+        return YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
     }
     else if (gdu_resp->u.HTTP_Response->code != 200)
     {
-        mp::wrbuf w;
-
         wrbuf_printf(w, "sparql: HTTP error %d from backend",
                      gdu_resp->u.HTTP_Response->code);
-        apdu_res = odr.create_searchResponse(apdu_req,
-                                             YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
-                                             w.c_str());
+        return YAZ_BIB1_TEMPORARY_SYSTEM_ERROR;
+    }
+    Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
+    wrbuf_write(w, resp->content_buf, resp->content_len);
+    return 0;
+}
+
+Z_APDU *yf::SPARQL::Session::search(mp::Package &package,
+                                    Z_APDU *apdu_req,
+                                    mp::odr &odr,
+                                    const char *sparql_query,
+                                    ConfPtr conf, FrontendSetPtr fset)
+{
+    Z_SearchRequest *req = apdu_req->u.searchRequest;
+    Z_APDU *apdu_res = 0;
+    mp::wrbuf w;
+
+    int error = invoke_sparql(package, sparql_query, conf, w);
+    if (error)
+    {
+        apdu_res = odr.create_searchResponse(apdu_req, error,
+                                             w.len() ?
+                                             w.c_str() : 0);
     }
     else
     {
-        Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
-        FrontendSetPtr fset(new FrontendSet);
-
-        fset->doc = xmlParseMemory(resp->content_buf, resp->content_len);
-        fset->db = req->databaseNames[0];
-        if (!fset->doc)
-            apdu_res = odr.create_searchResponse(apdu_req,
-                                             YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
-                                             "invalid XML from backendbackend");
+        xmlDocPtr doc = xmlParseMemory(w.c_str(), w.len());
+        if (!doc)
+        {
+            apdu_res = odr.create_searchResponse(
+                apdu_req,
+                YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
+                "invalid XML from backendbackend");
+        }
         else
         {
+            Result result;
             Z_Records *records = 0;
             int number_returned = 0;
             int next_position = 0;
             int error_code = 0;
             std::string addinfo;
 
-            get_result(fset->doc, &fset->hits, -1);
+            result.doc = doc;
+            result.conf = conf;
+            fset->results.push_back(result);
+            yaz_log(YLOG_LOG, "saving sparql result xmldoc=%p", doc);
+
+            get_result(result.doc, &fset->hits, -1, 0);
             m_frontend_sets[req->resultSetName] = fset;
 
+            result.doc = 0;
+
             Odr_int number = 0;
             const char *element_set_name = 0;
             mp::util::piggyback_sr(req, fset->hits, number, &element_set_name);
@@ -421,7 +654,7 @@ Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
                     esn = req->mediumSetElementSetNames;
                 else
                     esn = req->smallSetElementSetNames;
-                records = fetch(fset,
+                records = fetch(package, fset,
                                 odr, req->preferredRecordSyntax, esn,
                                 1, number,
                                 error_code, addinfo,
@@ -508,7 +741,7 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
                         apdu_req,
                         YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF,
                         0);
-                package.response() = apdu_res;
+                package.response() = apdu;
             }
             m_frontend_sets.erase(fset_it);
         }
@@ -527,40 +760,40 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
         {
             std::string db = req->databaseNames[0];
             std::list<ConfPtr>::const_iterator it;
+            FrontendSetPtr fset(new FrontendSet);
 
+            m_frontend_sets.erase(req->resultSetName);
+            fset->db = db;
             it = m_sparql->db_conf.begin();
             for (; it != m_sparql->db_conf.end(); it++)
                 if (yaz_match_glob((*it)->db.c_str(), db.c_str()))
-                    break;
-            if (it == m_sparql->db_conf.end())
+                {
+                    mp::wrbuf addinfo_wr;
+                    mp::wrbuf sparql_wr;
+                    int error =
+                        yaz_sparql_from_rpn_wrbuf((*it)->s,
+                                                  addinfo_wr, sparql_wr,
+                                                  req->query->u.type_1);
+                    if (error)
+                    {
+                        apdu_res = odr.create_searchResponse(
+                            apdu_req, error,
+                            addinfo_wr.len() ? addinfo_wr.c_str() : 0);
+                    }
+                    else
+                    {
+                        Z_APDU *apdu_1 = search(package, apdu_req, odr,
+                                                sparql_wr.c_str(), *it,
+                                                fset);
+                        if (!apdu_res)
+                            apdu_res = apdu_1;
+                    }
+                }
+            if (apdu_res == 0)
             {
                 apdu_res = odr.create_searchResponse(
                     apdu_req, YAZ_BIB1_DATABASE_DOES_NOT_EXIST, db.c_str());
             }
-            else
-            {
-                WRBUF addinfo_wr = wrbuf_alloc();
-                WRBUF sparql_wr = wrbuf_alloc();
-                int error =
-                    yaz_sparql_from_rpn_wrbuf((*it)->s,
-                                              addinfo_wr, sparql_wr,
-                                              req->query->u.type_1);
-                if (error)
-                {
-                    apdu_res = odr.create_searchResponse(
-                        apdu_req, error,
-                        wrbuf_len(addinfo_wr) ?
-                        wrbuf_cstr(addinfo_wr) : 0);
-                }
-                else
-                {
-                    apdu_res = run_sparql(package, apdu_req, odr,
-                                          wrbuf_cstr(sparql_wr),
-                                          (*it)->uri.c_str());
-                }
-                wrbuf_destroy(addinfo_wr);
-                wrbuf_destroy(sparql_wr);
-            }
         }
     }
     else if (apdu_req->which == Z_APDU_presentRequest)
@@ -598,6 +831,7 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
             }
         }
         Z_Records *records = fetch(
+            package,
             fset_it->second,
             odr, req->preferredRecordSyntax, esn,
             *req->resultSetStartPoint, *req->numberOfRecordsRequested,