Result construction MPSPARCL-4
[mp-sparql-moved-to-github.git] / src / filter_sparql.cpp
index 8900033..8aa9bab 100644 (file)
@@ -38,9 +38,13 @@ namespace metaproxy_1 {
             class Session;
             class Rep;
             class Conf;
+            class FrontendSet;
 
             typedef boost::shared_ptr<Session> SessionPtr;
             typedef boost::shared_ptr<Conf> ConfPtr;
+
+            typedef boost::shared_ptr<FrontendSet> FrontendSetPtr;
+            typedef std::map<std::string,FrontendSetPtr> FrontendSets;
         public:
             SPARQL();
             ~SPARQL();
@@ -65,6 +69,16 @@ namespace metaproxy_1 {
             boost::mutex m_mutex;
             std::map<mp::Session,SessionPtr> m_clients;
         };
+        class SPARQL::FrontendSet {
+        public:
+            FrontendSet();
+            ~FrontendSet();
+        private:
+            friend class Session;
+            Odr_int hits;
+            std::string db;
+            xmlDoc *doc;
+        };
         class SPARQL::Session {
         public:
             Session(const SPARQL *);
@@ -75,14 +89,32 @@ namespace metaproxy_1 {
                                mp::odr &odr,
                                const char *sparql_query,
                                const char *uri);
+            Z_Records *fetch(
+                FrontendSetPtr fset,
+                ODR odr, Odr_oid *preferredRecordSyntax,
+                Z_ElementSetNames *esn,
+                int start, int number, int &error_code, std::string &addinfo,
+                int *number_returned, int *next_position);
             bool m_in_use;
         private:
             bool m_support_named_result_sets;
+            FrontendSets m_frontend_sets;
             const SPARQL *m_sparql;
         };
     }
 }
 
+yf::SPARQL::FrontendSet::~FrontendSet()
+{
+    if (doc)
+        xmlFreeDoc(doc);
+}
+
+yf::SPARQL::FrontendSet::FrontendSet()
+{
+    doc = 0;
+}
+
 yf::SPARQL::SPARQL() : m_p(new Rep)
 {
 }
@@ -233,12 +265,127 @@ void yf::SPARQL::release_session(Package &package) const
     }
 }
 
+static xmlNode *get_result(xmlDoc *doc, Odr_int *sz, Odr_int pos)
+{
+    xmlNode *ptr = xmlDocGetRootElement(doc);
+    Odr_int cur = 0;
+
+    if (ptr->type == XML_ELEMENT_NODE &&
+        !strcmp((const char *) ptr->name, "RDF"))
+    {
+        ptr = ptr->children;
+
+        while (ptr && ptr->type != XML_ELEMENT_NODE)
+            ptr = ptr->next;
+        if (ptr && ptr->type == XML_ELEMENT_NODE &&
+            !strcmp((const char *) ptr->name, "Description"))
+        {
+            xmlNode *p = ptr->children;
+
+            while (p && p->type != XML_ELEMENT_NODE)
+                p = p->next;
+            if (p && p->type == XML_ELEMENT_NODE &&
+                !strcmp((const char *) p->name, "type"))
+            { /* SELECT RESULT */
+                for (ptr = ptr->children; ptr; ptr = ptr->next)
+                    if (ptr->type == XML_ELEMENT_NODE &&
+                        !strcmp((const char *) ptr->name, "solution"))
+                    {
+                        if (cur++ == pos)
+                            break;
+                    }
+            }
+            else
+            {   /* CONSTRUCT result */
+                for (; ptr; ptr = ptr->next)
+                    if (ptr->type == XML_ELEMENT_NODE &&
+                        !strcmp((const char *) ptr->name, "Description"))
+                    {
+                        if (cur++ == pos)
+                            break;
+                    }
+            }
+        }
+    }
+    else
+    {
+        for (; ptr; ptr = ptr->next)
+            if (ptr->type == XML_ELEMENT_NODE &&
+                !strcmp((const char *) ptr->name, "sparql"))
+                break;
+        if (ptr)
+        {
+            for (ptr = ptr->children; ptr; ptr = ptr->next)
+                if (ptr->type == XML_ELEMENT_NODE &&
+                    !strcmp((const char *) ptr->name, "results"))
+                    break;
+        }
+        if (ptr)
+        {
+            for (ptr = ptr->children; ptr; ptr = ptr->next)
+                if (ptr->type == XML_ELEMENT_NODE &&
+                    !strcmp((const char *) ptr->name, "result"))
+                {
+                    if (cur++ == pos)
+                        break;
+                }
+        }
+    }
+    if (sz)
+        *sz = cur;
+    return ptr;
+}
+
+Z_Records *yf::SPARQL::Session::fetch(
+    FrontendSetPtr fset,
+    ODR odr, Odr_oid *preferredRecordSyntax,
+    Z_ElementSetNames *esn,
+    int start, int number, int &error_code, std::string &addinfo,
+    int *number_returned, int *next_position)
+{
+    Z_Records *rec = (Z_Records *) odr_malloc(odr, sizeof(Z_Records));
+    rec->which = Z_Records_DBOSD;
+    rec->u.databaseOrSurDiagnostics = (Z_NamePlusRecordList *)
+        odr_malloc(odr, sizeof(Z_NamePlusRecordList));
+    rec->u.databaseOrSurDiagnostics->records = (Z_NamePlusRecord **)
+        odr_malloc(odr, sizeof(Z_NamePlusRecord *) * number);
+    int i;
+    for (i = 0; i < number; i++)
+    {
+        rec->u.databaseOrSurDiagnostics->records[i] = (Z_NamePlusRecord *)
+            odr_malloc(odr, sizeof(Z_NamePlusRecord));
+        Z_NamePlusRecord *npr = rec->u.databaseOrSurDiagnostics->records[i];
+        npr->databaseName = odr_strdup(odr, fset->db.c_str());
+        npr->which = Z_NamePlusRecord_databaseRecord;
+
+        xmlNode *node = get_result(fset->doc, 0, start - 1 + i);
+        if (!node)
+            break;
+        assert(node->type == XML_ELEMENT_NODE);
+        xmlNode *tmp = xmlCopyNode(node, 1);
+        xmlBufferPtr buf = xmlBufferCreate();
+        xmlNodeDump(buf, tmp->doc, tmp, 0, 0);
+        npr->u.databaseRecord =
+            z_ext_record_xml(odr, (const char *) buf->content, buf->use);
+        xmlFreeNode(tmp);
+        xmlBufferFree(buf);
+    }
+    rec->u.databaseOrSurDiagnostics->num_records = i;
+    *number_returned = i;
+    if (start + number > fset->hits)
+        *next_position = 0;
+    else
+        *next_position = start + number;
+    return rec;
+}
+
 Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
                                         Z_APDU *apdu_req,
                                         mp::odr &odr,
                                         const char *sparql_query,
                                         const char *uri)
 {
+    Z_SearchRequest *req = apdu_req->u.searchRequest;
     Package http_package(package.session(), package.origin());
 
     http_package.copy_filter(package);
@@ -246,6 +393,8 @@ Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
 
     z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
                       "Content-Type", "application/x-www-form-urlencoded");
+    z_HTTP_header_add(odr, &gdu->u.HTTP_Request->headers,
+                      "Accept", "application/rdf+xml");
     const char *names[2];
     names[0] = "query";
     names[1] = 0;
@@ -257,22 +406,88 @@ Z_APDU *yf::SPARQL::Session::run_sparql(mp::Package &package,
     gdu->u.HTTP_Request->content_buf = path;
     gdu->u.HTTP_Request->content_len = strlen(path);
 
-
     yaz_log(YLOG_LOG, "sparql: HTTP request\n%s", sparql_query);
 
     http_package.request() = gdu;
     http_package.move();
 
     Z_GDU *gdu_resp = http_package.response().get();
-    if (gdu_resp && gdu_resp->which == Z_GDU_HTTP_Response)
+    Z_APDU *apdu_res = 0;
+    if (!gdu_resp || gdu_resp->which != Z_GDU_HTTP_Response)
     {
-        Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
+        yaz_log(YLOG_LOG, "sparql: no HTTP response");
+        apdu_res = odr.create_searchResponse(apdu_req,
+                                             YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
+                                             "no HTTP response from backend");
+    }
+    else if (gdu_resp->u.HTTP_Response->code != 200)
+    {
+        mp::wrbuf w;
+
+        wrbuf_printf(w, "sparql: HTTP error %d from backend",
+                     gdu_resp->u.HTTP_Response->code);
+        apdu_res = odr.create_searchResponse(apdu_req,
+                                             YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
+                                             w.c_str());
     }
     else
     {
-        yaz_log(YLOG_LOG, "sparql: no HTTP response");
+        Z_HTTP_Response *resp = gdu_resp->u.HTTP_Response;
+        FrontendSetPtr fset(new FrontendSet);
+
+        fset->doc = xmlParseMemory(resp->content_buf, resp->content_len);
+        fset->db = req->databaseNames[0];
+        if (!fset->doc)
+            apdu_res = odr.create_searchResponse(apdu_req,
+                                             YAZ_BIB1_TEMPORARY_SYSTEM_ERROR,
+                                             "invalid XML from backendbackend");
+        else
+        {
+            Z_Records *records = 0;
+            int number_returned = 0;
+            int next_position = 0;
+            int error_code = 0;
+            std::string addinfo;
+
+            get_result(fset->doc, &fset->hits, -1);
+            m_frontend_sets[req->resultSetName] = fset;
+
+            Odr_int number = 0;
+            const char *element_set_name = 0;
+            mp::util::piggyback_sr(req, fset->hits, number, &element_set_name);
+            if (number)
+            {
+                Z_ElementSetNames *esn;
+
+                if (number > *req->smallSetUpperBound)
+                    esn = req->mediumSetElementSetNames;
+                else
+                    esn = req->smallSetElementSetNames;
+                records = fetch(fset,
+                                odr, req->preferredRecordSyntax, esn,
+                                1, number,
+                                error_code, addinfo,
+                                &number_returned,
+                                &next_position);
+            }
+            if (error_code)
+            {
+                apdu_res =
+                    odr.create_searchResponse(
+                        apdu_req, error_code, addinfo.c_str());
+            }
+            else
+            {
+                apdu_res =
+                    odr.create_searchResponse(apdu_req, 0, 0);
+                Z_SearchResponse *resp = apdu_res->u.searchResponse;
+                *resp->resultCount = fset->hits;
+                *resp->numberOfRecordsReturned = number_returned;
+                *resp->nextResultSetPosition = next_position;
+                resp->records = records;
+            }
+        }
     }
-    Z_APDU *apdu_res = odr.create_searchResponse(apdu_req, 0, 0);
     return apdu_res;
 }
 
@@ -321,6 +536,24 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
     {
         Z_SearchRequest *req = apdu_req->u.searchRequest;
 
+        FrontendSets::iterator fset_it =
+            m_frontend_sets.find(req->resultSetName);
+        if (fset_it != m_frontend_sets.end())
+        {
+            // result set already exist
+            // if replace indicator is off: we return diagnostic if
+            // result set already exist.
+            if (*req->replaceIndicator == 0)
+            {
+                Z_APDU *apdu =
+                    odr.create_searchResponse(
+                        apdu_req,
+                        YAZ_BIB1_RESULT_SET_EXISTS_AND_REPLACE_INDICATOR_OFF,
+                        0);
+                package.response() = apdu_res;
+            }
+            m_frontend_sets.erase(fset_it);
+        }
         if (req->query->which != Z_Query_type_1)
         {
             apdu_res = odr.create_searchResponse(
@@ -372,6 +605,63 @@ void yf::SPARQL::Session::handle_z(mp::Package &package, Z_APDU *apdu_req)
             }
         }
     }
+    else if (apdu_req->which == Z_APDU_presentRequest)
+    {
+        Z_PresentRequest *req = apdu_req->u.presentRequest;
+        FrontendSets::iterator fset_it =
+            m_frontend_sets.find(req->resultSetId);
+        if (fset_it == m_frontend_sets.end())
+        {
+            apdu_res =
+                odr.create_presentResponse(
+                    apdu_req, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST,
+                    req->resultSetId);
+            package.response() = apdu_res;
+            return;
+        }
+        int number_returned = 0;
+        int next_position = 0;
+        int error_code = 0;
+        std::string addinfo;
+        Z_ElementSetNames *esn = 0;
+        if (req->recordComposition)
+        {
+            if (req->recordComposition->which == Z_RecordComp_simple)
+                esn = req->recordComposition->u.simple;
+            else
+            {
+                apdu_res =
+                    odr.create_presentResponse(
+                        apdu_req,
+                        YAZ_BIB1_ONLY_A_SINGLE_ELEMENT_SET_NAME_SUPPORTED,
+                        0);
+                package.response() = apdu_res;
+                return;
+            }
+        }
+        Z_Records *records = fetch(
+            fset_it->second,
+            odr, req->preferredRecordSyntax, esn,
+            *req->resultSetStartPoint, *req->numberOfRecordsRequested,
+            error_code, addinfo,
+            &number_returned,
+            &next_position);
+        if (error_code)
+        {
+            apdu_res =
+                odr.create_presentResponse(apdu_req, error_code,
+                                           addinfo.c_str());
+        }
+        else
+        {
+            apdu_res =
+                odr.create_presentResponse(apdu_req, 0, 0);
+            Z_PresentResponse *resp = apdu_res->u.presentResponse;
+            resp->records = records;
+            *resp->numberOfRecordsReturned = number_returned;
+            *resp->nextResultSetPosition = next_position;
+        }
+    }
     else
     {
         apdu_res = odr.create_close(apdu_req,