URL recipe in place
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 30 Jun 2011 17:34:43 +0000 (19:34 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Thu, 30 Jun 2011 17:34:43 +0000 (19:34 +0200)
etc/config-zoom.xml
include/metaproxy/xmlutil.hpp
src/Makefile.am
src/filter_zoom.cpp
src/test_xmlutil.cpp [new file with mode: 0644]
src/url_recipe.cpp
src/xmlutil.cpp
xml/schema/filter_zoom.rnc
xml/schema/filter_zoom.rng
xml/schema/filter_zoom.xsd

index 6985f85..20246c1 100644 (file)
@@ -22,6 +22,7 @@
            <cclmap_ti>1=4 s=pw t=l,r</cclmap_ti>
            <requestSyntax>usmarc</requestSyntax>
            <transform>tmarc.xsl</transform>
+           <urlRecipe>http://sever.com?title=${md-title[\s+/+/g]}</urlRecipe>
            <zurl>localhost:9999/db01</zurl>
          </record>
          <record>
index ff265ef..ce17190 100644 (file)
@@ -51,7 +51,7 @@ namespace metaproxy_1 {
 
         void check_empty(const xmlNode *node);
 
-        void url_recipe_handle(xmlDoc *doc, std::string recipe);
+        std::string url_recipe_handle(xmlDoc *doc, std::string recipe);
     }
     class XMLError : public std::runtime_error {
     public:
index 903c637..7972352 100644 (file)
@@ -58,11 +58,11 @@ libmetaproxy_la_SOURCES = \
        xmlutil.cpp 
 
 
-libmetaproxy_la_LIBADD = $(YAZPPLALIB) $(BOOST_LIB) $(BOOST_THREAD_LIB)
+libmetaproxy_la_LIBADD = $(YAZPPLALIB) $(BOOST_LIB) $(BOOST_THREAD_LIB) $(BOOST_REGEX_LIB)
 
 # Rules for lib
 
-LDADD = libmetaproxy.la $(YAZPPLALIB) $(BOOST_LIB) $(BOOST_THREAD_LIB)
+LDADD = libmetaproxy.la $(YAZPPLALIB) $(BOOST_LIB) $(BOOST_THREAD_LIB) $(BOOST_REGEX_LIB)
 
 bin_PROGRAMS = metaproxy
 noinst_PROGRAMS = ex_filter_frontend_net ex_router_flexml tstdl
@@ -101,7 +101,8 @@ check_PROGRAMS = \
        test_filter_sru_to_z3950 \
        test_filter_virt_db \
        test_ses_map \
-       test_router_flexml
+       test_router_flexml \
+       test_xmlutil
 
 TESTS=$(check_PROGRAMS)
 
@@ -127,6 +128,7 @@ test_filter_sru_to_z3950_SOURCES = test_filter_sru_to_z3950.cpp
 test_filter_virt_db_SOURCES = test_filter_virt_db.cpp
 test_ses_map_SOURCES = test_ses_map.cpp
 test_router_flexml_SOURCES = test_router_flexml.cpp
+test_xmlutil_SOURCES = test_xmlutil.cpp
 
 TESTLDADD = $(LDADD) $(BOOST_TEST_LIB)
 
@@ -152,6 +154,7 @@ test_filter_virt_db_LDADD = $(TESTLDADD)
 test_router_flexml_LDADD = $(TESTLDADD)
 test_ses_map_LDADD = $(TESTLDADD)
 test_thread_pool_observer_LDADD = $(TESTLDADD)
+test_xmlutil_LDADD = $(TESTLDADD)
 
 # doxygen target
 dox:
index 39b5624..eb9f64b 100644 (file)
@@ -855,6 +855,7 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
                 }
                 
                 int rec_len;
+                xmlChar *xmlrec_buf = 0;
                 const char *rec_buf = ZOOM_record_get(recs[i], rec_type_str,
                                                       &rec_len);
                 if (rec_buf && b->xsp && enable_pz2_transform)
@@ -866,15 +867,35 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
                         rec_res = xsltApplyStylesheet(b->xsp, rec_doc, 0);
 
                         if (rec_res)
-                            xsltSaveResultToString((xmlChar **) &rec_buf, &rec_len,
+                            xsltSaveResultToString(&xmlrec_buf, &rec_len,
                                                    rec_res, b->xsp);
+                        rec_buf = (const char *) xmlrec_buf;
+                        xmlFreeDoc(rec_doc);
+                        xmlFreeDoc(rec_res);
                     }
                 }
 
                 if (rec_buf)
                 {
                     xmlDoc *doc = xmlParseMemory(rec_buf, rec_len);
-                    mp::xml::url_recipe_handle(doc, b->sptr->urlRecipe);
+                    std::string res = 
+                        mp::xml::url_recipe_handle(doc, b->sptr->urlRecipe);
+                    if (res.length())
+                    {
+                        xmlNode *ptr = xmlDocGetRootElement(doc);
+                        while (ptr && ptr->type != XML_ELEMENT_NODE)
+                            ptr = ptr->next;
+                        xmlNode *c = 
+                            xmlNewChild(ptr, 0, BAD_CAST "generated-url", 0);
+                        xmlNode * t = xmlNewText(BAD_CAST res.c_str());
+                        xmlAddChild(c, t);
+
+                        if (xmlrec_buf)
+                            xmlFree(xmlrec_buf);
+
+                        xmlDocDumpMemory(doc, &xmlrec_buf, &rec_len);
+                        rec_buf = (const char *) xmlrec_buf;
+                    }
                     xmlFreeDoc(doc);
                 }
                 if (rec_buf)
@@ -892,6 +913,8 @@ Z_Records *yf::Zoom::Frontend::get_records(Odr_int start,
                         YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS,
                         rec_type_str);
                 }
+                if (xmlrec_buf)
+                    xmlFree(xmlrec_buf);
             }
             else
             {
diff --git a/src/test_xmlutil.cpp b/src/test_xmlutil.cpp
new file mode 100644 (file)
index 0000000..13065a1
--- /dev/null
@@ -0,0 +1,116 @@
+/* This file is part of Metaproxy.
+   Copyright (C) 2005-2011 Index Data
+
+Metaproxy is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Metaproxy is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+*/
+
+#include "config.hpp"
+#include <iostream>
+#include <stdexcept>
+
+#include <metaproxy/xmlutil.hpp>
+
+#define BOOST_AUTO_TEST_MAIN
+#define BOOST_TEST_DYN_LINK
+#include <boost/test/auto_unit_test.hpp>
+
+#include <yaz/zgdu.h>
+#include <yaz/otherinfo.h>
+#include <yaz/oid_db.h>
+
+using namespace boost::unit_test;
+namespace mp = metaproxy_1;
+namespace mp_xml = metaproxy_1::xml;
+
+BOOST_AUTO_TEST_CASE( url_recipe )
+{
+    try 
+    {
+        const char *xml_text = 
+            "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n"
+            "<pz:record xmlns:pz=\"http://www.indexdata.com/pazpar2/1.0\""
+            " xmlns:tmarc=\"http://www.indexdata.com/turbomarc\">\n"
+            "<pz:metadata type=\"id\">   11224466 </pz:metadata>\n"
+            "<pz:metadata type=\"oclc-number\"/>\n"
+            "<pz:metadata type=\"lccn\">   11224466 </pz:metadata>\n"
+            "<pz:metadata type=\"author\">Jack Collins</pz:metadata>\n"
+            "<pz:metadata type=\"author-title\"/>\n"
+            "<pz:metadata type=\"author-date\"/>\n"
+            "<pz:metadata type=\"date\"/>\n"
+            "<pz:metadata type=\"title\">How to program a computer</pz:metadata>\n"
+            "<pz:metadata type=\"publication-place\">Penguin</pz:metadata>\n"
+            "<pz:metadata type=\"has-fulltext\">no</pz:metadata>\n"
+            "</pz:record>\n";
+        xmlDoc *doc = xmlParseMemory(xml_text, strlen(xml_text));
+        BOOST_CHECK(doc);
+        if (doc)
+        {
+            std::string res;
+
+            res = mp_xml::url_recipe_handle(doc, "abc");
+            BOOST_CHECK(!res.compare("abc"));
+
+            res = mp_xml::url_recipe_handle(doc, "${has-fulltext[no/yes]}");
+            std::cout << "res=" << res << std::endl;
+            BOOST_CHECK(!res.compare("yes"));
+
+            res = mp_xml::url_recipe_handle(doc, "${has-fulltext[no]}");
+            std::cout << "res=" << res << std::endl;
+            BOOST_CHECK(!res.compare(""));
+
+            res = mp_xml::url_recipe_handle(doc, "${has-fulltext[no/]}");
+            std::cout << "res=" << res << std::endl;
+            BOOST_CHECK(!res.compare(""));
+
+            res = mp_xml::url_recipe_handle(doc, "${has-fulltext[n/]}");
+            std::cout << "res=" << res << std::endl;
+            BOOST_CHECK(!res.compare("o"));
+
+            res = mp_xml::url_recipe_handle(doc, "${has-fulltext}");
+            std::cout << "res=" << res << std::endl;
+            BOOST_CHECK(!res.compare("no"));
+
+            res = mp_xml::url_recipe_handle(
+                doc, "http://sever.com?title=${md-title[\\s+/+/g]}");
+            std::cout << "res=" << res << std::endl;
+            BOOST_CHECK(!res.compare("http://sever.com?title=How+to+program+a+computer"));
+
+            res = mp_xml::url_recipe_handle(doc, "${md-id[2/1]}");
+            std::cout << "res=" << res << std::endl;
+            BOOST_CHECK(!res.compare("   11124466 "));
+
+            res = mp_xml::url_recipe_handle(doc, "${md-id[2/1/g]}");
+            std::cout << "res=" << res << std::endl;
+            BOOST_CHECK(!res.compare("   11114466 "));
+
+
+            xmlFreeDoc(doc);
+        }
+    }
+    catch ( ... ) {
+        BOOST_CHECK (false);
+    }
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
index bba6a41..8076928 100644 (file)
@@ -16,77 +16,119 @@ along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
+#include "config.hpp"
+
+#include <boost/regex.hpp>
 #include <metaproxy/xmlutil.hpp>
 
 #include <string.h>
 
-namespace mp = metaproxy_1;
-// Doxygen doesn't like mp::xml, so we use this instead
 namespace mp_xml = metaproxy_1::xml;
 
-void mp_xml::url_recipe_handle(xmlDoc *doc, std::string recipe)
+std::string mp_xml::url_recipe_handle(xmlDoc *doc, std::string recipe)
 {
-    if (recipe.length() == 0)
-        return;
     std::string result;
+    if (recipe.length() == 0)
+        return result;
+
+    const xmlNode *ptr1 = xmlDocGetRootElement(doc);
+    while (ptr1 && ptr1->type != XML_ELEMENT_NODE)
+        ptr1 = ptr1->next;
+    if (ptr1)
+        ptr1 = ptr1->children;
 
-    size_t p0 = 0, p1 = 0;
+    size_t p0 = 0;
     for (;;)
     {
-        p1 = recipe.find_first_of("${", p0);
+        size_t p1 = recipe.find_first_of("${", p0);
         if (p1 == std::string::npos)
         {
             result += recipe.substr(p0);
             break;
         }
         result += recipe.substr(p0, p1 - p0);
+        p0 = p1+2;
 
         int step = 0;  // 0=variable, 1=pattern, 2=replacement, 3=mode
         std::string variable;
         std::string pattern;
         std::string replacement;
         std::string mode;
-        p0 = p1+2;
+        int c_prev = 0;
         while (p0 < recipe.length() && step < 5)
         {
             char c = recipe[p0];
-            if (c == '}')
+            int c_check = c;
+            if (c_prev == '\\')
+                c_check = 0;
+            
+            if (c_check == '}')
                 step = 5;
             else if (step == 0)
             {
-                if (c == '[')
+                if (c_check == '[')
                     step = 1;
                 else
                     variable += c;
             }
+            else if (c_check == ']')
+                step = 4;
             else if (step == 1)
             {
-                if (c == '/')
+                if (c_check == '/')
                     step = 2;
                 else
                     pattern += c;
             }
             else if (step == 2)
             {
-                if (c == '/')
+                if (c_check == '/')
                     step = 3;
                 else
                     replacement += c;
             }
             else if (step == 3)
             {
-                if (c == ']')
-                    step = 4;
-                else
-                    mode += c;
+                mode += c;
             }
+            c_prev = c;
             p0++;
         }
         if (variable.length())
         {
-            ;
+            std::string text;
+            size_t offset = 0;
+            size_t md_pos = variable.find_first_of("md-");
+            if (md_pos == 0)
+                offset = 3;
+            const xmlNode *ptr = ptr1;
+            for (; ptr; ptr = ptr->next)
+                if (ptr->type == XML_ELEMENT_NODE
+                    && !strcmp((const char *) ptr->name, "metadata"))
+                {
+                    const _xmlAttr *attr = ptr->properties;
+                    for (; attr; attr = attr->next)
+                        if (!strcmp((const char *) attr->name, "type")
+                            && attr->children
+                            && !strcmp((const char *) attr->children->content,
+                                       variable.c_str() + offset))
+                        {
+                            text = mp_xml::get_text(ptr);
+                            break;
+                        }
+                }
+            boost::regex::flag_type b_mode = boost::regex::perl;
+            if (mode.find_first_of('i') != std::string::npos)
+                b_mode |= boost::regex::icase;
+            boost::regex e(pattern, b_mode);
+
+            boost::match_flag_type match_mode = boost::format_first_only;
+            if (mode.find_first_of('g') != std::string::npos)
+                match_mode = boost::format_all;
+            result += regex_replace(text, e, replacement, match_mode);
         }
     }
+    return result;
 }
 
 
index 288096f..ef86b2d 100644 (file)
@@ -16,11 +16,12 @@ along with this program; if not, write to the Free Software
 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 */
 
+#include "config.hpp"
+
 #include <metaproxy/xmlutil.hpp>
 
 #include <string.h>
 
-
 namespace mp = metaproxy_1;
 // Doxygen doesn't like mp::xml, so we use this instead
 namespace mp_xml = metaproxy_1::xml;
index 62ee291..75494bb 100644 (file)
@@ -28,6 +28,7 @@ filter_zoom =
         element mp:requestSyntax { xsd:string }?,
         element mp:sru { xsd:string }?,
         element mp:transform { xsd:string }?,
+        element mp:urlRecipe { xsd:string }?,
         element mp:zurl { xsd:string },
         element mp:cfAuth { xsd:string }?,
         element mp:cfProxy { xsd:string }?,
index 85fd5a8..16f04d1 100644 (file)
                     <data type="string"/>
                   </element>
                 </optional>
+                <optional>
+                  <element name="mp:urlRecipe">
+                    <data type="string"/>
+                  </element>
+                </optional>
                 <element name="mp:zurl">
                   <data type="string"/>
                 </element>
index fb7fbbd..fe9c5a6 100644 (file)
@@ -46,6 +46,7 @@
         <xs:element minOccurs="0" ref="mp:requestSyntax"/>
         <xs:element minOccurs="0" ref="mp:sru"/>
         <xs:element minOccurs="0" ref="mp:transform"/>
+        <xs:element minOccurs="0" ref="mp:urlRecipe"/>
         <xs:element ref="mp:zurl"/>
         <xs:element minOccurs="0" ref="mp:cfAuth"/>
         <xs:element minOccurs="0" ref="mp:cfProxy"/>
@@ -68,6 +69,7 @@
   <xs:element name="requestSyntax" type="xs:string"/>
   <xs:element name="sru" type="xs:string"/>
   <xs:element name="transform" type="xs:string"/>
+  <xs:element name="urlRecipe" type="xs:string"/>
   <xs:element name="zurl" type="xs:string"/>
   <xs:element name="cfAuth" type="xs:string"/>
   <xs:element name="cfProxy" type="xs:string"/>