Sort module documentation, schema, fixes
authorAdam Dickmeiss <adam@indexdata.dk>
Tue, 20 Mar 2012 09:23:38 +0000 (10:23 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Tue, 20 Mar 2012 09:23:38 +0000 (10:23 +0100)
doc/Makefile.am
doc/sort.xml [new file with mode: 0644]
etc/config-record-transform.xml
src/filter_sort.cpp
xml/schema/Makefile.am
xml/schema/filter_sort.rnc [new file with mode: 0644]
xml/schema/metaproxy.rnc

index cdaadc6..838f7e1 100644 (file)
@@ -23,6 +23,7 @@ XMLMAN = metaproxy.xml \
        query_rewrite.xml \
         record_transform.xml\
        session_shared.xml \
+       sort.xml \
         sru_z3950.xml\
        template.xml \
        virt_db.xml \
@@ -44,6 +45,7 @@ MANFILES = metaproxy.1 \
        multi.3mp query_rewrite.3mp \
         record_transform.3mp\
        session_shared.3mp \
+       sort.3mp \
         sru_z3950.3mp \
        template.3mp \
        virt_db.3mp \
diff --git a/doc/sort.xml b/doc/sort.xml
new file mode 100644 (file)
index 0000000..d4a0431
--- /dev/null
@@ -0,0 +1,125 @@
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V4.4//EN" 
+    "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd" [
+ <!ENTITY copyright SYSTEM "copyright.xml">
+ <!ENTITY % idcommon SYSTEM "common/common.ent">
+     %idcommon;
+]>
+<refentry id="ref-sort">
+ <refentryinfo>
+  <productname>Metaproxy</productname>
+  <info><orgname>Index Data</orgname></info>
+ </refentryinfo>
+
+ <refmeta>
+  <refentrytitle>sort</refentrytitle>
+  <manvolnum>3mp</manvolnum>
+  <refmiscinfo class="manual">Metaproxy Module</refmiscinfo>
+ </refmeta>
+ <refnamediv>
+  <refname>sort</refname>
+  <refpurpose>Metaproxy Z39.50 Sort Module</refpurpose>
+ </refnamediv>
+ <refsect1><title>DESCRIPTION</title>
+  <para>
+   This filter performs sorting of Z39.50 result sets.
+   The sorting criteria is selected via an X-Path expression. Only
+   XML records are supported. The sorting is done only for the first
+   present request following a search. The number of records to prefetch
+   can is configurable. For example, if a client asks initially for 10
+   records this module may extend that and fetch more records and only
+   return the results in the 10 record window - after sorting.
+  </para>
+  <para>
+   The configuration is given as attribute inside element
+   <literal>sort</literal>. This element must occur exactly once. Future
+   versions of the sort module may include multiple sort elements.
+   The attributes within sort are:
+   <variablelist>
+    <varlistentry><term>xpath</term>
+     <listitem>
+      <para>
+       Specifies the X-Path expression that picks the sorting data from
+       the record.
+      </para>
+     </listitem>
+    </varlistentry>
+    <varlistentry><term>namespaces</term>
+     <listitem>
+      <para>
+       Allows one or more namespaces to be declared with a user-defined
+       prefix. Each prefix may be referred to within the xpath expression.
+      </para>
+     </listitem>
+    </varlistentry>
+    <varlistentry><term>prefetch</term>
+     <listitem>
+      <para>
+       Number of records to prefetch.
+      </para>
+     </listitem>
+    </varlistentry>
+    <varlistentry><term>ascending</term>
+     <listitem>
+      <para>
+       Is a boolean value (false, true). If true, the sort module will
+       sort ascending. If false, the sort module will sort descending.
+       If omitted, the sort order will be ascending.
+      </para>
+     </listitem>
+    </varlistentry>
+   </variablelist>
+  </para>
+ </refsect1>
+ <refsect1><title>SCHEMA</title>
+   <literallayout><xi:include
+                    xi:href="../xml/schema/filter_sort.rnc"
+                    xi:parse="text"  
+                    xmlns:xi="http://www.w3.org/2001/XInclude" />
+   </literallayout>
+ </refsect1>
+ <refsect1><title>EXAMPLES</title>
+  <para>
+   For example, to sort MARCXML records on title, one could use:
+   <screen><![CDATA[
+<filter type="sort">
+  <sort 
+    xpath="/marc:record/marc:datafield[@tag='245']/marc:subfield[@code='a']"
+    namespaces="marc=http://www.loc.gov/MARC21/slim"
+    prefetch="5"
+    ascending="true"
+    debug="true"
+   />
+</filter>
+]]>
+   </screen>
+  </para>
+ </refsect1> 
+ <refsect1><title>SEE ALSO</title>
+  <para>
+   <citerefentry>
+    <refentrytitle>metaproxy</refentrytitle>
+    <manvolnum>1</manvolnum>
+   </citerefentry>
+  </para>
+  <para>
+   <citerefentry>
+    <refentrytitle>record_transform</refentrytitle>
+    <manvolnum>3mp</manvolnum>
+   </citerefentry>
+  </para>
+ </refsect1>
+ &copyright;
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: nxml
+nxml-child-indent: 1
+End:
+-->
index 8e8dbc3..bce4097 100644 (file)
       <filter type="log">
         <message>Front</message>
       </filter>
+      <filter type="sort">
+        <sort 
+           xpath="/marc:record/marc:datafield[@tag='245']/marc:subfield[@code='a']"
+           namespaces="marc=http://www.loc.gov/MARC21/slim"
+           prefetch="5"
+           ascending="true"
+           debug="true"
+             />
+      </filter>
       <filter type="record_transform">
         <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"
                     href="retrieval-info.xml"/>
index 5f09492..784c6f9 100644 (file)
@@ -51,6 +51,7 @@ namespace metaproxy_1 {
             std::string m_xpath_expr;
             std::string m_namespaces;
             bool m_ascending;
+            bool m_debug;
             boost::mutex m_mutex;
             boost::condition m_cond_session_ready;
             std::map<mp::Session, FrontendPtr> m_clients;
@@ -62,12 +63,12 @@ namespace metaproxy_1 {
             Z_NamePlusRecord *npr;
             std::string score;
             void get_xpath(xmlDoc *doc, const char *namespaces,
-                           const char *expr);
+                           const char *expr, bool debug);
             bool register_namespaces(xmlXPathContextPtr xpathCtx,
                                      const char *nsList);
         public:
             Record(Z_NamePlusRecord *n, const char *namespaces,
-                   const char *expr);
+                   const char *expr, bool debug);
             ~Record();
             bool operator < (const Record &rhs);
         };
@@ -77,6 +78,7 @@ namespace metaproxy_1 {
             mp::odr m_odr;
             std::string namespaces;
             std::string xpath_expr;
+            bool debug;
         public:
             bool cmp(Odr_oid *syntax);
             void add(Z_NamePlusRecord *s);
@@ -84,7 +86,7 @@ namespace metaproxy_1 {
             Z_NamePlusRecord *get(int i, bool ascending);
             void sort();
             RecordList(Odr_oid *, std::string namespaces,
-                       std::string xpath_expr);
+                       std::string xpath_expr, bool debug);
             ~RecordList();
         };
         class Sort::ResultSet : boost::noncopyable {
@@ -213,7 +215,7 @@ bool yf::Sort::Record::register_namespaces(xmlXPathContextPtr xpathCtx,
 
 
 void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces,
-                                 const char *expr)
+                                 const char *expr, bool debug)
 {
     xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc);
     if (xpathCtx)
@@ -224,6 +226,8 @@ void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces,
         if (xpathObj)
         {
             xmlNodeSetPtr nodes = xpathObj->nodesetval;
+            if (debug)
+                print_xpath_nodes(nodes, yaz_log_file());
             if (nodes)
             {
                 int i;
@@ -240,7 +244,7 @@ void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces,
                     {
                         content = mp::xml::get_text(ptr);
                     }
-                    if (content.c_str())
+                    if (content.length())
                     {
                         score = content;
                         break;
@@ -255,7 +259,8 @@ void yf::Sort::Record::get_xpath(xmlDoc *doc, const char *namespaces,
 
 yf::Sort::Record::Record(Z_NamePlusRecord *n,
                          const char *namespaces,
-                         const char *expr) : npr(n) 
+                         const char *expr,
+                         bool debug) : npr(n) 
 {
     if (npr->which == Z_NamePlusRecord_databaseRecord)
     {
@@ -269,7 +274,7 @@ yf::Sort::Record::Record(Z_NamePlusRecord *n,
                 ext->u.octet_aligned->len);
             if (doc)
             {
-                get_xpath(doc, namespaces, expr);
+                get_xpath(doc, namespaces, expr, debug);
                 xmlFreeDoc(doc);
             }
         }
@@ -289,8 +294,9 @@ bool yf::Sort::Record::operator < (const Record &rhs)
 
 yf::Sort::RecordList::RecordList(Odr_oid *syntax,
                                  std::string a_namespaces,
-                                 std::string a_xpath_expr)
-    : namespaces(a_namespaces), xpath_expr(a_xpath_expr)
+                                 std::string a_xpath_expr,
+                                 bool a_debug)
+    : namespaces(a_namespaces), xpath_expr(a_xpath_expr), debug(a_debug)
 
 {
     if (syntax)
@@ -322,7 +328,7 @@ void yf::Sort::RecordList::add(Z_NamePlusRecord *s)
 {
     ODR oi = m_odr;
     Z_NamePlusRecord *npr = yaz_clone_z_NamePlusRecord(s, oi->mem);
-    Record record(npr, namespaces.c_str(), xpath_expr.c_str());
+    Record record(npr, namespaces.c_str(), xpath_expr.c_str(), debug);
     npr_list.push_back(record);
 }
 
@@ -375,7 +381,7 @@ yf::Sort::Frontend::~Frontend()
 }
 
 
-yf::Sort::Impl::Impl() : m_prefetch(20), m_ascending(true)
+yf::Sort::Impl::Impl() : m_prefetch(20), m_ascending(true), m_debug(false)
 {
 }
 
@@ -435,7 +441,7 @@ void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only,
     {
         if (ptr->type != XML_ELEMENT_NODE)
             continue;
-        if (!strcmp((const char *) ptr->name, "config"))
+        if (!strcmp((const char *) ptr->name, "sort"))
         {            
             const struct _xmlAttr *attr;
             for (attr = ptr->properties; attr; attr = attr->next)
@@ -458,18 +464,13 @@ void yf::Sort::Impl::configure(const xmlNode *ptr, bool test_only,
                 {
                     m_namespaces = mp::xml::get_text(attr->children);
                 }
-                else if (!strcmp((const char *) attr->name, "sortorder"))
+                else if (!strcmp((const char *) attr->name, "ascending"))
                 {
-                    std::string t = mp::xml::get_text(attr->children);
-                    if (t == "ascending")
-                        m_ascending = true;
-                    else if (t == "descending")
-                        m_ascending = false;
-                    else
-                        throw mp::filter::FilterException(
-                            "Bad attribute value " + t + " for attribute " +
-                            std::string((const char *) attr->name));
-
+                    m_ascending = mp::xml::get_bool(attr->children, true);
+                }
+                else if (!strcmp((const char *) attr->name, "debug"))
+                {
+                    m_debug = mp::xml::get_bool(attr->children, false);
                 }
                 else
                     throw mp::filter::FilterException(
@@ -515,7 +516,8 @@ void yf::Sort::Frontend::handle_records(mp::Package &package,
         int pos = 1;
         RecordListPtr rlp(new RecordList(syntax,
                                          m_p->m_namespaces.c_str(),
-                                         m_p->m_xpath_expr.c_str()));
+                                         m_p->m_xpath_expr.c_str(),
+                                         m_p->m_debug));
         for (i = 0; i < nprl->num_records; i++, pos++)
             rlp->add(nprl->records[i]);
 
@@ -570,6 +572,10 @@ void yf::Sort::Frontend::handle_search(mp::Package &package, Z_APDU *apdu_req)
     std::string resultSetId = req->resultSetName;
     Package b_package(package.session(), package.origin());
     mp::odr odr;
+    Odr_oid *syntax = 0;
+
+    if (req->preferredRecordSyntax)
+        syntax = odr_oiddup(odr, req->preferredRecordSyntax);
 
     b_package.copy_filter(package);
     Sets_it sets_it = m_sets.find(req->resultSetName);
@@ -600,7 +606,7 @@ void yf::Sort::Frontend::handle_search(mp::Package &package, Z_APDU *apdu_req)
         Z_SearchResponse *res = gdu_res->u.z3950->u.searchResponse;
         s->hit_count = *res->resultCount;
         handle_records(b_package, apdu_req, res->records, 1, s,
-                       req->preferredRecordSyntax, resultSetId.c_str());
+                       syntax, resultSetId.c_str());
         package.response() = gdu_res;
     }
 }
@@ -611,6 +617,11 @@ void yf::Sort::Frontend::handle_present(mp::Package &package, Z_APDU *apdu_req)
     std::string resultSetId = req->resultSetId;
     Package b_package(package.session(), package.origin());
     mp::odr odr;
+    Odr_oid *syntax = 0;
+    Odr_int start = *req->resultSetStartPoint;
+
+    if (req->preferredRecordSyntax)
+        syntax = odr_oiddup(odr, req->preferredRecordSyntax);
 
     b_package.copy_filter(package);
     Sets_it sets_it = m_sets.find(resultSetId);
@@ -666,8 +677,7 @@ void yf::Sort::Frontend::handle_present(mp::Package &package, Z_APDU *apdu_req)
     {
         Z_PresentResponse *res = gdu_res->u.z3950->u.presentResponse;
         handle_records(b_package, apdu_req, res->records, 
-                       *req->resultSetStartPoint, rset,
-                       req->preferredRecordSyntax, resultSetId.c_str());
+                       start, rset, syntax, resultSetId.c_str());
         package.response() = gdu_res;
     }
 }
index 348feec..8bfdc74 100644 (file)
@@ -18,6 +18,7 @@ filter_multi.rnc \
 filter_query_rewrite.rnc \
 filter_record_transform.rnc \
 filter_session_shared.rnc \
+filter_sort.rnc \
 filter_sru_z3950.rnc \
 filter_virt_db.rnc \
 filter_z3950_client.rnc \
diff --git a/xml/schema/filter_sort.rnc b/xml/schema/filter_sort.rnc
new file mode 100644 (file)
index 0000000..d62b844
--- /dev/null
@@ -0,0 +1,19 @@
+# Metaproxy XML config file schema
+
+namespace mp = "http://indexdata.com/metaproxy"
+
+filter_sort =
+  attribute type { "sort" },
+  attribute id { xsd:NCName }?,
+  attribute name { xsd:NCName }?,
+  element mp:sort {
+    attribute prefetch { xsd:integer }?,
+    attribute xpath { xsd:string },
+    attribute namespaces { xsd:string }?,
+    attribute ascending { xsd:boolean }?,
+    attribute debug { xsd:boolean }?
+  }
+
+
+
+
index 3442c35..b82b7ae 100644 (file)
@@ -36,6 +36,7 @@ include "filter_multi.rnc"
 include "filter_query_rewrite.rnc"
 include "filter_record_transform.rnc"
 include "filter_session_shared.rnc"
+include "filter_sort.rnc"
 include "filter_sru_z3950.rnc"
 include "filter_virt_db.rnc"
 include "filter_z3950_client.rnc"
@@ -83,6 +84,7 @@ filter =
     | filter_query_rewrite
     | filter_record_transform
     | filter_session_shared
+    | filter_sort
     | filter_sru_z3950
     | filter_virt_db
     | filter_z3950_client