Added a few ids for sections
[yaz-moved-to-github.git] / doc / tools.xml
index d47683f..7f12685 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $Id: tools.xml,v 1.50 2006-06-13 16:01:51 adam Exp $ -->
+<!-- $Id: tools.xml,v 1.58 2007-05-04 08:06:24 adam Exp $ -->
  <chapter id="tools"><title>Supporting Tools</title>
   
   <para>
     <literallayout>
      query ::= top-set query-struct.
 
-     top-set ::= &lsqb; '@attrset' string &rsqb;
+     top-set ::= [ '@attrset' string ]
 
      query-struct ::= attr-spec | simple | complex | '@term' term-type query
 
-     attr-spec ::= '@attr' &lsqb; string &rsqb; string query-struct
+     attr-spec ::= '@attr' [ string ] string query-struct
 
      complex ::= operator query-struct query-struct.
 
 
     <sect3 id="pqf-examples"><title>PQF queries</title>
 
-     <example><title>PQF queries using simple terms</title>
+     <example id="example.pqf.simple.terms">
+      <title>PQF queries using simple terms</title>
       <para>
        <screen>
        dylan
        </screen>
       </para>
      </example>
-     <example><title>PQF boolean operators</title>
+     <example id="pqf.example.pqf.boolean.operators">
+      <title>PQF boolean operators</title>
       <para>
        <screen>
        @or "dylan" "zimmerman"
        </screen>
       </para>
      </example>
-     <example><title>PQF references to result sets</title>
+     <example id="example.pqf.result.sets">
+      <title>PQF references to result sets</title>
       <para>
        <screen>
        @set Result-1
        </screen>
       </para>
      </example>
-     <example><title>Attributes for terms</title>
+     <example id="example.pqf.attributes">
+      <title>Attributes for terms</title>
       <para>
        <screen>
        @attr 1=4 computer
        </screen>
       </para>
      </example>
-     <example><title>PQF Proximity queries</title>
+     <example id="example.pqf.proximity">
+      <title>PQF Proximity queries</title>
       <para>
        <screen>
        @prox 0 3 1 2 k 2 dylan zimmerman
        </para></note>
       </para>
      </example>
-     <example><title>PQF specification of search term</title>
+     <example id="example.pqf.search.term.type">
+      <title>PQF specification of search term type</title>
       <para>
        <screen>
        @term string "a UTF-8 string, maybe?"
        </screen>
       </para>
      </example>
-     <example><title>PQF mixed queries</title>
+     <example id="example.pqf.mixed.queries">
+      <title>PQF mixed queries</title>
       <para>
        <screen>
        @or @and bob dylan @set Result-1
      license, it is included as a supplement to &yaz;.
     </para>
 
-    <sect3><title>CCL Syntax</title>
+    <sect3 id="ccl.syntax">
+     <title>CCL Syntax</title>
 
      <para>
       The CCL parser obeys the following grammar for the FIND argument.
       The syntax is annotated by in the lines prefixed by
-      <literal>&dash;&dash;</literal>.
+      <literal>--</literal>.
      </para>
 
      <screen>
 
      </screen>
      
-     <example><title>CCL queries</title>
+     <example id="example.ccl.queries">
+      <title>CCL queries</title>
       <para>
        The following queries are all valid:
       </para>
      </example>
      
     </sect3>
-    <sect3><title>CCL Qualifiers</title>
+    <sect3 id="ccl.qualifiers">
+     <title>CCL Qualifiers</title>
      
      <para>
       Qualifiers are used to direct the search to a particular searchable
       lines in a CCL profile: qualifier specification,
       qualifier alias, comments and directives.
      </para>
-     <sect4><title id="qualifier-specification">Qualifier specification</title>
+     <sect4 id="ccl.qualifier.specification">
+      <title>Qualifier specification</title>
       <para>
        A qualifier specification is of the form:
       </para>
        or <literal>c</literal> for completeness.
        The attributes for the special qualifier name <literal>term</literal>
        are used when no CCL qualifier is given in a query.
-       <table><title>Common Bib-1 attributes</title>
+       <table id="ccl.common.bib1.attributes">
+       <title>Common Bib-1 attributes</title>
        <tgroup cols="2">
         <colspec colwidth="2*" colname="type"></colspec>
         <colspec colwidth="9*" colname="description"></colspec>
        which are used in combination with certain types.
        The special combinations are:
        
-       <table><title>Special attribute combos</title>
+       <table id="ccl.special.attribute.combos">
+       <title>Special attribute combos</title>
        <tgroup cols="2">
         <colspec colwidth="2*" colname="name"></colspec>
         <colspec colwidth="9*" colname="description"></colspec>
        </tgroup>
        </table>
       </para>
-      <example><title>CCL profile</title>
+      <example id="example.ccl.profile"><title>CCL profile</title>
        <para>
        Consider the following definition:
        </para>
        </para>
       </example>
      </sect4>
-     <sect4><title>Qualifier alias</title>
+     <sect4 id="ccl.qualifier.alias">
+      <title>Qualifier alias</title>
       <para>
        A qualifier alias is of the form:
       </para>
       </para>
      </sect4>
 
-     <sect4><title>Comments</title>
+     <sect4 id="ccl.comments">
+      <title>Comments</title>
       <para>
        Lines with white space or lines that begin with
        character <literal>#</literal> are treated as comments.
       </para>
      </sect4>
 
-     <sect4><title>Directives</title>
+     <sect4 id="ccl.directives">
+      <title>Directives</title>
       <para>
        Directive specifications takes the form
       </para>
       <para><literal>@</literal><replaceable>directive</replaceable> <replaceable>value</replaceable>
       </para>
-      <table><title>CCL directives</title>
+      <table id="ccl.directives.table">
+       <title>CCL directives</title>
        <tgroup cols="3">
        <colspec colwidth="2*" colname="name"></colspec>
        <colspec colwidth="8*" colname="description"></colspec>
       </table>
      </sect4>
     </sect3>
-    <sect3><title>CCL API</title>
+    <sect3 id="ccl.api">
+     <title>CCL API</title>
      <para>
       All public definitions can be found in the header file
       <filename>ccl.h</filename>. A profile identifier is of type
@@ -936,7 +954,7 @@ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str,
      </para>
     </sect3>
    </sect2>
-   <sect2 id="tools.cql"><title>CQL</title>
+   <sect2 id="cql"><title>CQL</title>
     <para>
      <ulink url="&url.cql;">CQL</ulink>
       - Common Query Language - was defined for the
@@ -986,7 +1004,7 @@ struct ccl_rpn_node *ccl_find_str (CCL_bibset bibset, const char *str,
       </listitem>
      </itemizedlist>
     </para>
-    <sect3 id="tools.cql.parsing"><title>CQL parsing</title>
+    <sect3 id="cql.parsing"><title>CQL parsing</title>
      <para>
       A CQL parser is represented by the <literal>CQL_parser</literal>
       handle. Its contents should be considered &yaz; internal (private).
@@ -1032,7 +1050,7 @@ int cql_parser_stdio(CQL_parser cp, FILE *f);
      </para>
     </sect3>
     
-    <sect3 id="tools.cql.tree"><title>CQL tree</title>
+    <sect3 id="cql.tree"><title>CQL tree</title>
      <para>
       The the query string is valid, the CQL parser
       generates a tree representing the structure of the
@@ -1136,7 +1154,7 @@ struct cql_node {
      </para>
 
     </sect3>
-    <sect3 id="tools.cql.pqf"><title>CQL to PQF conversion</title>
+    <sect3 id="cql.to.pqf"><title>CQL to PQF conversion</title>
      <para>
       Conversion to PQF (and Z39.50 RPN) is tricky by the fact
       that the resulting RPN depends on the Z39.50 target
@@ -1218,7 +1236,7 @@ int cql_transform_FILE(cql_transform_t ct,
       open <literal>FILE</literal>.
      </para>
     </sect3>
-    <sect3 id="tools.cql.map">
+    <sect3 id="cql.to.rpn">
      <title>Specification of CQL to RPN mappings</title>
      <para>
       The file supplied to functions 
@@ -1245,6 +1263,13 @@ int cql_transform_FILE(cql_transform_t ct,
       <replaceable>value</replaceable> the attribute value.
      </para>
      <para>
+      The character <literal>*</literal> (asterisk) has special meaning
+      when used in the RPN pattern.
+      Each occurrence of <literal>*</literal> is substituted with the
+      CQL matching name (index, relation, qualifier etc).
+      This facility can be used to copy a CQL name verbatim to the RPN result.
+     </para>
+     <para>
       The following CQL patterns are recognized:
       <variablelist>
        <varlistentry><term>
@@ -1265,6 +1290,11 @@ int cql_transform_FILE(cql_transform_t ct,
           <literal>http://www.loc.gov/zing/cql/cql-indexes/v1.0/</literal>.
           If this pattern is not defined, the mapping will fail.
          </para>
+         <para>
+          The pattern, 
+          <literal>index.</literal><replaceable>set</replaceable><literal>.*</literal>
+          is used when no other index pattern is matched.
+        </para>
         </listitem>
        </varlistentry>
        <varlistentry><term>
@@ -1367,28 +1397,41 @@ int cql_transform_FILE(cql_transform_t ct,
          </para>
         </listitem>
        </varlistentry>
+
+       <varlistentry><term>
+         <literal>set</literal>
+        </term>
+        <listitem>
+         <para>
+          This specification defines a default CQL context set for index names.
+          The value on the right hand side is the URI for the set.
+         </para>
+        </listitem>
+       </varlistentry>
+
       </variablelist>
      </para>
-     <example><title>CQL to RPN mapping file</title>
+     <example id="example.cql.to.rpn.mapping">
+      <title>CQL to RPN mapping file</title>
       <para>
        This simple file defines two context sets, three indexes and three
        relations, a position pattern and a default structure.
       </para>
       <programlisting><![CDATA[
-       set.cql    = http://www.loc.gov/zing/cql/context-sets/cql/v1.1/
-       set.dc     = http://www.loc.gov/zing/cql/dc-indexes/v1.0/
+       set.cql  = http://www.loc.gov/zing/cql/context-sets/cql/v1.1/
+       set.dc   = http://www.loc.gov/zing/cql/dc-indexes/v1.0/
 
        index.cql.serverChoice = 1=1016
        index.dc.title         = 1=4
        index.dc.subject       = 1=21
   
-       relation.<                 = 2=1
-       relation.eq                = 2=3
-       relation.scr               = 2=3
+       relation.<             = 2=1
+       relation.eq            = 2=3
+       relation.scr           = 2=3
 
-       position.any               = 3=3 6=1
+       position.any           = 3=3 6=1
 
-       structure.*                = 4=1
+       structure.*            = 4=1
 ]]>
       </programlisting>
       <para>
@@ -1423,8 +1466,51 @@ int cql_transform_FILE(cql_transform_t ct,
        </screen>
       </para>
      </example>
+     <example id="example.cql.to.rpn.string">
+      <title>CQL to RPN string attributes</title>
+      <para>
+       In this example we allow any index to be passed to RPN as
+       a use attribute.
+      </para>
+      <programlisting><![CDATA[
+       # Identifiers for prefixes used in this file. (index.*)
+       set.cql  = info:srw/cql-context-set/1/cql-v1.1
+       set.rpn  = http://bogus/rpn
+       set      = http://bogus/rpn
+
+       # The default index when none is specified by the query
+       index.cql.serverChoice     = 1=any
+
+       index.rpn.*                = 1=*
+       relation.eq                = 2=3
+       structure.*                = 4=1
+       position.any               = 3=3
+]]>
+      </programlisting>
+      <para>
+       The <literal>http://bogus/rpn</literal> context set is also the default
+       so we can make queries such as
+       <screen>
+        title = a
+       </screen>
+       which is converted to
+       <screen>
+        @attr 2=3 @attr 4=1 @attr 3=3 @attr 1=title "a"
+       </screen>
+      </para>
+     </example>
+     <example id="example.cql.to.rpn.bathprofile">
+      <title>CQL to RPN using Bath Profile</title>
+      <para>
+       The file <filename>etc/pqf.properties</filename> has mappings from
+       the Bath Profile and Dublin Core to RPN.
+       If YAZ is installed as a package it's usually located
+       in <filename>/usr/share/yaz/etc</filename> and part of the
+       development package, such as <literal>libyaz-dev</literal>.
+      </para>
+     </example>
     </sect3>
-    <sect3 id="tools.cql.xcql"><title>CQL to XCQL conversion</title>
+    <sect3 id="cql.xcql"><title>CQL to XCQL conversion</title>
      <para>
       Conversion from CQL to XCQL is trivial and does not
       require a mapping to be defined.
@@ -1950,7 +2036,7 @@ typedef struct oident
 
   </sect1>
   
-  <sect1 id="tools.marc"><title>MARC</title>
+  <sect1 id="marc"><title>MARC</title>
    
    <para>
     YAZ provides a fast utility that decodes MARC records and
@@ -2012,7 +2098,7 @@ typedef struct oident
      </varlistentry>
 
      <varlistentry>
-      <term>YAZ_MARC_MARXML</term>
+      <term>YAZ_MARC_MARCXML</term>
       <listitem>
        <para>
        The resulting record is converted to MARCXML.
@@ -2038,7 +2124,7 @@ typedef struct oident
     stores the resulting record in a WRBUF handle (WRBUF is a simple string
     type).
    </para>
-   <example>
+   <example id="example.marc.display">
     <title>Display of MARC record</title>
     <para>
      The followint program snippet illustrates how the MARC API may
@@ -2061,6 +2147,267 @@ typedef struct oident
    </example>
   </sect1>
 
+  <sect1 id="tools.retrieval">
+   <title>Retrieval Facility</title>
+   <para>
+    YAZ version 2.1.20 or later includes a Retrieval facility tool
+    which allows a SRU/Z39.50 to describe itself and perform record
+    conversions. The idea is the following:
+    
+    <itemizedlist>
+     <listitem>
+      <para>
+       An SRU/Z39.50 client sends a retrieval request which includes
+       a combination of the following parameters: syntax (format),
+       schema (or element set name).
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       The retrieval facility is invoked with parameters in a
+       server/proxy. The retrieval facility matches the parameters a set of
+       "supported" retrieval types.
+       If there is no match, the retrieval signals an error
+       (syntax and / or schema not supported).
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       For a successful match, the backend is invoked with the same
+       or altered retrieval parameters (syntax, schema). If
+       a record is received from the backend, it is converted to the
+       frontend name / syntax.
+      </para>
+     </listitem>
+
+     <listitem>
+      <para>
+       The resulting record is sent back the client and tagged with
+       the frontend syntax / schema.
+      </para>
+     </listitem>
+
+    </itemizedlist>
+   </para>
+   <para>
+    The Retrieval facility is driven by an XML configuration. The
+    configuration is neither Z39.50 ZeeRex or SRU ZeeRex. But it
+    should be easy to generate both of them from the XML configuration.
+    (unfortunately the two versions
+    of ZeeRex differ substantially in this regard).
+   </para>
+   <sect2 id="tools.retrieval.format">
+    <title>Retrieval XML format</title>
+    <para>
+     All elements should be covered by namespace 
+     <literal>http://indexdata.com/yaz</literal> .
+     The root element node must be <literal>retrievalinfo</literal>.
+    </para>
+    <para>
+     The <literal>retrievalinfo</literal> must include one or
+     more <literal>retrieval</literal> elements. Each 
+    <literal>retrieval</literal> defines specific combination of
+     syntax, name and identifier supported by this retrieval service.
+    </para>
+    <para>
+     The <literal>retrieval</literal> element may include any of the
+     following attributes:
+     <variablelist>
+      <varlistentry><term><literal>syntax</literal> (REQUIRED)</term>
+       <listitem>
+        <para>
+         Defines the record syntax. Possible values is any
+         of the names defined in YAZ' OID database or a raw
+         OID in (n.n ... n).
+        </para>
+       </listitem>
+      </varlistentry>
+      <varlistentry><term><literal>name</literal> (OPTIONAL)</term>
+       <listitem>
+        <para>
+         Defines the name of the retrieval format. This can be
+         any string. For SRU, the value, is equivalent to schema (short-hand);
+         for Z39.50 it's equivalent to simple element set name. 
+        </para>
+       </listitem>
+      </varlistentry>
+      <varlistentry><term><literal>identifier</literal> (OPTIONAL)</term>
+       <listitem>
+        <para>
+         Defines the URI schema name of the retrieval format. This can be
+         any string. For SRU, the value, is equivalent to URI schema.
+         For Z39.50, there is no equivalent.
+        </para>
+       </listitem>
+      </varlistentry>
+     </variablelist>
+    </para>
+    <para>
+     The <literal>retrieval</literal> may include one 
+     <literal>backend</literal> element. If a <literal>backend</literal>
+     element is given, it specifies how the records are retrieved by
+     some backend and how the records are converted from the backend to
+     the "frontend".
+    </para>
+    <para>
+     The attributes, <literal>name</literal> and <literal>syntax</literal>
+     may be specified for the <literal>backend</literal> element. These
+     semantics of these attributes is equivalent to those for the
+     <literal>retrieval</literal>. However, these values are passed to
+     the "backend".
+    </para>
+    <para>
+     The <literal>backend</literal> element may includes one or more
+     conversion instructions (as children elements). The supported
+     conversions are:
+     <variablelist>
+      <varlistentry><term><literal>marc</literal></term>
+       <listitem>
+        <para>
+         The <literal>marc</literal> element specifies a conversion 
+         to - and from ISO2709 encoded MARC and 
+         <ulink url="&url.marcxml;">&marcxml;</ulink>/MarcXchange.
+         The following attributes may be specified:
+
+         <variablelist>
+          <varlistentry><term><literal>inputformat</literal> (REQUIRED)</term>
+           <listitem>
+            <para>
+             Format of input. Supported values are 
+            <literal>marc</literal> (for ISO2709); and <literal>xml</literal>
+             for MARCXML/MarcXchange.
+            </para>
+           </listitem>
+          </varlistentry>
+
+          <varlistentry><term><literal>outputformat</literal> (REQUIRED)</term>
+           <listitem>
+            <para>
+             Format of output. Supported values are 
+            <literal>line</literal> (MARC line format); 
+            <literal>marcxml</literal> (for MARCXML),
+            <literal>marc</literal> (ISO2709),
+            <literal>marcxhcange</literal> (for MarcXchange).
+            </para>
+           </listitem>
+          </varlistentry>
+
+          <varlistentry><term><literal>inputcharset</literal> (OPTIONAL)</term>
+           <listitem>
+            <para>
+             Encoding of input. For XML input formats, this need not
+             be given, but for ISO2709 based inputformats, this should
+             be set to the encoding used. For MARC21 records, a common
+             inputcharset value  would be <literal>marc-8</literal>.
+            </para>
+           </listitem>
+          </varlistentry>
+
+          <varlistentry><term><literal>outputcharset</literal> (OPTIONAL)</term>
+           <listitem>
+            <para>
+             Encoding of output. If outputformat is XML based, it is
+             strongly recommened to use <literal>utf-8</literal>.
+            </para>
+           </listitem>
+          </varlistentry>
+
+         </variablelist>
+        </para>
+       </listitem>
+      </varlistentry>
+      <varlistentry><term><literal>xslt</literal></term>
+       <listitem>
+        <para>
+         The <literal>xslt</literal> element specifies a conversion
+         via &xslt;. The following attributes may be specified:
+
+         <variablelist>
+          <varlistentry><term><literal>stylesheet</literal> (REQUIRED)</term>
+           <listitem>
+            <para>
+             Stylesheet file.
+            </para>
+           </listitem>
+          </varlistentry>
+         </variablelist>
+
+        </para>
+       </listitem>
+      </varlistentry>
+     </variablelist>
+    </para>
+   </sect2>
+   <sect2 id="tools.retrieval.examples">
+    <title>Retrieval Facility Examples</title>
+    <example id="tools.retrieval.marc21">
+     <title>MARC21 backend</title>
+     <para>
+      A typical way to use the retrieval facility is to enable XML
+      for servers that only supports ISO2709 encoded MARC21 records.
+     </para>
+     <programlisting><![CDATA[
+     <retrievalinfo>
+       <retrieval syntax="usmarc" name="F"/>
+       <retrieval syntax="usmarc" name="B"/>
+       <retrieval syntax="xml" name="marcxml"
+                 identifier="info:srw/schema/1/marcxml-v1.1">
+         <backend syntax="usmarc" name="F">
+          <marc inputformat="marc" outputformat="marcxml"
+                inputcharset="marc-8"/>
+        </backend>
+       </retrieval>
+       <retrieval syntax="xml" name="dc">
+         <backend syntax="usmarc" name="F">
+          <marc inputformat="marc" outputformat="marcxml"
+                inputcharset="marc-8"/>
+           <xslt stylesheet="MARC21slim2DC.xsl"/>
+        </backend>
+       </retrieval>
+     </retrievalinfo>
+]]>
+     </programlisting>
+     <para>
+      This means that our frontend supports:
+      <itemizedlist>
+       <listitem>
+        <para>
+         MARC21 F(ull) records.
+        </para>
+       </listitem>
+       <listitem>
+        <para>
+         MARC21 B(rief) records.
+        </para>
+       </listitem>
+
+       <listitem>
+        <para>
+         MARCXML records.
+        </para>
+       </listitem>
+
+       <listitem>
+        <para>
+         Dublin core records.
+        </para>
+       </listitem>
+      </itemizedlist>
+     </para>
+    </example>
+   </sect2>
+   <sect2 id="tools.retrieval.api">
+    <title>API</title>
+    <para>
+     It should be easy to use the retrieval systems from applications. Refer
+     to the headers
+     <filename>yaz/retrieval.h</filename> and 
+     <filename>yaz/record_conv.h</filename>.
+    </para>
+   </sect2>
+  </sect1>
  </chapter>
  
  <!-- Keep this comment at the end of the file