Move declaration to start of block

[yaz-moved-to-github.git] / doc / tools.xml
diff --git a/doc/tools.xml b/doc/tools.xml

index 5e55e41..4e91c27 100644 (file)
--- a/doc/tools.xml
+++ b/doc/tools.xml
@@ -1,4 +1,4 @@
-<!-- $Id: tools.xml,v 1.27 2003-07-11 09:51:13 mike Exp $ -->
+<!-- $Id: tools.xml,v 1.36 2004-07-26 12:16:56 mike Exp $ -->
   <chapter id="tools"><title>Supporting Tools</title>
    
    <para>
@@ -131,7 +131,7 @@
  
       top-set ::= &lsqb; '@attrset' string &rsqb;
  
-     query-struct ::= attr-spec | simple | complex | '@term' term-type
+     query-struct ::= attr-spec | simple | complex | '@term' term-type query
  
       attr-spec ::= '@attr' &lsqb; string &rsqb; string query-struct
  
@@ -173,11 +173,15 @@
      <para>
       The @attr operator is followed by an attribute specification 
       (<literal>attr-spec</literal> above). The specification consists
-     of optional an attribute set, an attribute type-value pair and
-     a sub query. The attribute type-value pair is packed in one string:
-     an attribute type, a dash, followed by an attribute value. 
+     of an optional attribute set, an attribute type-value pair and
+     a sub-query. The attribute type-value pair is packed in one string:
+     an attribute type, an equals sign, and an attribute value, like this:
+     <literal>@attr 1=1003</literal>.
       The type is always an integer but the value may be either an
       integer or a string (if it doesn't start with a digit character).
+     A string attribute-value is encoded as a Type-1 ``complex''
+     attribute with the list of values containing the single string
+     specified, and including no semantic indicators.
      </para>
  
      <para>
@@ -297,101 +301,111 @@
  
      <sect3 id="pqf-examples"><title>PQF queries</title>
  
-     <para>Queries using simple terms.
-      <screen>
-      dylan
-      "bob dylan"
-      </screen>
-     </para>
-     <para>Boolean operators.
-      <screen>
-       @or "dylan" "zimmerman"
-       @and @or dylan zimmerman when
-       @and when @or dylan zimmerman
-      </screen>
-     </para>
-     <para>
-      Reference to result sets.
-      <screen>
-       @set Result-1
-       @and @set seta setb
-      </screen>
-     </para>
-     <para>
-      Attributes for terms.
-      <screen>
-       @attr 1=4 computer
-       @attr 1=4 @attr 4=1 "self portrait"
-       @attr exp1 @attr 1=1 CategoryList
-       @attr gils 1=2008 Copenhagen
-       @attr 1=/book/title computer
-      </screen>
-     </para>
-     <para>
-      Proximity.
-      <screen>
-       @prox 0 3 1 2 k 2 dylan zimmerman
-      </screen>
-      <note><para>
-      Here the parameters 0, 3, 1, 2, k and 2 represent exclusion,
-      distance, ordered, relation, which-code and unit-code, in that
-      order.  So:
-      <itemizedlist>
-        <listitem><para>
-         exclusion = 0: the proximity condition must hold
-        </para></listitem>
-        <listitem><para>
-         distance = 3: the terms must be three units apart
-        </para></listitem>
-        <listitem><para>
-         ordered = 1: they must occur in the order they are specified
-        </para></listitem>
-        <listitem><para>
-         relation = 2: lessThanOrEqual (to the distance of 3 units)
-        </para></listitem>
-        <listitem><para>
-         which-code is ``known'', so the standard unit-codes are used
-        </para></listitem>
-        <listitem><para>
-         unit-code = 2: word.
-        </para></listitem>
-      </itemizedlist>
-      So the whole proximity query means that the words
-      <literal>dylan</literal> and <literal>zimmerman</literal> must
-      both occur in the record, in that order, differing in position
-      by three or fewer words (i.e. with two or fewer words between
-      them.)  The query would find ``Bob Dylan, aka. Robert
-      Zimmerman'', but not ``Bob Dylan, born as Robert Zimmerman''
-      since the distance in this case is four.
-      </para></note>
-     </para>
-     <para>
-      Specifying term type.
-      <screen>
-       @term string "a UTF-8 string, maybe?"
-      </screen>
-     </para>
-     <para>Mixed queries
-      <screen>
-       @or @and bob dylan @set Result-1
-       
-       @attr 4=1 @and @attr 1=1 "bob dylan" @attr 1=4 "slow train coming"
-       
-       @and @attr 2=4 @attr gils 1=2038 -114 @attr 2=2 @attr gils 1=2039 -109
+     <example><title>PQF queries using simple terms</title>
+      <para>
+       <screen>
+       dylan
+       "bob dylan"
+       </screen>
+      </para>
+     </example>
+     <example><title>PQF boolean operators</title>
+      <para>
+       <screen>
+       @or "dylan" "zimmerman"
+       @and @or dylan zimmerman when
+       @and when @or dylan zimmerman
+       </screen>
+      </para>
+     </example>
+     <example><title>PQF references to result sets</title>
+      <para>
+       <screen>
+       @set Result-1
+       @and @set seta setb
+       </screen>
+      </para>
+     </example>
+     <example><title>Attributes for terms</title>
+      <para>
+       <screen>
+       @attr 1=4 computer
+       @attr 1=4 @attr 4=1 "self portrait"
+       @attrset exp1 @attr 1=1 CategoryList
+       @attr gils 1=2008 Copenhagen
+       @attr 1=/book/title computer
+       </screen>
+      </para>
+     </example>
+     <example><title>PQF Proximity queries</title>
+      <para>
+       <screen>
+       @prox 0 3 1 2 k 2 dylan zimmerman
+       </screen>
+       <note><para>
+        Here the parameters 0, 3, 1, 2, k and 2 represent exclusion,
+        distance, ordered, relation, which-code and unit-code, in that
+        order.  So:
+        <itemizedlist>
+         <listitem><para>
+           exclusion = 0: the proximity condition must hold
+          </para></listitem>
+         <listitem><para>
+           distance = 3: the terms must be three units apart
+          </para></listitem>
+         <listitem><para>
+           ordered = 1: they must occur in the order they are specified
+          </para></listitem>
+         <listitem><para>
+           relation = 2: lessThanOrEqual (to the distance of 3 units)
+          </para></listitem>
+         <listitem><para>
+           which-code is ``known'', so the standard unit-codes are used
+          </para></listitem>
+         <listitem><para>
+           unit-code = 2: word.
+          </para></listitem>
+        </itemizedlist>
+        So the whole proximity query means that the words
+        <literal>dylan</literal> and <literal>zimmerman</literal> must
+        both occur in the record, in that order, differing in position
+        by three or fewer words (i.e. with two or fewer words between
+        them.)  The query would find ``Bob Dylan, aka. Robert
+        Zimmerman'', but not ``Bob Dylan, born as Robert Zimmerman''
+        since the distance in this case is four.
+       </para></note>
+      </para>
+     </example>
+     <example><title>PQF specification of search term</title>
+      <para>
+       <screen>
+       @term string "a UTF-8 string, maybe?"
+       </screen>
+      </para>
+     </example>
+     <example><title>PQF mixed queries</title>
+      <para>
+       <screen>
+       @or @and bob dylan @set Result-1
+       
+       @attr 4=1 @and @attr 1=1 "bob dylan" @attr 1=4 "slow train coming"
+       
+       @and @attr 2=4 @attr gils 1=2038 -114 @attr 2=2 @attr gils 1=2039 -109
        </screen>
-      <note>
+       <note>
         <para>
-         The last of these examples is a spatial search: in
-         <ulink url="http://www.gils.net/prof_v2.html#sec_7_4"
+        The last of these examples is a spatial search: in
+        <ulink url="http://www.gils.net/prof_v2.html#sec_7_4"
           >the GILS attribute set</ulink>,
-         access point
-         2038 indicates West Bounding Coordinate and
-         2030 indicates East Bounding Coordinate,
-         so the query is for areas extending from -114 degrees
-         to no more than -109 degrees.
+        access point
+        2038 indicates West Bounding Coordinate and
+        2030 indicates East Bounding Coordinate,
+        so the query is for areas extending from -114 degrees
+        to no more than -109 degrees.
         </para>
-      </note>
-     </para>
+       </note>
+      </para>
+     </example>
      </sect3>
     </sect2>
     <sect2 id="CCL"><title>CCL</title>
@@ -407,8 +421,7 @@
      </para>
  
      <para>
-     The <ulink url="http://europagate.dtv.dk/">EUROPAGATE</ulink>
-     research project working under the Libraries programme
+     The EUROPAGATE research project working under the Libraries programme
       of the European Commission's DG XIII has, amongst other useful tools,
       implemented a general-purpose CCL parser which produces an output
       structure that can be trivially converted to the internal RPN
@@ -759,7 +772,7 @@
         be an alias for <replaceable>q1</replaceable>, 
         <replaceable>q2</replaceable>... such that the CCL
         query <replaceable>q=x</replaceable> is equivalent to
-       <replaceable>q1=x or w2=x or ...</replaceable>.
+       <replaceable>q1=x or q2=x or ...</replaceable>.
        </para>
       </sect4>
  
@@ -1195,9 +1208,9 @@ int cql_transform_error(cql_transform_t ct, char **addinfop);
        error-code and sets the string-pointer at
        <literal>*addinfop</literal> to point to a string containing
        additional information about the error that occurred: for
-      example, if the error code is 15 (``Illegal or unsupported index
+      example, if the error code is 15 (``Illegal or unsupported context
        set''), the additional information is the name of the requested
-      index set that was not recognised.
+      context set that was not recognised.
       </para>
       <para>
        The SRW error-codes may be translated into brief human-readable
@@ -1253,26 +1266,37 @@ int cql_transform_FILE(cql_transform_t ct,
        The following CQL patterns are recognized:
        <variablelist>
         <varlistentry><term>
-         <literal>qualifier.</literal><replaceable>set</replaceable><literal>.</literal><replaceable>name</replaceable>
+         <literal>index.</literal><replaceable>set</replaceable><literal>.</literal><replaceable>name</replaceable>
          </term>
          <listitem>
           <para>
-          This pattern is invoked when a CQL qualifier, such as 
+          This pattern is invoked when a CQL index, such as 
            dc.title is converted. <replaceable>set</replaceable>
-          and <replaceable>name</replaceable> is the index set and qualifier
+          and <replaceable>name</replaceable> are the context set and index
            name respectively.
            Typically, the RPN specifies an equivalent use attribute.
           </para>
           <para>
-          For terms not bound by a qualifier the pattern
-          <literal>qualifier.srw.serverChoice</literal> is used.
-          Here, the prefix <literal>srw</literal> is defined as
-          <literal>http://www.loc.gov/zing/cql/srw-indexes/v1.0/</literal>.
+          For terms not bound by an index the pattern
+          <literal>index.cql.serverChoice</literal> is used.
+          Here, the prefix <literal>cql</literal> is defined as
+          <literal>http://www.loc.gov/zing/cql/cql-indexes/v1.0/</literal>.
            If this pattern is not defined, the mapping will fail.
           </para>
          </listitem>
         </varlistentry>
         <varlistentry><term>
+         <literal>qualifier.</literal><replaceable>set</replaceable><literal>.</literal><replaceable>name</replaceable>
+        (DEPRECATED)
+        </term>
+        <listitem>
+         <para>
+         For backwards compatibility, this is recognised as a synonym of
+          <literal>index.</literal><replaceable>set</replaceable><literal>.</literal><replaceable>name</replaceable>
+         </para>
+        </listitem>
+       </varlistentry>
+       <varlistentry><term>
           <literal>relation.</literal><replaceable>relation</replaceable>
          </term>
          <listitem>
@@ -1354,10 +1378,10 @@ int cql_transform_FILE(cql_transform_t ct,
          </term>
          <listitem>
           <para>
-          This specification defines a CQL index set for a given prefix.
+          This specification defines a CQL context set for a given prefix.
            The value on the right hand side is the URI for the set - 
            <emphasis>not</emphasis> RPN. All prefixes used in
-          qualifier patterns must be defined this way.
+          index patterns must be defined this way.
           </para>
          </listitem>
         </varlistentry>
@@ -1365,16 +1389,16 @@ int cql_transform_FILE(cql_transform_t ct,
       </para>
       <example><title>CQL to RPN mapping file</title>
        <para>
-       This simple file defines two index sets, three qualifiers and three
+       This simple file defines two context sets, three indexes and three
         relations, a position pattern and a default structure.
        </para>
        <programlisting><![CDATA[
-       set.srw    = http://www.loc.gov/zing/cql/srw-indexes/v1.0/
+       set.cql    = http://www.loc.gov/zing/cql/context-sets/cql/v1.1/
         set.dc     = http://www.loc.gov/zing/cql/dc-indexes/v1.0/
  
-       qualifier.srw.serverChoice = 1=1016
-       qualifier.dc.title         = 1=4
-       qualifier.dc.subject       = 1=21
+       index.cql.serverChoice = 1=1016
+       index.dc.title         = 1=4
+       index.dc.subject       = 1=21
    
         relation.<                 = 2=1
         relation.eq                = 2=3
@@ -1394,7 +1418,7 @@ int cql_transform_FILE(cql_transform_t ct,
         <screen>
          @attr 1=1016 @attr 2=3 @attr 4=1 @attr 3=3 @attr 6=1 "computer"
         </screen>
-       by rules <literal>qualifier.srw.serverChoice</literal>,
+       by rules <literal>index.cql.serverChoice</literal>,
         <literal>relation.scr</literal>, <literal>structure.*</literal>,
         <literal>position.any</literal>.
        </para>
@@ -1744,7 +1768,7 @@ typedef struct oident
      release the associated memory again. For the structures describing the
      Z39.50 PDUs and related structures, it is convenient to use the
      memory-management system of the &odr; subsystem (see
-    <link linkend="odr-use">Using ODR</link>). However, in some circumstances
+    <xref linkend="odr.use"/>). However, in some circumstances
      where you might otherwise benefit from using a simple nibble memory
      management system, it may be impractical to use
      <function>odr_malloc()</function> and <function>odr_reset()</function>.
@@ -1794,6 +1818,117 @@ typedef struct oident
     </para>
  
    </sect1>
+
+  <sect1 id="tools.marc"><title>MARC</title>
+   
+   <para>
+    YAZ provides a fast utility that decodes MARC records and
+    encodes to a varity of output formats. The MARC records must
+    be encoded in ISO2709.
+   </para>
+   <synopsis><![CDATA[
+    #include <yaz/marcdisp.h>
+
+    /* create handler */
+    yaz_marc_t yaz_marc_create(void);
+    /* destroy */
+    void yaz_marc_destroy(yaz_marc_t mt);
+
+    /* set XML mode YAZ_MARC_LINE, YAZ_MARC_SIMPLEXML, ... */
+    void yaz_marc_xml(yaz_marc_t mt, int xmlmode);
+    #define YAZ_MARC_LINE      0
+    #define YAZ_MARC_SIMPLEXML 1
+    #define YAZ_MARC_OAIMARC   2
+    #define YAZ_MARC_MARCXML   3
+    #define YAZ_MARC_ISO2709   4
+
+    /* supply iconv handle for character set conversion .. */
+    void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd);
+
+    /* set debug level, 0=none, 1=more, 2=even more, .. */
+    void yaz_marc_debug(yaz_marc_t mt, int level);
+
+    /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure.
+    On success, result in *result with size *rsize. */
+    int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
+                             char **result, int *rsize);
+
+    /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure.
+       On success, result in WRBUF */
+    int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf,
+                               int bsize, WRBUF wrbuf);
+]]>
+   </synopsis>
+   <para>
+    A MARC conversion handle must be created by using
+    <function>yaz_marc_create</function> and destroyed
+    by calling <function>yaz_marc_destroy</function>.
+  </para>
+   <para>
+    All other function operate on a <literal>yaz_marc_t</literal> handle.
+    The output is specified by a call to <function>yaz_marc_xml</function>.
+    The <literal>xmlmode</literal> must be one of
+    <variablelist>
+     <varlistentry>
+      <term>YAZ_MARC_LINE</term>
+      <listitem>
+       <para>
+       A simple line-by-line format suitable for display but not
+       recommend for further (machine) processing.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term>YAZ_MARC_MARXML</term>
+      <listitem>
+       <para>
+       The resulting record is converted to MARCXML.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term>YAZ_MARC_ISO2709</term>
+      <listitem>
+       <para>
+       The resulting record is converted to ISO2709 (MARC).
+       </para>
+      </listitem>
+     </varlistentry>
+    </variablelist>
+   </para>
+   <para>
+    The actual conversion functions are 
+    <function>yaz_marc_decode_buf</function> and
+    <function>yaz_marc_decode_wrbuf</function> which decodes and encodes
+    a MARC record. The former function operates on simple buffers, the
+    stores the resulting record in a WRBUF handle (WRBUF is a simple string
+    type).
+   </para>
+   <example>
+    <title>Display of MARC record</title>
+    <para>
+     The followint program snippet illustrates how the MARC API may
+     be used to convert a MARC record to the line-by-line format:
+     <programlisting><![CDATA[
+      void print_marc(const char *marc_buf, int marc_buf_size)
+      {
+         char *result;      /* for result buf */
+         int result_len;    /* for size of result */
+         yaz_marc_t mt = yaz_marc_create();
+         yaz_marc_xml(mt, YAZ_MARC_LINE);
+         yaz_marc_decode_buf(mt, marc_buf, marc_buf_size,
+                             &result, &result_len);
+         fwrite(result, result_len, 1, stdout);
+         yaz_marc_destroy(mt);  /* note that result is now freed... */
+      }
+]]>
+      </programlisting>
+    </para>
+   </example>
+  </sect1>
+
   </chapter>
   
   <!-- Keep this comment at the end of the file