Document CCL case to reflect reality

[yaz-moved-to-github.git] / doc / tools.xml
diff --git a/doc/tools.xml b/doc/tools.xml

index 4cfcf1d..ec50220 100644 (file)
--- a/doc/tools.xml
+++ b/doc/tools.xml
@@ -1,4 +1,3 @@
-<!-- $Id: tools.xml,v 1.66 2008-02-28 09:40:42 adam Exp $ -->
   <chapter id="tools"><title>Supporting Tools</title>
    
    <para>
@@ -641,7 +640,7 @@
         </table>
        </para>
        <para>
-       Refer to the complete
+       Refer to <xref linkend="bib1"/> or the complete
         <ulink url="&url.z39.50.attset.bib1;">list of Bib-1 attributes</ulink>
        </para>
        <para>
@@ -680,6 +679,17 @@
            </entry>
           </row>
           
+         <row><entry><literal>s=ag</literal></entry><entry>
+           Tokens that appears as phrases (with blank in them) gets
+           structure phrase attached. Tokens that appers as words
+           gets structure phrase attached. Phrases and words are
+           ANDed. This is a variant of s=al and s=pw, with the main
+           difference that words are not split (with operator AND)
+           but instead kept in one RPN token. This facility appeared
+           in YAZ 4.2.38.
+          </entry>
+         </row>
+
           <row><entry><literal>r=o</literal></entry><entry>
             Allows ranges and the operators greather-than, less-than, ...
             equals.
@@ -732,6 +742,24 @@
             set to both left&amp;right.
            </entry>
           </row>
+
+         <row><entry><literal>t=x</literal></entry><entry>
+           Allows masking anywhere in a term, thus fully supporting
+           # (mask one character) and ? (zero or more of any).
+           If masking is used, trunction is set to 102 (regexp-1 in term)
+           and the term is converted accordingly to a regular expression.
+          </entry>
+         </row>
+
+         <row><entry><literal>t=z</literal></entry><entry>
+           Allows masking anywhere in a term, thus fully supporting
+           # (mask one character) and ? (zero or more of any).
+           If masking is used, trunction is set to 104 (Z39.58 in term)
+           and the term is converted accordingly to Z39.58 masking term -
+           actually the same truncation as CCL itself.
+          </entry>
+         </row>
+
          </tbody>
         </tgroup>
         </table>
@@ -846,9 +874,9 @@
          <row>
           <entry>case</entry>
           <entry>Specificies if CCL operatores and qualifiers should be
-          compared with case sensitivity or not. Specify 0 for
-          case sensitive; 1 for case insensitive.</entry>
-         <entry><literal>0</literal></entry>
+          compared with case sensitivity or not. Specify 1 for
+          case sensitive; 0 for case insensitive.</entry>
+         <entry><literal>1</literal></entry>
          </row>
  
          <row>
@@ -1726,9 +1754,9 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
     <screen>
      NMEM nmem_create(void);
      void nmem_destroy(NMEM n);
-    void *nmem_malloc(NMEM n, int size);
+    void *nmem_malloc(NMEM n, size_t size);
      void nmem_reset(NMEM n);
-    int nmem_total(NMEM n);
+    size_t nmem_total(NMEM n);
      void nmem_init(void);
      void nmem_exit(void);
     </screen>
@@ -1913,9 +1941,9 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
    <sect1 id="marc"><title>MARC</title>
     
     <para>
-    YAZ provides a fast utility that decodes MARC records and
-    encodes to a varity of output formats. The MARC records must
-    be encoded in ISO2709.
+    YAZ provides a fast utility for working with MARC records.
+    Early versions of the MARC utility only allowed decoding of ISO2709.
+    Today the utility may both encode - and decode to a varity of formats.
     </para>
     <synopsis><![CDATA[
      #include <yaz/marcdisp.h>
@@ -1933,6 +1961,8 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
      #define YAZ_MARC_MARCXML   3
      #define YAZ_MARC_ISO2709   4
      #define YAZ_MARC_XCHANGE   5
+    #define YAZ_MARC_CHECK     6
+    #define YAZ_MARC_TURBOMARC 7
  
      /* supply iconv handle for character set conversion .. */
      void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd);
@@ -1942,15 +1972,22 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
  
      /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure.
      On success, result in *result with size *rsize. */
-    int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
-                             char **result, int *rsize);
+    int yaz_marc_decode_buf(yaz_marc_t mt, const char *buf, int bsize,
+                            const char **result, size_t *rsize);
  
      /* decode MARC in buf of size bsize. Returns >0 on success; <=0 on failure.
         On success, result in WRBUF */
-    int yaz_marc_decode_wrbuf (yaz_marc_t mt, const char *buf,
-                               int bsize, WRBUF wrbuf);
+    int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf,
+                              int bsize, WRBUF wrbuf);
  ]]>
     </synopsis>
+   <note>
+    <para>
+     The synopsis is just a basic subset of all functionality. Refer
+     to the actual header file <filename>marcdisp.h</filename> for
+     details.
+    </para>
+   </note>
     <para>
      A MARC conversion handle must be created by using
      <function>yaz_marc_create</function> and destroyed
@@ -1975,7 +2012,7 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
        <term>YAZ_MARC_MARCXML</term>
        <listitem>
         <para>
-       The resulting record is converted to MARCXML.
+       <ulink url="&url.marcxml;">MARCXML</ulink>.
         </para>
        </listitem>
       </varlistentry>
@@ -1984,10 +2021,41 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
        <term>YAZ_MARC_ISO2709</term>
        <listitem>
         <para>
-       The resulting record is converted to ISO2709 (MARC).
+       ISO2709 (sometimes just referred to as "MARC").
         </para>
        </listitem>
       </varlistentry>
+
+     <varlistentry>
+      <term>YAZ_MARC_XCHANGE</term>
+      <listitem>
+       <para>
+       <ulink url="&url.marcxchange;">MarcXchange</ulink>.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term>YAZ_MARC_CHECK</term>
+      <listitem>
+       <para>
+       Pseudo format for validation only. Does not generate
+       any real output except diagnostics.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term>YAZ_MARC_TURBOMARC</term>
+      <listitem>
+       <para>
+       XML format with same semantics as MARCXML but more compact
+       and geared towards fast processing with XSLT. Refer to
+       <xref linkend="tools.turbomarc"/> for more information.
+       </para>
+      </listitem>
+     </varlistentry>
+
      </variablelist>
     </para>
     <para>
@@ -2001,13 +2069,13 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
     <example id="example.marc.display">
      <title>Display of MARC record</title>
      <para>
-     The followint program snippet illustrates how the MARC API may
+     The following program snippet illustrates how the MARC API may
       be used to convert a MARC record to the line-by-line format:
       <programlisting><![CDATA[
        void print_marc(const char *marc_buf, int marc_buf_size)
        {
           char *result;      /* for result buf */
-         int result_len;    /* for size of result */
+         size_t result_len;    /* for size of result */
           yaz_marc_t mt = yaz_marc_create();
           yaz_marc_xml(mt, YAZ_MARC_LINE);
           yaz_marc_decode_buf(mt, marc_buf, marc_buf_size,
@@ -2019,6 +2087,71 @@ void cql_to_xml_stdio(struct cql_node *cn, FILE *f);
        </programlisting>
      </para>
     </example>
+   <sect2 id="tools.turbomarc">
+    <title>TurboMARC</title>
+    <para>
+     TurboMARC is yet another XML encoding of a MARC record. The format
+     was designed for fast processing with XSLT.
+    </para>
+    <para>
+     Applications like
+     Pazpar2 uses XSLT to convert an XML encoded MARC record to an internal
+     representation. This conversion mostly check the tag of a MARC field
+     to determine the basic rules in the conversion. This check is
+     costly when that is tag is encoded as an attribute in MARCXML.
+     By having the tag value as the element instead, makes processing
+     many times faster (at least for Libxslt).
+    </para>
+    <para>
+     TurboMARC is encoded as follows:
+     <itemizedlist>
+      <listitem><para>
+       Record elements is part of namespace
+       "<literal>http://www.indexdata.com/turbomarc</literal>".
+       </para></listitem>
+      <listitem><para>
+       A record is enclosed in element <literal>r</literal>.
+       </para></listitem>
+      <listitem><para>
+       A collection of records is enclosed in element
+       <literal>collection</literal>.
+       </para></listitem>
+      <listitem><para>
+       The leader is encoded as element <literal>l</literal> with the 
+       leader content as its (text) value.
+       </para></listitem>
+      <listitem><para>
+       A control field is encoded as element <literal>c</literal> concatenated
+       with the tag value of the control field if the tag value
+       matches the regular expression <literal>[a-zA-Z0-9]*</literal>.
+       If the tag value do not match the regular expression
+       <literal>[a-zA-Z0-9]*</literal> the control field is encoded
+       as element <literal>c</literal> and attribute <literal>code</literal>
+       will hold the tag value.
+       This rule ensure that in the rare cases where a tag value might
+       result in a non-wellformed XML YAZ encode it as a coded attribute
+       (as in MARCXML).
+       </para>
+       <para>
+       The control field content is the the text value of this element.
+       Indicators are encoded as attribute names
+       <literal>i1</literal>, <literal>i2</literal>, etc.. and
+       corresponding values for each indicator.
+       </para></listitem>
+      <listitem><para>
+       A data field is encoded as element <literal>d</literal> concatenated
+       with the tag value of the data field or using the attribute
+       <literal>code</literal> as described in the rules for control fields.
+       The children of the data field element is subfield elements.
+       Each subfield element is encoded as <literal>s</literal>
+       concatenated with the sub field code.
+       The text of the subfield element is the contents of the subfield.
+       Indicators are encoded as attributes for the data field element similar
+       to the encoding for control fields.
+       </para></listitem>
+     </itemizedlist>
+    </para>
+   </sect2>
    </sect1>
  
    <sect1 id="tools.retrieval">