ZOOM option: step. More verbose GFS SRW server display
[yaz-moved-to-github.git] / doc / tools.xml
index 051daf2..7f1997c 100644 (file)
@@ -1,4 +1,4 @@
-<!-- $Id: tools.xml,v 1.16 2003-01-23 20:26:37 adam Exp $ -->
+<!-- $Id: tools.xml,v 1.22 2003-03-18 13:30:21 adam Exp $ -->
  <chapter id="tools"><title>Supporting Tools</title>
   
   <para>
  <chapter id="tools"><title>Supporting Tools</title>
   
   <para>
     </para>
 
     <para>
     </para>
 
     <para>
-     Z39.50 version 3 defines various encoding of terms.
-     Use the @term operator to indicate the encoding type:
-     <literal>general</literal>, <literal>numeric</literal>,
-     <literal>string</literal> (for InternationalString), ..
+     Version 3 of the Z39.50 specification defines various encoding of terms.
+     Use the <literal>@term </literal> <replaceable>type</replaceable>,
+     where type is one of: <literal>general</literal>,
+     <literal>numeric</literal>, <literal>string</literal>
+     (for InternationalString), ..
      If no term type has been given, the <literal>general</literal> form
      is used which is the only encoding allowed in both version 2 - and 3
      of the Z39.50 standard.
     </para>
     
      If no term type has been given, the <literal>general</literal> form
      is used which is the only encoding allowed in both version 2 - and 3
      of the Z39.50 standard.
     </para>
     
-    <para>
-     The following are all examples of valid queries in the PQF.
-    </para>
-
-    <screen>
-     dylan
-
-     "bob dylan"
-
-     @or "dylan" "zimmerman"
-
-     @set Result-1
-
-     @or @and bob dylan @set Result-1
-
-     @attr 1=4 computer
-
-     @attr 4=1 @and @attr 1=1 "bob dylan" @attr 1=4 "slow train coming"
-
-     @attr 4=1 @attr 1=4 "self portrait"
-
-     @prox 0 3 1 2 k 2 dylan zimmerman
-
-     @and @attr 2=4 @attr gils 1=2038 -114 @attr 2=2 @attr gils 1=2039 -109
-
-     @term string "a UTF-8 string, maybe?"
-
-     @attr 1=/book/title computer
-    </screen>
+    <example><title>PQF queries</title>
 
 
+     <para>Queries using simple terms.
+      <screen>
+      dylan
+      "bob dylan"
+      </screen>
+     </para>
+     <para>Boolean operators.
+      <screen>
+       @or "dylan" "zimmerman"
+       @and @or dylan zimmerman when
+       @and when @or dylan zimmerman
+      </screen>
+     </para>
+     <para>
+      Reference to result sets.
+      <screen>
+       @set Result-1
+       @and @set seta setb
+      </screen>
+     </para>
+     <para>
+      Attributes for terms.
+      <screen>
+       @attr 1=4 computer
+       @attr 1=4 @attr 4=1 "self portrait"
+       @attr exp1 @attr 1=1 CategoryList
+       @attr gils 1=2008 Copenhagen
+       @attr 1=/book/title computer
+      </screen>
+     </para>
+     <para>
+      Proximity.
+      <screen>
+       @prox 0 3 1 2 k 2 dylan zimmerman
+       </screen>
+      </para>
+     <para>
+      Specifying term type.
+      <screen>
+       @term string "a UTF-8 string, maybe?"
+      </screen>
+     </para>
+     <para>Mixed queries
+      <screen>
+       @or @and bob dylan @set Result-1
+       
+       @attr 4=1 @and @attr 1=1 "bob dylan" @attr 1=4 "slow train coming"
+       
+       @and @attr 2=4 @attr gils 1=2038 -114 @attr 2=2 @attr gils 1=2039 -109
+      </screen>
+     </para>
+    </example>
    </sect2>
    <sect2 id="CCL"><title>Common Command Language</title>
 
    </sect2>
    <sect2 id="CCL"><title>Common Command Language</title>
 
       -- Proximity operator
 
      </screen>
       -- Proximity operator
 
      </screen>
-
-     <para>
-      The following queries are all valid:
-     </para>
-
-     <screen>
-      dylan
-
-      "bob dylan"
-
-      dylan or zimmerman
-
-      set=1
-
-      (dylan and bob) or set=1
-
-     </screen>
-     <para>
-      Assuming that the qualifiers <literal>ti</literal>, <literal>au</literal>
-      and <literal>date</literal> are defined we may use:
-     </para>
-
-     <screen>
-      ti=self portrait
-
-      au=(bob dylan and slow train coming)
-
-      date>1980 and (ti=((self portrait)))
-
-     </screen>
-
+     
+     <example><title>CCL queries</title>
+      <para>
+       The following queries are all valid:
+      </para>
+      
+      <screen>
+       dylan
+       
+       "bob dylan"
+       
+       dylan or zimmerman
+       
+       set=1
+       
+       (dylan and bob) or set=1
+       
+      </screen>
+      <para>
+       Assuming that the qualifiers <literal>ti</literal>,
+       <literal>au</literal>
+       and <literal>date</literal> are defined we may use:
+      </para>
+      
+      <screen>
+       ti=self portrait
+       
+       au=(bob dylan and slow train coming)
+
+       date>1980 and (ti=((self portrait)))
+       
+      </screen>
+     </example>
+     
     </sect3>
     <sect3><title>CCL Qualifiers</title>
     </sect3>
     <sect3><title>CCL Qualifiers</title>
-
+     
      <para>
       Qualifiers are used to direct the search to a particular searchable
       index, such as title (ti) and author indexes (au). The CCL standard
      <para>
       Qualifiers are used to direct the search to a particular searchable
       index, such as title (ti) and author indexes (au). The CCL standard
      </para>
 
      <para>
      </para>
 
      <para>
-      Consider a scenario where the target support ranked searches in the
-      title-index. In this case, the user could specify
-     </para>
-
-     <screen>
-      ti,ranked=knuth computer
-     </screen>
-     <para>
-      and the <literal>ranked</literal> would map to relation=relevance
-      (2=102) and the <literal>ti</literal> would map to title (1=4).
-     </para>
-
-     <para>
-      A "profile" with a set predefined CCL qualifiers can be read from a
-      file. The YAZ client reads its CCL qualifiers from a file named
+      A  CCL profile is a set of predefined CCL qualifiers that may be
+      read from a file.
+      The YAZ client reads its CCL qualifiers from a file named
       <filename>default.bib</filename>. Each line in the file has the form:
      </para>
 
      <para>
       <replaceable>qualifier-name</replaceable>  
       <filename>default.bib</filename>. Each line in the file has the form:
      </para>
 
      <para>
       <replaceable>qualifier-name</replaceable>  
-      <replaceable>type</replaceable>=<replaceable>val</replaceable>
-      <replaceable>type</replaceable>=<replaceable>val</replaceable> ...
+      [<replaceable>attributeset</replaceable><literal>,</literal>]<replaceable>type</replaceable><literal>=</literal><replaceable>val</replaceable>
+      [<replaceable>attributeset</replaceable><literal>,</literal>]<replaceable>type</replaceable><literal>=</literal><replaceable>val</replaceable> ...      
      </para>
 
      <para>
       where <replaceable>qualifier-name</replaceable> is the name of the
       qualifier to be used (eg. <literal>ti</literal>),
      </para>
 
      <para>
       where <replaceable>qualifier-name</replaceable> is the name of the
       qualifier to be used (eg. <literal>ti</literal>),
-      <replaceable>type</replaceable> is a BIB-1 category type and
-      <replaceable>val</replaceable> is the corresponding BIB-1 attribute
-      value.
-      The <replaceable>type</replaceable> can be either numeric or it may be
-      either <literal>u</literal> (use), <literal>r</literal> (relation),
-      <literal>p</literal> (position), <literal>s</literal> (structure),
-      <literal>t</literal> (truncation) or <literal>c</literal> (completeness).
-      The <replaceable>qualifier-name</replaceable> <literal>term</literal>
-      has a special meaning.
-      The types and values for this definition is used when
-      <emphasis>no</emphasis> qualifiers are present.
-     </para>
-
-     <para>
-      Consider the following definition:
-     </para>
-
-     <screen>
-      ti       u=4 s=1
-      au       u=1 s=1
-      term     s=105
-     </screen>
-     <para>
-      Two qualifiers are defined, <literal>ti</literal> and
-      <literal>au</literal>.
-      They both set the structure-attribute to phrase (1).
-      <literal>ti</literal>
-      sets the use-attribute to 4. <literal>au</literal> sets the
-      use-attribute to 1.
-      When no qualifiers are used in the query the structure-attribute is
-      set to free-form-text (105).
+      <replaceable>type</replaceable> is attribute type in the attribute
+      set (Bib-1 is used if no attribute set is given) and
+      <replaceable>val</replaceable> is attribute value.
+      The <replaceable>type</replaceable> can be specified as an
+      integer or as it be specified either as a single-letter:
+      <literal>u</literal> for use, 
+      <literal>r</literal> for relation,<literal>p</literal> for position,
+      <literal>s</literal> for structure,<literal>t</literal> for truncation
+      or <literal>c</literal> for completeness.
+      The attributes for the special qualifier name <literal>term</literal>
+      are used when no CCL qualifier is given in a query.
      </para>
 
      </para>
 
+     <example><title>CCL profile</title>
+      <para>
+       Consider the following definition:
+      </para>
+      
+      <screen>
+       ti       u=4 s=1
+       au       u=1 s=1
+       term     s=105
+       ranked   r=102
+      </screen>
+      <para>
+       Three qualifiers are defined, <literal>ti</literal>, 
+       <literal>au</literal> and <literal>ranked</literal>.
+       <literal>ti</literal> and <literal>au</literal> both set 
+       structure attribute to phrase (s=1).
+       <literal>ti</literal>
+       sets the use-attribute to 4. <literal>au</literal> sets the
+       use-attribute to 1.
+       When no qualifiers are used in the query the structure-attribute is
+       set to free-form-text (105).
+      </para>
+      <para>
+       You can combine attributes. To Search for "ranked title" you
+       can do 
+       <screen>
+        ti,ranked=knuth computer
+       </screen>
+       which will use "relation is ranked", "use is title", "structure is
+       phrase".
+      </para>
+     </example>
+     
     </sect3>
     <sect3><title>CCL API</title>
      <para>
     </sect3>
     <sect3><title>CCL API</title>
      <para>
@@ -600,7 +629,7 @@ struct cql_node {
             struct cql_node *right;
             struct cql_node *modifiers;
             struct cql_node *prefixes;
             struct cql_node *right;
             struct cql_node *modifiers;
             struct cql_node *prefixes;
-        } bool;
+        } boolean;
         struct {
             char *name;
             char *value;
         struct {
             char *name;
             char *value;
@@ -707,7 +736,7 @@ struct cql_node {
       Conversion to PQF (and Z39.50 RPN) is tricky by the fact
       that the resulting RPN depends on the Z39.50 target
       capabilities (combinations of supported attributes). 
       Conversion to PQF (and Z39.50 RPN) is tricky by the fact
       that the resulting RPN depends on the Z39.50 target
       capabilities (combinations of supported attributes). 
-      Furthermore, the CQL and SRW operates on index prefixes
+      In addition, the CQL and SRW operates on index prefixes
       (URI or strings), whereas the RPN uses Object Identifiers
       for attribute sets.
      </para>
       (URI or strings), whereas the RPN uses Object Identifiers
       for attribute sets.
      </para>
@@ -721,7 +750,7 @@ cql_transform_t cql_transform_open_fname(const char *fname);
 void cql_transform_close(cql_transform_t ct);
       </synopsis>
       The first two functions create a tranformation handle from
 void cql_transform_close(cql_transform_t ct);
       </synopsis>
       The first two functions create a tranformation handle from
-      either an already open FILE or from a filename.
+      either an already open FILE or from a filename respectively.
      </para>
      <para>
       The handle is destroyed by <function>cql_transform_close</function> 
      </para>
      <para>
       The handle is destroyed by <function>cql_transform_close</function> 
@@ -762,7 +791,202 @@ int cql_transform_FILE(cql_transform_t ct,
       open <literal>FILE</literal>.
      </para>
     </sect3>
       open <literal>FILE</literal>.
      </para>
     </sect3>
-    <sect3 id="toolq.cql.xcql"><title>CQL to XCQL conversion</title>
+    <sect3 id="tools.cql.map">
+     <title>Specification of CQL to RPN mapping</title>
+     <para>
+      The file supplied to functions 
+      <function>cql_transform_open_FILE</function>,
+      <function>cql_transform_open_fname</function> follows
+      a structure found in many Unix utilities.
+      It consists of mapping specifications - one per line.
+      Lines starting with <literal>#</literal> are ignored (comments).
+     </para>
+     <para>
+      Each line is of the form
+      <literallayout>
+       <replaceable>CQL pattern</replaceable><literal> = </literal> <replaceable> RPN equivalent</replaceable>
+      </literallayout>
+     </para>
+     <para>
+      An RPN pattern is a simple attribute list. Each attribute pair
+      takes the form:
+      <literallayout>
+       [<replaceable>set</replaceable>] <replaceable>type</replaceable><literal>=</literal><replaceable>value</replaceable>
+      </literallayout>
+      The attribute <replaceable>set</replaceable> is optional.
+      The <replaceable>type</replaceable> is the attribute type,
+      <replaceable>value</replaceable> the attribute value.
+     </para>
+     <para>
+      The following CQL patterns are recognized:
+      <variablelist>
+       <varlistentry><term>
+         <literal>qualifier.</literal><replaceable>set</replaceable><literal>.</literal><replaceable>name</replaceable>
+        </term>
+        <listitem>
+         <para>
+          This pattern is invoked when a CQL qualifier, such as 
+          dc.title is converted. <replaceable>set</replaceable>
+          and <replaceable>name</replaceable> is the index set and qualifier
+          name respectively.
+          Typically, the RPN specifies an equivalent use attribute.
+         </para>
+         <para>
+          For terms not bound by a qualifier the pattern
+          <literal>qualifier.srw.serverChoice</literal> is used.
+          Here, the prefix <literal>srw</literal> is defined as
+          <literal>http://www.loc.gov/zing/cql/srw-indexes/v1.0/</literal>.
+          If this pattern is not defined, the mapping will fail.
+         </para>
+        </listitem>
+       </varlistentry>
+       <varlistentry><term>
+         <literal>relation.</literal><replaceable>relation</replaceable>
+        </term>
+        <listitem>
+         <para>
+          This pattern specifies how a CQL relation is mapped to RPN.
+          <replaceable>pattern</replaceable> is name of relation
+          operator. Since <literal>=</literal> is used as
+          separator between CQL pattern and RPN, CQL relations
+          including <literal>=</literal> cannot be
+          used directly. To avoid a conflict, the names
+          <literal>ge</literal>,
+          <literal>eq</literal>,
+          <literal>le</literal>,
+          must be used for CQL operators, greater-than-or-equal,
+          equal, less-than-or-equal respectively.
+          The RPN pattern is supposed to include a relation attribute.
+         </para>
+         <para>
+          For terms not bound by a relation, the pattern
+          <literal>relation.scr</literal> is used. If the pattern
+          is not defined, the mapping will fail.
+         </para>
+         <para>
+          The special pattern, <literal>relation.*</literal> is used
+          when no other relation pattern is matched.
+         </para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry><term>
+         <literal>relationModifier.</literal><replaceable>mod</replaceable>
+        </term>
+        <listitem>
+         <para>
+          This pattern specifies how a CQL relation modifier is mapped to RPN.
+          The RPN pattern is usually a relation attribute.
+         </para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry><term>
+         <literal>structure.</literal><replaceable>type</replaceable>
+        </term>
+        <listitem>
+         <para>
+          This pattern specifies how a CQL structure is mapped to RPN.
+          Note that this CQL pattern is somewhat to similar to
+          CQL pattern <literal>relation</literal>. 
+          The <replaceable>type</replaceable> is a CQL relation.
+         </para>
+         <para>
+          The pattern, <literal>structure.*</literal> is used
+          when no other structure pattern is matched.
+          Usually, the RPN equivalent specifies a structure attribute.
+         </para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry><term>
+         <literal>position.</literal><replaceable>type</replaceable>
+        </term>
+        <listitem>
+         <para>
+          This pattern specifies how the anchor (position) of
+          CQL is mapped to RPN.
+          The <replaceable>type</replaceable> is one
+          of <literal>first</literal>, <literal>any</literal>,
+          <literal>last</literal>, <literal>firstAndLast</literal>.
+         </para>
+         <para>
+          The pattern, <literal>position.*</literal> is used
+          when no other position pattern is matched.
+         </para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry><term>
+         <literal>set.</literal><replaceable>prefix</replaceable>
+        </term>
+        <listitem>
+         <para>
+          This specification defines a CQL index set for a given prefix.
+          The value on the right hand side is the URI for the set - 
+          <emphasis>not</emphasis> RPN. All prefixes used in
+          qualifier patterns must be defined this way.
+         </para>
+        </listitem>
+       </varlistentry>
+      </variablelist>
+     </para>
+     <example><title>CQL to RPN mapping file</title>
+      <para>
+       This simple file defines two index sets, three qualifiers and three
+       relations, a position pattern and a default structure.
+      </para>
+      <programlisting><![CDATA[
+       set.srw    = http://www.loc.gov/zing/cql/srw-indexes/v1.0/
+       set.dc     = http://www.loc.gov/zing/cql/dc-indexes/v1.0/
+
+       qualifier.srw.serverChoice = 1=1016
+       qualifier.dc.title         = 1=4
+       qualifier.dc.subject       = 1=21
+  
+       relation.<                 = 2=1
+       relation.eq                = 2=3
+       relation.scr               = 2=3
+
+       position.any               = 3=3 6=1
+
+       structure.*                = 4=1
+]]>
+      </programlisting>
+      <para>
+       With the mappings above, the CQL query
+       <screen>
+        computer
+       </screen>
+       is converted to the PQF:
+       <screen>
+        @attr 1=1016 @attr 2=3 @attr 4=1 @attr 3=3 @attr 6=1 "computer"
+       </screen>
+       by rules <literal>qualifier.srw.serverChoice</literal>,
+       <literal>relation.scr</literal>, <literal>structure.*</literal>,
+       <literal>position.any</literal>.
+      </para>
+      <para>
+       CQL query
+       <screen>
+        computer^
+       </screen>
+       is rejected, since <literal>position.right</literal> is
+       undefined.
+      </para>
+      <para>
+       CQL query
+       <screen>
+        >my = "http://www.loc.gov/zing/cql/dc-indexes/v1.0/" my.title = x
+       </screen>
+       is converted to
+       <screen>
+        @attr 1=4 @attr 2=3 @attr 4=1 @attr 3=3 @attr 6=1 "x"
+       </screen>
+      </para>
+     </example>
+    </sect3>
+    <sect3 id="tools.cql.xcql"><title>CQL to XCQL conversion</title>
      <para>
       Conversion from CQL to XCQL is trivial and does not
       require a mapping to be defined.
      <para>
       Conversion from CQL to XCQL is trivial and does not
       require a mapping to be defined.