Added charmap facility to delete leading articles

[idzebra-moved-to-github.git] / doc / recordmodel.xml
diff --git a/doc/recordmodel.xml b/doc/recordmodel.xml

index 8ad29e8..5846b4b 100644 (file)
--- a/doc/recordmodel.xml
+++ b/doc/recordmodel.xml
@@ -1,5 +1,5 @@
   <chapter id="record-model">
   <chapter id="record-model">
-  <!-- $Id: recordmodel.xml,v 1.12 2002-10-30 11:09:39 adam Exp $ -->
+  <!-- $Id: recordmodel.xml,v 1.19 2004-09-14 14:38:07 quinn Exp $ -->
    <title>The Record Model</title>
    
    <para>
    <title>The Record Model</title>
    
    <para>
@@ -33,7 +33,7 @@
  
     <itemizedlist>
      <listitem>
  
     <itemizedlist>
      <listitem>
-
+     
       <para>
        When records are accessed by the system, they are represented
        in their local, or native format. This might be SGML or HTML files,
       <para>
        When records are accessed by the system, they are represented
        in their local, or native format. This might be SGML or HTML files,
@@ -477,13 +477,14 @@
       <variablelist>
  
        <varlistentry>
       <variablelist>
  
        <varlistentry>
-       <term>begin <emphasis>type [parameter ... ]</emphasis></term>
+       <term>begin <replaceable>type [parameter ... ]</replaceable></term>
         <listitem>
          <para>
           Begin a new
         <listitem>
          <para>
           Begin a new
-         data element. The type is one of the following:
+         data element. The <replaceable>type</replaceable> is one of
+         the following:
           <variablelist>
           <variablelist>
-
+          
            <varlistentry>
             <term>record</term>
             <listitem>
            <varlistentry>
             <term>record</term>
             <listitem>
@@ -492,7 +493,7 @@
               name of the schema that describes the structure of the record, eg.
               <literal>gils</literal> or <literal>wais</literal> (see below).
               The <literal>begin record</literal> call should precede
               name of the schema that describes the structure of the record, eg.
               <literal>gils</literal> or <literal>wais</literal> (see below).
               The <literal>begin record</literal> call should precede
-             any other use of the <emphasis>begin</emphasis> statement.
+             any other use of the <replaceable>begin</replaceable> statement.
              </para>
             </listitem>
            </varlistentry>
              </para>
             </listitem>
            </varlistentry>
@@ -512,7 +513,7 @@
             <listitem>
              <para>
               Begin a new node in a variant tree. The parameters are
             <listitem>
              <para>
               Begin a new node in a variant tree. The parameters are
-             <emphasis>class type value</emphasis>.
+             <replaceable>class type value</replaceable>.
              </para>
             </listitem>
            </varlistentry>
              </para>
             </listitem>
            </varlistentry>
@@ -521,7 +522,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>data</term>
+       <term>data <replaceable>parameter</replaceable></term>
         <listitem>
          <para>
           Create a data element. The concatenated arguments make
         <listitem>
          <para>
           Create a data element. The concatenated arguments make
@@ -530,28 +531,41 @@
           the layout (whitespace) of the data should be retained for
           transmission.
           The option <literal>-element</literal>
           the layout (whitespace) of the data should be retained for
           transmission.
           The option <literal>-element</literal>
-         <emphasis>tag</emphasis> wraps the data up in
-         the <emphasis>tag</emphasis>.
+         <replaceable>tag</replaceable> wraps the data up in
+         the <replaceable>tag</replaceable>.
           The use of the <literal>-element</literal> option is equivalent to
           The use of the <literal>-element</literal> option is equivalent to
-         preceding the command with a <emphasis>begin
-          element</emphasis> command, and following
-         it with the <emphasis>end</emphasis> command.
+         preceding the command with a <replaceable>begin
+          element</replaceable> command, and following
+         it with the <replaceable>end</replaceable> command.
          </para>
         </listitem>
        </varlistentry>
        <varlistentry>
          </para>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>end <emphasis>[type]</emphasis></term>
+       <term>end <replaceable>[type]</replaceable></term>
         <listitem>
          <para>
           Close a tagged element. If no parameter is given,
           the last element on the stack is terminated.
           The first parameter, if any, is a type name, similar
         <listitem>
          <para>
           Close a tagged element. If no parameter is given,
           the last element on the stack is terminated.
           The first parameter, if any, is a type name, similar
-         to the <emphasis>begin</emphasis> statement.
-         For the <emphasis>element</emphasis> type, a tag
+         to the <replaceable>begin</replaceable> statement.
+         For the <replaceable>element</replaceable> type, a tag
           name can be provided to terminate a specific tag.
          </para>
         </listitem>
        </varlistentry>
           name can be provided to terminate a specific tag.
          </para>
         </listitem>
        </varlistentry>
+
+      <varlistentry>
+       <term>unread <replaceable>no</replaceable></term>
+       <listitem>
+        <para>
+         Move the input pointer to the offset of first character that
+         match rule given by <replaceable>no</replaceable>.
+         The first rule from left-to-right is numbered zero,
+         the second rule is named 1 and so on.
+        </para>
+       </listitem>
+      </varlistentry>
+
       </variablelist>
      </para>
  
       </variablelist>
      </para>
  
@@ -571,23 +585,21 @@
        /^Subject:/ BODY /$/ { data -element title $1 }
        /^Date:/ BODY /$/    { data -element lastModified $1 }
        /\n\n/ BODY END      {
        /^Subject:/ BODY /$/ { data -element title $1 }
        /^Date:/ BODY /$/    { data -element lastModified $1 }
        /\n\n/ BODY END      {
-      begin element bodyOfDisplay
-      begin variant body iana "text/plain"
-      data -text $1
-      end record
+         begin element bodyOfDisplay
+         begin variant body iana "text/plain"
+         data -text $1
+         end record
        }
       </screen>
  
      </para>
  
      <para>
        }
       </screen>
  
      </para>
  
      <para>
-     If Zebra is compiled with support for Tcl (Tool Command Language)
-     enabled, the statements described above are supplemented with a complete
+     If Zebra is compiled with support for Tcl enabled, the statements
+     described above are supplemented with a complete
       scripting environment, including control structures (conditional
       expressions and loop constructs), and powerful string manipulation
       scripting environment, including control structures (conditional
       expressions and loop constructs), and powerful string manipulation
-     mechanisms for modifying the elements of a record. Tcl is a popular
-     scripting environment, with several tutorials available both online
-     and in hardcopy.
+     mechanisms for modifying the elements of a record.
      </para>
  
     </sect2>
      </para>
  
     </sect2>
@@ -692,35 +704,35 @@
       Which of the two elements are transmitted to the client by the server
       depends on the specifications provided by the client, if any.
      </para>
       Which of the two elements are transmitted to the client by the server
       depends on the specifications provided by the client, if any.
      </para>
-
+    
      <para>
       In practice, each variant node is associated with a triple of class,
       type, value, corresponding to the variant mechanism of Z39.50.
      </para>
      <para>
       In practice, each variant node is associated with a triple of class,
       type, value, corresponding to the variant mechanism of Z39.50.
      </para>
-
+    
     </sect2>
     </sect2>
-
+   
     <sect2>
      <title>Data Elements</title>
     <sect2>
      <title>Data Elements</title>
-
+    
      <para>
       Data nodes have no children (they are always leaf nodes in the record
       tree).
      </para>
      <para>
       Data nodes have no children (they are always leaf nodes in the record
       tree).
      </para>
-
+    
      <!--
      <!--
-      FIXME! Documentation needs extension here about types of nodes - numerical,
-      textual, etc., plus the various types of inclusion notes.
-     </para>
+    FIXME! Documentation needs extension here about types of nodes - numerical,
+    textual, etc., plus the various types of inclusion notes.
+   </para>
      -->
      
     </sect2>
      -->
      
     </sect2>
-
+   
    </sect1>
    </sect1>
-
+  
    <sect1 id="data-model">
     <title>Configuring Your Data Model</title>
    <sect1 id="data-model">
     <title>Configuring Your Data Model</title>
-
+   
     <para>
      The following sections describe the configuration files that govern
      the internal management of data records. The system searches for the files
     <para>
      The following sections describe the configuration files that govern
      the internal management of data records. The system searches for the files
@@ -770,7 +782,7 @@
          known.
         </para>
        </listitem>
          known.
         </para>
        </listitem>
-
+      
        <listitem>
         <para>
          The variant set which is used in the profile. This provides a
        <listitem>
         <para>
          The variant set which is used in the profile. This provides a
@@ -884,12 +896,12 @@
      <para>
       The file may contain the following directives:
      </para>
      <para>
       The file may contain the following directives:
      </para>
-
+    
      <para>
       <variablelist>
      <para>
       <variablelist>
-
+      
        <varlistentry>
        <varlistentry>
-       <term>name <emphasis>symbolic-name</emphasis></term>
+       <term>name <replaceable>symbolic-name</replaceable></term>
         <listitem>
          <para>
           (m) This provides a shorthand name or
         <listitem>
          <para>
           (m) This provides a shorthand name or
@@ -898,17 +910,17 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>reference <emphasis>OID-name</emphasis></term>
+       <term>reference <replaceable>OID-name</replaceable></term>
         <listitem>
          <para>
           (m) The reference name of the OID for the profile.
           The reference names can be found in the <emphasis>util</emphasis>
         <listitem>
          <para>
           (m) The reference name of the OID for the profile.
           The reference names can be found in the <emphasis>util</emphasis>
-         module of <emphasis>YAZ</emphasis>.
+         module of YAZ.
          </para>
         </listitem>
        </varlistentry>
        <varlistentry>
          </para>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>attset <emphasis>filename</emphasis></term>
+       <term>attset <replaceable>filename</replaceable></term>
         <listitem>
          <para>
           (m) The attribute set that is used for
         <listitem>
          <para>
           (m) The attribute set that is used for
@@ -917,7 +929,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>tagset <emphasis>filename</emphasis></term>
+       <term>tagset <replaceable>filename</replaceable></term>
         <listitem>
          <para>
           (o) The tag set (if any) that describe
         <listitem>
          <para>
           (o) The tag set (if any) that describe
@@ -926,7 +938,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>varset <emphasis>filename</emphasis></term>
+       <term>varset <replaceable>filename</replaceable></term>
         <listitem>
          <para>
           (o) The variant set used in the profile.
         <listitem>
          <para>
           (o) The variant set used in the profile.
@@ -934,25 +946,27 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>maptab <emphasis>filename</emphasis></term>
+       <term>maptab <replaceable>filename</replaceable></term>
         <listitem>
          <para>
           (o,r) This points to a
           conversion table that might be used if the client asks for the record
           in a different schema from the native one.
          </para>
         <listitem>
          <para>
           (o,r) This points to a
           conversion table that might be used if the client asks for the record
           in a different schema from the native one.
          </para>
-       </listitem></varlistentry>
+       </listitem>
+      </varlistentry>
        <varlistentry>
        <varlistentry>
-       <term>marc <emphasis>filename</emphasis></term>
+       <term>marc <replaceable>filename</replaceable></term>
         <listitem>
          <para>
           (o) Points to a file containing parameters
         <listitem>
          <para>
           (o) Points to a file containing parameters
-         for representing the record contents in the ISO2709 syntax. Read the
-         description of the MARC representation facility below.
+         for representing the record contents in the ISO2709 syntax.
+         Read the description of the MARC representation facility below.
          </para>
          </para>
-       </listitem></varlistentry>
+       </listitem>
+      </varlistentry>
        <varlistentry>
        <varlistentry>
-       <term>esetname <emphasis>name filename</emphasis></term>
+       <term>esetname <replaceable>name filename</replaceable></term>
         <listitem>
          <para>
           (o,r) Associates the
         <listitem>
          <para>
           (o,r) Associates the
@@ -960,9 +974,10 @@
           given in place of the filename, this corresponds to a null mapping for
           the given element set name.
          </para>
           given in place of the filename, this corresponds to a null mapping for
           the given element set name.
          </para>
-       </listitem></varlistentry>
+       </listitem>
+      </varlistentry>
        <varlistentry>
        <varlistentry>
-       <term>any <emphasis>tags</emphasis></term>
+       <term>any <replaceable>tags</replaceable></term>
         <listitem>
          <para>
           (o) This directive specifies a list of attributes
         <listitem>
          <para>
           (o) This directive specifies a list of attributes
@@ -972,49 +987,74 @@
           provides an efficient way of supporting free-text searching across all
           elements. However, it does increase the size of the index
           significantly. The attributes can be qualified with a structure, as in
           provides an efficient way of supporting free-text searching across all
           elements. However, it does increase the size of the index
           significantly. The attributes can be qualified with a structure, as in
-         the <emphasis>elm</emphasis> directive below.
+         the <replaceable>elm</replaceable> directive below.
          </para>
          </para>
-       </listitem></varlistentry>
+       </listitem>
+      </varlistentry>
        <varlistentry>
        <varlistentry>
-       <term>elm <emphasis>path name attributes</emphasis></term>
+       <term>elm <replaceable>path name attributes</replaceable></term>
         <listitem>
          <para>
           (o,r) Adds an element to the abstract record syntax of the schema.
         <listitem>
          <para>
           (o,r) Adds an element to the abstract record syntax of the schema.
-         The <emphasis>path</emphasis> follows the
+         The <replaceable>path</replaceable> follows the
           syntax which is suggested by the Z39.50 document - that is, a sequence
           syntax which is suggested by the Z39.50 document - that is, a sequence
-         of tags separated by slashes (/). Each tag is given as a
+         of tags separated by slashes (&#x2f;). Each tag is given as a
           comma-separated pair of tag type and -value surrounded by parenthesis.
           comma-separated pair of tag type and -value surrounded by parenthesis.
-         The <emphasis>name</emphasis> is the name of the element, and
-         the <emphasis>attributes</emphasis>
+         The <replaceable>name</replaceable> is the name of the element, and
+         the <replaceable>attributes</replaceable>
           specifies which attributes to use when indexing the element in a
           comma-separated list.
           A ! in place of the attribute name is equivalent to
           specifying an attribute name identical to the element name.
           A - in place of the attribute name
           specifies that no indexing is to take place for the given element.
           specifies which attributes to use when indexing the element in a
           comma-separated list.
           A ! in place of the attribute name is equivalent to
           specifying an attribute name identical to the element name.
           A - in place of the attribute name
           specifies that no indexing is to take place for the given element.
-         The attributes can be qualified with <emphasis>field
-          types</emphasis> to specify which
+         The attributes can be qualified with <replaceable>field
+          types</replaceable> to specify which
           character set should govern the indexing procedure for that field.
           The same data element may be indexed into several different
           fields, using different character set definitions.
           See the <xref linkend="field-structure-and-character-sets"/>.
           character set should govern the indexing procedure for that field.
           The same data element may be indexed into several different
           fields, using different character set definitions.
           See the <xref linkend="field-structure-and-character-sets"/>.
-         The default field type is "w" for <emphasis>word</emphasis>.
+         The default field type is <literal>w</literal> for
+         <emphasis>word</emphasis>.
          </para>
          </para>
-       </listitem></varlistentry>
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>xelm <replaceable>xpath attributes</replaceable></term>
+       <listitem>
+        <para>
+         Specifies indexing for record nodes given by
+         <replaceable>xpath</replaceable>. Unlike directive
+         elm, this directive allows you to index attribute
+         contents. The <replaceable>xpath</replaceable> uses
+         a syntax similar to XPath. The <replaceable>attributes</replaceable>
+         have same syntax and meaning as directive elm, except that operator
+         ! refers to the nodes selected by <replaceable>xpath</replaceable>.
+         <!--
+         xelm   /         !:w                 default index
+         xelm   //        !:w                 additional index
+         xelm   /gils/title/@att    myatt:w   index attribute @att in myatt
+         xelm   title/@att          myatt:w   same meaning.
+         -->
+        </para>
+       </listitem>
+      </varlistentry>
+      
        <varlistentry>
        <varlistentry>
-       <term>encoding <emphasis>encodingname</emphasis></term>
+       <term>encoding <replaceable>encodingname</replaceable></term>
         <listitem>
          <para>
           This directive specifies character encoding for external records.
           For records such as XML that specifies encoding within the
           file via a header this directive is ignored.
           If neither this directive is given, nor an encoding is set
         <listitem>
          <para>
           This directive specifies character encoding for external records.
           For records such as XML that specifies encoding within the
           file via a header this directive is ignored.
           If neither this directive is given, nor an encoding is set
-         within external records, ISO-8859-1 encoding is assmed.
+         within external records, ISO-8859-1 encoding is assumed.
           </para>
         </listitem>
        </varlistentry>
        <varlistentry>
           </para>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>xpath <emphasis>enable/disable</emphasis></term>
+       <term>xpath <literal>enable</literal>/<literal>disable</literal></term>
         <listitem>
          <para>
           If this directive is followed by <literal>enable</literal>,
         <listitem>
          <para>
           If this directive is followed by <literal>enable</literal>,
@@ -1024,6 +1064,103 @@
          </para>
         </listitem>
        </varlistentry>
          </para>
         </listitem>
        </varlistentry>
+
+      <!-- Adam's version 
+      <varlistentry>
+       <term>systag <replaceable>systemtag</replaceable> <replaceable>element</replaceable></term>
+       <listitem>
+        <para>
+         This directive maps system information to an element during
+         retrieval. This information is dynamically created. The
+         following system tags are defined
+         <variablelist>
+          <varlistentry>
+           <term>size</term>
+           <listitem>
+            <para>
+             Size of record in bytes. By default this
+             is mapped to element <literal>size</literal>.
+            </para>
+           </listitem>
+          </varlistentry>
+
+          <varlistentry>
+           <term>rank</term>
+           <listitem>
+            <para>
+             Score/rank of record. By default this
+             is mapped to element <literal>rank</literal>.
+             If no score was calculated for the record (non-ranked
+             searched) search this directive is ignored.
+            </para>
+           </listitem>
+          </varlistentry>
+          
+          <varlistentry>
+           <term>sysno</term>
+           <listitem> 
+            <para>
+             Zebra's system number (record ID) for the
+             record. By default this is mapped to element
+             <literal>localControlNumber</literal>.
+            </para>
+           </listitem>
+          </varlistentry>
+         </variablelist>
+         If you do not want a particular system tag to be applied,
+         then set the resulting element to something undefined in the
+         abs file (such as <literal>none</literal>).
+        </para>
+       </listitem>
+      </varlistentry>
+      -->
+
+      <!-- Mike's version -->
+      <varlistentry>
+       <term>
+        systag
+        <replaceable>systemTag</replaceable>
+        <replaceable>actualTag</replaceable>
+       </term>
+       <listitem>
+       <para>
+        Specifies what information, if any, Zebra should
+        automatically include in retrieval records for the 
+        ``system fields'' that it supports.
+        <replaceable>systemTag</replaceable> may
+        be any of the following:
+        <variablelist>
+         <varlistentry>
+          <term><literal>rank</literal></term>
+          <listitem><para>
+           An integer indicating the relevance-ranking score
+           assigned to the record.
+          </para></listitem>
+         </varlistentry>
+         <varlistentry>
+          <term><literal>sysno</literal></term>
+          <listitem><para>
+           An automatically generated identifier for the record,
+           unique within this database.  It is represented by the
+           <literal>&lt;localControlNumber&gt;</literal> element in
+           XML and the <literal>(1,14)</literal> tag in GRS-1.
+          </para></listitem>
+         </varlistentry>
+         <varlistentry>
+          <term><literal>size</literal></term>
+          <listitem><para>
+           The size, in bytes, of the retrieved record.
+          </para></listitem>
+         </varlistentry>
+        </variablelist>
+       </para>
+       <para>
+        The <replaceable>actualTag</replaceable> parameter may be
+        <literal>none</literal> to indicate that the named element
+        should be omitted from retrieval records.
+       </para>
+       </listitem>
+      </varlistentry>
       </variablelist>
      </para>
      
       </variablelist>
      </para>
      
@@ -1058,23 +1195,23 @@
        esetname G gils-g.est
        esetname F @
  
        esetname G gils-g.est
        esetname F @
  
-      elm (1,10)              rank                        -
-      elm (1,12)              url                         -
-      elm (1,14)              localControlNumber     Local-number
-      elm (1,16)              dateOfLastModification Date/time-last-modified
-      elm (2,1)               title                       w:!,p:!
-      elm (4,1)               controlIdentifier      Identifier-standard
-      elm (2,6)               abstract               Abstract
-      elm (4,51)              purpose                     !
-      elm (4,52)              originator                  - 
-      elm (4,53)              accessConstraints           !
-      elm (4,54)              useConstraints              !
-      elm (4,70)              availability                -
-      elm (4,70)/(4,90)       distributor                 -
-      elm (4,70)/(4,90)/(2,7) distributorName             !
-      elm (4,70)/(4,90)/(2,10 distributorOrganization     !
-      elm (4,70)/(4,90)/(4,2) distributorStreetAddress    !
-      elm (4,70)/(4,90)/(4,3) distributorCity             !
+      elm (1,10)               rank                        -
+      elm (1,12)               url                         -
+      elm (1,14)               localControlNumber     Local-number
+      elm (1,16)               dateOfLastModification Date/time-last-modified
+      elm (2,1)                title                       w:!,p:!
+      elm (4,1)                controlIdentifier      Identifier-standard
+      elm (2,6)                abstract               Abstract
+      elm (4,51)               purpose                     !
+      elm (4,52)               originator                  - 
+      elm (4,53)               accessConstraints           !
+      elm (4,54)               useConstraints              !
+      elm (4,70)               availability                -
+      elm (4,70)/(4,90)        distributor                 -
+      elm (4,70)/(4,90)/(2,7)  distributorName             !
+      elm (4,70)/(4,90)/(2,10) distributorOrganization     !
+      elm (4,70)/(4,90)/(4,2)  distributorStreetAddress    !
+      elm (4,70)/(4,90)/(4,3)  distributorCity             !
       </screen>
  
      </para>
       </screen>
  
      </para>
@@ -1085,7 +1222,7 @@
      <title>The Attribute Set (.att) Files</title>
  
      <para>
      <title>The Attribute Set (.att) Files</title>
  
      <para>
-     This file type describes the <emphasis>Use</emphasis> elements of
+     This file type describes the <replaceable>Use</replaceable> elements of
       an attribute set. 
       It contains the following directives. 
      </para>
       an attribute set. 
       It contains the following directives. 
      </para>
@@ -1093,7 +1230,7 @@
      <para>
       <variablelist>
        <varlistentry>
      <para>
       <variablelist>
        <varlistentry>
-       <term>name <emphasis>symbolic-name</emphasis></term>
+       <term>name <replaceable>symbolic-name</replaceable></term>
         <listitem>
          <para>
           (m) This provides a shorthand name or
         <listitem>
          <para>
           (m) This provides a shorthand name or
@@ -1102,24 +1239,24 @@
          </para>
         </listitem></varlistentry>
        <varlistentry>
          </para>
         </listitem></varlistentry>
        <varlistentry>
-       <term>reference <emphasis>OID-name</emphasis></term>
+       <term>reference <replaceable>OID-name</replaceable></term>
         <listitem>
          <para>
           (m) The reference name of the OID for
           the attribute set.
         <listitem>
          <para>
           (m) The reference name of the OID for
           the attribute set.
-         The reference names can be found in the <emphasis>util</emphasis>
-         module of <emphasis>YAZ</emphasis>.
+         The reference names can be found in the <replaceable>util</replaceable>
+         module of <replaceable>YAZ</replaceable>.
          </para>
         </listitem></varlistentry>
        <varlistentry>
          </para>
         </listitem></varlistentry>
        <varlistentry>
-       <term>include <emphasis>filename</emphasis></term>
+       <term>include <replaceable>filename</replaceable></term>
         <listitem>
          <para>
           (o,r) This directive is used to
           include another attribute set as a part of the current one. This is
           used when a new attribute set is defined as an extension to another
           set. For instance, many new attribute sets are defined as extensions
         <listitem>
          <para>
           (o,r) This directive is used to
           include another attribute set as a part of the current one. This is
           used when a new attribute set is defined as an extension to another
           set. For instance, many new attribute sets are defined as extensions
-         to the <emphasis>bib-1</emphasis> set.
+         to the <replaceable>bib-1</replaceable> set.
           This is an important feature of the retrieval
           system of Z39.50, as it ensures the highest possible level of
           interoperability, as those access points of your database which are
           This is an important feature of the retrieval
           system of Z39.50, as it ensures the highest possible level of
           interoperability, as those access points of your database which are
@@ -1129,15 +1266,15 @@
         </listitem></varlistentry>
        <varlistentry>
         <term>att
         </listitem></varlistentry>
        <varlistentry>
         <term>att
-        <emphasis>att-value att-name [local-value]</emphasis></term>
+        <replaceable>att-value att-name [local-value]</replaceable></term>
         <listitem>
          <para>
           (o,r) This
           repeatable directive introduces a new attribute to the set. The
           attribute value is stored in the index (unless a
         <listitem>
          <para>
           (o,r) This
           repeatable directive introduces a new attribute to the set. The
           attribute value is stored in the index (unless a
-         <emphasis>local-value</emphasis> is
+         <replaceable>local-value</replaceable> is
           given, in which case this is stored). The name is used to refer to the
           given, in which case this is stored). The name is used to refer to the
-         attribute from the <emphasis>abstract syntax</emphasis>. 
+         attribute from the <replaceable>abstract syntax</replaceable>. 
          </para>
         </listitem></varlistentry>
       </variablelist>
          </para>
         </listitem></varlistentry>
       </variablelist>
@@ -1649,174 +1786,216 @@
       special-purpose fields such as WWW-style linkages (URx).
      </para>
  
       special-purpose fields such as WWW-style linkages (URx).
      </para>
  
-    <para>
-     The field types, and hence character sets, are associated with data
-     elements by the .abs files (see above).
-     The file <literal>default.idx</literal>
-     provides the association between field type codes (as used in the .abs
-     files) and the character map files (with the .chr suffix). The format
-     of the .idx file is as follows
-    </para>
-
-    <para>
-     <variablelist>
-
-      <varlistentry>
-       <term>index <emphasis>field type code</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a new search index code.
-         The argument is a one-character code to be used in the
-         .abs files to select this particular index type. An index, roughly,
-         corresponds to a particular structure attribute during search. Refer
-         to <xref linkend="search"/>.
-        </para>
-       </listitem></varlistentry>
-      <varlistentry>
-       <term>sort <emphasis>field code type</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a 
-         sort index. The argument is a one-character code to be used in the
-         .abs fie to select this particular index type. The corresponding
-         use attribute must be used in the sort request to refer to this
-         particular sort index. The corresponding character map (see below)
-         is used in the sort process.
-        </para>
-       </listitem></varlistentry>
-      <varlistentry>
-       <term>completeness <emphasis>boolean</emphasis></term>
-       <listitem>
-        <para>
-         This directive enables or disables complete field indexing.
-         The value of the <emphasis>boolean</emphasis> should be 0
-         (disable) or 1. If completeness is enabled, the index entry will
-         contain the complete contents of the field (up to a limit), with words
-         (non-space characters) separated by single space characters
-         (normalized to " " on display). When completeness is
-         disabled, each word is indexed as a separate entry. Complete subfield
-         indexing is most useful for fields which are typically browsed (eg.
-         titles, authors, or subjects), or instances where a match on a
-         complete subfield is essential (eg. exact title searching). For fields
-         where completeness is disabled, the search engine will interpret a
-         search containing space characters as a word proximity search.
-        </para>
-       </listitem></varlistentry>
-      <varlistentry>
-       <term>charmap <emphasis>filename</emphasis></term>
-       <listitem>
-        <para>
-         This is the filename of the character
-         map to be used for this index for field type.
-        </para>
-       </listitem></varlistentry>
-     </variablelist>
-    </para>
-
-    <para>
-     The contents of the character map files are structured as follows:
-    </para>
-
-    <para>
-     <variablelist>
-
-      <varlistentry>
-       <term>lowercase <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the basic value set of the field type.
-         The format is an ordered list (without spaces) of the
-         characters which may occur in "words" of the given type.
-         The order of the entries in the list determines the
-         sort order of the index. In addition to single characters, the
-         following combinations are legal:
-        </para>
-
-        <para>
-
-         <itemizedlist>
-          <listitem>
-           <para>
-            Backslashes may be used to introduce three-digit octal, or
-            two-digit hex representations of single characters
-            (preceded by <literal>x</literal>).
-            In addition, the combinations
-            \\, \\r, \\n, \\t, \\s (space &mdash; remember that real
-            space-characters may not occur in the value definition), and
-            \\ are recognized, with their usual interpretation.
-           </para>
-          </listitem>
-
-          <listitem>
-           <para>
-            Curly braces {} may be used to enclose ranges of single
-            characters (possibly using the escape convention described in the
-            preceding point), eg. {a-z} to introduce the
-            standard range of ASCII characters.
-            Note that the interpretation of such a range depends on
-            the concrete representation in your local, physical character set.
-           </para>
-          </listitem>
-
-          <listitem>
-           <para>
-            paranthesises () may be used to enclose multi-byte characters -
-            eg. diacritics or special national combinations (eg. Spanish
-            "ll"). When found in the input stream (or a search term),
-            these characters are viewed and sorted as a single character, with a
-            sorting value depending on the position of the group in the value
-            statement.
-           </para>
-          </listitem>
+    <sect3 id="default-idx-file">
+     <title>The default.idx file</title>
+     <para>
+      The field types, and hence character sets, are associated with data
+      elements by the .abs files (see above).
+      The file <literal>default.idx</literal>
+      provides the association between field type codes (as used in the .abs
+      files) and the character map files (with the .chr suffix). The format
+      of the .idx file is as follows
+     </para>
  
  
-         </itemizedlist>
+     <para>
+      <variablelist>
+
+       <varlistentry>
+       <term>index <emphasis>field type code</emphasis></term>
+       <listitem>
+        <para>
+         This directive introduces a new search index code.
+         The argument is a one-character code to be used in the
+         .abs files to select this particular index type. An index, roughly,
+         corresponds to a particular structure attribute during search. Refer
+         to <xref linkend="search"/>.
+        </para>
+       </listitem></varlistentry>
+       <varlistentry>
+       <term>sort <emphasis>field code type</emphasis></term>
+       <listitem>
+        <para>
+         This directive introduces a 
+         sort index. The argument is a one-character code to be used in the
+         .abs fie to select this particular index type. The corresponding
+         use attribute must be used in the sort request to refer to this
+         particular sort index. The corresponding character map (see below)
+         is used in the sort process.
+        </para>
+       </listitem></varlistentry>
+       <varlistentry>
+       <term>completeness <emphasis>boolean</emphasis></term>
+       <listitem>
+        <para>
+         This directive enables or disables complete field indexing.
+         The value of the <emphasis>boolean</emphasis> should be 0
+         (disable) or 1. If completeness is enabled, the index entry will
+         contain the complete contents of the field (up to a limit), with words
+         (non-space characters) separated by single space characters
+         (normalized to " " on display). When completeness is
+         disabled, each word is indexed as a separate entry. Complete subfield
+         indexing is most useful for fields which are typically browsed (eg.
+         titles, authors, or subjects), or instances where a match on a
+         complete subfield is essential (eg. exact title searching). For fields
+         where completeness is disabled, the search engine will interpret a
+         search containing space characters as a word proximity search.
+        </para>
+       </listitem></varlistentry>
+       <varlistentry>
+       <term>charmap <emphasis>filename</emphasis></term>
+       <listitem>
+        <para>
+         This is the filename of the character
+         map to be used for this index for field type.
+        </para>
+       </listitem></varlistentry>
+      </variablelist>
+     </para>
+    </sect3>
  
  
-        </para>
-       </listitem></varlistentry>
-      <varlistentry>
-       <term>uppercase <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the
-         upper-case equivalencis to the value set (if any). The number and
-         order of the entries in the list should be the same as in the
-         <literal>lowercase</literal> directive.
-        </para>
-       </listitem></varlistentry>
-      <varlistentry>
-       <term>space <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the character
-         which separate words in the input stream. Depending on the
-         completeness mode of the field in question, these characters either
-         terminate an index entry, or delimit individual "words" in
-         the input stream. The order of the elements is not significant &mdash;
-         otherwise the representation is the same as for the
-         <literal>uppercase</literal> and <literal>lowercase</literal>
-         directives.
-        </para>
-       </listitem></varlistentry>
-      <varlistentry>
-       <term>map <emphasis>value-set</emphasis>
-        <emphasis>target</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a
-         mapping between each of the members of the value-set on the left to
-         the character on the right. The character on the right must occur in
-         the value set (the <literal>lowercase</literal> directive) of
-         the character set, but
-         it may be a paranthesis-enclosed multi-octet character. This directive
-         may be used to map diacritics to their base characters, or to map
-         HTML-style character-representations to their natural form, etc.
-        </para>
-       </listitem></varlistentry>
-     </variablelist>
-    </para>
+    <sect3 id="character-map-files">
+     <title>The character map file format</title>
+     <para>
+      The contents of the character map files are structured as follows:
+     </para>
  
  
+     <para>
+      <variablelist>
+
+       <varlistentry>
+       <term>lowercase <emphasis>value-set</emphasis></term>
+       <listitem>
+        <para>
+         This directive introduces the basic value set of the field type.
+         The format is an ordered list (without spaces) of the
+         characters which may occur in "words" of the given type.
+         The order of the entries in the list determines the
+         sort order of the index. In addition to single characters, the
+         following combinations are legal:
+        </para>
+
+        <para>
+
+         <itemizedlist>
+          <listitem>
+           <para>
+            Backslashes may be used to introduce three-digit octal, or
+            two-digit hex representations of single characters
+            (preceded by <literal>x</literal>).
+            In addition, the combinations
+            \\, \\r, \\n, \\t, \\s (space &mdash; remember that real
+            space-characters may not occur in the value definition), and
+            \\ are recognized, with their usual interpretation.
+           </para>
+          </listitem>
+
+          <listitem>
+           <para>
+            Curly braces {} may be used to enclose ranges of single
+            characters (possibly using the escape convention described in the
+            preceding point), eg. {a-z} to introduce the
+            standard range of ASCII characters.
+            Note that the interpretation of such a range depends on
+            the concrete representation in your local, physical character set.
+           </para>
+          </listitem>
+
+          <listitem>
+           <para>
+            paranthesises () may be used to enclose multi-byte characters -
+            eg. diacritics or special national combinations (eg. Spanish
+            "ll"). When found in the input stream (or a search term),
+            these characters are viewed and sorted as a single character, with a
+            sorting value depending on the position of the group in the value
+            statement.
+           </para>
+          </listitem>
+
+         </itemizedlist>
+
+        </para>
+       </listitem></varlistentry>
+       <varlistentry>
+       <term>uppercase <emphasis>value-set</emphasis></term>
+       <listitem>
+        <para>
+         This directive introduces the
+         upper-case equivalencis to the value set (if any). The number and
+         order of the entries in the list should be the same as in the
+         <literal>lowercase</literal> directive.
+        </para>
+       </listitem></varlistentry>
+       <varlistentry>
+       <term>space <emphasis>value-set</emphasis></term>
+       <listitem>
+        <para>
+         This directive introduces the character
+         which separate words in the input stream. Depending on the
+         completeness mode of the field in question, these characters either
+         terminate an index entry, or delimit individual "words" in
+         the input stream. The order of the elements is not significant &mdash;
+         otherwise the representation is the same as for the
+         <literal>uppercase</literal> and <literal>lowercase</literal>
+         directives.
+        </para>
+       </listitem></varlistentry>
+       <varlistentry>
+       <term>map <emphasis>value-set</emphasis>
+        <emphasis>target</emphasis></term>
+       <listitem>
+        <para>
+         This directive introduces a
+         mapping between each of the members of the value-set on the left to
+         the character on the right. The character on the right must occur in
+         the value set (the <literal>lowercase</literal> directive) of
+         the character set, but
+         it may be a paranthesis-enclosed multi-octet character. This directive
+         may be used to map diacritics to their base characters, or to map
+         HTML-style character-representations to their natural form, etc. The map directive
+         can also be used to ignore leading articles in searching and/or sorting, and to perform
+         other special transformations. See section <xref linkend="leading-articles"/>.
+        </para>
+       </listitem></varlistentry>
+      </variablelist>
+     </para>
+    </sect3>
+    <sect3 id="leading-articles">
+     <title>Ignoring leading articles</title>
+     <para>
+      In addition to specifying sort orders, space (blank) handling, and upper/lowercase folding,
+      you can also use the character map files to make Zebra ignore leading articles in sorting
+      records, or when doing complete field searching.
+     </para>
+     <para>
+      This is done using the <literal>map</literal> directive in the character map file. In a
+      nutshell, what you do is map certain sequences of characters, when they occur <emphasis>
+      in the beginning of a field</emphasis>, to a space. Assuming that the character "@" is
+      defined as a space character in your file, you can do:
+      <screen>
+       map (^The\s) @
+       map (^the\s) @
+      </screen>
+      The effect of these directives is to map either 'the' or 'The', followed by a space
+      character, to a space. The hat ^ character denotes beginning-of-field only when
+      complete-subfield indexing or sort indexing is taking place; otherwise, it is treated just
+      as any other character.
+     </para>
+     <para>
+      Because the <literal>default.idx</literal> file can be used to associate different
+      character maps with different indexing types -- and you can create additional indexing
+      types, should the need arise -- it is possible to specify that leading articles should be
+      ignored either in sorting, in complete-field searching, or both.
+     </para>
+     <para>
+      If you ignore certain prefixes in sorting, then these will be eliminated from the index,
+      and sorting will take place as if they weren't there. However, if you set the system up
+      to ignore certain prefixes in <emphasis>searching</emphasis>, then these are deleted both
+      from the indexes and from query terms, when the client specifies complete-field
+      searching. This has the effect that a search for 'the science journal' and 'science
+      journal' would both produce the same results.
+     </para>
+    </sect3>
     </sect2>
     </sect2>
-
    </sect1>
  
    <sect1 id="formats">
    </sect1>
  
    <sect1 id="formats">