Honor position attribute, i.e. allow first-in-field search. To

[idzebra-moved-to-github.git] / doc / recordmodel-grs.xml
diff --git a/doc/recordmodel-grs.xml b/doc/recordmodel-grs.xml

index a2c798e..bd11180 100644 (file)
--- a/doc/recordmodel-grs.xml
+++ b/doc/recordmodel-grs.xml
@@ -1,7 +1,6 @@
- <chapter id="record-model-grs">
-  <!-- $Id: recordmodel-grs.xml,v 1.3 2006-04-25 12:26:26 marc Exp $ -->
+ <chapter id="grs">
+  <!-- $Id: recordmodel-grs.xml,v 1.4 2006-09-03 21:37:27 adam Exp $ -->
    <title>GRS Record Model and Filter Modules</title>
    <title>GRS Record Model and Filter Modules</title>
-  
  
    <para>
     The record model described in this chapter applies to the fundamental,
  
    <para>
     The record model described in this chapter applies to the fundamental,
@@ -11,7 +10,7 @@
    </para>
  
  
    </para>
  
  
-  <sect1 id="grs-record-filters">
+  <section id="grs-filters">
     <title>GRS Record Filters</title>
     <para>
      Many basic subtypes of the <emphasis>grs</emphasis> type are
     <title>GRS Record Filters</title>
     <para>
      Many basic subtypes of the <emphasis>grs</emphasis> type are
@@ -21,120 +20,116 @@
     <para>
      <variablelist>
       <varlistentry>
     <para>
      <variablelist>
       <varlistentry>
-      <term>grs.sgml</term>
+      <term><literal>grs.sgml</literal></term>
        <listitem>
         <para>
          This is the canonical input format
          described <xref linkend="grs-canonical-format"/>. It is using
          simple SGML-like syntax. 
         </para>
        <listitem>
         <para>
          This is the canonical input format
          described <xref linkend="grs-canonical-format"/>. It is using
          simple SGML-like syntax. 
         </para>
-       <!--
-       <para>
-         <literal>libidzebra1.4-mod-grs-sgml not packaged yet ??</literal>
-       </para>
-       -->
        </listitem>
       </varlistentry>
       <varlistentry>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.marc<!--.<emphasis>abstract syntax</emphasis>--></term>
+      <term><literal>grs.marc.</literal><replaceable>type</replaceable></term>
        <listitem>
         <para>
          This allows Zebra to read
          records in the ISO2709 (MARC) encoding standard. 
        <listitem>
         <para>
          This allows Zebra to read
          records in the ISO2709 (MARC) encoding standard. 
-        <!-- In this case, the
-        last parameter <emphasis>abstract syntax</emphasis> names the
+        Last parameter <replaceable>type</replaceable> names the
          <literal>.abs</literal> file (see below)
          which describes the specific MARC structure of the input record as
          <literal>.abs</literal> file (see below)
          which describes the specific MARC structure of the input record as
-        well as the indexing rules. -->
+        well as the indexing rules.
+       </para>
+       <para>The <literal>grs.marc</literal> uses an internal represtantion
+       which is not XML conformant. In particular MARC tags are
+       presented as elements with the same name. And XML elements
+       may not start with digits. Therefore this filter is only
+       suitable for systems returning GRS-1 and MARC records. For XML
+       use <literal>grs.marcxml</literal> filter instead (see below).
         </para>
         <para>
           The loadable <literal>grs.marc</literal> filter module
           is packaged in the GNU/Debian package
         </para>
         <para>
           The loadable <literal>grs.marc</literal> filter module
           is packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-marc</literal>
-        </para>
+        <literal>libidzebra2.0-mod-grs-marc</literal>
+       </para>
        </listitem>
       </varlistentry>
       <varlistentry>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.marcxml<!--.<emphasis>abstract syntax</emphasis>--></term>
+      <term><literal>grs.marcxml.</literal><replaceable>type</replaceable></term>
        <listitem>
         <para>
        <listitem>
         <para>
-        This allows Zebra to read
-        records in the ISO2709??? (MARCXML) encoding standard.
+        This allows Zebra to read ISO2709 encoded records.
+        Last parameter <replaceable>type</replaceable> names the
+        <literal>.abs</literal> file (see below)
+        which describes the specific MARC structure of the input record as
+        well as the indexing rules.
         </para>
         <para>
         </para>
         <para>
-         The loadable <literal>grs.marcxml</literal> filter module
-         is also contained in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-marc</literal>
-        </para>
-      </listitem>
-     </varlistentry>
-     <varlistentry>
-      <term>grs.danbib</term>
-      <listitem>
-       <para>
-        The <literal>grs.danbib</literal> filter parses DanBib
-        records, a danish MARC record variant called DANMARC.
-        DanBib is the Danish Union Catalogue hosted by the
-        Danish Bibliographic Centre (DBC).
+       The internal representation for <literal>grs.marcxml</literal>
+       is the same as for <ulink url="&url.marcxml;">MARCXML</ulink>.
+       It slightly more complicated to work with than 
+       <literal>grs.marc</literal> but XML conformant.
         </para>
         </para>
-       <para>The loadable  <literal>grs.danbib</literal> filter module
-         is packages in the GNU/Debian package 
-         <literal>libidzebra1.4-mod-grs-danbib</literal>.
+       <para>
+       The loadable <literal>grs.marcxml</literal> filter module
+       is also contained in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-marc</literal>
         </para>
        </listitem>
       </varlistentry>
       <varlistentry>
         </para>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.xml</term>
+      <term><literal>grs.xml</literal></term>
        <listitem>
         <para>
        <listitem>
         <para>
-        This filter reads XML records and uses <ulink url="http://expat.sourceforge.net/">Expat</ulink> to
+        This filter reads XML records and uses
+       <ulink url="http://expat.sourceforge.net/">Expat</ulink> to
          parse them and convert them into IDZebra's internal 
          <literal>grs</literal> record model.
          parse them and convert them into IDZebra's internal 
          <literal>grs</literal> record model.
-        Only one record per file
-        is supported. The filter is only available if Zebra/YAZ
-        is compiled with EXPAT support.
+        Only one record per file is supported, due to the fact XML does
+       not allow two documents to "follow" each other (there is no way
+       to know when a document is finished).
+       This filter is only available if Zebra is compiled with EXPAT support.
         </para>
         <para>
         </para>
         <para>
-         The loadable <literal>grs.xml</literal> filter module
-         is packagged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-xml</literal>
+       The loadable <literal>grs.xml</literal> filter module
+       is packagged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-xml</literal>
          </para>
        </listitem>
       </varlistentry>
       <varlistentry>
          </para>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.regx<!--.<emphasis>filter</emphasis>--></term>
+      <term><literal>grs.regx.</literal><replaceable>filter</replaceable></term>
        <listitem>
         <para>
          This enables a user-supplied Regular Expressions input
        <listitem>
         <para>
          This enables a user-supplied Regular Expressions input
-        filter described in
-        <xref linkend="grs-regx-tcl"/>.
+        filter described in <xref linkend="grs-regx-tcl"/>.
         </para>
         <para>
         </para>
         <para>
-         The loadable  <literal>grs.regx</literal> filter module
-         is packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-regx</literal>
-        </para>
+       The loadable <literal>grs.regx</literal> filter module
+       is packaged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-regx</literal>
+       </para>
        </listitem>
       </varlistentry>
       <varlistentry>
        </listitem>
       </varlistentry>
       <varlistentry>
-      <term>grs.tcl<!--.<emphasis>filter</emphasis>--></term>
+      <term><literal>grs.tcl.</literal><replaceable>filter</replaceable></term>
        <listitem>
         <para>
          Similar to grs.regx but using Tcl for rules, described in 
          <xref linkend="grs-regx-tcl"/>.
         </para>
         <para>
        <listitem>
         <para>
          Similar to grs.regx but using Tcl for rules, described in 
          <xref linkend="grs-regx-tcl"/>.
         </para>
         <para>
-         The loadable <literal>grs.tcl</literal> filter module
-         is also packaged in the GNU/Debian package
-        <literal>libidzebra1.4-mod-grs-regx</literal>
-        </para>
+       The loadable <literal>grs.tcl</literal> filter module
+       is also packaged in the GNU/Debian package
+        <literal>libidzebra2.0-mod-grs-regx</literal>
+       </para>
        </listitem>
       </varlistentry>
  
      </variablelist>
     </para>
  
        </listitem>
       </varlistentry>
  
      </variablelist>
     </para>
  
-   <sect2 id="grs-canonical-format">
+   <section id="grs-canonical-format">
      <title>GRS Canonical Input Format</title>
  
      <para>
      <title>GRS Canonical Input Format</title>
  
      <para>
@@ -207,7 +202,7 @@
       structured data element such a <emphasis>Supplier</emphasis> element.
      </para>
  
       structured data element such a <emphasis>Supplier</emphasis> element.
      </para>
  
-    <sect3>
+    <section id="grs-record-root">
       <title>Record Root</title>
  
       <para>
       <title>Record Root</title>
  
       <para>
@@ -234,9 +229,9 @@
  
       </para>
  
  
       </para>
  
-    </sect3>
+    </section>
  
  
-    <sect3><!-- ### we shouldn't make such a big deal about this -->
+    <section id="grs-variants">
       <title>Variants</title>
  
       <para>
       <title>Variants</title>
  
       <para>
@@ -272,7 +267,7 @@
        The available values for the <emphasis>class</emphasis> and
        <emphasis>type</emphasis> fields are given by the variant set
        that is associated with the current schema
        The available values for the <emphasis>class</emphasis> and
        <emphasis>type</emphasis> fields are given by the variant set
        that is associated with the current schema
-      (see <xref linkend="variant-set"/>).
+      (see <xref linkend="grs-variants"/>).
       </para>
  
       <para>
       </para>
  
       <para>
@@ -331,11 +326,11 @@
        of the end-user.
       </para>
  
        of the end-user.
       </para>
  
-    </sect3>
+    </section>
  
  
-   </sect2>
+   </section>
  
  
-   <sect2 id="grs-regx-tcl">
+   <section id="grs-regx-tcl">
      <title>GRS REGX And TCL Input Filters</title>
  
      <para>
      <title>GRS REGX And TCL Input Filters</title>
  
      <para>
@@ -578,11 +573,11 @@
       mechanisms for modifying the elements of a record.
      </para>
  
       mechanisms for modifying the elements of a record.
      </para>
  
-   </sect2>
+   </section>
  
  
-  </sect1>
+  </section>
  
  
-  <sect1 id="grs-internal-representation">
+  <section id="grs-internal-representation">
     <title>GRS Internal Record Representation</title>
  
     <para>
     <title>GRS Internal Record Representation</title>
  
     <para>
@@ -633,7 +628,7 @@
      different tag path.
     </para>
  
      different tag path.
     </para>
  
-   <sect2>
+   <section id="grs-tagged-elements">
      <title>Tagged Elements</title>
  
      <para>
      <title>Tagged Elements</title>
  
      <para>
@@ -650,9 +645,9 @@
       reached from the root of the record).
      </para>
  
       reached from the root of the record).
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-variant-details">
      <title>Variants</title>
  
      <para>
      <title>Variants</title>
  
      <para>
@@ -686,9 +681,9 @@
       type, value, corresponding to the variant mechanism of Z39.50.
      </para>
      
       type, value, corresponding to the variant mechanism of Z39.50.
      </para>
      
-   </sect2>
+   </section>
     
     
-   <sect2>
+   <section id="grs-data-elements">
      <title>Data Elements</title>
      
      <para>
      <title>Data Elements</title>
      
      <para>
@@ -702,11 +697,11 @@
     </para>
      -->
      
     </para>
      -->
      
-   </sect2>
+   </section>
     
     
-  </sect1>
+  </section>
    
    
-  <sect1 id="record-model-grs-conf">
+  <section id="grs-conf">
     <title>GRS Record Model Configuration</title>
     
     <para>
     <title>GRS Record Model Configuration</title>
     
     <para>
@@ -717,7 +712,7 @@
      setting in the <literal>zebra.cfg</literal> file.
     </para>
  
      setting in the <literal>zebra.cfg</literal> file.
     </para>
  
-   <sect2>
+   <section id="grs-abstract-syntax">
      <title>The Abstract Syntax</title>
  
      <para>
      <title>The Abstract Syntax</title>
  
      <para>
@@ -810,9 +805,9 @@
       describe the given objects.
      </para>
  
       describe the given objects.
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-configuration-files">
      <title>The Configuration Files</title>
  
      <para>
      <title>The Configuration Files</title>
  
      <para>
@@ -841,9 +836,9 @@
       mandatory (m).
      </para>
      
       mandatory (m).
      </para>
      
-   </sect2>
+   </section>
     
     
-   <sect2 id="abs-file">
+   <section id="abs-file">
      <title>The Abstract Syntax (.abs) Files</title>
      
      <para>
      <title>The Abstract Syntax (.abs) Files</title>
      
      <para>
@@ -954,7 +949,7 @@
         </listitem>
        </varlistentry>
        <varlistentry>
         </listitem>
        </varlistentry>
        <varlistentry>
-       <term>any <replaceable>tags</replaceable></term>
+       <term>all <replaceable>tags</replaceable></term>
         <listitem>
          <para>
           (o) This directive specifies a list of attributes
         <listitem>
          <para>
           (o) This directive specifies a list of attributes
@@ -981,16 +976,16 @@
           the <replaceable>attributes</replaceable>
           specifies which attributes to use when indexing the element in a
           comma-separated list.
           the <replaceable>attributes</replaceable>
           specifies which attributes to use when indexing the element in a
           comma-separated list.
-         A ! in place of the attribute name is equivalent to
-         specifying an attribute name identical to the element name.
-         A - in place of the attribute name
+         A <literal>!</literal> in place of the attribute name is equivalent
+        to specifying an attribute name identical to the element name.
+         A <literal>-</literal> in place of the attribute name
           specifies that no indexing is to take place for the given element.
           The attributes can be qualified with <replaceable>field
            types</replaceable> to specify which
           character set should govern the indexing procedure for that field.
           The same data element may be indexed into several different
           fields, using different character set definitions.
           specifies that no indexing is to take place for the given element.
           The attributes can be qualified with <replaceable>field
            types</replaceable> to specify which
           character set should govern the indexing procedure for that field.
           The same data element may be indexed into several different
           fields, using different character set definitions.
-         See the <xref linkend="field-structure-and-character-sets"/>.
+         See the <xref linkend="fields-and-charsets"/>.
           The default field type is <literal>w</literal> for
           <emphasis>word</emphasis>.
          </para>
           The default field type is <literal>w</literal> for
           <emphasis>word</emphasis>.
          </para>
@@ -1208,9 +1203,9 @@
  
      </para>
  
  
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2 id="attset-files">
+   <section id="attset-files">
      <title>The Attribute Set (.att) Files</title>
  
      <para>
      <title>The Attribute Set (.att) Files</title>
  
      <para>
@@ -1294,9 +1289,9 @@
  
      </para>
  
  
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-tag-files">
      <title>The Tag Set (.tag) Files</title>
  
      <para>
      <title>The Tag Set (.tag) Files</title>
  
      <para>
@@ -1452,9 +1447,9 @@
       </screen>
      </para>
  
       </screen>
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2 id="variant-set">
+   <section id="grs-var-files">
      <title>The Variant Set (.var) Files</title>
  
      <para>
      <title>The Variant Set (.var) Files</title>
  
      <para>
@@ -1533,9 +1528,9 @@
  
      </para>
  
  
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-est-files">
      <title>The Element Set (.est) Files</title>
  
      <para>
      <title>The Element Set (.est) Files</title>
  
      <para>
@@ -1673,9 +1668,9 @@
  
      </para>
  
  
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2 id="schema-mapping">
+   <section id="schema-mapping">
      <title>The Schema Mapping (.map) Files</title>
  
      <para>
      <title>The Schema Mapping (.map) Files</title>
  
      <para>
@@ -1737,9 +1732,9 @@
       </variablelist>
      </para>
  
       </variablelist>
      </para>
  
-   </sect2>
+   </section>
  
  
-   <sect2>
+   <section id="grs-mar-files">
      <title>The MARC (ISO2709) Representation (.mar) Files</title>
  
      <para>
      <title>The MARC (ISO2709) Representation (.mar) Files</title>
  
      <para>
@@ -1754,253 +1749,10 @@
        handled by the system.</emphasis>
      -->
  
        handled by the system.</emphasis>
      -->
  
-   </sect2>
-
-   <sect2 id="field-structure-and-character-sets">
-    <title>Field Structure and Character Sets
-    </title>
-
-    <para>
-     In order to provide a flexible approach to national character set
-     handling, Zebra allows the administrator to configure the set up the
-     system to handle any 8-bit character set &mdash; including sets that
-     require multi-octet diacritics or other multi-octet characters. The
-     definition of a character set includes a specification of the
-     permissible values, their sort order (this affects the display in the
-     SCAN function), and relationships between upper- and lowercase
-     characters. Finally, the definition includes the specification of
-     space characters for the set.
-    </para>
-
-    <para>
-     The operator can define different character sets for different fields,
-     typical examples being standard text fields, numerical fields, and
-     special-purpose fields such as WWW-style linkages (URx).
-    </para>
-
-    <sect3 id="default-idx-file">
-     <title>The default.idx file</title>
-     <para>
-      The field types, and hence character sets, are associated with data
-      elements by the .abs files (see above).
-      The file <literal>default.idx</literal>
-      provides the association between field type codes (as used in the .abs
-      files) and the character map files (with the .chr suffix). The format
-      of the .idx file is as follows
-     </para>
-
-     <para>
-      <variablelist>
-
-       <varlistentry>
-       <term>index <emphasis>field type code</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a new search index code.
-         The argument is a one-character code to be used in the
-         .abs files to select this particular index type. An index, roughly,
-         corresponds to a particular structure attribute during search. Refer
-         to <xref linkend="search"/>.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>sort <emphasis>field code type</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a 
-         sort index. The argument is a one-character code to be used in the
-         .abs fie to select this particular index type. The corresponding
-         use attribute must be used in the sort request to refer to this
-         particular sort index. The corresponding character map (see below)
-         is used in the sort process.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>completeness <emphasis>boolean</emphasis></term>
-       <listitem>
-        <para>
-         This directive enables or disables complete field indexing.
-         The value of the <emphasis>boolean</emphasis> should be 0
-         (disable) or 1. If completeness is enabled, the index entry will
-         contain the complete contents of the field (up to a limit), with words
-         (non-space characters) separated by single space characters
-         (normalized to " " on display). When completeness is
-         disabled, each word is indexed as a separate entry. Complete subfield
-         indexing is most useful for fields which are typically browsed (eg.
-         titles, authors, or subjects), or instances where a match on a
-         complete subfield is essential (eg. exact title searching). For fields
-         where completeness is disabled, the search engine will interpret a
-         search containing space characters as a word proximity search.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>charmap <emphasis>filename</emphasis></term>
-       <listitem>
-        <para>
-         This is the filename of the character
-         map to be used for this index for field type.
-        </para>
-       </listitem></varlistentry>
-      </variablelist>
-     </para>
-    </sect3>
-
-    <sect3 id="character-map-files">
-     <title>The character map file format</title>
-     <para>
-      The contents of the character map files are structured as follows:
-     </para>
+   </section>
+  </section>
  
  
-     <para>
-      <variablelist>
-
-       <varlistentry>
-       <term>lowercase <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the basic value set of the field type.
-         The format is an ordered list (without spaces) of the
-         characters which may occur in "words" of the given type.
-         The order of the entries in the list determines the
-         sort order of the index. In addition to single characters, the
-         following combinations are legal:
-        </para>
-
-        <para>
-
-         <itemizedlist>
-          <listitem>
-           <para>
-            Backslashes may be used to introduce three-digit octal, or
-            two-digit hex representations of single characters
-            (preceded by <literal>x</literal>).
-            In addition, the combinations
-            \\, \\r, \\n, \\t, \\s (space &mdash; remember that real
-            space-characters may not occur in the value definition), and
-            \\ are recognized, with their usual interpretation.
-           </para>
-          </listitem>
-
-          <listitem>
-           <para>
-            Curly braces {} may be used to enclose ranges of single
-            characters (possibly using the escape convention described in the
-            preceding point), eg. {a-z} to introduce the
-            standard range of ASCII characters.
-            Note that the interpretation of such a range depends on
-            the concrete representation in your local, physical character set.
-           </para>
-          </listitem>
-
-          <listitem>
-           <para>
-            paranthesises () may be used to enclose multi-byte characters -
-            eg. diacritics or special national combinations (eg. Spanish
-            "ll"). When found in the input stream (or a search term),
-            these characters are viewed and sorted as a single character, with a
-            sorting value depending on the position of the group in the value
-            statement.
-           </para>
-          </listitem>
-
-         </itemizedlist>
-
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>uppercase <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the
-         upper-case equivalencis to the value set (if any). The number and
-         order of the entries in the list should be the same as in the
-         <literal>lowercase</literal> directive.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>space <emphasis>value-set</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces the character
-         which separate words in the input stream. Depending on the
-         completeness mode of the field in question, these characters either
-         terminate an index entry, or delimit individual "words" in
-         the input stream. The order of the elements is not significant &mdash;
-         otherwise the representation is the same as for the
-         <literal>uppercase</literal> and <literal>lowercase</literal>
-         directives.
-        </para>
-       </listitem></varlistentry>
-       <varlistentry>
-       <term>map <emphasis>value-set</emphasis>
-        <emphasis>target</emphasis></term>
-       <listitem>
-        <para>
-         This directive introduces a mapping between each of the
-         members of the value-set on the left to the character on the
-         right. The character on the right must occur in the value
-         set (the <literal>lowercase</literal> directive) of the
-         character set, but it may be a paranthesis-enclosed
-         multi-octet character. This directive may be used to map
-         diacritics to their base characters, or to map HTML-style
-         character-representations to their natural form, etc. The
-         map directive can also be used to ignore leading articles in
-         searching and/or sorting, and to perform other special
-         transformations. See section <xref
-         linkend="leading-articles"/>.
-        </para>
-       </listitem></varlistentry>
-      </variablelist>
-     </para>
-    </sect3>
-    <sect3 id="leading-articles">
-     <title>Ignoring leading articles</title>
-     <para>
-      In addition to specifying sort orders, space (blank) handling,
-      and upper/lowercase folding, you can also use the character map
-      files to make Zebra ignore leading articles in sorting records,
-      or when doing complete field searching.
-     </para>
-     <para>
-      This is done using the <literal>map</literal> directive in the
-      character map file. In a nutshell, what you do is map certain
-      sequences of characters, when they occur <emphasis> in the
-      beginning of a field</emphasis>, to a space. Assuming that the
-      character "@" is defined as a space character in your file, you
-      can do:
-      <screen>
-       map (^The\s) @
-       map (^the\s) @
-      </screen>
-      The effect of these directives is to map either 'the' or 'The',
-      followed by a space character, to a space. The hat ^ character
-      denotes beginning-of-field only when complete-subfield indexing
-      or sort indexing is taking place; otherwise, it is treated just
-      as any other character.
-     </para>
-     <para>
-      Because the <literal>default.idx</literal> file can be used to
-      associate different character maps with different indexing types
-      -- and you can create additional indexing types, should the need
-      arise -- it is possible to specify that leading articles should
-      be ignored either in sorting, in complete-field searching, or
-      both.
-     </para>
-     <para>
-      If you ignore certain prefixes in sorting, then these will be
-      eliminated from the index, and sorting will take place as if
-      they weren't there. However, if you set the system up to ignore
-      certain prefixes in <emphasis>searching</emphasis>, then these
-      are deleted both from the indexes and from query terms, when the
-      client specifies complete-field searching. This has the effect
-      that a search for 'the science journal' and 'science journal'
-      would both produce the same results.
-     </para>
-    </sect3>
-   </sect2>
-  </sect1>
-
-  <sect1 id="grs-exchange-formats">
+  <section id="grs-exchange-formats">
     <title>GRS Exchange Formats</title>
  
     <para>
     <title>GRS Exchange Formats</title>
  
     <para>
@@ -2085,8 +1837,326 @@
     
      </itemizedlist>
     </para>
     
      </itemizedlist>
     </para>
-  </sect1>
+  </section>
+  
+  <section id="grs-extended-marc-indexing">
+   <title>Extended indexing of MARC records</title>
+   
+   <para>Extended indexing of MARC records will help you if you need index a
+    combination of subfields, or index only a part of the whole field,
+    or use during indexing process embedded fields of MARC record.
+   </para>
+   
+   <para>Extended indexing of MARC records additionally allows:
+    <itemizedlist>
+     
+     <listitem>
+      <para>to index data in LEADER of MARC record</para>
+     </listitem>
+     
+     <listitem>
+      <para>to index data in control fields (with fixed length)</para>
+     </listitem>
+     
+     <listitem>
+      <para>to use during indexing the values of indicators</para>
+     </listitem>
+     
+     <listitem>
+      <para>to index linked fields for UNIMARC based formats</para>
+     </listitem>
+     
+    </itemizedlist>
+   </para>
+   
+   <note><para>In compare with simple indexing process the extended indexing
+     may increase (about 2-3 times) the time of indexing process for MARC
+     records.</para></note>
+   
+   <section id="formula">
+    <title>The index-formula</title>
+    
+    <para>At the beginning, we have to define the term
+     <emphasis>index-formula</emphasis> for MARC records. This term helps
+     to understand the notation of extended indexing of MARC records by Zebra.
+     Our definition is based on the document
+     <ulink url="http://www.rba.ru/rusmarc/soft/Z39-50.htm">"The table
+      of conformity for Z39.50 use attributes and RUSMARC fields"</ulink>.
+     The document is available only in russian language.</para>
+    
+    <para>
+     The <emphasis>index-formula</emphasis> is the combination of
+     subfields presented in such way:
+    </para>
+    
+    <screen>
+     71-00$a, $g, $h ($c){.$b ($c)} , (1)
+    </screen>
+    
+    <para>
+     We know that Zebra supports a Bib-1 attribute - right truncation.
+     In this case, the <emphasis>index-formula</emphasis> (1) consists from 
+     forms, defined in the same way as (1)</para>
+    
+    <screen>
+     71-00$a, $g, $h
+     71-00$a, $g
+     71-00$a
+    </screen>
+    
+    <note>
+     <para>The original MARC record may be without some elements, which included in <emphasis>index-formula</emphasis>.
+     </para>
+    </note>
+    
+    <para>This notation includes such operands as:
+     <variablelist>
+      
+      <varlistentry>
+       <term>#</term>
+       <listitem><para>It means whitespace character.</para></listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>-</term>
+       <listitem><para>The position may contain any value, defined by
+        MARC format.
+        For example, <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        70-#1$a, $g , (2)
+       </screen>
+       
+       <para>includes</para> 
+       
+       <screen>
+        700#1$a, $g
+        701#1$a, $g
+        702#1$a, $g
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>{...}</term>
+       <listitem>
+       <para>The repeatable elements are defined in figure-brackets {}.
+        For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        71-00$a, $g, $h ($c){.$b ($c)} , (3)
+       </screen>
+       
+       <para>includes</para>
+       
+       <screen>
+        71-00$a, $g, $h ($c). $b ($c)
+        71-00$a, $g, $h ($c). $b ($c). $b ($c)
+        71-00$a, $g, $h ($c). $b ($c). $b ($c). $b ($c)
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+     </variablelist>
+     
+     <note>
+      <para>
+       All another operands are the same as accepted in MARC world.
+      </para>
+     </note>
+    </para>
+   </section>
+   
+   <section id="notation">
+    <title>Notation of <emphasis>index-formula</emphasis> for Zebra</title>
+    
+    
+    <para>Extended indexing overloads <literal>path</literal> of
+     <literal>elm</literal> definition in abstract syntax file of Zebra
+     (<literal>.abs</literal> file). It means that names beginning with
+     <literal>"mc-"</literal> are interpreted by Zebra as
+     <emphasis>index-formula</emphasis>. The database index is created and
+     linked with <emphasis>access point</emphasis> (Bib-1 use attribute)
+     according to this formula.</para>
+    
+    <para>For example, <emphasis>index-formula</emphasis></para>
+    
+    <screen>
+     71-00$a, $g, $h ($c){.$b ($c)} , (4)
+    </screen>
+    
+    <para>in <literal>.abs</literal> file looks like:</para>
+    
+    <screen>
+     mc-71.00_$a,_$g,_$h_(_$c_){.$b_(_$c_)}
+    </screen>
+    
+    
+    <para>The notation of <emphasis>index-formula</emphasis> uses the operands:
+     <variablelist>
+      
+      <varlistentry>
+       <term>_</term>
+       <listitem><para>It means whitespace character.</para></listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>.</term>
+       <listitem><para>The position may contain any value, defined by
+        MARC format. For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        70-#1$a, $g , (5)
+       </screen>
+       
+       <para>matches <literal>mc-70._1_$a,_$g_</literal> and includes</para>
+       
+       <screen>
+        700_1_$a,_$g_
+        701_1_$a,_$g_
+        702_1_$a,_$g_
+       </screen>
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>{...}</term>
+       <listitem><para>The repeatable elements are defined in
+        figure-brackets {}. For example,
+        <emphasis>index-formula</emphasis></para>
+       
+       <screen>
+        71#00$a, $g, $h ($c) {.$b ($c)} , (6)
+       </screen>
+       
+       <para>matches 
+        <literal>mc-71.00_$a,_$g,_$h_(_$c_){.$b_(_$c_)}</literal> and
+        includes</para>
+       
+       <screen>
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_)
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_).$b_(_$c_)
+        71.00_$a,_$g,_$h_(_$c_).$b_(_$c_).$b_(_$c_).$b_(_$c_)
+       </screen>
+       </listitem>
+      </varlistentry>
+      
+      <varlistentry>
+       <term>&#60;...&#62;</term>
+       <listitem><para>Embedded <emphasis>index-formula</emphasis> (for
+        linked fields) is between &#60;&#62;. For example,
+        <emphasis>index-formula</emphasis>
+       </para>
+       
+       <screen>
+        4--#-$170-#1$a, $g ($c) , (7)
+       </screen>
+       
+       <para>matches
+        <literal>mc-4.._._$1&#60;70._1_$a,_$g_(_$c_)&#62;_</literal> and
+        includes</para>
+       
+       <screen>
+        463_._$1&#60;70._1_$a,_$g_(_$c_)&#62;_
+       </screen>
+       
+       </listitem>
+      </varlistentry>
+     </variablelist>
+    </para>
+    
+    <note>
+     <para>All another operands are the same as accepted in MARC world.</para>
+    </note>
+    
+    <section id="grs-examples">
+     <title>Examples</title>
+     
+     <para>
+      <orderedlist>
+       
+       <listitem>
+       
+       <para>indexing LEADER</para>
+       
+       <para>You need to use keyword "ldr" to index leader. For example,
+        indexing data from 6th and 7th position of LEADER</para>
+       
+       <screen>
+        elm mc-ldr[6] Record-type !
+        elm mc-ldr[7] Bib-level   !
+       </screen>
+       
+       </listitem>
+       
+       <listitem>
+       
+       <para>indexing data from control fields</para>
+       
+       <para>indexing date (the time added to database)</para>
+       
+       <screen>
+        elm mc-008[0-5] Date/time-added-to-db !        
+       </screen>
+       
+       <para>or for RUSMARC (this data included in 100th field)</para>
+       
+       <screen>
+        elm mc-100___$a[0-7]_ Date/time-added-to-db !
+       </screen>
+       
+       </listitem>
+       
+       <listitem>
+       
+       <para>using indicators while indexing</para>
+
+       <para>For RUSMARC <emphasis>index-formula</emphasis>
+        <literal>70-#1$a, $g</literal> matches</para>
+       
+       <screen>
+        elm 70._1_$a,_$g_ Author !:w,!:p
+       </screen>
+       
+       <para>When Zebra finds a field according to 
+        <literal>"70."</literal> pattern it checks the indicators. In this
+        case the value of first indicator doesn't mater, but the value of
+        second one must be whitespace, in another case a field is not 
+        indexed.</para>
+       </listitem>
+       
+       <listitem>
+       
+       <para>indexing embedded (linked) fields for UNIMARC based
+        formats</para>
+       
+       <para>For RUSMARC <emphasis>index-formula</emphasis> 
+        <literal>4--#-$170-#1$a, $g ($c)</literal> matches</para>
+       
+       <screen><![CDATA[
+        elm mc-4.._._$1<70._1_$a,_$g_(_$c_)>_ Author !:w,!:p
+        ]]></screen>
+       
+       <para>Data are extracted from record if the field matches to
+        <literal>"4.._."</literal> pattern and data in linked field
+        match to embedded
+        <emphasis>index-formula</emphasis>
+        <literal>70._1_$a,_$g_(_$c_)</literal>.</para>
+       
+       </listitem>
+       
+      </orderedlist>
+     </para>
+     
+     
+    </section>
+   </section>
  
  
+  </section>
+  
   </chapter>
   <!-- Keep this comment at the end of the file
   Local variables:
   </chapter>
   <!-- Keep this comment at the end of the file
   Local variables: