Fix documentation of of chr's equivalent directive ZEB-672

[idzebra-moved-to-github.git] / doc / field-structure.xml
diff --git a/doc/field-structure.xml b/doc/field-structure.xml

index bf02d35..a19838e 100644 (file)
--- a/doc/field-structure.xml
+++ b/doc/field-structure.xml
@@ -1,5 +1,4 @@
   <chapter id="fields-and-charsets">
-  <!-- $Id: field-structure.xml,v 1.13 2007-12-19 09:30:29 adam Exp $ -->
    <title>Field Structure and Character Sets
    </title>
    
@@ -22,15 +21,15 @@
    </para>
  
    <para>
-   Zebra 1.3 and Zebra 2.0 series require that the field type is
-   a single character, e.g. <literal>w</literal> (for word), and
-   <literal>p</literal> for phrase. Zebra 2.1 allows field types to
-   be any string. This allows for greater flexibility - in particular
+   Zebra 1.3 and Zebra versions 2.0.18 and earlier required that the field
+   type is a single character, e.g. <literal>w</literal> (for word), and
+   <literal>p</literal> for phrase. Zebra 2.0.20 and later allow field types 
+   to be any string. This allows for greater flexibility - in particular
     per-locale (language) fields can be defined.
    </para>
  
    <para>
-   Version 2.1 of Zebra can also be configured - per field - to use the
+   Version 2.0.20 of Zebra can also be configured - per field - to use the
     <ulink url="&url.icu;">ICU</ulink> library to perform tokenization and
     normalization of strings. This is an alternative to the "charmap"
     files which has been part of Zebra since its first release.
@@ -85,9 +84,9 @@
         (non-space characters) separated by single space characters
         (normalized to " " on display). When completeness is
         disabled, each word is indexed as a separate entry. Complete subfield
-       indexing is most useful for fields which are typically browsed (eg.
+       indexing is most useful for fields which are typically browsed (e.g.,
         titles, authors, or subjects), or instances where a match on a
-       complete subfield is essential (eg. exact title searching). For fields
+       complete subfield is essential (e.g., exact title searching). For fields
         where completeness is disabled, the search engine will interpret a
         search containing space characters as a word proximity search.
         </para>
@@ -137,7 +136,7 @@
        </listitem></varlistentry>
      </variablelist>
     </para>
-   <example>
+   <example id="field-types">
      <title>Field types</title>
      <para>
       Following are three excerpts of the standard
@@ -147,7 +146,7 @@
       to them:
       <screen>
       # Traditional word index
-     # Used if completenss is 'incomplete field' (@attr 6=1) and
+     # Used if completeness is 'incomplete field' (@attr 6=1) and
       # structure is word/phrase/word-list/free-form-text/document-text
       index w
       completeness 0
@@ -296,7 +295,7 @@
           <para>
            Curly braces {} may be used to enclose ranges of single
            characters (possibly using the escape convention described in the
-          preceding point), eg. {a-z} to introduce the
+          preceding point), e.g., {a-z} to introduce the
            standard range of ASCII characters.
            Note that the interpretation of such a range depends on
            the concrete representation in your local, physical character set.
@@ -305,8 +304,8 @@
  
          <listitem>
           <para>
-          paranthesises () may be used to enclose multi-byte characters -
-          eg. diacritics or special national combinations (eg. Spanish
+          parentheses () may be used to enclose multi-byte characters -
+          e.g., diacritics or special national combinations (e.g., Spanish
            "ll"). When found in the input stream (or a search term),
            these characters are viewed and sorted as a single character, with a
            sorting value depending on the position of the group in the value
@@ -437,21 +436,22 @@
        <term>equivalent <replaceable>value-set</replaceable></term>
        <listitem>
         <para>
-       This directive introduces equivalence classes of characters
-       and/or strings for sorting purposes only. It resembles the map
-       directive, but does not affect search and retrieval indexing,
-       but only sorting order under present requests. 
+       This directive introduces equivalence classes of strings for
+       searching purposes only. It's a one-to-many
+       conversion that takes place only during search before the map
+       directive kicks in.
         </para>
         <para>
-        For example, <literal>scan.chr</literal> contains the following
-        equivalent sorting instructions, which can be uncommented:
+        For example given:
          <screen><![CDATA[
-         # equivalent æä(ae)
-         # equivalent øö(oe)
-         # equivalent å(aa)
-         # equivalent uü
+         equivalent æä(ae)
          ]]></screen>
         </para>
+       <para>
+        a search for the <literal>äsel</literal> will be be match any of
+        <literal>æsel</literal>, <literal>äsel</literal> and
+        <literal>aesel</literal>.
+       </para>
        </listitem></varlistentry>
      </variablelist>
     </para>
@@ -480,7 +480,7 @@
       Use the yaz-icu program to test your icuchain rules.
      </para>
     </tip>
-   <example><title>Indexing Greek text</title>
+   <example id="indexing-greek-example"><title>Indexing Greek text</title>
      <para>
       Consider a system where all "regular" text is to be indexed
       using as Greek (locale: EL).
@@ -513,10 +513,10 @@
      which is an ICU chain version of <filename>default.idx</filename>.
     </para>
  
-   <example><title>MARCXML indexing using ICU</title>
+   <example id="indexing-marcxml-example"><title>MARCXML indexing using ICU</title>
      <para>
       The directory <filename>examples/marcxml</filename> includes
-     a complete sample with MARCXML recordst that are DOM XML indexed 
+     a complete sample with MARCXML records that are DOM XML indexed 
       using ICU chain rules. Study the
       <filename>README</filename> in the <filename>marcxml</filename>
       directory for details.
@@ -534,7 +534,7 @@
   sgml-always-quote-attributes:t
   sgml-indent-step:1
   sgml-indent-data:t
- sgml-parent-document: "zebra.xml"
+ sgml-parent-document: "idzebra.xml"
   sgml-local-catalogs: nil
   sgml-namecase-general:t
   End: