X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=doc%2Ffield-structure.xml;h=a19838e57e8e00271e3491be1e0a5f7fbc645a69;hp=49ce9a02ce27cd6dc3f053c3f8820640294d864f;hb=HEAD;hpb=641b06ee918ab2d4f899177b7412507d7969b4c6

diff --git a/doc/field-structure.xml b/doc/field-structure.xml
index 49ce9a0..a19838e 100644
--- a/doc/field-structure.xml
+++ b/doc/field-structure.xml
@@ -1,5 +1,4 @@
  <chapter id="fields-and-charsets">
-  <!-- $Id: field-structure.xml,v 1.15 2008-03-05 09:33:23 adam Exp $ -->
   <title>Field Structure and Character Sets
   </title>
   
@@ -30,7 +29,7 @@
   </para>
 
   <para>
-   Version 2.1 of Zebra can also be configured - per field - to use the
+   Version 2.0.20 of Zebra can also be configured - per field - to use the
    <ulink url="&url.icu;">ICU</ulink> library to perform tokenization and
    normalization of strings. This is an alternative to the "charmap"
    files which has been part of Zebra since its first release.
@@ -85,9 +84,9 @@
 	(non-space characters) separated by single space characters
 	(normalized to " " on display). When completeness is
 	disabled, each word is indexed as a separate entry. Complete subfield
-	indexing is most useful for fields which are typically browsed (eg.
+	indexing is most useful for fields which are typically browsed (e.g.,
 	titles, authors, or subjects), or instances where a match on a
-	complete subfield is essential (eg. exact title searching). For fields
+	complete subfield is essential (e.g., exact title searching). For fields
 	where completeness is disabled, the search engine will interpret a
 	search containing space characters as a word proximity search.
        </para>
@@ -147,7 +146,7 @@
      to them:
      <screen>
      # Traditional word index
-     # Used if completenss is 'incomplete field' (@attr 6=1) and
+     # Used if completeness is 'incomplete field' (@attr 6=1) and
      # structure is word/phrase/word-list/free-form-text/document-text
      index w
      completeness 0
@@ -296,7 +295,7 @@
 	  <para>
 	   Curly braces {} may be used to enclose ranges of single
 	   characters (possibly using the escape convention described in the
-	   preceding point), eg. {a-z} to introduce the
+	   preceding point), e.g., {a-z} to introduce the
 	   standard range of ASCII characters.
 	   Note that the interpretation of such a range depends on
 	   the concrete representation in your local, physical character set.
@@ -305,8 +304,8 @@
 
 	 <listitem>
 	  <para>
-	   paranthesises () may be used to enclose multi-byte characters -
-	   eg. diacritics or special national combinations (eg. Spanish
+	   parentheses () may be used to enclose multi-byte characters -
+	   e.g., diacritics or special national combinations (e.g., Spanish
 	   "ll"). When found in the input stream (or a search term),
 	   these characters are viewed and sorted as a single character, with a
 	   sorting value depending on the position of the group in the value
@@ -437,21 +436,22 @@
       <term>equivalent <replaceable>value-set</replaceable></term>
       <listitem>
        <para>
-	This directive introduces equivalence classes of characters
-	and/or strings for sorting purposes only. It resembles the map
-	directive, but does not affect search and retrieval indexing,
-	but only sorting order under present requests. 
+	This directive introduces equivalence classes of strings for
+	searching purposes only. It's a one-to-many
+	conversion that takes place only during search before the map
+	directive kicks in.
        </para>
        <para>
-        For example, <literal>scan.chr</literal> contains the following
-        equivalent sorting instructions, which can be uncommented:
+	 For example given:
         <screen><![CDATA[
-         # equivalent Ã¦Ã¤(ae)
-         # equivalent Ã¸Ã¶(oe)
-         # equivalent Ã¥(aa)
-         # equivalent uÃ¼
+         equivalent Ã¦Ã¤(ae)
         ]]></screen>
        </para>
+       <para>
+	 a search for the <literal>Ã¤sel</literal> will be be match any of
+	 <literal>Ã¦sel</literal>, <literal>Ã¤sel</literal> and
+	 <literal>aesel</literal>.
+       </para>
       </listitem></varlistentry>
     </variablelist>
    </para>
@@ -516,7 +516,7 @@
    <example id="indexing-marcxml-example"><title>MARCXML indexing using ICU</title>
     <para>
      The directory <filename>examples/marcxml</filename> includes
-     a complete sample with MARCXML recordst that are DOM XML indexed 
+     a complete sample with MARCXML records that are DOM XML indexed 
      using ICU chain rules. Study the
      <filename>README</filename> in the <filename>marcxml</filename>
      directory for details.
@@ -534,7 +534,7 @@
  sgml-always-quote-attributes:t
  sgml-indent-step:1
  sgml-indent-data:t
- sgml-parent-document: "zebra.xml"
+ sgml-parent-document: "idzebra.xml"
  sgml-local-catalogs: nil
  sgml-namecase-general:t
  End: