Added charmap facility to delete leading articles

[idzebra-moved-to-github.git] / doc / examples.xml
diff --git a/doc/examples.xml b/doc/examples.xml

index 10cbeb5..f8f9a20 100644 (file)
--- a/doc/examples.xml
+++ b/doc/examples.xml
@@ -1,5 +1,5 @@
  <chapter id="examples">
  <chapter id="examples">
- <!-- $Id: examples.xml,v 1.17 2002-11-08 17:00:57 mike Exp $ -->
+ <!-- $Id: examples.xml,v 1.19 2002-12-30 12:56:07 adam Exp $ -->
   <title>Example Configurations</title>
  
   <sect1>
   <title>Example Configurations</title>
  
   <sect1>
@@ -19,23 +19,35 @@
  
      <listitem>
       <para>
  
      <listitem>
       <para>
-      Where to find subsidiary configuration files, including
-      <literal>default.idx</literal>
+      Where to find subsidiary configuration files, including both
+      those that are named explicitly and a few ``magic'' files such
+      as <literal>default.idx</literal>,
        which specifies the default indexing rules.
       </para>
      </listitem>
  
      <listitem>
       <para>
        which specifies the default indexing rules.
       </para>
      </listitem>
  
      <listitem>
       <para>
-      What attribute sets to recognise in searches.
+      What record schemas to support.  (Subsidiary files specifiy how
+      to index the contents of records in those schemas, and what
+      format to use when presenting records in those schemas to client
+      software.)
       </para>
      </listitem>
  
      <listitem>
       <para>
       </para>
      </listitem>
  
      <listitem>
       <para>
-      Policy details such as what record type to expect, what
-      low-level indexing algorithm to use, how to identify potential
-      duplicate records, etc.
+      What attribute sets to recognise in searches.  (Subsidiary files
+      specify how to interpret the attributes in terms
+      of the indexes that are created on the records.)
+     </para>
+    </listitem>
+
+    <listitem>
+     <para>
+      Policy details such as what type of input format to expect when
+      adding new records, what low-level indexing algorithm to use,
+      how to identify potential duplicate records, etc.
       </para>
      </listitem>
  
       </para>
      </listitem>
  
@@ -69,6 +81,10 @@
     <literal>dino.tree</literal>.)
     Type <literal>make records/dino.xml</literal>
     to make the XML data file.
     <literal>dino.tree</literal>.)
     Type <literal>make records/dino.xml</literal>
     to make the XML data file.
+   (Or you could just type <literal>make dino</literal> to build the XML
+   data file, create the database and populate it with the taxonomic
+   records all in one shot - but then you wouldn't learn anything,
+   would you?  :-)
    </para>
    <para>
     Now we need to create a Zebra database to hold and index the XML
    </para>
    <para>
     Now we need to create a Zebra database to hold and index the XML
@@ -76,7 +92,7 @@
     Zebra indexer, <literal>zebraidx</literal>, which is
     driven by the <literal>zebra.cfg</literal> configuration file.
     For our purposes, we don't need any
     Zebra indexer, <literal>zebraidx</literal>, which is
     driven by the <literal>zebra.cfg</literal> configuration file.
     For our purposes, we don't need any
-   special behaviour - we can use the defaults - so we start with a
+   special behaviour - we can use the defaults - so we can start with a
     minimal file that just tells <literal>zebraidx</literal> where to
     find the default indexing rules, and how to parse the records:
     <screen>
     minimal file that just tells <literal>zebraidx</literal> where to
     find the default indexing rules, and how to parse the records:
     <screen>
@@ -108,7 +124,7 @@
     XPath-based boolean queries and fetch the XML records that satisfy
     them:
     <screen>
     XPath-based boolean queries and fetch the XML records that satisfy
     them:
     <screen>
-    $ yaz-client tcp:@:9999
+    $ yaz-client @:9999
      Connecting...Ok.
      Z&gt; find @attr 1=/Zthes/termName Sauroposeidon
      Number of hits: 1
      Connecting...Ok.
      Z&gt; find @attr 1=/Zthes/termName Sauroposeidon
      Number of hits: 1
@@ -118,6 +134,7 @@
       &lt;termId&gt;22&lt;/termId&gt;
       &lt;termName&gt;Sauroposeidon&lt;/termName&gt;
       &lt;termType&gt;PT&lt;/termType&gt;
       &lt;termId&gt;22&lt;/termId&gt;
       &lt;termName&gt;Sauroposeidon&lt;/termName&gt;
       &lt;termType&gt;PT&lt;/termType&gt;
+     &lt;termNote&gt;The tallest known dinosaur (18m)&lt;/termNote&gt;
       &lt;relation&gt;
        &lt;relationType&gt;BT&lt;/relationType&gt;
        &lt;termId&gt;21&lt;/termId&gt;
       &lt;relation&gt;
        &lt;relationType&gt;BT&lt;/relationType&gt;
        &lt;termId&gt;21&lt;/termId&gt;
@@ -126,7 +143,7 @@
       &lt;/relation&gt;
  
        &lt;idzebra xmlns="http://www.indexdata.dk/zebra/"&gt;
       &lt;/relation&gt;
  
        &lt;idzebra xmlns="http://www.indexdata.dk/zebra/"&gt;
-       &lt;size&gt;245&lt;/size&gt;
+       &lt;size&gt;300&lt;/size&gt;
         &lt;localnumber&gt;23&lt;/localnumber&gt;
         &lt;filename&gt;records/dino.xml&lt;/filename&gt;
        &lt;/idzebra&gt;
         &lt;localnumber&gt;23&lt;/localnumber&gt;
         &lt;filename&gt;records/dino.xml&lt;/filename&gt;
        &lt;/idzebra&gt;
@@ -134,7 +151,7 @@
     </screen>
    </para>
    <para>
     </screen>
    </para>
    <para>
-   Now wasn't that easy?
+   Now wasn't that nice and easy?
    </para>
   </sect1>
  
    </para>
   </sect1>
  
@@ -158,7 +175,7 @@
     significantly because it ties searching semantics to the physical
     structure of the searched records.  You can't use the same search
     specification to search two databases if their internal
     significantly because it ties searching semantics to the physical
     structure of the searched records.  You can't use the same search
     specification to search two databases if their internal
-   representations are different.  Consider an alternative taxonomy
+   representations are different.  Consider an different taxonomy
     database in which the records have taxon names specified
     inside a <literal>&lt;name&gt;</literal> element nested within a
     <literal>&lt;identification&gt;</literal> element
     database in which the records have taxon names specified
     inside a <literal>&lt;name&gt;</literal> element nested within a
     <literal>&lt;identification&gt;</literal> element
@@ -175,8 +192,8 @@
     said about implementation: in a given database, an access point
     might be implemented as an index, a path into physical records, an
     algorithm for interrogating relational tables or whatever works.
     said about implementation: in a given database, an access point
     might be implemented as an index, a path into physical records, an
     algorithm for interrogating relational tables or whatever works.
-   The key point is that the semantics of an access point are fixed
-   and well defined.
+   The only important thing point is that the semantics of an access
+   point are fixed and well defined.
    </para>
    <para>
     For convenience, access points are gathered into <firstterm>attribute
    </para>
    <para>
     For convenience, access points are gathered into <firstterm>attribute
@@ -192,7 +209,7 @@
     In practice, the BIB-1 attribute set has tended to be a dumping
     ground for all sorts of access points, so that, for example, it
     includes some geospatial access points as well as strictly
     In practice, the BIB-1 attribute set has tended to be a dumping
     ground for all sorts of access points, so that, for example, it
     includes some geospatial access points as well as strictly
-   bibliographic ones.  Nevertheless, the key point is that this model
+   bibliographic ones.  Nevertheless, this model
     allows a layer of abstraction over the physical representation of
     records in databases.
    </para>
     allows a layer of abstraction over the physical representation of
     records in databases.
    </para>
@@ -213,24 +230,89 @@
     This is a two-step process.  First, we need to tell Zebra that we
     want to support the BIB-1 attribute set.  Then we need to tell it
     which elements of its record pertain to access point 4.
     This is a two-step process.  First, we need to tell Zebra that we
     want to support the BIB-1 attribute set.  Then we need to tell it
     which elements of its record pertain to access point 4.
-  </para>
-  <para>
+   </para>
+   <para>
     We need to create an <link linkend="abs-file">Abstract Syntax
     file</link> named after the document element of the records we're
     We need to create an <link linkend="abs-file">Abstract Syntax
     file</link> named after the document element of the records we're
-   working with, plus a <literal>.abs</literal> suffix - in this case,
-   <literal>Zthes.abs</literal> - as follows:
-  </para>
-  <itemizedlist>
-   <listitem>
-    <para>
-     
-    </para>
-   </listitem>
-   <listitem>
-    <para>
-    </para>
-   </listitem>
-  </itemizedlist>
+    working with, plus a <literal>.abs</literal> suffix - in this case,
+    <literal>Zthes.abs</literal> - as follows:
+   </para>
+   <programlistingco>
+    <areaspec>
+     <area id="attset.zthes" coords="2"/>
+     <area id="attset.attset" coords="3"/>
+     <area id="termId" coords="7"/>
+     <area id="termName" coords="8"/>
+    </areaspec>
+    <programlisting>
+attset zthes.att
+attset bib1.att
+xpath enable
+systag sysno none
+
+xelm /Zthes/termId              termId:w
+xelm /Zthes/termName            termName:w,title:w
+xelm /Zthes/termQualifier       termQualifier:w
+xelm /Zthes/termType            termType:w
+xelm /Zthes/termLanguage        termLanguage:w
+xelm /Zthes/termNote            termNote:w
+xelm /Zthes/termCreatedDate     termCreatedDate:w
+xelm /Zthes/termCreatedBy       termCreatedBy:w
+xelm /Zthes/termModifiedDate    termModifiedDate:w
+xelm /Zthes/termModifiedBy      termModifiedBy:w
+    </programlisting>
+   <calloutlist>
+    <callout arearefs="attset.zthes">
+     <para>
+      Declare Thesausus attribute set. See <filename>zthes.att</filename>.
+     </para>
+    </callout>
+    <callout arearefs="attset.attset">
+     <para>
+      Declare Bib-1 attribute set. See <filename>bib1.att</filename> in
+      Zebra's <filename>tab</filename> directory.
+     </para>
+    </callout>
+    <callout arearefs="termId">
+     <para>
+      This xelm directive selects contents of nodes by XPath expression
+      <literal>/Zthes/termId</literal>. The contents (CDATA) will be
+      word searchable by Zthes attribute termId (value 1001).
+     </para>
+    </callout>
+    <callout arearefs="termName">
+     <para>
+      Make <literal>termName</literal> word searchable by both
+      Zthes attribute termName (1002) and Bib-1 atttribute title (4).
+     </para>
+    </callout>
+   </calloutlist>
+  </programlistingco>
+   <para>
+    After re-indexing, we can search the database using Bib-1
+    attribute, title, as follows:
+    <screen>
+Z> form xml
+Z> f @attr 1=4 Eoraptor
+Sent searchRequest.
+Received SearchResponse.
+Search was a success.
+Number of hits: 1, setno 1
+SearchResult-1: Eoraptor(1)
+records returned: 0
+Elapsed: 0.106896
+Z> s
+Sent presentRequest (1+1).
+Records: 1
+[Default]Record type: XML
+&lt;Zthes&gt;
+ &lt;termId&gt;2&lt;/termId&gt;
+ &lt;termName&gt;Eoraptor&lt;/termName&gt;
+ &lt;termType&gt;PT&lt;/termType&gt;
+ &lt;termNote&gt;The most basal known dinosaur&lt;/termNote&gt;
+ ...
+    </screen>
+   </para>
   </sect1>
  </chapter>
  
   </sect1>
  </chapter>