Fix broken xml entity.

[idzebra-moved-to-github.git] / doc / tutorial.xml
diff --git a/doc/tutorial.xml b/doc/tutorial.xml

index edcd5fd..8d79bd8 100644 (file)
--- a/doc/tutorial.xml
+++ b/doc/tutorial.xml
@@ -1,7 +1,7 @@
   <chapter id="tutorial">
    <title>Tutorial</title>
  
-  
+
    <sect1 id="tutorial-oai">
     <title>A first &acro.oai; indexing example</title>
  
@@ -15,22 +15,22 @@
      Go to the <literal>examples/oai-pmh</literal> subdirectory of the
      distribution archive, or make a deep copy of the Debian installation
      directory
-    <literal>/usr/share/idzebra-2.0.-examples/oai-pmh</literal>. 
+    <literal>/usr/share/idzebra-2.0-examples/oai-pmh</literal>.
      An XML file containing multiple &acro.oai;
      records is located in the  sub
-    directory <literal>examples/oai-pmh/data</literal>. 
+    directory <literal>examples/oai-pmh/data</literal>.
     </para>
-   <para> 
+   <para>
      Additional OAI test records can be downloaded by running a shell
-    script (you may want to abort the script when you have waitet
-    longer than your coffe brews ..).
+    script (you may want to abort the script when you have waited
+    longer than your coffee brews ..).
      <screen>
       cd data
       ./fetch_OAI_data.sh
       cd ../
      </screen>
     </para>
-   <para> 
+   <para>
      To index these &acro.oai; records, type:
      <screen>
       zebraidx-2.0 -c conf/zebra.cfg init
@@ -40,24 +40,24 @@
      In case you have not installed zebra yet but have compiled the
      binaries from this tarball, use the following command form:
      <screen>
-     ../../index/zebraidx -c conf/zebra.cfg this and that 
+     ../../index/zebraidx -c conf/zebra.cfg this and that
      </screen>
      On some systems the &zebra; binaries are installed under the
      generic names, you need to use  the following command form:
      <screen>
-     zebraidx -c conf/zebra.cfg this and that 
+     zebraidx -c conf/zebra.cfg this and that
      </screen>
     </para>
-   
+
     <para>
      In this command, the word <literal>update</literal> is followed
      by the name of a directory: <literal>zebraidx</literal> updates all
-    files in the hierarchy rooted at <literal>data</literal>. 
-    The command option 
+    files in the hierarchy rooted at <literal>data</literal>.
+    The command option
      <literal>-c conf/zebra.cfg</literal> points to the proper
      configuration file.
     </para>
-   
+
     <para>
      You might ask yourself how &acro.xml; content is indexed using &acro.xslt;
      stylesheets: to satisfy your curiosity, you might want to run the
@@ -86,17 +86,17 @@
      return records in the &acro.xml; format only. The indexing machine
      did the splitting into individual records just behind the scenes.
     </para>
-   
+
  
    </sect1>
  
    <sect1 id="tutorial-oai-sru-pqf">
     <title>Searching the &acro.oai; database by web service</title>
-   
+
     <para>
      &zebra; has a build-in web service, which is close to the
      &acro.sru; standard web service. We use it to access our new
-    database using any   &acro.xml; enabled web browser. 
+    database using any   &acro.xml; enabled web browser.
      This service is using the  &acro.pqf; query language.
      In a later
      section we show how to run a fully compliant  &acro.sru; server,
@@ -105,7 +105,7 @@
  
     <para>
      Searching and retrieving &acro.xml; records is easy. For example,
-    you can point your browser to one of the following url's to
+    you can point your browser to one of the following URLs to
      search for the term <literal>the</literal>. Just point your
      browser at this link:
      <ulink
@@ -115,14 +115,14 @@
  
     <warning>
      <para>
-     These URL's woun't work unless you have indexed the example data
+     These URLs won't work unless you have indexed the example data
       and started an &zebra; server as outlined in the previous section.
      </para>
     </warning>
  
     <para>
      In case we actually want to retrieve one record, we need to alter
-    our URl to the following
+    our URL to the following
      <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc">
       http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc
      </ulink>
@@ -138,7 +138,7 @@
  
     <!--
     relation tests:
-   
+
     <ulink url="">
  
     http://localhost:9999/?version=1.1&amp;operation=searchRetrieve
@@ -153,23 +153,23 @@
      &zebra; uses &acro.xslt; stylesheets for both &acro.xml;record
      indexing and
      display retrieval. In this example installation, they are two
-    retrieval schema's defined in 
-    <literal>conf/dom-conf.xml</literal>: 
-    the <literal>dc</literal> schema implemented in 
+    retrieval schema's defined in
+    <literal>conf/dom-conf.xml</literal>:
+    the <literal>dc</literal> schema implemented in
      <literal>conf/oai2dc.xsl</literal>, and
-    the <literal>zebra</literal> schema implemented in 
-    <literal>conf/oai2zebra.xsl</literal>. 
-    The URL's for acessing both are the same, except for the different
+    the <literal>zebra</literal> schema implemented in
+    <literal>conf/oai2zebra.xsl</literal>.
+    The URLs for accessing both are the same, except for the different
      value of the <literal>recordSchema</literal> parameter:
      <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc">
       http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc
-    </ulink>    
+    </ulink>
      and
      <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra">
       http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra
-    </ulink>    
+    </ulink>
      For the curious, one can see that the &acro.xslt; transformations
-    really do the magic.  
+    really do the magic.
      <screen>
       xsltproc conf/oai2dc.xsl data/debug-record.xml
       xsltproc conf/oai2zebra.xsl data/debug-record.xml
@@ -196,7 +196,7 @@
      original stored &acro.oai; &acro.xml; record.
      <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::data">
       http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::data
-    </ulink>    
+    </ulink>
     </para>
  
    </sect1>
@@ -208,27 +208,27 @@
      The &acro.oai; indexing example defines many different index
      names, a study of the <literal>conf/oai2index.xsl</literal>
      stylesheet reveals the following word type indexes (i.e. those
-    swith suffix <literal>:w</literal>):
+    with suffix <literal>:w</literal>):
      <screen>
-     any:w 
-     dc_title:w
-     dc_creator:w
-     dc_subject:w
-     dc_description:w
-     dc_contributor:w
-     dc_publisher:w
-     dc_language:w
-     dc_rights:w
+     any:w
+     title:w
+     author:w
+     subject:w
+     description:w
+     contributor:w
+     publisher:w
+     language:w
+     rights:w
      </screen>
-    By default, searches do access the <literal>anr:w</literal> index,
+    By default, searches do access the <literal>any:w</literal> index,
      but we can direct searches to any access point by constructing the
      correct &acro.pqf; query. For example, to search in titles only,
      we use
      <ulink
       url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=@attr
-     1=dc_title the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc">
+     1=title the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc">
       http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=@attr
-     1=dc_title the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc
+     1=title the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc
      </ulink>
     </para>
  
@@ -236,49 +236,49 @@
      Similar we can direct searches to the other indexes defined. Or we
      can create boolean combinations of searches on different
      indexes. In this case we search for <literal>the</literal> in
-    <literal>dc_title</literal> and for <literal>fish</literal> in 
-    <literal>dc_description</literal> using the query 
-    <literal>@and @attr 1=dc_title the @attr 1=dc_description fish</literal>.
+    <literal>title</literal> and for <literal>fish</literal> in
+    <literal>description</literal> using the query
+    <literal>@and @attr 1=title the @attr 1=description fish</literal>.
      <ulink
       url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=@and
-     @attr 1=dc_title the
-     @attr 1=dc_description
+     @attr 1=title the
+     @attr 1=description
       fish&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc">
       http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=@and
-     @attr 1=dc_title the
-     @attr 1=dc_description fish&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc
+     @attr 1=title the
+     @attr 1=description fish&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=dc
      </ulink>
     </para>
  
  
    </sect1>
  
-  <sect1 id="tutorial-oai-sru-zebra-indexess">
+  <sect1 id="tutorial-oai-sru-zebra-indexes">
     <title>Investigating the content of the indexes</title>
  
     <para>
-    How doess the magic work? What is inside the indexes? Why is a certain
-    record foound by a search, and another not?. The answer is in the
-    inverterd indexes. You can easily investigate them using the
+    How does the magic work? What is inside the indexes? Why is a certain
+    record found by a search, and another not?. The answer is in the
+    inverted indexes. You can easily investigate them using the
      special &zebra; schema
      <literal>zebra::index::fieldname</literal>. In this example you
-    can see that the <literal>dc_title</literal> index has both word
+    can see that the <literal>title</literal> index has both word
      (type <literal>:w</literal>) and phrase (type
-    <literal>:p</literal>) 
-    indexed fields, 
-    <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::index::dc_title">
-     http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::index::dc_title
-    </ulink>    
+    <literal>:p</literal>)
+    indexed fields,
+    <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::index::title">
+     http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::index::title
+    </ulink>
     </para>
  
     <para>
      But where in the indexes did the term match for the query occur?
      Easily answered with the special  &zebra; schema
-    <literal>zebra::snippet</literal>. The matching terma are
-    encapsulated by <literal>&lt;s&gt;</literal> tags. 
+    <literal>zebra::snippet</literal>. The matching terms are
+    encapsulated by <literal>&lt;s&gt;</literal> tags.
      <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::snippet">
       http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::snippet
-    </ulink>    
+    </ulink>
     </para>
  
     <para>
@@ -286,19 +286,19 @@
      found inside my hit set? Try the special  &zebra; schema
      <literal>zebra::facet::fieldname:type</literal>. In this case, we
      investigate additional search terms for the
-    <literal>dc_title:w</literal> index.
-    <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::facet::dc_title:w">
-     http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::facet::dc_title:w
-    </ulink>    
+    <literal>title:w</literal> index.
+    <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::facet::title:w">
+     http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::facet::title:w
+    </ulink>
     </para>
  
     <para>
      One can ask for multiple facets. Here, we want them from phrase
      indexes of type
      <literal>:p</literal>.
-    <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::facet::dc_publisher:p,dc_title:p">
-     http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::facet::dc_publisher:p,dc_title:p
-    </ulink>    
+    <ulink url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::facet::publisher:p,title:p">
+     http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;x-pquery=the&amp;startRecord=1&amp;maximumRecords=1&amp;recordSchema=zebra::facet::publisher:p,title:p
+    </ulink>
     </para>
  
    </sect1>
@@ -310,13 +310,13 @@
     <para>
      The &acro.sru; specification mandates that the &acro.cql; query
      language is supported and properly configure. Also, the server
-    needs to be able to emmit a proper  &acro.explain; &acro.xml;
+    needs to be able to emit a proper  &acro.explain; &acro.xml;
      record, which is used to determine the capabilities of the
      specific server instance.
     </para>
  
     <para>
-    In this example configuration we expoit the similarities between
+    In this example configuration we exploit the similarities between
      the &acro.explain; record and the &acro.cql; query language
      configuration, we generate the later from the former using an
      &acro.xslt; transformation.
@@ -326,7 +326,7 @@
     </para>
  
     <para>
-    The we are all set to start the &acro.sru;/acro.z3950; server including 
+    We are all set to start the &acro.sru;/&acro.z3950; server including
      &acro.pqf; and &acro.cql; query configuration. It uses the &yaz; frontend
      server configuration - just type
      <screen>
@@ -337,7 +337,7 @@
     <para>
      First, we'd like to be sure that we can see the  &acro.explain;
      &acro.xml; response correctly. You might use either of these equivalent
-    requests: 
+    requests:
      <ulink
       url="http://localhost:9999">http://localhost:9999
      </ulink>
@@ -349,7 +349,7 @@
     </para>
  
     <para>
-    Now we can issue true &acro.sru; requests. For example, 
+    Now we can issue true &acro.sru; requests. For example,
      <literal>dc.title=the
       and dc.description=fish</literal> results in the following page
      <ulink
@@ -362,24 +362,24 @@
     </para>
  
     <para>
-    Scan of indexes is a part of the  &acro.sru; server business. For example, 
+    Scan of indexes is a part of the  &acro.sru; server business. For example,
      scanning the <literal>dc.title</literal> index gives us an idea
      what search terms are found there
      <ulink
       url="http://localhost:9999/?version=1.1&amp;operation=scan&amp;scanClause=dc.title=fish">
       http://localhost:9999/?version=1.1&amp;operation=scan&amp;scanClause=dc.title=fish
      </ulink>,
-    whereas 
+    whereas
      <ulink
       url="http://localhost:9999/?version=1.1&amp;operation=scan&amp;scanClause=dc.identifier=fish">
-     http://localhost:9999/?version=1.1&amp;operation=scan&amp;scanClause=dc.identifier=fish 
+     http://localhost:9999/?version=1.1&amp;operation=scan&amp;scanClause=dc.identifier=fish
      </ulink>
-    accesses the indexed indentifiers.
+    accesses the indexed identifiers.
     </para>
  
     <para>
-    In addition, all &zebra; internal special elemen sets or record
-    schema's of the form   
+    In addition, all &zebra; internal special element sets or record
+    schema's of the form
      <literal>zebra::</literal> just work right out of the box
      <ulink
       url="http://localhost:9999/?version=1.1&amp;operation=searchRetrieve&amp;query=dc.title=the
@@ -397,21 +397,21 @@
  
    <sect1 id="tutorial-oai-z3950">
     <title>Searching the &acro.oai; database by &acro.z3950; protocol</title>
-   
+
     <para>
      In this section we repeat the searches and presents we have done so
      far using the binary &acro.z3950; protocol, you can use any
-    &acro.z3950; client. 
+    &acro.z3950; client.
      For instance, you can use the demo command-line client that comes
-    with &yaz;. 
+    with &yaz;.
     </para>
     <para>
-    Connecting to the server is done by the command 
+    Connecting to the server is done by the command
      <screen>
       yaz-client localhost:9999
      </screen>
     </para>
-   
+
     <para>
      When the client has connected, you can type:
      <screen>
@@ -422,43 +422,43 @@
       Z> show 1+1
      </screen>
     </para>
-   
+
     <para>
      &acro.z3950; presents using presentation stylesheets:
      <screen>
       Z> elements dc
       Z> show 2+1
-     
+
       Z> elements zebra
       Z> show 3+1
      </screen>
     </para>
  
     <para>
-    &acro.z3950; buildin Zebra presents (in this configuration only if 
+    &acro.z3950; buildin Zebra presents (in this configuration only if
      started without yaz-frontendserver):
-    
+
      <screen>
       Z> elements zebra::meta
       Z> show 4+1
-     
+
       Z> elements zebra::meta::sysno
       Z> show 5+1
-     
+
       Z> format sutrs
       Z> show 5+1
       Z> format xml
-     
+
       Z> elements zebra::index
       Z> show 6+1
-     
+
       Z> elements zebra::snippet
       Z> show 7+1
-     
+
       Z> elements zebra::facet::any:w
       Z> show 1+1
-     
-     Z> elements zebra::facet::dc_publisher:p,dc_title:p
+
+     Z> elements zebra::facet::publisher:p,title:p
       Z> show 1+1
      </screen>
     </para>
@@ -477,15 +477,15 @@
  
       Z> find @attr 1=oai_setspec @attr 4=3 7374617475733D756E707562
       Z> show 1+1
-     
-     Z> find @attr 1=dc_title communication
+
+     Z> find @attr 1=title communication
       Z> show 1+1
-     
-     Z> find @attr 1=dc_identifier @attr 4=3  
+
+     Z> find @attr 1=identifier @attr 4=3
       http://resolver.caltech.edu/CaltechCSTR:1986.5228-tr-86
       Z> show 1+1
      </screen>
-    etc, etc. 
+    etc, etc.
     </para>
  
     <para>
@@ -498,8 +498,8 @@
       Z> scan @attr 1=oai_datestamp @attr 4=3 1
       Z> scan @attr 1=oai_setspec @attr 4=3 2000
       Z>
-     Z> scan @attr 1=dc_title communication
-     Z> scan @attr 1=dc_identifier @attr 4=3 a
+     Z> scan @attr 1=title communication
+     Z> scan @attr 1=identifier @attr 4=3 a
      </screen>
     </para>
  
@@ -510,7 +510,7 @@
       Z> querytype cql
       Z> elements dc
       Z>
-     Z> find harry 
+     Z> find harry
       Z>
       Z> find dc.creator = the
       Z> find dc.creator = the
@@ -520,15 +520,15 @@
       Z> find dc.title &gt; some
       Z>
       Z> find dc.identifier="http://resolver.caltech.edu/CaltechCSTR:1978.2276-tr-78"
-     Z> find dc.relation = something 
+     Z> find dc.relation = something
      </screen>
     </para>
  
     <!--
-   etc, etc. Notice that  all indexes defined by 'type="0"' in the 
-   indexing style  sheet must be searched using the 'eq' 
-   relation.    
-   
+   etc, etc. Notice that  all indexes defined by 'type="0"' in the
+   indexing style  sheet must be searched using the 'eq'
+   relation.
+
     Z> find title <> and
  
     fails as well.  ???
@@ -536,34 +536,34 @@
  
     <tip>
      <para>
-     &acro.z3950; scan using server side CQL conversion - 
-     unfortunately, this will _never_ work as it is not supported by the 
+     &acro.z3950; scan using server side CQL conversion -
+     unfortunately, this will _never_ work as it is not supported by the
       &acro.z3950; standard.
-     If you want to use scan using server side CQL conversion, you need to  
+     If you want to use scan using server side CQL conversion, you need to
       make an SRW connection using  yaz-client, or a
       SRU connection using REST Web Services - any browser will do.
      </para>
     </tip>
  
     <tip>
-    <para>    
-     All indexes defined by 'type="0"' in the 
-     indexing style  sheet must be searched using the '@attr 4=3' 
-     structure attribute instruction.   
+    <para>
+     All indexes defined by 'type="0"' in the
+     indexing style  sheet must be searched using the '@attr 4=3'
+     structure attribute instruction.
      </para>
     </tip>
  
     <para>
      Notice that searching and scan on indexes
-    <literal>dc_contributor</literal>,  <literal>dc_language</literal>, 
-    <literal>dc_rights</literal>, and <literal>dc_source</literal> 
-    might fail, simply because none of the records in the small example set 
+    <literal>contributor</literal>,  <literal>language</literal>,
+    <literal>rights</literal>, and <literal>source</literal>
+    might fail, simply because none of the records in the small example set
      have these fields set, and consequently, these indexes might not
-    been created. 
+    been created.
     </para>
-   
+
    </sect1>
-  
+
   </chapter>
  
  
@@ -576,7 +576,7 @@
   sgml-always-quote-attributes:t
   sgml-indent-step:1
   sgml-indent-data:t
- sgml-parent-document: "zebra.xml"
+ sgml-parent-document: "idzebra.xml"
   sgml-local-catalogs: nil
   sgml-namecase-general:t
   End: