Update m4

[idzebra-moved-to-github.git] / doc / administration.xml
diff --git a/doc/administration.xml b/doc/administration.xml

index a1a7da2..762ba7d 100644 (file)
--- a/doc/administration.xml
+++ b/doc/administration.xml
@@ -1,5 +1,4 @@
  <chapter id="administration">
- <!-- $Id: administration.xml,v 1.51 2007-05-24 13:44:09 adam Exp $ -->
   <title>Administrating &zebra;</title>
   <!-- ### It's a bit daft that this chapter (which describes half of
            the configuration-file formats) is separated from
@@ -300,6 +299,32 @@
       </varlistentry>
  
       <varlistentry>
+      <term>index: <replaceable>filename</replaceable></term>
+      <listitem>
+       <para>
+       Defines the filename which holds fields structure
+       definitions. If omitted, the file <filename>default.idx</filename>
+       is read.
+       Refer to <xref linkend="default-idx-file"/> for
+       more information.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
+      <term>sortmax: <replaceable>integer</replaceable></term>
+      <listitem>
+       <para>
+    Specifies the maximum number of records that will be sorted
+    in a result set.  If the result set contains more than 
+    <replaceable>integer</replaceable> records, records after the
+    limit will not be sorted.  If omitted, the default value is
+    1,000.
+       </para>
+      </listitem>
+     </varlistentry>
+
+     <varlistentry>
        <term>staticrank: <replaceable>integer</replaceable></term>
        <listitem>
         <para>
@@ -313,13 +338,13 @@
  
  
       <varlistentry>
-      <term>estimatehits:: <replaceable>integer</replaceable></term>
+      <term>estimatehits: <replaceable>integer</replaceable></term>
        <listitem>
         <para>
-       Controls whether &zebra; should calculate approximite hit counts and
+       Controls whether &zebra; should calculate approximate hit counts and
         at which hit count it is to be enabled.
-       A value of 0 disables approximiate hit counts.
-       For a positive value approximaite hit count is enabled
+       A value of 0 disables approximate hit counts.
+       For a positive value approximate hit count is enabled
         if it is known to be larger than <replaceable>integer</replaceable>.
         </para>
         <para>
@@ -413,7 +438,7 @@
       <replaceable>permstring</replaceable></term>
       <listitem>
        <para>
-       Specifies permissions (priviledge) for a user that are allowed
+       Specifies permissions (privilege) for a user that are allowed
         to access &zebra; via the passwd system. There are two kinds
         of permissions currently: read (r) and write(w). By default
         users not listed in a permission directive are given the read
@@ -428,13 +453,53 @@
      </varlistentry>
  
      <varlistentry>
-      <term>dbaccess <replaceable>accessfile</replaceable></term>
+      <term>dbaccess: <replaceable>accessfile</replaceable></term>
        <listitem>
          <para>
           Names a file which lists database subscriptions for individual users.
-         The access file should consists of lines of the form <literal>username:
-         dbnames</literal>, where dbnames is a list of database names, seprated by
-         '+'. No whitespace is allowed in the database list.
+         The access file should consists of lines of the form
+          <literal>username: dbnames</literal>, where dbnames is a list of
+          database names, separated by '+'. No whitespace is allowed in the
+          database list.
+       </para>
+      </listitem>
+    </varlistentry>
+
+    <varlistentry>
+      <term>encoding: <replaceable>charsetname</replaceable></term>
+      <listitem>
+        <para>
+         Tells &zebra; to interpret the terms in Z39.50 queries as
+         having been encoded using the specified character
+         encoding.  The default is <literal>ISO-8859-1</literal>; one
+         useful alternative is <literal>UTF-8</literal>.
+       </para>
+      </listitem>
+    </varlistentry>
+
+    <varlistentry>
+      <term>storeKeys: <replaceable>value</replaceable></term>
+      <listitem>
+        <para>
+          Specifies whether &zebra; keeps a copy of indexed keys.
+          Use a value of 1 to enable; 0 to disable. If storeKeys setting is
+          omitted, it is enabled. Enabled storeKeys
+          are required for updating and deleting records.  Disable only 
+          storeKeys to save space and only plan to index data once.
+       </para>
+      </listitem>
+    </varlistentry>
+
+    <varlistentry>
+      <term>storeData: <replaceable>value</replaceable></term>
+      <listitem>
+        <para>
+          Specifies whether &zebra; keeps a copy of indexed records.
+          Use a value of 1 to enable; 0 to disable. If storeData setting is
+          omitted, it is enabled. A storeData setting of 0 (disabled) makes
+          Zebra fetch records from the original locaction in the file 
+          system using filename, file offset and file length. For the
+          DOM and ALVIS filter, the storeData setting is ignored.
         </para>
        </listitem>
      </varlistentry>
@@ -761,9 +826,7 @@
     The value of the <literal>register</literal> setting is a sequence
     of tokens. Each token takes the form:
     
-   <screen>
-    <emphasis>dir</emphasis><literal>:</literal><emphasis>size</emphasis>. 
-   </screen>
+   <emphasis>dir</emphasis><literal>:</literal><emphasis>size</emphasis> 
     
     The <emphasis>dir</emphasis> specifies a directory in which index files
     will be stored and the <emphasis>size</emphasis> specifies the maximum
@@ -776,19 +839,21 @@
     <literal>k</literal> for kilobytes.
     <literal>M</literal> for megabytes,
     <literal>G</literal> for gigabytes.
+   Specifying a negative value disables the checking (it still needs the unit, 
+   use <literal>-1b</literal>).
    </para>
    
    <para>
-   For instance, if you have allocated two disks for your register, and
+   For instance, if you have allocated three disks for your register, and
     the first disk is mounted
-   on <literal>/d1</literal> and has 2GB of free space and the
-   second, mounted on <literal>/d2</literal> has 3.6 GB, you could
-   put this entry in your configuration file:
+   on <literal>/d1</literal> and has 2GB of free space, the
+   second, mounted on <literal>/d2</literal> has 3.6 GB, and the third,
+   on which you have more space than you bother to worry about, mounted on 
+   <literal>/d3</literal> you could put this entry in your configuration file:
     
     <screen>
-    register: /d1:2G /d2:3600M
+    register: /d1:2G /d2:3600M /d3:-1b
     </screen>
-   
    </para>
    
    <para>
@@ -1015,7 +1080,7 @@
    <title>Static Ranking</title>
    
     <para>
-    &zebra; uses internally inverted indexes to look up term occurencies
+    &zebra; uses internally inverted indexes to look up term frequencies
      in documents. Multiple queries from different indexes can be
      combined by the binary boolean operations <literal>AND</literal>, 
      <literal>OR</literal> and/or <literal>NOT</literal> (which
@@ -1106,7 +1171,7 @@
     <para>
       The default <literal>rank-1</literal> ranking module implements a 
       TF/IDF (Term Frequecy over Inverse Document Frequency) like
-     algorithm. In contrast to the usual defintion of TF/IDF
+     algorithm. In contrast to the usual definition of TF/IDF
       algorithms, which only considers searching in one full-text
       index, this one works on multiple indexes at the same time.
       More precisely, 
@@ -1523,9 +1588,18 @@ where g = rset_count(terms[i]->rset) is the count of all documents in this speci
      is able to parse &acro.xml; records, you may use any of the two
      declarations (but not both simultaneously!)
      <screen>    
-     recordType: grs.xml
-     # recordType: alvis.filter_alvis_config.xml
+     recordType: dom.filter_dom_conf.xml
+     # recordType: grs.xml
      </screen>
+    Notice the difference to the specific instructions
+    <screen>    
+     recordType.xml: dom.filter_dom_conf.xml
+     # recordType.xml: grs.xml
+    </screen> 
+    which only work when indexing XML files from the filesystem using
+    the <literal>*.xml</literal> naming convention.
+   </para>
+   <para>
      To enable transaction safe shadow indexing,
      which is extra important for this kind of operation, set
      <screen>
@@ -1597,16 +1671,18 @@ where g = rset_count(terms[i]->rset) is the count of all documents in this speci
           <entry><literal>&acro.xml; string</literal></entry>
           <entry>An &acro.xml; formatted string containing the record</entry>
          </row>
-        <row>
-         <entry><literal>syntax</literal></entry>
-         <entry><literal>'xml'</literal></entry>
-         <entry>Only &acro.xml; record syntax is supported</entry>
-        </row>
+       <row>
+       <entry><literal>syntax</literal></entry>
+       <entry><literal>'xml'</literal></entry>
+       <entry>XML/SUTRS/MARC. GRS-1 not supported.
+        The default filter (record type) as given by recordType in
+        zebra.cfg is used to parse the record.</entry>
+       </row>
          <row>
           <entry><literal>recordIdOpaque</literal></entry>
           <entry><literal>string</literal></entry>
           <entry>
-         Optional  client-supplied, opaque record
+         Optional client-supplied, opaque record
           identifier used under insert operations.
          </entry>
          </row>
@@ -1804,6 +1880,76 @@ where g = rset_count(terms[i]->rset) is the count of all documents in this speci
      </screen>  
      </para>
      </sect2>
+
+   <sect2 id="administration-extended-services-debugging">
+    <title>Extended services debugging guide</title>
+    <para>
+     When debugging ES over PHP we recommend the following order of tests:
+    </para>
+
+    <itemizedlist>
+     <listitem>
+      <para>
+       Make sure you have a nice record on your filesystem, which you can 
+       index from the filesystem by use of the zebraidx command.
+       Do it exactly as you planned, using one of the GRS-1 filters,
+       or the DOMXML filter. 
+       When this works, proceed.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       Check that your server setup is OK before you even coded one single 
+       line PHP using ES.
+       Take the same record form the file system, and send as ES via 
+       <literal>yaz-client</literal> like described in
+       <xref linkend="administration-extended-services-yaz-client"/>,
+       and
+       remember the <literal>-a</literal> option which tells you what
+       goes over the wire! Notice also the section on permissions:
+       try 
+       <screen>
+        perm.anonymous: rw
+       </screen>
+       in <literal>zebra.cfg</literal> to make sure you do not run into 
+       permission  problems (but never expose such an insecure setup on the 
+       internet!!!). Then, make sure to set the general
+       <literal>recordType</literal> instruction, pointing correctly
+       to the GRS-1 filters,
+       or the DOMXML filters.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       If you insist on using the <literal>sysno</literal> in the 
+       <literal>recordIdNumber</literal> setting, 
+       please make sure you do only updates and deletes. Zebra's internal 
+       system number is not allowed for
+       <literal>recordInsert</literal> or 
+       <literal>specialUpdate</literal> actions 
+       which result in fresh record inserts.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       If <literal>shadow register</literal> is enabled in your 
+       <literal>zebra.cfg</literal>, you must remember running the 
+       <screen>
+        Z> adm-commit
+       </screen>
+       command as well.
+      </para>
+     </listitem>
+     <listitem>
+      <para>
+       If this works, then proceed to do the same thing in your PHP script.
+      </para>
+     </listitem>
+    </itemizedlist>
+
+
+   </sect2>
+
   </sect1>
  
  </chapter>