Deal with double unlocks (which does happen sometimes)

[idzebra-moved-to-github.git] / doc / administration.xml
diff --git a/doc/administration.xml b/doc/administration.xml

index eee315e..503b032 100644 (file)
--- a/doc/administration.xml
+++ b/doc/administration.xml
@@ -1,5 +1,5 @@
  <chapter id="administration">
  <chapter id="administration">
- <!-- $Id: administration.xml,v 1.23 2006-02-15 12:08:47 marc Exp $ -->
+ <!-- $Id: administration.xml,v 1.41 2006-06-30 10:58:41 marc Exp $ -->
   <title>Administrating Zebra</title>
   <!-- ### It's a bit daft that this chapter (which describes half of
            the configuration-file formats) is separated from
   <title>Administrating Zebra</title>
   <!-- ### It's a bit daft that this chapter (which describes half of
            the configuration-file formats) is separated from
@@ -356,7 +356,7 @@
         Specifies a file with description of user accounts for Zebra.
         File format is similar to that used by the passwd directive except
         that the password are encrypted. Use Apache's htpasswd or similar
         Specifies a file with description of user accounts for Zebra.
         File format is similar to that used by the passwd directive except
         that the password are encrypted. Use Apache's htpasswd or similar
-       for maintenanace.
+       for maintenance.
        </para>
       </listitem>
      </varlistentry>
        </para>
       </listitem>
      </varlistentry>
@@ -370,7 +370,7 @@
         to access Zebra via the passwd system. There are two kinds
         of permissions currently: read (r) and write(w). By default
         users not listed in a permission directive are given the read
         to access Zebra via the passwd system. There are two kinds
         of permissions currently: read (r) and write(w). By default
         users not listed in a permission directive are given the read
-       priviledge. To specify permissions for a user with no
+       privilege. To specify permissions for a user with no
         username, or Z39.50 anonymous style use
         <literal>anonymous</literal>. The permstring consists of
         a sequence of characters. Include character <literal>w</literal>
         username, or Z39.50 anonymous style use
         <literal>anonymous</literal>. The permstring consists of
         a sequence of characters. Include character <literal>w</literal>
@@ -402,7 +402,7 @@
    <para>
     The default behavior of the Zebra system is to reference the
     records from their original location, i.e. where they were found when you
    <para>
     The default behavior of the Zebra system is to reference the
     records from their original location, i.e. where they were found when you
-   ran <literal>zebraidx</literal>.
+   run <literal>zebraidx</literal>.
     That is, when a client wishes to retrieve a record
     following a search operation, the files are accessed from the place
     where you originally put them - if you remove the files (without
     That is, when a client wishes to retrieve a record
     following a search operation, the files are accessed from the place
     where you originally put them - if you remove the files (without
@@ -923,7 +923,48 @@
  
  
   <sect1 id="administration-ranking">
  
  
   <sect1 id="administration-ranking">
-  <title>Static and Dynamic Ranking</title>
+  <title>Relevance Ranking and Sorting of Result Sets</title>
+
+  <sect2>
+   <title>Overview</title>
+   <para>
+    The default ordering of a result set is left up to the server,
+    which inside Zebra means sorting in ascending document ID order. 
+    This is not always the order humans want to browse the sometimes
+    quite large hit sets. Ranking and sorting comes to the rescue.
+   </para>
+
+   <para> 
+    In cases where a good presentation ordering can be computed at
+    indexing time, we can use a fixed <literal>static ranking</literal>
+    scheme, which is provided for the <literal>alvis</literal>
+    indexing filter. This defines a fixed ordering of hit lists,
+    independently of the query issued. 
+   </para>
+
+   <para>
+    There are cases, however, where relevance of hit set documents is
+    highly dependent on the query processed.
+    Simply put, <literal>dynamic relevance ranking</literal> 
+    sorts a set of retrieved records such that those most likely to be
+    relevant to your request are retrieved first. 
+    Internally, Zebra retrieves all documents that satisfy your
+    query, and re-orders the hit list to arrange them based on
+    a measurement of similarity between your query and the content of
+    each record. 
+   </para>
+
+   <para>
+    Finally, there are situations where hit sets of documents should be
+    <literal>sorted</literal> during query time according to the
+    lexicographical ordering of certain sort indexes created at
+    indexing time.
+   </para>
+  </sect2>
+
+
+ <sect2 id="administration-ranking-static">
+  <title>Static Ranking</title>
    
     <para>
      Zebra uses internally inverted indexes to look up term occurencies
    
     <para>
      Zebra uses internally inverted indexes to look up term occurencies
@@ -948,81 +989,778 @@
      <screen>
      staticrank: 1 
      </screen> 
      <screen>
      staticrank: 1 
      </screen> 
-    directive in the main core Zebra config file, the internal document
-    keys used for ordering are augmented by a preceeding integer, which
+    directive in the main core Zebra configuration file, the internal document
+    keys used for ordering are augmented by a preceding integer, which
      contains the static rank of a given document, and the index lists
      are ordered 
      first by ascending static rank,
      then by ascending document <literal>ID</literal>.
      contains the static rank of a given document, and the index lists
      are ordered 
      first by ascending static rank,
      then by ascending document <literal>ID</literal>.
-   </para>
-   <para>
-    This implies that the default rank <literal>0</literal> 
-    is the best rank at the
-    beginning of the list, and <literal>max int</literal> 
-    is the worst static rank.
+    Zero
+    is the ``best'' rank, as it occurs at the
+    beginning of the list; higher numbers represent worse scores.
     </para>
     <para>
      The experimental <literal>alvis</literal> filter provides a
      directive to fetch static rank information out of the indexed XML
     </para>
     <para>
      The experimental <literal>alvis</literal> filter provides a
      directive to fetch static rank information out of the indexed XML
-    records, thus making <emphasis>all</emphasis> hit sets orderd
+    records, thus making <emphasis>all</emphasis> hit sets ordered
      after <emphasis>ascending</emphasis> static
      rank, and for those doc's which have the same static rank, ordered
      after <emphasis>ascending</emphasis> doc <literal>ID</literal>.
      after <emphasis>ascending</emphasis> static
      rank, and for those doc's which have the same static rank, ordered
      after <emphasis>ascending</emphasis> doc <literal>ID</literal>.
-    See <xref linkend="record-model-alvisxslt"/> for the glory details.
+    See <xref linkend="record-model-alvisxslt"/> for the gory details.
     </para>
     </para>
+    </sect2>
+
+
+ <sect2 id="administration-ranking-dynamic">
+  <title>Dynamic Ranking</title>
     <para>
     <para>
-    If one wants to do a little fiddeling with the static rank order,
-    one has to invoke additional re-ranking/re-ordering using dynamic 
-    reranking or score functions. These functions return positive
-    interger scores, where <emphasis>highest</emphasis> score is 
-    <emphasis>best</emphasis>, which means that the
-    hit sets will be sorted according to
-    <emphasis>decending</emphasis> 
+    In order to fiddle with the static rank order, it is necessary to
+    invoke additional re-ranking/re-ordering using dynamic
+    ranking or score functions. These functions return positive
+    integer scores, where <emphasis>highest</emphasis> score is 
+    ``best'';
+    hit sets are sorted according to <emphasis>descending</emphasis> 
      scores (in contrary
      to the index lists which are sorted according to
      scores (in contrary
      to the index lists which are sorted according to
-    <emphasis>ascending</emphasis> rank  number and document ID).
+    ascending rank number and document ID).
     </para>
     </para>
-   <!--
     <para>
     <para>
-    Those are defined in the zebra C source files 
-    <screen>     
-    "rank-1" : zebra/index/rank1.c  
-               default TF/IDF like zebra dynamic ranking
-    "rank-static" : zebra/index/rankstatic.c
-               do-nothing dummy static ranking (this is just to prove
-               that the static rank can be used in dynamic ranking functions)  
-     "zvrank" : zebra/index/zvrank.c
-               many different dynamic TF/IDF ranking functions 
-    </screen> 
+    Dynamic ranking is enabled by a directive like one of the
+    following in the zebra configuration file (use only one of these a time!):
+    <screen> 
+    rank: rank-1        # default TDF-IDF like
+    rank: rank-static   # dummy do-nothing
+    </screen>
     </para>
     </para>
-   -->
+ 
     <para>
     <para>
-    Those are in the zebra config file enabled by a directive like (use
-    only one of these a time!):
-    <screen> 
-    rank: rank-1        # default
-    rank: rank-static   # dummy 
-    rank: zvrank        # TDF-IDF like
+    Dynamic ranking is done at query time rather than
+    indexing time (this is why we
+    call it ``dynamic ranking'' in the first place ...)
+    It is invoked by adding
+    the Bib-1 relation attribute with
+    value ``relevance'' to the PQF query (that is,
+    <literal>@attr&nbsp;2=102</literal>, see also  
+    <ulink url="&url.z39.50;bib1.html">
+     The BIB-1 Attribute Set Semantics</ulink>, also in 
+      <ulink url="&url.z39.50.attset.bib1;">HTML</ulink>). 
+    To find all articles with the word <literal>Eoraptor</literal> in
+    the title, and present them relevance ranked, issue the PQF query:
+    <screen>
+     @attr 2=102 @attr 1=4 Eoraptor
      </screen>
      </screen>
-    Notice that the <literal>rank-1</literal> and
-    <literal>zvrank</literal> do not use the static rank 
-    information in the list keys, and will produce the same ordering
-    with our without static ranking enabled.
     </para>
     </para>
+
+    <sect3 id="administration-ranking-dynamic-rank1">
+     <title>Dynamically ranking using PQF queries with the 'rank-1' 
+      algorithm</title>
+
     <para>
     <para>
+     The default <literal>rank-1</literal> ranking module implements a 
+     TF/IDF (Term Frequecy over Inverse Document Frequency) like
+     algorithm. In contrast to the usual defintion of TF/IDF
+     algorithms, which only considers searching in one full-text
+     index, this one works on multiple indexes at the same time.
+     More precisely, 
+     Zebra does boolean queries and searches in specific addressed
+     indexes (there are inverted indexes pointing from terms in the
+     dictionary to documents and term positions inside documents). 
+     It works like this:
+     <variablelist>
+      <varlistentry>
+       <term>Query Components</term>
+       <listitem>
+        <para>
+         First, the boolean query is dismantled into it's principal components,
+         i.e. atomic queries where one term is looked up in one index.
+         For example, the query
+         <screen>
+        @attr 2=102 @and @attr 1=1010 Utah @attr 1=1018 Springer
+         </screen>
+         is a boolean AND between the atomic parts
+         <screen>
+       @attr 2=102 @attr 1=1010 Utah
+         </screen>
+          and
+         <screen>
+       @attr 2=102 @attr 1=1018 Springer
+         </screen>
+         which gets processed each for itself.
+        </para>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term>Atomic hit lists</term>
+       <listitem>
+        <para>
+         Second, for each atomic query, the hit list of documents is
+         computed.
+        </para>
+        <para>
+         In this example, two hit lists for each index  
+         <literal>@attr 1=1010</literal>  and  
+         <literal>@attr 1=1018</literal> are computed.
+        </para>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term>Atomic scores</term>
+       <listitem>
+        <para>
+         Third, each document in the hit list is assigned a score (_if_ ranking
+         is enabled and requested in the query)  using a TF/IDF scheme.
+        </para>
+        <para>
+         In this example, both atomic parts of the query assign the magic
+         <literal>@attr 2=102</literal> relevance attribute, and are
+         to be used in the relevance ranking functions. 
+        </para>
+        <para>
+         It is possible to apply dynamic ranking on only parts of the
+         PQF query: 
+         <screen>
+          @and @attr 2=102 @attr 1=1010 Utah @attr 1=1018 Springer
+         </screen>
+         searches for all documents which have the term 'Utah' on the
+         body of text, and which have the term 'Springer' in the publisher
+         field, and sort them in the order of the relevance ranking made on
+         the body-of-text index only. 
+        </para>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term>Hit list merging</term>
+       <listitem>
+        <para>
+         Fourth, the atomic hit lists are merged according to the boolean
+         conditions to a final hit list of documents to be returned.
+        </para>
+        <para>
+        This step is always performed, independently of the fact that
+        dynamic ranking is enabled or not.
+        </para>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term>Document score computation</term>
+       <listitem>
+        <para>
+         Fifth, the total score of a document is computed as a linear
+         combination of the atomic scores of the atomic hit lists
+        </para>
+        <para>
+         Ranking weights may be used to pass a value to a ranking
+         algorithm, using the non-standard BIB-1 attribute type 9.
+         This allows one branch of a query to use one value while
+         another branch uses a different one.  For example, we can search
+         for <literal>utah</literal> in the 
+         <literal>@attr 1=4</literal> index with weight 30, as
+         well as in the <literal>@attr 1=1010</literal> index with weight 20:
+         <screen>
+         @attr 2=102 @or @attr 9=30 @attr 1=4 utah @attr 9=20 @attr 1=1010 city
+         </screen>
+        </para>
+        <para>
+         The default weight is
+         sqrt(1000) ~ 34 , as the Z39.50 standard prescribes that the top score
+         is 1000 and the bottom score is 0, encoded in integers.
+        </para>
+        <warning>
+         <para>
+          The ranking-weight feature is experimental. It may change in future
+          releases of zebra. 
+         </para>
+        </warning>
+       </listitem>
+      </varlistentry>
+
+      <varlistentry>
+       <term>Re-sorting of hit list</term>
+       <listitem>
+        <para>
+         Finally, the final hit list is re-ordered according to scores.
+        </para>
+       </listitem>
+      </varlistentry>
+     </variablelist>
+ 
+
+<!--
+Still need to describe the exact TF/IDF formula. Here's the info, need -->
+<!--to extract it in human readable form .. MC
+
+static int calc (void *set_handle, zint sysno, zint staticrank,
+                 int *stop_flag)
+{
+    int i, lo, divisor, score = 0;
+    struct rank_set_info *si = (struct rank_set_info *) set_handle;
+
+    if (!si->no_rank_entries)
+        return -1;   /* ranking not enabled for any terms */
+
+    for (i = 0; i < si->no_entries; i++)
+    {
+        yaz_log(log_level, "calc: i=%d rank_flag=%d lo=%d",
+                i, si->entries[i].rank_flag, si->entries[i].local_occur);
+        if (si->entries[i].rank_flag && (lo = si->entries[i].local_occur))
+            score += (8+log2_int (lo)) * si->entries[i].global_inv *
+                si->entries[i].rank_weight;
+    }
+    divisor = si->no_rank_entries * (8+log2_int (si->last_pos/si->no_entries));
+    score = score / divisor;
+    yaz_log(log_level, "calc sysno=" ZINT_FORMAT " score=%d", sysno, score);
+    if (score > 1000)
+        score = 1000;
+    /* reset the counts for the next term */
+    for (i = 0; i < si->no_entries; i++)
+        si->entries[i].local_occur = 0;
+    return score;
+}
+
+
+where lo = si->entries[i].local_occur is the local documents term-within-index frequency, si->entries[i].global_inv represents the IDF part (computed in static void *begin()), and
+si->entries[i].rank_weight is the weight assigner per index (default 34, or set in the @attr 9=xyz magic)
+
+Finally, the IDF part is computed as:
+
+static void *begin (struct zebra_register *reg,
+                    void *class_handle, RSET rset, NMEM nmem,
+                    TERMID *terms, int numterms)
+{
+    struct rank_set_info *si =
+        (struct rank_set_info *) nmem_malloc (nmem,sizeof(*si));
+    int i;
+
+    yaz_log(log_level, "rank-1 begin");
+    si->no_entries = numterms;
+    si->no_rank_entries = 0;
+    si->nmem=nmem;
+    si->entries = (struct rank_term_info *)
+        nmem_malloc (si->nmem, sizeof(*si->entries)*numterms);
+    for (i = 0; i < numterms; i++)
+    {
+        zint g = rset_count(terms[i]->rset);
+        yaz_log(log_level, "i=%d flags=%s '%s'", i,
+                terms[i]->flags, terms[i]->name );
+        if  (!strncmp (terms[i]->flags, "rank,", 5))
+        {
+            const char *cp = strstr(terms[i]->flags+4, ",w=");
+            si->entries[i].rank_flag = 1;
+            if (cp)
+                si->entries[i].rank_weight = atoi (cp+3);
+            else
+              si->entries[i].rank_weight = 34; /* sqrroot of 1000 */
+            yaz_log(log_level, " i=%d weight=%d g="ZINT_FORMAT, i,
+                     si->entries[i].rank_weight, g);
+            (si->no_rank_entries)++;
+        }
+        else
+            si->entries[i].rank_flag = 0;
+        si->entries[i].local_occur = 0;  /* FIXME */
+        si->entries[i].global_occur = g;
+        si->entries[i].global_inv = 32 - log2_int (g);
+        yaz_log(log_level, " global_inv = %d g = " ZINT_FORMAT,
+                (int) (32-log2_int (g)), g);
+        si->entries[i].term = terms[i];
+        si->entries[i].term_index=i;
+        terms[i]->rankpriv = &(si->entries[i]);
+    }
+    return si;
+}
+
+
+where g = rset_count(terms[i]->rset) is the count of all documents in this specific index hit list, and the IDF part then is
+
+ si->entries[i].global_inv = 32 - log2_int (g);
+   -->
+
+   </para>
+
+
+    <para>
+    The <literal>rank-1</literal> algorithm
+    does not use the static rank 
+    information in the list keys, and will produce the same ordering
+    with or without static ranking enabled.
+    </para>
+ 
+    </sect3>
+
+    <!--
+    <sect3 id="administration-ranking-dynamic-rank1">
+     <title>Dynamically ranking PQF queries with the 'rank-static' 
+      algorithm</title>
+    <para>
      The dummy <literal>rank-static</literal> reranking/scoring
      function returns just 
      <literal>score = max int - staticrank</literal>
      The dummy <literal>rank-static</literal> reranking/scoring
      function returns just 
      <literal>score = max int - staticrank</literal>
-    in order to preserve the ordering of hit sets with and without it's
-    call.
-     Obviously, to combine static and dynamic ranking usefully, one wants
+    in order to preserve the static ordering of hit sets that would
+    have been produced had it not been invoked.
+    Obviously, to combine static and dynamic ranking usefully,
+    it is necessary
      to make a new ranking 
      to make a new ranking 
-    function, which is left
+    function; this is left
      as an exercise for the reader. 
     </para>
      as an exercise for the reader. 
     </para>
+    </sect3>
+    -->
+
+ 
+   <warning>
+     <para>
+      <literal>Dynamic ranking</literal> is not compatible
+      with <literal>estimated hit sizes</literal>, as all documents in
+      a hit set must be accessed to compute the correct placing in a
+      ranking sorted list. Therefore the use attribute setting
+      <literal>@attr&nbsp;2=102</literal> clashes with 
+      <literal>@attr&nbsp;9=integer</literal>. 
+     </para>
+   </warning>  
+
+   <!--
+    we might want to add ranking like this:
+    UNPUBLISHED:
+    Simple BM25 Extension to Multiple Weighted Fields
+    Stephen Robertson, Hugo Zaragoza and Michael Taylor
+    Microsoft Research
+    ser@microsoft.com
+    hugoz@microsoft.com
+    mitaylor2microsoft.com
+   -->
+
+
+    <sect3 id="administration-ranking-dynamic-cql">
+     <title>Dynamically ranking CQL queries</title>
+     <para>
+      Dynamic ranking can be enabled during sever side CQL
+      query expansion by adding <literal>@attr&nbsp;2=102</literal>
+      chunks to the CQL config file. For example
+      <screen>
+       relationModifier.relevant               = 2=102
+      </screen>
+      invokes dynamic ranking each time a CQL query of the form 
+      <screen>
+       Z> querytype cql
+       Z> f alvis.text =/relevant house
+      </screen>
+      is issued. Dynamic ranking can also be automatically used on
+      specific CQL indexes by (for example) setting
+      <screen>
+       index.alvis.text                        = 1=text 2=102
+      </screen>
+      which then invokes dynamic ranking each time a CQL query of the form 
+      <screen>
+       Z> querytype cql
+       Z> f alvis.text = house
+      </screen>
+      is issued.
+     </para>
+     
+    </sect3>
+
+    </sect2>
+
+
+ <sect2 id="administration-ranking-sorting">
+  <title>Sorting</title>
+   <para>
+     Zebra sorts efficiently using special sorting indexes
+     (type=<literal>s</literal>; so each sortable index must be known
+     at indexing time, specified in the configuration of record
+     indexing.  For example, to enable sorting according to the BIB-1
+     <literal>Date/time-added-to-db</literal> field, one could add the line
+     <screen>
+        xelm /*/@created               Date/time-added-to-db:s
+     </screen>
+     to any <literal>.abs</literal> record-indexing configuration file.
+     Similarly, one could add an indexing element of the form
+     <screen><![CDATA[       
+      <z:index name="date-modified" type="s">
+       <xsl:value-of select="some/xpath"/>
+      </z:index>
+      ]]></screen>
+     to any <literal>alvis</literal>-filter indexing stylesheet.
+     </para>
+     <para>
+      Indexing can be specified at searching time using a query term
+      carrying the non-standard
+      BIB-1 attribute-type <literal>7</literal>.  This removes the
+      need to send a Z39.50 <literal>Sort Request</literal>
+      separately, and can dramatically improve latency when the client
+      and server are on separate networks.
+      The sorting part of the query is separate from the rest of the
+      query - the actual search specification - and must be combined
+      with it using OR.
+     </para>
+     <para>
+      A sorting subquery needs two attributes: an index (such as a
+      BIB-1 type-1 attribute) specifying which index to sort on, and a
+      type-7 attribute whose value is be <literal>1</literal> for
+      ascending sorting, or <literal>2</literal> for descending.  The
+      term associated with the sorting attribute is the priority of
+      the sort key, where <literal>0</literal> specifies the primary
+      sort key, <literal>1</literal> the secondary sort key, and so
+      on.
+     </para>
+    <para>For example, a search for water, sort by title (ascending),
+    is expressed by the PQF query
+     <screen>
+     @or @attr 1=1016 water @attr 7=1 @attr 1=4 0
+     </screen>
+      whereas a search for water, sort by title ascending, 
+     then date descending would be
+     <screen>
+     @or @or @attr 1=1016 water @attr 7=1 @attr 1=4 0 @attr 7=2 @attr 1=30 1
+     </screen>
+    </para>
+    <para>
+     Notice the fundamental differences between <literal>dynamic
+     ranking</literal> and <literal>sorting</literal>: there can be
+     only one ranking function defined and configured; but multiple
+     sorting indexes can be specified dynamically at search
+     time. Ranking does not need to use specific indexes, so
+     dynamic ranking can be enabled and disabled without
+     re-indexing; whereas, sorting indexes need to be
+     defined before indexing.
+     </para>
+
+ </sect2>
+
+
+ </sect1>
+
+ <sect1 id="administration-extended-services">
+  <title>Extended Services: Remote Insert, Update and Delete</title>
+  
+   <note>
+     Extended services are only supported when accessing the Zebra
+     server using the <ulink url="&url.z39.50;">Z39.50</ulink>
+     protocol. The <ulink url="&url.sru;">SRU</ulink> protocol does
+     not support extended services.
+    </note>
+
+  <para>
+    The extended services are not enabled by default in zebra - due to the
+    fact that they modify the system. Zebra can be configured
+    to allow anybody to
+    search, and to allow only updates for a particular admin user
+    in the main zebra configuration file <filename>zebra.cfg</filename>.
+    For user <literal>admin</literal>, you could use:
+    <screen>
+     perm.anonymous: r
+     perm.admin: rw
+     passwd: passwordfile
+    </screen>
+    And in the password file 
+    <filename>passwordfile</filename>, you have to specify users and
+    encrypted passwords as colon separated strings. 
+    Use a tool like <filename>htpasswd</filename> 
+    to maintain the encrypted passwords. 
+    <screen> 
+     admin:secret
+    </screen>
+    It is essential to configure  Zebra to store records internally, 
+    and to support
+    modifications and deletion of records:
+    <screen>
+     storeData: 1
+     storeKeys: 1
+    </screen>
+    The general record type should be set to any record filter which
+    is able to parse XML records, you may use any of the two
+    declarations (but not both simultaneously!)
+    <screen>    
+     recordType: grs.xml
+     # recordType: alvis.filter_alvis_config.xml
+    </screen>
+    To enable transaction safe shadow indexing,
+    which is extra important for this kind of operation, set
+    <screen>
+     shadow: directoryname: size (e.g. 1000M)
+    </screen>
+   </para>
+   <note>It is not possible to carry information about record types or
+    similar to Zebra when using extended services, due to
+    limitations of the <ulink url="&url.z39.50;">Z39.50</ulink>
+    protocol. Therefore, indexing filters can not be chosen on a
+    per-record basis. One and only one general XML indexing filter
+    must be defined.  
+    <!-- but because it is represented as an OID, we would need some
+    form of proprietary mapping scheme between record type strings and
+    OIDs. -->
+    <!--
+    However, as a minimum, it would be extremely useful to enable
+    people to use MARC21, assuming grs.marcxml.marc21 as a record
+    type.  
+    -->
+   </note>
+
+
+   <sect2 id="administration-extended-services-z3950">
+    <title>Extended services in the Z39.50 protocol</title>
+
+    <para>
+     The <ulink url="&url.z39.50;">Z39.50</ulink> standard allows
+     servers to accept special binary <emphasis>extended services</emphasis>
+     protocol packages, which may be used to insert, update and delete
+     records into servers. These carry  control and update
+     information to the servers, which are encoded in seven package fields: 
+    </para>
+
+
+     <table id="administration-extended-services-z3950-table"
+      frame="all" rowsep="1" colsep="1" align="center">
+
+      <caption>Extended services Z39.50 Package Fields</caption>
+       <thead>
+       <tr>
+         <td>Parameter</td>
+         <td>Value</td>
+         <td>Notes</td>
+        </tr>
+      </thead>
+       <tbody>
+        <tr>
+         <td><literal>type</literal></td>
+         <td><literal>'update'</literal></td>
+         <td>Must be set to trigger extended services</td>
+        </tr>
+        <tr>
+         <td><literal>action</literal></td>
+         <td><literal>string</literal></td>
+        <td>
+         Extended service action type with 
+         one of four possible values: <literal>recordInsert</literal>,
+         <literal>recordReplace</literal>,
+         <literal>recordDelete</literal>,
+         and <literal>specialUpdate</literal>
+        </td>
+        </tr>
+        <tr>
+         <td><literal>record</literal></td>
+         <td><literal>XML string</literal></td>
+         <td>An XML formatted string containing the record</td>
+        </tr>
+        <tr>
+         <td><literal>syntax</literal></td>
+         <td><literal>'xml'</literal></td>
+         <td>Only XML record syntax is supported</td>
+        </tr>
+        <tr>
+         <td><literal>recordIdOpaque</literal></td>
+         <td><literal>string</literal></td>
+         <td>
+         Optional  client-supplied, opaque record
+         identifier used under insert operations.
+        </td>
+        </tr>
+        <tr>
+         <td><literal>recordIdNumber </literal></td>
+         <td><literal>positive number</literal></td>
+         <td>Zebra's internal system number, only for update
+         actions.
+        </td>
+        </tr>
+        <tr>
+         <td><literal>databaseName</literal></td>
+         <td><literal>database identifier</literal></td>
+        <td>
+         The name of the database to which the extended services should be 
+         applied.
+        </td>
+        </tr>
+        </tbody>
+     </table>
+
+
+   <para>
+    The <literal>action</literal> parameter can be any of 
+    <literal>recordInsert</literal> (will fail if the record already exists),
+    <literal>recordReplace</literal> (will fail if the record does not exist),
+    <literal>recordDelete</literal> (will fail if the record does not
+       exist), and
+    <literal>specialUpdate</literal> (will insert or update the record
+       as needed).
+   </para>
+
+    <para>
+     During a  <literal>recordInsert</literal> action, the
+     usual rules for internal record ID generation apply, unless an
+     optional <literal>recordIdNumber</literal> Zebra internal ID or a
+    <literal>recordIdOpaque</literal> string identifier is assigned. 
+     The default ID generation is
+     configured using the <literal>recordId:</literal> from
+     <filename>zebra.cfg</filename>.     
+    </para>
+
+   <para>
+    The actions <literal>recordReplace</literal> or
+    <literal>recordDelete</literal> need specification of the additional 
+    <literal>recordIdNumber</literal> parameter, which must be an
+    existing Zebra internal system ID number, or the optional 
+     <literal>recordIdOpaque</literal> string parameter.
+    </para>
+
+    <para>
+     When retrieving existing
+     records indexed with GRS indexing filters, the Zebra internal 
+     ID number is returned in the field
+    <literal>/*/id:idzebra/localnumber</literal> in the namespace
+    <literal>xmlns:id="http://www.indexdata.dk/zebra/"</literal>,
+    where it can be picked up for later record updates or deletes. 
+    </para>
+    <para>
+     Records indexed with the <literal>alvis</literal> filter
+     have similar means to discover the internal Zebra ID.
+    </para>
+ 
+   <para>
+     The <literal>recordIdOpaque</literal> string parameter
+     is an client-supplied, opaque record
+     identifier, which may be  used under 
+     insert, update and delete operations. The
+     client software is responsible for assigning these to
+     records.      This identifier will
+     replace zebra's own automagic identifier generation with a unique
+     mapping from <literal>recordIdOpaque</literal> to the 
+     Zebra internal <literal>recordIdNumber</literal>.
+     <emphasis>The opaque <literal>recordIdOpaque</literal> string
+     identifiers
+      are not visible in retrieval records, nor are
+      searchable, so the value of this parameter is
+      questionable. It serves mostly as a convenient mapping from
+      application domain string identifiers to Zebra internal ID's.
+     </emphasis> 
+    </para>
+   </sect2>
+
     
     
+ <sect2 id="administration-extended-services-yaz-client">
+  <title>Extended services from yaz-client</title>
+
+   <para>
+    We can now start a yaz-client admin session and create a database:
+   <screen>
+    <![CDATA[
+     $ yaz-client localhost:9999 -u admin/secret
+     Z> adm-create
+     ]]>
+   </screen>
+    Now the <literal>Default</literal> database was created,
+    we can insert an XML file (esdd0006.grs
+    from example/gils/records) and index it:
+   <screen>  
+    <![CDATA[
+     Z> update insert id1234 esdd0006.grs
+     ]]>
+   </screen>
+    The 3rd parameter - <literal>id1234</literal> here -
+      is the  <literal>recordIdOpaque</literal> package field.
+   </para>
+   <para>
+    Actually, we should have a way to specify "no opaque record id" for
+    yaz-client's update command.. We'll fix that.
+   </para>
+   <para>
+    The newly inserted record can be searched as usual:
+    <screen>
+    <![CDATA[
+     Z> f utah
+     Sent searchRequest.
+     Received SearchResponse.
+     Search was a success.
+     Number of hits: 1, setno 1
+     SearchResult-1: term=utah cnt=1
+     records returned: 0
+     Elapsed: 0.014179
+     ]]>
+    </screen>
+   </para>
+   <para>
+     Let's delete the beast, using the same 
+     <literal>recordIdOpaque</literal> string parameter:
+    <screen>
+    <![CDATA[
+     Z> update delete id1234
+     No last record (update ignored)
+     Z> update delete 1 esdd0006.grs
+     Got extended services response
+     Status: done
+     Elapsed: 0.072441
+     Z> f utah
+     Sent searchRequest.
+     Received SearchResponse.
+     Search was a success.
+     Number of hits: 0, setno 2
+     SearchResult-1: term=utah cnt=0
+     records returned: 0
+     Elapsed: 0.013610
+     ]]>
+     </screen>
+    </para>
+    <para>
+    If shadow register is enabled in your
+    <filename>zebra.cfg</filename>,
+    you must run the adm-commit command
+    <screen>
+    <![CDATA[
+     Z> adm-commit
+     ]]>
+    </screen>
+     after each update session in order write your changes from the
+     shadow to the life register space.
+   </para>
+ </sect2>
+
+  
+ <sect2 id="administration-extended-services-yaz-php">
+  <title>Extended services from yaz-php</title>
+
+   <para>
+    Extended services are also available from the YAZ PHP client layer. An
+    example of an YAZ-PHP extended service transaction is given here:
+    <screen>
+    <![CDATA[
+     $record = '<record><title>A fine specimen of a record</title></record>';
+
+     $options = array('action' => 'recordInsert',
+                      'syntax' => 'xml',
+                      'record' => $record,
+                      'databaseName' => 'mydatabase'
+                     );
+
+     yaz_es($yaz, 'update', $options);
+     yaz_es($yaz, 'commit', array());
+     yaz_wait();
+
+     if ($error = yaz_error($yaz))
+       echo "$error";
+     ]]>
+    </screen>  
+    </para>
+    </sect2>
   </sect1>
  
   </sect1>
  
+
+  <sect1 id="gfs-config">
+   <title>YAZ Frontend Virtual Hosts</title>
+    <para>
+     <command>zebrasrv</command> uses the YAZ server frontend and does
+     support multiple virtual servers behind multiple listening sockets.
+    </para>
+    &zebrasrv-virtual;
+ 
+   <para>
+    Section "Virtual Hosts" in the YAZ manual.
+    <filename>http://www.indexdata.dk/yaz/doc/server.vhosts.tkl</filename>
+   </para>
+ </sect1>
+
+
   
  </chapter>
  
   
  </chapter>