Zebra Attribute Extension Approximative Limit changed from type 9 to

[idzebra-moved-to-github.git] / doc / querymodel.xml
diff --git a/doc/querymodel.xml b/doc/querymodel.xml

index 1d2c8b5..831eeff 100644 (file)
--- a/doc/querymodel.xml
+++ b/doc/querymodel.xml
@@ -1,5 +1,5 @@
   <chapter id="querymodel">
-  <!-- $Id: querymodel.xml,v 1.18 2006-06-29 16:02:12 heikki Exp $ -->
+  <!-- $Id: querymodel.xml,v 1.23 2006-07-31 12:26:55 adam Exp $ -->
    <title>Query Model</title>
    
    <sect1 id="querymodel-overview">
@@ -24,7 +24,7 @@
      <para>
       Since the <literal>type-1 (RPN)</literal> 
       query structure has no direct, useful string
-     representation, every origin application needs to provide some
+     representation, every client application needs to provide some
       form of mapping from a local query notation or representation to it.
      </para>
      
@@ -152,7 +152,8 @@
      The <ulink url="&url.yaz.pqf;">PQF grammar</ulink>
      is documented in the YAZ manual, and shall not be
      repeated here. This textual PQF representation
-    is always during search mapped to the equivalent Zebra internal
+    is not transmistted to Zebra during search, but it is in the
+    client mapped to the equivalent Z39.50 binary 
      query parse tree. 
     </para>
     
@@ -209,7 +210,7 @@
           <td><literal>bib-1</literal></td>
           <td>Standard PQF query language attribute set which defines the
            semantics of Z39.50 searching. In addition, all of the
-          non-use attributes (type 2-9) define the hard-wired 
+          non-use attributes (types 2-11) define the hard-wired 
            Zebra internal query
            processing.</td>
           <td>default</td>
@@ -226,7 +227,7 @@
           <td><literal>idxpath</literal></td>
           <td>Hardwired XPATH like attribute set, only available for
               indexing with the GRS record model</td>
-         <td>depreciated</td>
+         <td>deprecated</td>
          </tr>
          -->
         </tbody>
@@ -349,7 +350,7 @@
       </para>
       <para>
        Atomic (APT) queries are always leaf nodes in the PQF query tree. 
-      UN-supplied non-use attributes type 2-9 are either inherited from
+      UN-supplied non-use attributes types 2-11 are either inherited from
        higher nodes in the query tree, or are set to Zebra's default values.
        See <xref linkend="querymodel-bib1"/> for details. 
       </para>
@@ -445,7 +446,9 @@
       
       <para>
        Defining a named result set and re-using it in the next query,
-      using <literal>yaz-client</literal>. 
+      using <literal>yaz-client</literal>. Notice that the client, not
+      the server, assigns the string <literal>'1'</literal> to the
+      named result set. 
        <screen>
         Z> f @attr 1=4 mozart
         ...
@@ -454,11 +457,6 @@
         Z> f @and @set 1 @attr 1=4 amadeus
         ...
         Number of hits: 14, setno 2
-       ...
-       Z> f @attr 1=1016 beethoven
-       ...
-       Number of hits: 26, setno 3
-       ...
        </screen>
       </para>
       
@@ -609,7 +607,8 @@
       </para>
       <para>
        Escaping PQF keywords and other non-parseable XPath constructs
-      with <literal>'{ }'</literal> to prevent syntax errors:
+      with <literal>'{ }'</literal> to prevent client-side PQF parsing
+      syntax errors:
        <screen>
         Z> find @attr {1=/root/first[@attr='danish']} content
         Z> find @attr {1=/record/@set} oai
@@ -787,13 +786,35 @@
       <filename>tab/dan1.att</filename>,
       <filename>tab/explain.att</filename>, and
       <filename>tab/gils.att</filename>.
+     </para>
+    <para>
+      For example, some few  <literal>Bib-1</literal> use
+      attributes from the  <filename>tab/bib1.att</filename> are:
+      <screen>
+       att 1               Personal-name
+       att 2               Corporate-name
+       att 3               Conference-name
+       att 4               Title
+       ...
+       att 1009            Subject-name-personal
+       att 1010            Body-of-text
+       att 1011            Date/time-added-to-db
+       ...
+       att 1016            Any
+       att 1017            Server-choice
+       att 1018            Publisher
+       ...
+       att 1035            Anywhere
+       att 1036            Author-Title-Subject
+      </screen>
+     </para>
+    <para>
       New attribute sets can be added by adding new 
       <filename>tab/*.att</filename> configuration files, which need to
-     be sourced in the main configuration <filename>zebra.cfg</filename>.
+     be sourced in the main configuration <filename>zebra.cfg</filename>. 
       </para>
-
      <para>
-     In addition, Zebra allows the access of 
+      In addition, Zebra allows the access of 
       <emphasis>internal index names</emphasis> and <emphasis>dynamic
       XPath</emphasis> as use attributes; see
        <xref linkend="querymodel-use-string"/> and 
@@ -996,7 +1017,7 @@
          <tr>
           <td>Any position in field</td>
           <td>3</td>
-         <td>default</td>
+         <td>supported</td>
          </tr>
         </tbody>
       </table>
@@ -1004,9 +1025,9 @@
      <para>
        The position attribute values <literal>first in field (1)</literal>,
        and <literal>first in subfield(2)</literal> are unsupported.
-      Using them does not trigger an error, but silent defaults to 
-      <literal>any position in field (3)</literal>.
-      <!-- It should -->
+      Using them silently maps to 
+      <literal>any position in field (3)</literal>. A proper diagnostic
+      should have been issued.
        </para>
      </sect3>
      
@@ -1351,7 +1372,7 @@
          <tr>
           <td>Complete subfield</td>
           <td>2</td>
-         <td>depreciated</td>
+         <td>deprecated</td>
          </tr>
          <tr>
           <td>Complete field</td>
@@ -1537,9 +1558,21 @@
       </screen>
      </para>
      
+
+    <!--
+    Zebra Extension Term Set Attribute
+    From the manual text, I can not see what is the point with this feature.
+    I think it makes more sense when there are multiple terms in a query, or
+    something...
+    
+    We decided 2006-06-03 to disable this feature, as it is covered by
+    scan within a resultset. Better use ressources to upgrade this
+    feature for good performance.
+    -->
+
+    <!--
      <sect3 id="querymodel-zebra-attr-estimation">
       <title>Zebra Extension Term Set Attribute (type 8)</title>
-    </sect3>
      <para>
       The Term Set feature is a facility that allows a search to store
       hitting terms in a "pseudo" resultset; thus a search (as usual) +
@@ -1559,6 +1592,9 @@
       The model has one serious flaw: we don't know the size of term
       set. Experimental. Do not use in production code.
      </warning>
+    </sect3>
+    -->
+
  
      <sect3 id="querymodel-zebra-attr-weight">
       <title>Zebra Extension Rank Weight Attribute (type 9)</title>
@@ -1577,31 +1613,46 @@
      </para>
  
      <sect3 id="querymodel-zebra-attr-limit">
-     <title>Zebra Extension Approximative Limit Attribute (type 9)</title>
+     <title>Zebra Extension Approximative Limit Attribute (type 11)</title>
      </sect3>
      <para>
-     Newer Zebra versions normally estimate hit count for every APT
+     Zebra  computes - unless otherwise configured -
+     the exact hit count for every APT
       (leaf) in the query tree. These hit counts are returned as part of
       the searchResult-1 facility in the binary encoded Z39.50 search
       response packages.
      </para>
      <para>
-     By setting a limit for the APT we can make Zebra turn into
-     approximate hit count when a certain hit count limit is
-     reached. A value of zero means exact hit count.
+     By setting an estimation limit size of the resultset of the APT
+     leaves, Zebra stoppes processing the result set when the limit
+     length is reached.
+     Hit counts under this limit are still precise, but hit counts over it
+     are estimated using the statistics gathered from the chopped
+     result set.
+    </para>
+    <para>
+     Specifying a limit of <literal>0</literal> resuts in exact hit counts.
      </para>
      <para>
       For example, we might be interested in exact hit count for a, but
       for b we allow hit count estimates for 1000 and higher. 
       <screen>
-      Z> find @and a @attr 9=1000 b
+      Z> find @and a @attr 11=1000 b
       </screen>
      </para>
      <note>
       The estimated hit count facility makes searches faster, as one
       only needs to process large hit lists partially.
+     It is mostly used in huge databases, where you you want trade
+     exactness of hit counts against speed of execution. 
      </note>
      <warning>
+     Do not use approximative hit count limits
+     in conjunction with relevance ranking, as re-sorting of the
+     result set obviosly only works when the entire result set has
+     been processed. 
+    </warning>
+    <warning>
       This facility clashes with rank weight, because there all
       documents in the hit lists need to be examined for scoring and
       re-sorting.
@@ -1710,11 +1761,11 @@
      </warning>
  
      <sect3 id="querymodel-zebra-attr-approx">
-     <title>Zebra Extension Approximative Limit (type 9)</title>
+     <title>Zebra Extension Approximative Limit (type 11)</title>
      </sect3>
      <para>
       The <literal>Zebra Extension Approximative Limit (type
-      9)</literal> is a way to enable approximate
+      11)</literal> is a way to enable approximate
       hit counts for <literal>scan</literal> hit counts, in the same
       way as for <literal>search</literal> hit counts. 
      </para>
@@ -1747,7 +1798,7 @@
       main Zebra configuration file <filename>zebra.cfg</filename>
       directive <literal>attset: idxpath.att</literal> must be enabled.
      </para>
-    <warning>The <literal>idxpath</literal> is depreciated, may not be
+    <warning>The <literal>idxpath</literal> is deprecated, may not be
       supported in future Zebra versions, and should definitely
       not be used in production code.
      </warning>
@@ -1780,31 +1831,31 @@
           <td>XPATH Begin</td>
           <td>1</td>
           <td>_XPATH_BEGIN</td>
-         <td>depreciated</td>
+         <td>deprecated</td>
          </tr>
          <tr>
           <td>XPATH End</td>
           <td>2</td>
           <td>_XPATH_END</td>
-         <td>depreciated</td>
+         <td>deprecated</td>
          </tr>
          <tr>
           <td>XPATH CData</td>
           <td>1016</td>
           <td>_XPATH_CDATA</td>
-         <td>depreciated</td>
+         <td>deprecated</td>
          </tr>
          <tr>
           <td>XPATH Attribute Name</td>
           <td>3</td>
           <td>_XPATH_ATTR_NAME</td>
-         <td>depreciated</td>
+         <td>deprecated</td>
          </tr>
          <tr>
           <td>XPATH Attribute CData</td>
           <td>1015</td>
           <td>_XPATH_ATTR_CDATA</td>
-         <td>depreciated</td>
+         <td>deprecated</td>
          </tr>
         </tbody>
       </table>
@@ -2325,6 +2376,8 @@
       The next plus character marks the end of the section.
       Currently Zebra only supports one specifier, the error tolerance,
       which consists one digit. 
+     <!-- TODO Nice thing, but what does
+     that error tolerance digit *mean*? Maybe an example would be nice? -->
      </para>
  
      <para>