added acronyme entities

[idzebra-moved-to-github.git] / doc / recordmodel-alvisxslt.xml
diff --git a/doc/recordmodel-alvisxslt.xml b/doc/recordmodel-alvisxslt.xml

index 6032aa1..328bbce 100644 (file)
--- a/doc/recordmodel-alvisxslt.xml
+++ b/doc/recordmodel-alvisxslt.xml
@@ -1,15 +1,15 @@
   <chapter id="record-model-alvisxslt">
-  <!-- $Id: recordmodel-alvisxslt.xml,v 1.6 2006-04-24 12:53:03 marc Exp $ -->
-  <title>ALVIS XML Record Model and Filter Module</title>
+  <!-- $Id: recordmodel-alvisxslt.xml,v 1.14 2007-02-02 09:58:39 marc Exp $ -->
+  <title>ALVIS &xml; Record Model and Filter Module</title>
    
  
    <para>
     The record model described in this chapter applies to the fundamental,
-   structured XML
+   structured &xml;
     record type <literal>alvis</literal>, introduced in
-   <xref linkend="componentmodulesalvis"/>. The ALVIS XML record model
+   <xref linkend="componentmodulesalvis"/>. The ALVIS &xml; record model
     is experimental, and it's inner workings might change in future
-   releases of the Zebra Information Server.
+   releases of the &zebra; Information Server.
    </para>
  
    <para> This filter has been developed under the 
@@ -19,10 +19,10 @@
    </para>
    
    
-  <sect1 id="record-model-alvisxslt-filter">
+  <section id="record-model-alvisxslt-filter">
     <title>ALVIS Record Filter</title>
     <para>
-    The experimental, loadable  Alvis XML/XSLT filter module
+    The experimental, loadable  Alvis &xml;/XSLT filter module
     <literal>mod-alvis.so</literal> is packaged in the GNU/Debian package
      <literal>libidzebra1.4-mod-alvis</literal>.
      It is invoked by the <filename>zebra.cfg</filename> configuration statement
@@ -35,7 +35,7 @@
      path <filename>db/filter_alvis_conf.xml</filename>.
     </para>
     <para>The Alvis XSLT filter configuration file must be
-    valid XML. It might look like this (This example is
+    valid &xml;. It might look like this (This example is
      used for indexing and display of OAI harvested records):
      <screen>
      &lt;?xml version="1.0" encoding="UTF-8"?&gt;
@@ -58,7 +58,7 @@
      unique, these are the literal <literal>schema</literal> or 
      <literal>element set</literal> names used in 
        <ulink url="http://www.loc.gov/standards/sru/srw/">SRW</ulink>,
-      <ulink url="http://www.loc.gov/standards/sru/">SRU</ulink> and
+      <ulink url="&url.sru;">SRU</ulink> and
      Z39.50 protocol queries.
      The paths in the <literal>stylesheet</literal> attributes
      are relative to zebras working directory, or absolute to file
@@ -66,7 +66,7 @@
     </para>
     <para>
      The <literal>&lt;split level="2"/&gt;</literal> decides where the
-    XML Reader shall split the
+    &xml; Reader shall split the
      collections of records into individual records, which then are
      loaded into DOM, and have the indexing XSLT stylesheet applied.
     </para>
@@ -76,22 +76,22 @@
      <literal>identifier="http://indexdata.dk/zebra/xslt/1"</literal>.
     </para>
  
-   <sect2 id="record-model-alvisxslt-internal">
+   <section id="record-model-alvisxslt-internal">
      <title>ALVIS Internal Record Representation</title>   
-    <para>When indexing, an XML Reader is invoked to split the input
-    files into suitable record XML pieces. Each record piece is then
-    transformed to an XML DOM structure, which is essentially the
+    <para>When indexing, an &xml; Reader is invoked to split the input
+    files into suitable record &xml; pieces. Each record piece is then
+    transformed to an &xml; DOM structure, which is essentially the
      record model. Only XSLT transformations can be applied during
      index, search and retrieval. Consequently, output formats are
-    restricted to whatever XSLT can deliver from the record XML
-    structure, be it other XML formats, HTML, or plain text. In case
+    restricted to whatever XSLT can deliver from the record &xml;
+    structure, be it other &xml; formats, HTML, or plain text. In case
      you have <literal>libxslt1</literal> running with EXSLT support,
      you can use this functionality inside the Alvis
      filter configuration XSLT stylesheets.
      </para>
-   </sect2>
+   </section>
  
-   <sect2 id="record-model-alvisxslt-canonical">
+   <section id="record-model-alvisxslt-canonical">
      <title>ALVIS Canonical Indexing Format</title>   
      <para>The output of the indexing XSLT stylesheets must contain
      certain elements in the magic 
@@ -113,27 +113,27 @@
             z:id="oai:JTRS:CP-3290---Volume-I" 
             z:rank="47896"
             z:type="update"&gt;
-       &lt;z:index name="oai:identifier" type="0"&gt;
+       &lt;z:index name="oai_identifier" type="0"&gt;
                  oai:JTRS:CP-3290---Volume-I&lt;/z:index&gt;
-       &lt;z:index name="oai:datestamp" type="0"&gt;2004-07-09&lt;/z:index&gt;
-       &lt;z:index name="oai:setspec" type="0"&gt;jtrs&lt;/z:index&gt;
-       &lt;z:index name="dc:all" type="w"&gt;
-          &lt;z:index name="dc:title" type="w"&gt;Proceedings of the 4th 
+       &lt;z:index name="oai_datestamp" type="0"&gt;2004-07-09&lt;/z:index&gt;
+       &lt;z:index name="oai_setspec" type="0"&gt;jtrs&lt;/z:index&gt;
+       &lt;z:index name="dc_all" type="w"&gt;
+          &lt;z:index name="dc_title" type="w"&gt;Proceedings of the 4th 
                  International Conference and Exhibition:
                  World Congress on Superconductivity - Volume I&lt;/z:index&gt;
-          &lt;z:index name="dc:creator" type="w"&gt;Kumar Krishen and *Calvin
+          &lt;z:index name="dc_creator" type="w"&gt;Kumar Krishen and *Calvin
                  Burnham, Editors&lt;/z:index&gt;
         &lt;/z:index&gt;
       &lt;/z:record&gt;
       </screen>
      </para>
-    <para>This means the following: From the original XML file 
-     <literal>one-record.xml</literal> (or from the XML record DOM of the
+    <para>This means the following: From the original &xml; file 
+     <literal>one-record.xml</literal> (or from the &xml; record DOM of the
       same form coming from a splitted input file), the indexing
-     stylesheet produces an indexing XML record, which is defined by
+     stylesheet produces an indexing &xml; record, which is defined by
       the <literal>record</literal> element in the magic namespace
       <literal>xmlns:z="http://indexdata.dk/zebra/xslt/1"</literal>.
-     Zebra uses the content of 
+     &zebra; uses the content of 
       <literal>z:id="oai:JTRS:CP-3290---Volume-I"</literal> as internal
       record ID, and - in case static ranking is set - the content of 
       <literal>z:rank="47896"</literal> as static rank. Following the
@@ -148,12 +148,12 @@
      </para>
      <para>In this example, the following literal indexes are constructed:
       <screen>
-       oai:identifier
-       oai:datestamp
-       oai:setspec
-       dc:all
-       dc:title
-       dc:creator
+       oai_identifier
+       oai_datestamp
+       oai_setspec
+       dc_all
+       dc_title
+       dc_creator
       </screen>
       where the indexing type is defined in the 
       <literal>type</literal> attribute 
@@ -195,11 +195,11 @@
        Z> elem dc
        Z> form xml
        Z>
-      Z> f @attr 1=dc:creator Kumar
-      Z> scan @attr 1=dc:creator adam
+      Z> f @attr 1=dc_creator Kumar
+      Z> scan @attr 1=dc_creator adam
        Z>
-      Z> f @attr 1=dc:title @attr 4=2 "proceeding congress superconductivity"
-      Z> scan @attr 1=dc:title abc
+      Z> f @attr 1=dc_title @attr 4=2 "proceeding congress superconductivity"
+      Z> scan @attr 1=dc_title abc
        ]]>
       </screen>
       or the proprietary
@@ -208,18 +208,14 @@
       SRU, and SRW
       <screen>
        <![CDATA[
-      http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=%40attr+1%3Ddc%3Acreator+%40attr+4%3D6+%22the
-      http://localhost:9999/?version=1.1&operation=scan&x-pScanClause=@attr+1=dc:date+@attr+4=2+a
+      http://localhost:9999/?version=1.1&operation=searchRetrieve&x-pquery=%40attr+1%3Ddc_creator+%40attr+4%3D6+%22the
+      http://localhost:9999/?version=1.1&operation=scan&x-pScanClause=@attr+1=dc_date+@attr+4=2+a
        ]]>
       </screen>
-     See <xref linkend="server-sru"/> for more information on SRU/SRW
-     configuration, and <xref linkend="gfs-config"/> or 
-     <ulink url="http://www.indexdata.dk/yaz/doc/tools.tkl#tools.cql">
-      the YAZ manual CQL section</ulink>
-     for the details
-     of the YAZ frontend server 
-     <ulink url="http://www.loc.gov/standards/sru/cql/">CQL</ulink>
-     configuration. 
+     See <xref linkend="zebrasrv-sru"/> for more information on SRU/SRW
+     configuration, and <xref linkend="gfs-config"/> or the YAZ
+     <ulink url="&url.yaz.cql;">CQL section</ulink>
+     for the details or the YAZ frontend server.
      </para>
      <para>
       Notice that there are no <filename>*.abs</filename>,
@@ -227,20 +223,20 @@
       filter configuration files involves in this process, and that the
       literal index names are used during search and retrieval.
      </para>
-   </sect2>
-  </sect1>
+   </section>
+  </section>
  
  
-  <sect1 id="record-model-alvisxslt-conf">
+  <section id="record-model-alvisxslt-conf">
     <title>ALVIS Record Model Configuration</title>
  
  
-  <sect2 id="record-model-alvisxslt-index">
+  <section id="record-model-alvisxslt-index">
     <title>ALVIS Indexing Configuration</title>
      <para>
       As mentioned above, there can be only one indexing
       stylesheet, and configuration of the indexing process is a synonym
-     of writing an XSLT stylesheet which produces XML output containing the
+     of writing an XSLT stylesheet which produces &xml; output containing the
       magic elements discussed in  
       <xref linkend="record-model-alvisxslt-internal"/>. 
       Obviously, there are million of different ways to accomplish this
@@ -250,19 +246,19 @@
      <para>
       Stylesheets can be written in the <emphasis>pull</emphasis> or
       the <emphasis>push</emphasis> style: <emphasis>pull</emphasis>
-     means that the output XML structure is taken as starting point of
+     means that the output &xml; structure is taken as starting point of
       the internal structure of the XSLT stylesheet, and portions of
-     the input XML are <emphasis>pulled</emphasis> out and inserted
-     into the right spots of the output XML structure. On the other
+     the input &xml; are <emphasis>pulled</emphasis> out and inserted
+     into the right spots of the output &xml; structure. On the other
       side, <emphasis>push</emphasis> XSLT stylesheets are recursavly
       calling their template definitions, a process which is commanded
-     by the input XML structure, and avake to produce some output XML
+     by the input &xml; structure, and avake to produce some output &xml;
       whenever some special conditions in the input styelsheets are
       met. The <emphasis>pull</emphasis> type is well-suited for input
-     XML with strong and well-defined structure and semantcs, like the
+     &xml; with strong and well-defined structure and semantcs, like the
       following OAI indexing example, whereas the
       <emphasis>push</emphasis> type might be the only possible way to
-     sort out deeply recursive input XML formats.
+     sort out deeply recursive input &xml; formats.
      </para>
      <para> 
       A <emphasis>pull</emphasis> stylesheet example used to index
@@ -293,7 +289,7 @@
  
           <!-- OAI indexing templates -->
           <xsl:template match="oai:record/oai:header/oai:identifier">
-          <z:index name="oai:identifier" type="0">
+          <z:index name="oai_identifier" type="0">
             <xsl:value-of select="."/>
            </z:index>    
           </xsl:template>
@@ -302,7 +298,7 @@
  
           <!-- DC specific indexing templates -->
           <xsl:template match="oai:record/oai:metadata/oai_dc:dc/dc:title">
-          <z:index name="dc:title" type="w">
+          <z:index name="dc_title" type="w">
             <xsl:value-of select="."/>
            </z:index>
           </xsl:template>
@@ -317,16 +313,16 @@
       Notice also,
       that the names and types of the indexes can be defined in the
       indexing XSLT stylesheet <emphasis>dynamically according to
-     content in the original XML records</emphasis>, which has
+     content in the original &xml; records</emphasis>, which has
       opportunities for great power and wizardery as well as grande
       disaster.  
      </para>
      <para>
       The following excerpt of a <emphasis>push</emphasis> stylesheet
       <emphasis>might</emphasis> 
-     be a good idea according to your strict control of the XML
+     be a good idea according to your strict control of the &xml;
       input format (due to rigerours checking against well-defined and
-     tight RelaxNG or XML Schema's, for example):
+     tight RelaxNG or &xml; Schema's, for example):
       <screen>
        <![CDATA[
        <xsl:template name="element-name-indexes">     
@@ -337,11 +333,11 @@
        ]]>
       </screen>
       This template creates indexes which have the name of the working 
-     node of any input  XML file, and assigns a '1' to the index.
+     node of any input  &xml; file, and assigns a '1' to the index.
       The example query 
       <literal>find @attr 1=xyz 1</literal> 
       finds all files which contain at least one
-     <literal>xyz</literal> XML element. In case you can not control
+     <literal>xyz</literal> &xml; element. In case you can not control
       which element names the input files contain, you might ask for
       disaster and bad karma using this technique.
      </para>
@@ -373,24 +369,24 @@
       to suffering and pain, and universal
       disentigration of your project schedule.
      </para>
-  </sect2>
+  </section>
  
-  <sect2 id="record-model-alvisxslt-elementset">
+  <section id="record-model-alvisxslt-elementset">
     <title>ALVIS Exchange Formats</title>
     <para>
       An exchange format can be anything which can be the outcome of an
       XSLT transformation, as far as the stylesheet is registered in
       the main Alvis XSLT filter configuration file, see
       <xref linkend="record-model-alvisxslt-filter"/>.
-     In principle anything that can be expressed in  XML, HTML, and
+     In principle anything that can be expressed in  &xml;, HTML, and
       TEXT can be the output of a <literal>schema</literal> or 
      <literal>element set</literal> directive during search, as long as
       the information comes from the 
-     <emphasis>original input record XML DOM tree</emphasis>
-     (and not the transformed and <emphasis>indexed</emphasis> XML!!).
+     <emphasis>original input record &xml; DOM tree</emphasis>
+     (and not the transformed and <emphasis>indexed</emphasis> &xml;!!).
      </para>
      <para>
-     In addition, internal administrative information from the Zebra
+     In addition, internal administrative information from the &zebra;
       indexer can be accessed during record retrieval. The following
       example is a summary of the possibilities:
       <screen>
@@ -422,9 +418,9 @@
       </screen>
      </para>
  
-  </sect2>
+  </section>
  
-  <sect2 id="record-model-alvisxslt-example">
+  <section id="record-model-alvisxslt-example">
     <title>ALVIS Filter OAI Indexing Example</title>
     <para>
       The sourcecode tarball contains a working Alvis filter example in
@@ -444,9 +440,9 @@
       <ulink url="http://www.oaforum.org/tutorial/">
        http://www.oaforum.org/tutorial/</ulink>.
      </para>
-   </sect2>
+   </section>
  
-  </sect1>
+  </section>
  
    
   </chapter>
@@ -491,12 +487,12 @@ c)  Main "alvis" XSLT filter config file:
    and so on.
  
  - in db/ a cql2pqf.txt yaz-client config file 
-  which is also used in the yaz-server <ulink url="http://www.loc.gov/standards/sru/cql/">CQL</ulink>-to-PQF process
+  which is also used in the yaz-server <ulink url="&url.cql;">CQL</ulink>-to-PQF process
  
     see: http://www.indexdata.com/yaz/doc/tools.tkl#tools.cql.map
  
  - in db/ an indexing XSLT stylesheet. This is a PULL-type XSLT thing,
-  as it constructs the new XML structure by pulling data out of the
+  as it constructs the new &xml; structure by pulling data out of the
    respective elements/attributes of the old structure.
  
    Notice the special zebra namespace, and the special elements in this
@@ -506,7 +502,7 @@ c)  Main "alvis" XSLT filter config file:
    indicates that a new record with given id and static rank has to be updated. 
  
    <z:index name="title" type="w">
-   encloses all the text/XML which shall be indexed in the index named
+   encloses all the text/&xml; which shall be indexed in the index named
     "title" and of index type "w" (see  file default.idx in your zebra
     installation) 
  
@@ -519,9 +515,17 @@ c)  Main "alvis" XSLT filter config file:
  
  
  
- <!-- Keep this Emacs mode comment at the end of the file
-Local variables:
-mode: nxml
-End:
--->
-
+ <!-- Keep this comment at the end of the file
+ Local variables:
+ mode: sgml
+ sgml-omittag:t
+ sgml-shorttag:t
+ sgml-minimize-attributes:nil
+ sgml-always-quote-attributes:t
+ sgml-indent-step:1
+ sgml-indent-data:t
+ sgml-parent-document: "zebra.xml"
+ sgml-local-catalogs: nil
+ sgml-namecase-general:t
+ End:
+ -->