added example of MARCXML indexing with chopping of sort indexes cccording to 'ind2...
authorMarc Cromme <marc@indexdata.dk>
Thu, 8 Mar 2007 11:24:50 +0000 (11:24 +0000)
committerMarc Cromme <marc@indexdata.dk>
Thu, 8 Mar 2007 11:24:50 +0000 (11:24 +0000)
doc/recordmodel-domxml.xml
test/xslt/Makefile.am
test/xslt/dom-config-one.xml
test/xslt/dom-index-element-chop.xsl [new file with mode: 0644]
test/xslt/zebra-dom.cfg

index a9b85db..1dd8559 100644 (file)
@@ -1,7 +1,7 @@
 <chapter id="record-model-domxml">
-  <!-- $Id: recordmodel-domxml.xml,v 1.10 2007-03-01 11:21:20 marc Exp $ -->
+  <!-- $Id: recordmodel-domxml.xml,v 1.11 2007-03-08 11:24:50 marc Exp $ -->
   <title>&dom; &xml; Record Model and Filter Module</title>
-
+  
   <para>
    The record model described in this chapter applies to the fundamental,
    structured &xml;
       ]]>
      </screen>
     </para>
+  </section>
+
+
+  <section id="record-model-domxml-index-marc">
+   <title>&dom; Indexing &marcxml;</title>
+    <para>
+      The &dom; filter allows indexing of both binary &marc; records
+      and &marcxml; records, depending on it's configuration.
+      A typical &marcxml; record might look like this:
+      <screen>  
+      <![CDATA[
+      <record xmlns="http://www.loc.gov/MARC21/slim">
+       <rank>42</rank>
+       <leader>00366nam  22001698a 4500</leader>
+       <controlfield tag="001">   11224466   </controlfield>
+       <controlfield tag="003">DLC  </controlfield>
+       <controlfield tag="005">00000000000000.0  </controlfield>
+       <controlfield tag="008">910710c19910701nju           00010 eng    </controlfield>
+       <datafield tag="010" ind1=" " ind2=" ">
+         <subfield code="a">   11224466 </subfield>
+       </datafield>
+       <datafield tag="040" ind1=" " ind2=" ">
+         <subfield code="a">DLC</subfield>
+         <subfield code="c">DLC</subfield>
+       </datafield>
+       <datafield tag="050" ind1="0" ind2="0">
+         <subfield code="a">123-xyz</subfield>
+       </datafield>
+       <datafield tag="100" ind1="1" ind2="0">
+         <subfield code="a">Jack Collins</subfield>
+       </datafield>
+       <datafield tag="245" ind1="1" ind2="0">
+         <subfield code="a">How to program a computer</subfield>
+       </datafield>
+       <datafield tag="260" ind1="1" ind2=" ">
+         <subfield code="a">Penguin</subfield>
+       </datafield>
+       <datafield tag="263" ind1=" " ind2=" ">
+         <subfield code="a">8710</subfield>
+       </datafield>
+       <datafield tag="300" ind1=" " ind2=" ">
+         <subfield code="a">p. cm.</subfield>
+       </datafield>
+      </record>
+      ]]>
+      </screen>
+    </para>
+
+    <para>
+      It is easily possible to make string manipulation in the &dom;
+      filter. For example, if you want to drop some leading articles
+      in the indexing of sort fields, you might want to pick out the 
+      &marcxml; indicator attributes to chop of leading substrings. If
+      the above &xml; example would have an indicator
+      <literal>ind2="8"</literal> in the title field 
+      <literal>245</literal>, i.e.
+      <screen>  
+      <![CDATA[
+       <datafield tag="245" ind1="1" ind2="8">
+         <subfield code="a">How to program a computer</subfield>
+       </datafield>
+      ]]>
+      </screen>
+      one could write a template taking into account this information
+      to chop the first <literal>8</literal> characters from the
+      sorting index <literal>title:s</literal> like this:
+      <screen>  
+      <![CDATA[
+      <xsl:template match="m:datafield[@tag='245']">
+        <xsl:variable name="chop">
+          <xsl:choose>
+            <xsl:when test="not(number(@ind2))">0</xsl:when>
+            <xsl:otherwise><xsl:value-of select="number(@ind2)"/></xsl:otherwise>
+          </xsl:choose>
+        </xsl:variable>  
+
+        <z:index name="title:w title:p any:w">
+           <xsl:value-of select="m:subfield[@code='a']"/>
+        </z:index>
+
+        <z:index name="title:s">
+          <xsl:value-of select="substring(m:subfield[@code='a'], $chop)"/>
+        </z:index>
+
+      </xsl:template> 
+      ]]>
+      </screen>
+      The output of the above &marcxml; and &xslt; excerpt would then be:
+      <screen>  
+      <![CDATA[
+        <z:index name="title:w title:p any:w">How to program a computer</z:index>
+        <z:index name="title:s">program a computer</z:index>
+      ]]>
+      </screen>
+      and the record would be sorted in the title index under 'P', not 'H'.
+    </para>
+  </section>
+
+
+  <section id="record-model-domxml-index-wizzard">
+   <title>&dom; Indexing Wizardry</title>
     <para>
-     Notice also,
-     that the names and types of the indexes can be defined in the
+     The names and types of the indexes can be defined in the
      indexing &xslt; stylesheet <emphasis>dynamically according to
      content in the original &xml; records</emphasis>, which has
      opportunities for great power and wizardry as well as grande
index 95c1a95..afc0557 100644 (file)
@@ -1,4 +1,4 @@
-# $Id: Makefile.am,v 1.15 2007-03-06 09:24:34 marc Exp $
+# $Id: Makefile.am,v 1.16 2007-03-08 11:24:50 marc Exp $
 
 check_PROGRAMS = xslt1 xslt2 xslt3 xslt4 xslt5 dom1
 TESTS = $(check_PROGRAMS)
@@ -9,6 +9,7 @@ EXTRA_DIST= \
      dom-config-one.xml \
      dom-config-skipped.xml \
      dom-index-element.xsl \
+     dom-index-element-chop.xsl \
      dom-index-pi.xsl \
      dom-index-skipped.xsl \
      id.xsl \
index 5dde67c..c7bcaf3 100644 (file)
@@ -1,15 +1,16 @@
 <dom>
-  <!-- $Id: dom-config-one.xml,v 1.2 2007-02-15 13:01:00 marc Exp $ -->
+  <!-- $Id: dom-config-one.xml,v 1.3 2007-03-08 11:24:50 marc Exp $ -->
+  <input syntax="xml">
+    <xmlreader level="0"/>
+    <xslt stylesheet="id.xsl"/>
+  </input>
   <extract name="index">
-      <xslt stylesheet="dom-index-element.xsl"/>
+    <xslt stylesheet="dom-index-element-chop.xsl"/>
+    <!-- <xslt stylesheet="dom-index-element.xsl"/> -->
   </extract>
   <retrieve name="F">
     <xslt stylesheet="id.xsl"/>
   </retrieve>
-  <input syntax="xml">
-    <xmlreader level="0"/>
-    <xslt stylesheet="id.xsl"/>
-  </input>
 </dom>
 
    
diff --git a/test/xslt/dom-index-element-chop.xsl b/test/xslt/dom-index-element-chop.xsl
new file mode 100644 (file)
index 0000000..ec4ce64
--- /dev/null
@@ -0,0 +1,44 @@
+<xsl:stylesheet xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+  xmlns:m="http://www.loc.gov/MARC21/slim"
+  xmlns:z="http://indexdata.com/zebra-2.0"
+  exclude-result-prefixes="m z"
+  version="1.0">
+  <!-- $Id: dom-index-element-chop.xsl,v 1.1 2007-03-08 11:24:50 marc Exp $ -->
+  <xsl:output indent="yes" method="xml" version="1.0" encoding="UTF-8"/>
+  
+
+  <xsl:template match="text()"/>
+
+
+  <xsl:template match="/m:record">
+    <z:record z:id="{normalize-space(m:controlfield[@tag='001'])}"
+        z:rank="{normalize-space(m:rank)}">
+      <xsl:apply-templates/>
+    </z:record>
+  </xsl:template>
+
+  <xsl:template match="m:controlfield[@tag='001']">
+    <z:index name="control">
+      <xsl:value-of select="normalize-space(.)"/>
+    </z:index>
+  </xsl:template>
+  
+  <xsl:template match="m:datafield[@tag='245']">
+    <xsl:variable name="chop">
+      <xsl:choose>
+        <xsl:when test="not(number(@ind2))">0</xsl:when>
+        <xsl:otherwise><xsl:value-of select="number(@ind2)"/></xsl:otherwise>
+      </xsl:choose>
+    </xsl:variable>  
+
+    <z:index name="title:w title:p any:w">
+      <xsl:value-of select="m:subfield[@code='a']"/>
+    </z:index>
+
+    <z:index name="title:s">
+      <xsl:value-of select="substring(m:subfield[@code='a'], $chop)"/>
+    </z:index>
+
+  </xsl:template>
+
+</xsl:stylesheet>
index a912991..fc73eda 100644 (file)
@@ -2,9 +2,9 @@ profilePath: ${srcdir:-.}:${srcdir:-.}/../../tab
 
 modulePath: ../../index/.libs
 
-recordType: dom.dom-config-skipped.xml
-#recordType: dom.dom-config-col.xml
+#recordType: dom.dom-config-skipped.xml
+recordType: dom.dom-config-col.xml
 
-perm.anonymous: rw
+#perm.anonymous: rw