Added yaz-icu man page
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 12 Nov 2007 11:13:05 +0000 (11:13 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 12 Nov 2007 11:13:05 +0000 (11:13 +0000)
doc/Makefile.am
doc/yaz-icu-man.xml [new file with mode: 0644]

index 76a2fac..6b54e5b 100644 (file)
@@ -1,4 +1,4 @@
-## $Id: Makefile.am,v 1.83 2007-07-10 09:42:46 adam Exp $
+## $Id: Makefile.am,v 1.84 2007-11-12 11:13:05 adam Exp $
 
 SUBDIRS = common
 
@@ -13,11 +13,12 @@ HTMLFILES = index.html
 
 MANFILES=yaz-client.1 yaz-ztest.8 \
        yaz-config.8 yaz.7 zoomsh.1 yaz-asncomp.1 \
-       yaz-marcdump.1 yaz-iconv.1 yaz-log.7 yaz-illclient.1
+       yaz-marcdump.1 yaz-iconv.1 yaz-log.7 \
+       yaz-illclient.1 yaz-icu.1
 REFFILES=yaz-client-man.xml yaz-ztest-man.xml yaz-config-man.xml \
        yaz-man.xml zoomsh-man.xml yaz-asncomp-man.xml \
        yaz-marcdump-man.xml yaz-iconv-man.xml yaz-log-man.xml \
-       yaz-illclient-man.xml
+       yaz-illclient-man.xml yaz-icu-man.xml
 
 SUPPORTFILES=entities.ent apilayer.obj local.ent.in
 
@@ -60,6 +61,9 @@ yaz-illclient.1: yaz-illclient-man.xml
 yaz-log.7: yaz-log-man.xml
        $(MAN_COMPILE) $(srcdir)/yaz-log-man.xml
 
+yaz-icu.1: yaz-icu-man.xml
+       $(MAN_COMPILE) $(srcdir)/yaz-icu-man.xml
+
 $(HTMLFILES): $(XMLFILES)
        rm -f *.html
        $(HTML_COMPILE) $(srcdir)/yaz.xml
diff --git a/doc/yaz-icu-man.xml b/doc/yaz-icu-man.xml
new file mode 100644 (file)
index 0000000..90d78c6
--- /dev/null
@@ -0,0 +1,257 @@
+<!DOCTYPE refentry PUBLIC "-//OASIS//DTD DocBook V4.1//EN"
+ "http://www.oasis-open.org/docbook/xml/4.1/docbookx.dtd"
+[
+     <!ENTITY % local SYSTEM "local.ent">
+     %local;
+     <!ENTITY % entities SYSTEM "entities.ent">
+     %entities;
+     <!ENTITY % idcommon SYSTEM "common/common.ent">
+     %idcommon;
+]>
+<!-- $Id: yaz-icu-man.xml,v 1.1 2007-11-12 11:13:05 adam Exp $ -->
+<refentry id="zoomsh">
+ <refentryinfo>
+  <productname>YAZ</productname>
+  <productnumber>&version;</productnumber>
+ </refentryinfo>
+ <refmeta>
+  <refentrytitle>yaz-icu</refentrytitle>
+  <manvolnum>1</manvolnum>
+ </refmeta>
+ <refnamediv>
+  <refname>yaz-icu</refname>
+  <refpurpose>YAZ ICU utility</refpurpose>
+ </refnamediv>
+
+ <refsynopsisdiv>
+  <cmdsynopsis>
+   <command>yaz-icu</command>
+   <arg choice="opt" rep="repeat">commands</arg>
+   <arg>-c <replaceable>config</replaceable></arg>
+   <arg>-p <replaceable>opt</replaceable></arg>
+   <arg>-x</arg>
+  </cmdsynopsis>
+ </refsynopsisdiv>
+ <refsect1><title>DESCRIPTION</title>
+  <para>
+   <command>yaz-icu</command> is utility which demonstrates 
+   the ICU chain module of yaz. (<filename>yaz/icu.h</filename>).
+  </para>
+ </refsect1>
+
+ <refsect1><title>OPTIONS</title>
+  <variablelist>
+   <varlistentry>
+    <term>-c <replaceable>config</replaceable></term>
+    <listitem><para>
+      Specifies the file containing ICU chain configuration
+      which is XML based.
+     </para></listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>-p <replaceable>type</replaceable></term>
+    <listitem><para>
+      Specifies extra information to be printed about the ICU system.
+      If <replaceable>type</replaceable> is <literal>c</literal>
+      then ICU converters are printed. 
+      If <replaceable>type</replaceable> is <literal>l</literal>
+      available locates are printed.
+      If <replaceable>type</replaceable> is <literal>t</literal>
+      available transliterators are printed.
+     </para></listitem>
+   </varlistentry>
+
+   <varlistentry>
+    <term>-x <replaceable>config</replaceable></term>
+    <listitem><para>
+      Specifies that output should be XML based rather than
+      "text" based.
+     </para></listitem>
+   </varlistentry>
+
+  </variablelist>
+ </refsect1>
+ <refsect1><title>ICU chain configuration</title>
+  <para>
+   The ICU chain configuration speicifies one or more rules to convert
+   text data into tokens. The configuration format is XML based.
+  </para>
+  <para>
+   The toplevel element must be named <literal>icu_chain</literal>.
+   The <literal>icu_chain</literal> element has one required attribute
+   <literal>locale</literal> which specifies the ICU locale to be used
+   in the conversion steps.
+  </para>
+  <para>
+   The <literal>icu_chain</literal> element must include elements where
+   each element specifies a conversion step. The conversion is performed
+   in the order in which the conversion steps are specified.
+   Each conversion element takes one attribute: <literal>rule</literal>
+   which serves as argument to the conversion step.
+  </para>
+  <para>
+   The following conversion elements are available:
+   
+   <variablelist>
+    <varlistentry>
+     <term>casemap</term>
+     <listitem><para>
+       Converts case and rule specifies how:
+
+       <variablelist>
+       <varlistentry>
+        <term>l</term>
+        <listitem>
+         <para>Lowercase using ICU function u_strToLower. </para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry>
+        <term>u</term>
+        <listitem>
+         <para>Upper case using ICU function u_strToUpper.</para>
+        </listitem>
+       </varlistentry>
+       
+       <varlistentry>
+        <term>t</term>
+        <listitem>
+         <para>To title using UCU function u_strToTitle.</para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry>
+        <term>f</term>
+        <listitem>
+         <para>Fold case using ICU function u_strFoldCase.</para>
+        </listitem>
+       </varlistentry>
+       
+       </variablelist>
+      </para></listitem>
+    </varlistentry>
+
+    <varlistentry>
+     <term>display</term>
+     <listitem><para>
+       This is a meta step which specifies that a term/token is to
+       be displayed. This term is retrieved in an application
+       using function icu_chain_token_display (<filename>yaz/icu.h</filename>).
+      </para></listitem>
+    </varlistentry>
+    
+    <varlistentry>
+     <term>transform</term>
+     <listitem><para>
+       Specifies an ICU transform rule. The rule attribute is the
+       custom transformation rule to be used. This is a text based format
+       which is offered by the ICU transform system. See
+       <ulink url="&url.icu.transform;">ICU Transforms</ulink> for
+       more information.
+      </para></listitem>
+    </varlistentry>
+    
+    <varlistentry>
+     <term>tokenize</term>
+     <listitem><para>
+       Breaks / tokenizes a string into components using
+       ICU functions ubrk_open, ubrk_setText, .. . The rule is
+       one of:
+       <variablelist>
+       <varlistentry>
+        <term>l</term>
+        <listitem>
+         <para>Line. ICU: UBRK_LINE.</para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry>
+        <term>s</term>
+        <listitem>
+         <para>Sentence. ICU: UBRK_SENTENCE.</para>
+        </listitem>
+       </varlistentry>
+       
+       <varlistentry>
+        <term>w</term>
+        <listitem>
+         <para>Word. ICU: UBRK_WORD.</para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry>
+        <term>c</term>
+        <listitem>
+         <para>Character. ICU: UBRK_CHARACTER.</para>
+        </listitem>
+       </varlistentry>
+
+       <varlistentry>
+        <term>t</term>
+        <listitem>
+         <para>Title. ICU: UBRK_TITLE.</para>
+        </listitem>
+       </varlistentry>
+
+       </variablelist>
+      </para></listitem>
+    </varlistentry>
+    
+   </variablelist>
+   
+  </para>
+ </refsect1>
+ <refsect1><title>EXAMPLES</title>
+  <para>
+   The following command analyzes text in file <filename>text</filename>
+   using ICU chain configuration <filename>chain.xml</filename>:
+   <screen>
+    cat text | yaz-icu -c chain.xml
+   </screen>
+   The chain.xml might look as follows:
+    <screen><![CDATA[
+<icu_chain locale="en">
+  <transform rule="[:Control:] Any-Remove"/>
+  <tokenize rule="w"/>
+  <transform rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
+  <display/>
+  <casemap rule="l"/>
+</icu_chain>
+]]>
+   </screen>
+  </para>
+ </refsect1>
+ <refsect1><title>SEE ALSO</title>
+  <para>
+   <citerefentry>
+    <refentrytitle>yaz</refentrytitle>
+    <manvolnum>7</manvolnum>
+   </citerefentry>
+  </para>
+  <para>
+   <ulink url="&url.icu;">ICU Home</ulink>
+  </para>
+  <para>
+   <ulink url="&url.icu.transform;">ICU Transforms</ulink>
+  </para>
+ </refsect1>
+</refentry>
+
+<!-- Keep this comment at the end of the file
+Local variables:
+mode: sgml
+sgml-omittag:t
+sgml-shorttag:t
+sgml-minimize-attributes:nil
+sgml-always-quote-attributes:t
+sgml-indent-step:1
+sgml-indent-data:t
+sgml-parent-document:nil
+sgml-local-catalogs: nil
+sgml-namecase-general:t
+End:
+-->