New metadata facility "icurule" for normalizing metadata text PAZ-1002
[pazpar2-moved-to-github.git] / test / test_icu.cfg
index f343817..1d8cc9c 100644 (file)
@@ -1,45 +1,51 @@
 <?xml version="1.0" encoding="UTF-8"?>
 <pazpar2 xmlns="http://www.indexdata.com/pazpar2/1.0">
-  <!-- Used by test_http.sh -->
+  <!-- Used by test_icu.sh -->
   <server>
     <listen port="9763"/>
     <proxy host="localhost"/>
     <settings src="z3950_indexdata_com_marc.xml"/>
     
-    <relevance>
-      <icu_chain locale="en">
-       <transform rule="[:Control:] Any-Remove"/>
-       <tokenize rule="l"/>
-       <transform rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
-       <casemap rule="l"/>
-       <stemming rule="english" />
-      </icu_chain>
-    </relevance>
-
-    <sort>
-      <icu_chain locale="en">
-       <transform rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
-       <casemap rule="l"/>
-      </icu_chain>
-    </sort>
-    
-    <icu_chain id="mergekey" locale="en">
+    <icu_chain id="relevance" locale="en">
+      <transform rule="[:Control:] Any-Remove"/>
       <tokenize rule="l"/>
+      <transform rule="[[:WhiteSpace:][:Punctuation:]] Remove"/>
+      <casemap rule="l"/>
+      <stemming rule="english" />
+    </icu_chain>
+
+    <icu_chain id="sort" locale="en">
       <transform rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
       <casemap rule="l"/>
     </icu_chain>
+
+    <mergekey><!-- the deprecated format -->    
+      <icu_chain locale="en">
+        <tokenize rule="l"/>
+        <transform rule="[[:Control:][:WhiteSpace:][:Punctuation:]] Remove"/>
+        <casemap rule="l"/>
+      </icu_chain>
+    </mergekey>
     
     <icu_chain id="facet" locale="en">
-      <transform rule="Title"/>
+      <transliterate>[[:WhiteSpace:][,.!;]]* } [$] > ;</transliterate>
     </icu_chain>
 
     <icu_chain id="mychain" locale="en">
+      <transliterate>[[:WhiteSpace:][,.!;]]* } [$] > ;</transliterate>
       <display/>
-      <transform rule="Title"/>
+      <casemap rule="l"/>
     </icu_chain>
-    
+
+    <icu_chain id="facet-author" locale="en">
+      <transliterate>
+       [,]* } [$] > ;
+      </transliterate>
+    </icu_chain>
+
     <service>
       <timeout session="30" z3950_operation="20" z3950_session="40"/>
+      <rank debug="yes"/>
       <metadata name="url" merge="unique"/>
       <metadata name="title" brief="yes" sortkey="skiparticle" merge="longest" rank="6"/>
       <metadata name="title-remainder" brief="yes" merge="longest" rank="5"/>
       <metadata name="date" brief="yes" sortkey="numeric" type="year" merge="range"
                termlist="yes"/>
       <metadata name="author" brief="yes" termlist="yes"
-                merge="longest" rank="2" facetrule="mychain"/>
+                merge="longest" rank="2" facetrule="facet-author" />
       <metadata name="subject" merge="unique" termlist="yes" rank="3"/>
       <metadata name="id"/>
       <metadata name="lccn" merge="unique"/>
-      <metadata name="description" brief="yes" merge="longest" rank="3"/>
+      <metadata name="description" brief="yes" merge="longest" rank="3" icurule="mychain"/>
       
       <metadata name="test-usersetting" brief="yes" setting="postproc"/>
       <metadata name="test" setting="parameter"/>