renamed ICU chain functions to the following names relation the operation
[yaz-moved-to-github.git] / test / tst_icu_I18N.c
index 717e760..07aab52 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: tst_icu_I18N.c,v 1.5 2007-10-24 13:23:34 marc Exp $
+/* $Id: tst_icu_I18N.c,v 1.10 2007-10-29 10:22:23 marc Exp $
    Copyright (c) 2006-2007, Index Data.
 
    This file is part of Pazpar2.
@@ -493,15 +493,13 @@ void test_icu_I18N_chain(int argc, char **argv)
     struct icu_chain * chain = 0;
     
 
-    const char * xml_str = "<icu_chain id=\"en:word\" locale=\"en\">"
+    const char * xml_str = "<icu>"
         "<normalize rule=\"[:Control:] Any-Remove\"/>"
         "<tokenize rule=\"l\"/>"
         "<normalize rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
         "<display/>"
         "<casemap rule=\"l\"/>"
-        "<index/>"
-        "<sortkey/>"
-        "</icu_chain>";
+        "</icu>";
 
     
     xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
@@ -511,40 +509,7 @@ void test_icu_I18N_chain(int argc, char **argv)
     // printf("ICU chain:\ninput: '%s'\n", en_str);
 
 
-    chain = icu_chain_xml_config(xml_node, &status);
-
-#if 0
-    chain  = icu_chain_create((uint8_t *) "en:word", (uint8_t *) "en");
-    step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
-                                 (const uint8_t *) "[:Control:] Any-Remove",
-                                 &status);
-    step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
-                                 (const uint8_t *) "s",
-                                 &status);
-    step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
-                                 (const uint8_t *) "l",
-                                 &status);
-    step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
-                                 (const uint8_t *)
-                                 "[[:WhiteSpace:][:Punctuation:]] Any-Remove",
-                                 &status);
-    step = icu_chain_insert_step(chain, ICU_chain_step_type_display,
-                                 (const uint8_t *)"",
-                                 &status);
-/*     step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize, */
-/*                                  (const uint8_t *) "Lower", */
-/*                                  &status); */
-    step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
-                                 (const uint8_t *) "l",
-                                 &status);
-    step = icu_chain_insert_step(chain, ICU_chain_step_type_index,
-                                 (const uint8_t *)"",
-                                 &status);
-/*     step = icu_chain_insert_step(chain, ICU_chain_step_type_sortkey, */
-/*                                  (const uint8_t *)"", */
-/*                                  &status); */
-    
-#endif
+    chain = icu_chain_xml_config(xml_node, (uint8_t *) "en", 0, &status);
 
     xmlFreeDoc(doc);
     YAZ_CHECK(chain);
@@ -554,12 +519,12 @@ void test_icu_I18N_chain(int argc, char **argv)
     while (icu_chain_next_token(chain, &status)){
         ;
         // printf("%d '%s' '%s'\n",
-        //       icu_chain_get_token_count(chain),
-        //       icu_chain_get_norm(chain),
-        //       icu_chain_get_display(chain));
+        //       icu_chain_token_number(chain),
+        //       icu_chain_token_norm(chain),
+        //       icu_chain_token_display(chain));
     }
 
-    YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
+    YAZ_CHECK_EQ(icu_chain_token_number(chain), 7);
 
 
     YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
@@ -567,13 +532,13 @@ void test_icu_I18N_chain(int argc, char **argv)
     while (icu_chain_next_token(chain, &status)){
         ;
         //printf("%d '%s' '%s'\n",
-        //       icu_chain_get_token_count(chain),
-        //       icu_chain_get_norm(chain),
-        //       icu_chain_get_display(chain));
+        //       icu_chain_token_number(chain),
+        //       icu_chain_token_norm(chain),
+        //       icu_chain_token_display(chain));
     }
 
 
-    YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3);
+    YAZ_CHECK_EQ(icu_chain_token_number(chain), 3);
 
     icu_chain_destroy(chain);
 }
@@ -584,7 +549,7 @@ void test_bug_1140(void)
     UErrorCode status = U_ZERO_ERROR;
     struct icu_chain * chain = 0;
     
-    const char * xml_str = "<icu_chain id=\"en:word\" locale=\"en\">"
+    const char * xml_str = "<icu>"
 
         /* if the first rule is normalize instead. Then it works */
 #if 0
@@ -594,16 +559,14 @@ void test_bug_1140(void)
         "<normalize rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
         "<display/>"
         "<casemap rule=\"l\"/>"
-        "<index/>"
-        "<sortkey/>"
-        "</icu_chain>";
+        "</icu>";
 
     
     xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
     xmlNode *xml_node = xmlDocGetRootElement(doc);
     YAZ_CHECK(xml_node);
 
-    chain = icu_chain_xml_config(xml_node, &status);
+    chain = icu_chain_xml_config(xml_node, (uint8_t *) "en", 0, &status);
 
     xmlFreeDoc(doc);
     YAZ_CHECK(chain);
@@ -615,27 +578,27 @@ void test_bug_1140(void)
     while (icu_chain_next_token(chain, &status)){    
         ;
         //printf("%d '%s' '%s'\n",
-        //       icu_chain_get_token_count(chain),
-        //       icu_chain_get_norm(chain),
-        //       icu_chain_get_display(chain));
+        //       icu_chain_token_number(chain),
+        //       icu_chain_token_norm(chain),
+        //       icu_chain_token_display(chain));
 
     }
     
 
-    YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 7);
+    YAZ_CHECK_EQ(icu_chain_token_number(chain), 7);
 
     YAZ_CHECK(icu_chain_assign_cstr(chain, "what is this?", &status));
 
     while (icu_chain_next_token(chain, &status)){
        ;
        //printf("%d '%s' '%s'\n",
-       //        icu_chain_get_token_count(chain),
-       //        icu_chain_get_norm(chain),
-       //        icu_chain_get_display(chain));
+       //        icu_chain_token_number(chain),
+       //        icu_chain_token_norm(chain),
+       //        icu_chain_token_display(chain));
     }
 
     /* we expect 'what' 'is' 'this', i.e. 3 tokens */
-    YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 3);
+    YAZ_CHECK_EQ(icu_chain_token_number(chain), 3);
 
     icu_chain_destroy(chain);
 }
@@ -647,17 +610,16 @@ void test_chain_empty_token(void)
     UErrorCode status = U_ZERO_ERROR;
     struct icu_chain * chain = 0;
 
-    const char * xml_str = "<icu_chain id=\"en:word\" locale=\"en\">"
+    const char * xml_str = "<icu>"
         "<tokenize rule=\"w\"/>"
         "<normalize rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>"
-        "<index/>"
-        "</icu_chain>";
+        "</icu>";
     
     xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
     xmlNode *xml_node = xmlDocGetRootElement(doc);
     YAZ_CHECK(xml_node);
 
-    chain = icu_chain_xml_config(xml_node, &status);
+    chain = icu_chain_xml_config(xml_node, (uint8_t *) "en", 0, &status);
 
     xmlFreeDoc(doc);
     YAZ_CHECK(chain);
@@ -669,12 +631,53 @@ void test_chain_empty_token(void)
     while (icu_chain_next_token(chain, &status)){
         ;
         //printf("%d '%s' '%s'\n",
-        //       icu_chain_get_token_count(chain),
-        //       icu_chain_get_norm(chain),
-        //       icu_chain_get_display(chain));
+        //       icu_chain_token_number(chain),
+        //       icu_chain_token_norm(chain),
+        //       icu_chain_token_display(chain));
+    }
+
+    YAZ_CHECK_EQ(icu_chain_token_number(chain), 8);
+
+    icu_chain_destroy(chain);
+}
+
+void test_chain_empty_chain(void)
+{
+    UErrorCode status = U_ZERO_ERROR;
+    struct icu_chain * chain = 0;
+
+    const char * xml_str = "<icu>"
+        "</icu>";
+    
+    const char * src8 = "some 5487 weired !ยค%&(/& sTuFf";
+    char * dest8 = 0;
+
+    xmlDoc *doc = xmlParseMemory(xml_str, strlen(xml_str));
+    xmlNode *xml_node = xmlDocGetRootElement(doc);
+    YAZ_CHECK(xml_node);
+
+    chain = icu_chain_xml_config(xml_node, (uint8_t *) "en", 0, &status);
+
+    xmlFreeDoc(doc);
+    YAZ_CHECK(chain);
+    
+    YAZ_CHECK(icu_chain_assign_cstr(
+                  chain,  src8,
+                  &status));
+
+    while (icu_chain_next_token(chain, &status)){
+        ;
+        //printf("%d '%s' '%s'\n",
+        //       icu_chain_token_number(chain),
+        //       icu_chain_token_norm(chain),
+        //       icu_chain_token_display(chain));
     }
 
-    YAZ_CHECK_EQ(icu_chain_get_token_count(chain), 8);
+    YAZ_CHECK_EQ(icu_chain_token_number(chain), 1);
+
+    dest8 = icu_chain_token_norm(chain);
+    YAZ_CHECK_EQ(strcmp(src8, dest8), 0);
+    
 
     icu_chain_destroy(chain);
 }
@@ -697,6 +700,7 @@ int main(int argc, char **argv)
     test_icu_I18N_tokenizer(argc, argv);
     test_icu_I18N_chain(argc, argv);
     test_chain_empty_token();
+    test_chain_empty_chain();
     test_bug_1140();
 
 #else /* HAVE_ICU */