Happy new year
[yaz-moved-to-github.git] / util / yaz-icu.c
index c510617..25e4de0 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2009 Index Data
+ * Copyright (C) 1995-2011 Index Data
  * See the file LICENSE for details.
  */
 
@@ -138,19 +138,20 @@ static void print_icu_converters(const struct config_t *p_config)
     if (p_config->xmloutput)
         fprintf(config.outfile, "<converters count=\"%d\" default=\"%s\">\n",
                 count, ucnv_getDefaultName());
-    else {    
+    else
+    {    
         fprintf(config.outfile, "Available ICU converters: %d\n", count);
         fprintf(config.outfile, "Default ICU Converter is: '%s'\n", 
                 ucnv_getDefaultName());
     }
     
-    for(i=0;i<count;i++)
+    for (i = 0; i < count; i++)
     {
         if (p_config->xmloutput)
             fprintf(config.outfile, "<converter id=\"%s\"/>\n", 
                     ucnv_getAvailableName(i));
         else     
-            fprintf(config.outfile, "%s ", ucnv_getAvailableName(i));
+            fprintf(config.outfile, "%s\n", ucnv_getAvailableName(i));
     }
     
     if (p_config->xmloutput)
@@ -161,29 +162,27 @@ static void print_icu_converters(const struct config_t *p_config)
 
 static void print_icu_transliterators(const struct config_t *p_config)
 {
-    int32_t buf_cap = 128;
-    char buf[128];
-    int32_t i;
-    int32_t count = utrans_countAvailableIDs();
-    
+    UErrorCode status;
+    UEnumeration *en = utrans_openIDs(&status);
+    int32_t count = uenum_count(en, &status);
+    const char *name;
+    int32_t length;
+
     if (p_config->xmloutput)
         fprintf(config.outfile, "<transliterators count=\"%d\">\n",  count);
     else 
         fprintf(config.outfile, "Available ICU transliterators: %d\n", count);
-    
-    for(i = 0; i <count; i++)
+
+    while ((name = uenum_next(en, &length, &status)))
     {
-        utrans_getAvailableID(i, buf, buf_cap);
         if (p_config->xmloutput)
-            fprintf(config.outfile, "<transliterator id=\"%s\"/>\n", buf);
+            fprintf(config.outfile, "<transliterator id=\"%s\"/>\n", name);
         else
-            fprintf(config.outfile, " %s", buf);
+            fprintf(config.outfile, "%s\n", name);
     }
-    
+    uenum_close(en);
     if (p_config->xmloutput)
-    {
         fprintf(config.outfile, "</transliterators>\n");
-    }
     else
     {
         fprintf(config.outfile, "\n\nUnicode Set Patterns:\n"
@@ -223,10 +222,8 @@ static void print_icu_transliterators(const struct config_t *p_config)
                 "   [A-Za-z]; Lower(); Latin-Katakana; Katakana-Hiragana (transforms latin and katagana to hiragana)\n"
                 "   [[:separator:][:start punctuation:][:initial punctuation:]] Remove \n"
                 "\n"
-                "see http://icu.sourceforge.net/userguide/Transform.html\n"
-                "    http://www.unicode.org/Public/UNIDATA/UCD.html\n"
-                "    http://icu.sourceforge.net/userguide/Transform.html\n"
-                "    http://icu.sourceforge.net/userguide/TransformRule.html\n"
+                "see http://userguide.icu-project.org/transforms/general\n"
+                "    http://www.unicode.org/reports/tr44/\n"
             );
         
         
@@ -283,8 +280,13 @@ static void print_icu_xml_locales(const struct config_t *p_config)
         fprintf(config.outfile, "<locales count=\"%d\" default=\"%s\" collations=\"%d\">\n", 
                 count, uloc_getDefault(), ucol_countAvailable());
     }
+    else
+    {
+        fprintf(config.outfile, "Available ICU locales: %d\n", count);
+        fprintf(config.outfile, "Default locale is: %s\n",  uloc_getDefault());
+    }
   
-    for(i=0;i<count;i++) 
+    for (i = 0; i < count; i++) 
     {
 
         keyword_len 
@@ -390,14 +392,14 @@ static void print_icu_xml_locales(const struct config_t *p_config)
             fprintf(config.outfile, "\n");
         }
         else
-            fprintf(config.outfile, "%s ", uloc_getAvailable(i));
+            fprintf(config.outfile, "%s\n", uloc_getAvailable(i));
     }
     if (p_config->xmloutput)
         fprintf(config.outfile, "</locales>\n");
     else
         fprintf(config.outfile, "\n");
 
-    if(U_FAILURE(status))
+    if (U_FAILURE(status))
     {
         fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status));
         exit(2);
@@ -443,7 +445,6 @@ static void process_text_file(const struct config_t *p_config)
     long unsigned int line_count = 0;    
     
     UErrorCode status = U_ZERO_ERROR;
-    int success = 0;
     
     if (!xml_node)
     {   
@@ -454,11 +455,12 @@ static void process_text_file(const struct config_t *p_config)
 
     config.chain = icu_chain_xml_config(xml_node, 1, &status);
 
-    if (config.chain && U_SUCCESS(status))
-        success = 1;
-    else {   
+    if (!config.chain || !U_SUCCESS(status))
+    {   
         printf("Could not set up ICU chain from config file '%s' \n",
                 config.conffile);
+        if (!U_SUCCESS(status))
+            printf("ICU Error: %d %s\n", status, u_errorName(status));
         exit(1);
     }
 
@@ -471,33 +473,43 @@ static void process_text_file(const struct config_t *p_config)
     /* read input lines for processing */
     while ((line=fgets(linebuf, sizeof(linebuf)-1, config.infile)))
     {
-        success = icu_chain_assign_cstr(config.chain, line, &status);
+        WRBUF sw = wrbuf_alloc();
+        WRBUF cdata = wrbuf_alloc();
+        int success = icu_chain_assign_cstr(config.chain, line, &status);
         line_count++;
 
         while (success && icu_chain_next_token(config.chain, &status))
         {
-            WRBUF sw = wrbuf_alloc();
             if (U_FAILURE(status))
                 success = 0;
-            else {
+            else
+            {
                 const char *sortkey = icu_chain_token_sortkey(config.chain);
                 wrbuf_rewind(sw);
                 wrbuf_puts_escaped(sw, sortkey);
                 token_count++;
                 if (p_config->xmloutput)                    
                 {
-                    /* should XML encode this. Bug #1902 */
                     fprintf(config.outfile, 
-                            "<token id=\"%lu\" line=\"%lu\""
-                            " norm=\"%s\" display=\"%s\"",
-                            token_count,
-                            line_count,
-                            icu_chain_token_norm(config.chain),
-                            icu_chain_token_display(config.chain));
+                            "<token id=\"%lu\" line=\"%lu\"",
+                            token_count, line_count);
+
+                    wrbuf_rewind(cdata);
+                    wrbuf_xmlputs(cdata, icu_chain_token_norm(config.chain));
+                    fprintf(config.outfile, " norm=\"%s\"",
+                            wrbuf_cstr(cdata));
+
+                    wrbuf_rewind(cdata);
+                    wrbuf_xmlputs(cdata, icu_chain_token_display(config.chain));
+                    fprintf(config.outfile, " display=\"%s\"",
+                            wrbuf_cstr(cdata));
+                    
                     if (p_config->sortoutput)
                     {
+                        wrbuf_rewind(cdata);
+                        wrbuf_xmlputs(cdata, wrbuf_cstr(sw));
                         fprintf(config.outfile, " sortkey=\"%s\"",
-                                wrbuf_cstr(sw));
+                                wrbuf_cstr(cdata));
                     }
                     fprintf(config.outfile, "/>\n");
                 }
@@ -515,16 +527,16 @@ static void process_text_file(const struct config_t *p_config)
                     fprintf(config.outfile, "\n");
                 }
             }
-            wrbuf_destroy(sw);
         }
-        
+        wrbuf_destroy(sw);
+        wrbuf_destroy(cdata);
     }
 
     if (p_config->xmloutput)
-        fprintf(config.outfile, 
+        fprintf(config.outfile,
                 "</tokens>\n"
                 "</icu>\n");
-
+    
     icu_chain_destroy(config.chain);
     xmlFreeDoc(doc);
     if (line)
@@ -550,7 +562,7 @@ int main(int argc, char **argv)
 #else /* YAZ_HAVE_ICU */
 
     printf("ICU not available on your system.\n"
-           "Please install libicu36-dev and icu-doc or similar, "
+           "Please install libicu-dev and icu-doc or similar, "
            "re-configure and re-compile\n");