yaz-icu returns exit code 3 if ICU is not present.
[yaz-moved-to-github.git] / util / yaz-icu.c
index df78e91..f14b494 100644 (file)
@@ -1,8 +1,6 @@
-/*
- * Copyright (C) 1995-2007, Index Data ApS
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2008 Index Data
  * See the file LICENSE for details.
- *
- * $Id: yaz-icu.c,v 1.4 2007-10-25 08:32:51 marc Exp $
  */
 
 #if HAVE_CONFIG_H
 
 #include <yaz/options.h>
 
-
-#if HAVE_ICU
+#if YAZ_HAVE_ICU
 
 #include <unicode/ucnv.h>
 #include <unicode/ustring.h>
+#include <unicode/ucol.h> 
+#include <unicode/ubrk.h>
+#include <unicode/utrans.h>
 
-#include <yaz/icu_I18N.h>
+#include <yaz/icu.h>
+#include <yaz/wrbuf.h>
 
 /* commando line and config parameters */
 static struct config_t { 
     char conffile[1024];
     char print[1024];
     int xmloutput;
-    struct icu_chain * chain;
+    yaz_icu_chain_t chain;
     FILE * infile;
     FILE * outfile;
 } config;
@@ -51,14 +52,11 @@ void print_option_error(const struct config_t *p_config)
             "./yaz-icu -p t -x\n"
             "\n"
             "Example ICU chain XML configuration file:\n"
-            "<icu_chain id=\"en:word\" locale=\"en\">\n"
-            "  <normalize rule=\"[:Control:] Any-Remove\"/>\n"
+            "<icu_chain locale=\"en\">\n"
+            "  <transform rule=\"[:Control:] Any-Remove\"/>\n"
             "  <tokenize rule=\"l\"/>\n"
-            "  <normalize rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>\n"
-            "  <display/>\n"
+            "  <transform rule=\"[[:WhiteSpace:][:Punctuation:]] Remove\"/>\n"
             "  <casemap rule=\"l\"/>\n"
-            "  <index/>\n"
-            "  <sortkey/>\n"
             "</icu_chain>\n"
           );
     exit(1);
@@ -139,7 +137,8 @@ static void print_icu_converters(const struct config_t *p_config)
                 ucnv_getDefaultName());
     }
     
-    for(i=0;i<count;i++){
+    for(i=0;i<count;i++)
+    {
         if (p_config->xmloutput)
             fprintf(config.outfile, "<converter id=\"%s\"/>\n", 
                     ucnv_getAvailableName(i));
@@ -156,7 +155,7 @@ static void print_icu_converters(const struct config_t *p_config)
 static void print_icu_transliterators(const struct config_t *p_config)
 {
     int32_t buf_cap = 128;
-    char buf[buf_cap];
+    char buf[128];
     int32_t i;
     int32_t count = utrans_countAvailableIDs();
     
@@ -174,7 +173,8 @@ static void print_icu_transliterators(const struct config_t *p_config)
             fprintf(config.outfile, " %s", buf);
     }
     
-    if (p_config->xmloutput){
+    if (p_config->xmloutput)
+    {
         fprintf(config.outfile, "</transliterators>\n");
     }
     else
@@ -271,8 +271,8 @@ static void print_icu_xml_locales(const struct config_t *p_config)
 
     count = uloc_countAvailable() ;
 
-    if (p_config->xmloutput){
-    
+    if (p_config->xmloutput)
+    {
         fprintf(config.outfile, "<locales count=\"%d\" default=\"%s\" collations=\"%d\">\n", 
                 count, uloc_getDefault(), ucol_countAvailable());
     }
@@ -346,7 +346,8 @@ static void print_icu_xml_locales(const struct config_t *p_config)
                     &status);
 
 
-        if (p_config->xmloutput){
+        if (p_config->xmloutput)
+        {
             fprintf(config.outfile, "<locale id=\"%s\"", uloc_getAvailable(i)); 
             /* fprintf(config.outfile, " locale=\"%s\"", uloc_getAvailable(i)); */
             /* if (strlen(keyword_str)) */
@@ -370,7 +371,8 @@ static void print_icu_xml_locales(const struct config_t *p_config)
                 fprintf(config.outfile, "%s", localname_str);
             fprintf(config.outfile, "</locale>\n"); 
         }
-        else if (1 == p_config->xmloutput){
+        else if (1 == p_config->xmloutput)
+        {
             fprintf(config.outfile, "%s", uloc_getAvailable(i)); 
             fprintf(config.outfile, " | ");
             if (strlen(name_str))
@@ -388,9 +390,10 @@ static void print_icu_xml_locales(const struct config_t *p_config)
     else
         fprintf(config.outfile, "\n");
 
-    if(U_FAILURE(status)) {
+    if(U_FAILURE(status))
+    {
         fprintf(stderr, "ICU Error: %d %s\n", status, u_errorName(status));
-        exit(status);
+        exit(2);
     }
 }
 
@@ -428,7 +431,6 @@ static void process_text_file(const struct config_t *p_config)
  
     xmlDoc *doc = xmlParseFile(config.conffile);  
     xmlNode *xml_node = xmlDocGetRootElement(doc);
-    xmlChar *xml_locale = xmlGetProp(xml_node, (xmlChar *) "locale");
 
     long unsigned int token_count = 0;    
     long unsigned int line_count = 0;    
@@ -436,20 +438,14 @@ static void process_text_file(const struct config_t *p_config)
     UErrorCode status = U_ZERO_ERROR;
     int success = 0;
     
-    if (! xml_node) {   
+    if (! xml_node)
+    {   
         printf("Could not parse XML config file '%s' \n",
                 config.conffile);
         exit (1);
     }
 
-    if (!xml_locale || !strlen((const char *) xml_locale))
-        return;        
-    
-    config.chain = icu_chain_xml_config(xml_node, (uint8_t *) xml_locale, 0,
-                                        &status);
-
-    xmlFree(xml_locale);
-
+    config.chain = icu_chain_xml_config(xml_node, 1, &status);
 
     if (config.chain && U_SUCCESS(status))
         success = 1;
@@ -458,8 +454,6 @@ static void process_text_file(const struct config_t *p_config)
                 config.conffile);
         exit (1);
     }
-    
-
 
     if (p_config->xmloutput)
         fprintf(config.outfile,
@@ -473,26 +467,37 @@ static void process_text_file(const struct config_t *p_config)
         success = icu_chain_assign_cstr(config.chain, line, &status);
         line_count++;
 
-        while (success && icu_chain_next_token(config.chain, &status)){
+        while (success && icu_chain_next_token(config.chain, &status))
+        {
+            WRBUF sw = wrbuf_alloc();
             if (U_FAILURE(status))
                 success = 0;
             else {
+                const char *sortkey = icu_chain_token_sortkey(config.chain);
+                wrbuf_rewind(sw);
+                wrbuf_puts_escaped(sw, sortkey);
                 token_count++;
                 if (p_config->xmloutput)                    
+                {
+                    /* should XML encode this. Bug #1902 */
                     fprintf(config.outfile, 
-                            "<token id=\%lu\" line=\"%lu\""
-                            " norm=\"%s\" display=\"%s\"/>\n",
+                            "<token id=\"%lu\" line=\"%lu\""
+                            " norm=\"%s\" display=\"%s\" sortkey=\"%s\"/>\n",
                             token_count,
                             line_count,
-                            icu_chain_get_norm(config.chain),
-                            icu_chain_get_display(config.chain));
+                            icu_chain_token_norm(config.chain),
+                            icu_chain_token_display(config.chain),
+                            wrbuf_cstr(sw));
+                }
                 else
-                    fprintf(config.outfile, "%lu %lu '%s' '%s'\n",
+                    fprintf(config.outfile, "%lu %lu '%s' '%s' '%s'\n",
                             token_count,
                             line_count,
-                            icu_chain_get_norm(config.chain),
-                            icu_chain_get_display(config.chain));
+                            icu_chain_token_norm(config.chain),
+                            icu_chain_token_display(config.chain),
+                            wrbuf_cstr(sw));
             }
+            wrbuf_destroy(sw);
         }
         
     }
@@ -508,13 +513,13 @@ static void process_text_file(const struct config_t *p_config)
         free(line);
 }
 
-#endif /* HAVE_ICU */
+#endif /* YAZ_HAVE_ICU */
 
 
 int main(int argc, char **argv) 
 {
 
-#if HAVE_ICU
+#if YAZ_HAVE_ICU
 
     read_params(argc, argv, &config);
 
@@ -524,14 +529,15 @@ int main(int argc, char **argv)
     if (config.print && strlen(config.print))
         print_info(&config);
 
-#else /* HAVE_ICU */
+#else /* YAZ_HAVE_ICU */
 
     printf("ICU not available on your system.\n"
            "Please install libicu36-dev and icu-doc or similar, "
            "re-configure and re-compile\n");
 
 
-#endif /* HAVE_ICU */
+    exit(3);
+#endif /* YAZ_HAVE_ICU */
 
     return(0);
 }