Finished.
[idzebra-moved-to-github.git] / util / zebramap.c
index fd62d4a..4361fda 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zebramap.c,v 1.72 2007-11-15 08:53:26 adam Exp $
+/* $Id: zebramap.c,v 1.75 2007-12-13 18:08:26 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -230,8 +230,16 @@ static int parse_command(zebra_maps_t zms, int argc, char **argv,
     }
     else if (!yaz_matchstr(argv[0], "icuchain"))
     {
+        char full_path[1024];
+        if (!yaz_filepath_resolve(argv[1], zms->tabpath, zms->tabroot,
+                                  full_path))
+        {
+            yaz_log(YLOG_WARN, "%s:%d: Could not locate icuchain config '%s'",
+                    fname, lineno, argv[1]);
+            return -1;
+        }
 #if YAZ_HAVE_XML2
-        zm->doc = xmlParseFile(argv[1]);
+        zm->doc = xmlParseFile(full_path);
         if (!zm->doc)
         {
             yaz_log(YLOG_WARN, "%s:%d: Could not load icuchain config '%s'",
@@ -639,6 +647,82 @@ static int tokenize_simple(zebra_map_t zm,
     return 0;
  }
 
+
+int zebra_map_tokenize_next(zebra_map_t zm,
+                            const char **result_buf, size_t *result_len,
+                            const char **display_buf, size_t *display_len)
+{
+    assert(zm->use_chain);
+
+#if YAZ_HAVE_ICU
+    if (!zm->icu_chain)
+        return tokenize_simple(zm, result_buf, result_len);
+    else
+    {
+        UErrorCode status;
+        while (icu_chain_next_token(zm->icu_chain, &status))
+        {
+            assert(U_SUCCESS(status));
+            *result_buf = icu_chain_token_sortkey(zm->icu_chain);
+            assert(*result_buf);
+
+            *result_len = strlen(*result_buf);
+
+            if (display_buf)
+            {
+                *display_buf = icu_chain_token_display(zm->icu_chain);
+                if (display_len)
+                    *display_len = strlen(*display_buf);
+            }
+            if (zm->debug)
+            {
+                wrbuf_rewind(zm->print_str);
+                wrbuf_write_escaped(zm->print_str, *result_buf, *result_len);
+                yaz_log(YLOG_LOG, "output %s", wrbuf_cstr(zm->print_str));
+            }
+
+            if (**result_buf != '\0')
+                return 1;
+        }
+        assert(U_SUCCESS(status));
+    }
+    return 0;
+#else
+    return tokenize_simple(zm, result_buf, result_len);
+#endif
+}
+
+int zebra_map_tokenize_start(zebra_map_t zm,
+                             const char *buf, size_t len)
+{
+    assert(zm->use_chain);
+
+    wrbuf_rewind(zm->input_str);
+    wrbuf_write(zm->input_str, buf, len);
+    zm->simple_off = 0;
+#if YAZ_HAVE_ICU
+    if (zm->icu_chain)
+    {
+        UErrorCode status;
+        if (zm->debug)
+        {
+            wrbuf_rewind(zm->print_str);
+            wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str),
+                                wrbuf_len(zm->input_str));
+            
+            yaz_log(YLOG_LOG, "input %s", 
+                    wrbuf_cstr(zm->print_str)); 
+        }
+        icu_chain_assign_cstr(zm->icu_chain,
+                              wrbuf_cstr(zm->input_str),
+                              &status);
+        assert(U_SUCCESS(status));
+    }
+#endif
+    return 0;
+}
+
+#if 0
 int zebra_map_tokenize(zebra_map_t zm,
                        const char *buf, size_t len,
                        const char **result_buf, size_t *result_len)
@@ -699,6 +783,7 @@ int zebra_map_tokenize(zebra_map_t zm,
     return tokenize_simple(zm, result_buf, result_len);
 #endif
 }
+#endif
 
 int zebra_maps_is_icu(zebra_map_t zm)
 {