ICU term lists are handled for search.
[idzebra-moved-to-github.git] / util / zebramap.c
index fd62d4a..d2cd37f 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zebramap.c,v 1.72 2007-11-15 08:53:26 adam Exp $
+/* $Id: zebramap.c,v 1.73 2007-12-07 14:09:10 adam Exp $
    Copyright (C) 1995-2007
    Index Data ApS
 
    Copyright (C) 1995-2007
    Index Data ApS
 
@@ -639,6 +639,75 @@ static int tokenize_simple(zebra_map_t zm,
     return 0;
  }
 
     return 0;
  }
 
+
+int zebra_map_tokenize_next(zebra_map_t zm,
+                            const char **result_buf, size_t *result_len)
+{
+    assert(zm->use_chain);
+
+#if YAZ_HAVE_ICU
+    if (!zm->icu_chain)
+        return tokenize_simple(zm, result_buf, result_len);
+    else
+    {
+        UErrorCode status;
+        while (icu_chain_next_token(zm->icu_chain, &status))
+        {
+            assert(U_SUCCESS(status));
+            *result_buf = icu_chain_token_sortkey(zm->icu_chain);
+            assert(*result_buf);
+
+            *result_len = strlen(*result_buf);
+
+            if (zm->debug)
+            {
+                wrbuf_rewind(zm->print_str);
+                wrbuf_write_escaped(zm->print_str, *result_buf, *result_len);
+                yaz_log(YLOG_LOG, "output %s", wrbuf_cstr(zm->print_str));
+            }
+
+            if (**result_buf != '\0')
+                return 1;
+        }
+        assert(U_SUCCESS(status));
+    }
+    return 0;
+#else
+    return tokenize_simple(zm, result_buf, result_len);
+#endif
+}
+
+int zebra_map_tokenize_start(zebra_map_t zm,
+                             const char *buf, size_t len)
+{
+    assert(zm->use_chain);
+
+    wrbuf_rewind(zm->input_str);
+    wrbuf_write(zm->input_str, buf, len);
+    zm->simple_off = 0;
+#if YAZ_HAVE_ICU
+    if (zm->icu_chain)
+    {
+        UErrorCode status;
+        if (zm->debug)
+        {
+            wrbuf_rewind(zm->print_str);
+            wrbuf_write_escaped(zm->print_str, wrbuf_buf(zm->input_str),
+                                wrbuf_len(zm->input_str));
+            
+            yaz_log(YLOG_LOG, "input %s", 
+                    wrbuf_cstr(zm->print_str)); 
+        }
+        icu_chain_assign_cstr(zm->icu_chain,
+                              wrbuf_cstr(zm->input_str),
+                              &status);
+        assert(U_SUCCESS(status));
+    }
+#endif
+    return 0;
+}
+
+#if 0
 int zebra_map_tokenize(zebra_map_t zm,
                        const char *buf, size_t len,
                        const char **result_buf, size_t *result_len)
 int zebra_map_tokenize(zebra_map_t zm,
                        const char *buf, size_t len,
                        const char **result_buf, size_t *result_len)
@@ -699,6 +768,7 @@ int zebra_map_tokenize(zebra_map_t zm,
     return tokenize_simple(zm, result_buf, result_len);
 #endif
 }
     return tokenize_simple(zm, result_buf, result_len);
 #endif
 }
+#endif
 
 int zebra_maps_is_icu(zebra_map_t zm)
 {
 
 int zebra_maps_is_icu(zebra_map_t zm)
 {