- wrbuf_rewind(zms->wrbuf_1);
- wrbuf_write(zms->wrbuf_1, input_str, input_len);
- if (!zm || !zm->replace_tokens)
- return zms->wrbuf_1;
-
-#if 0
- logf (LOG_LOG, "in:%.*s:", wrbuf_len(zms->wrbuf_1),
- wrbuf_buf(zms->wrbuf_1));
+#define SE_CHARS ";,.()-/?<> \r\n\t"
+
+static int tokenize_simple(zebra_map_t zm,
+ const char **result_buf, size_t *result_len)
+{
+ char *buf = wrbuf_buf(zm->input_str);
+ size_t len = wrbuf_len(zm->input_str);
+ size_t i = zm->simple_off;
+ size_t start;
+
+ while (i < len && strchr(SE_CHARS, buf[i]))
+ i++;
+ start = i;
+ while (i < len && !strchr(SE_CHARS, buf[i]))
+ {
+ if (buf[i] > 32 && buf[i] < 127)
+ buf[i] = tolower(buf[i]);
+ i++;
+ }
+
+ zm->simple_off = i;
+ if (start != i)
+ {
+ *result_buf = buf + start;
+ *result_len = i - start;
+ return 1;
+ }
+ return 0;
+ }
+
+
+int zebra_map_tokenize_next(zebra_map_t zm,
+ const char **result_buf, size_t *result_len,
+ const char **display_buf, size_t *display_len)
+{
+ assert(zm->use_chain);
+
+#if YAZ_HAVE_ICU
+ if (!zm->icu_chain)
+ return tokenize_simple(zm, result_buf, result_len);
+ else
+ {
+ UErrorCode status;
+ while (icu_chain_next_token(zm->icu_chain, &status))
+ {
+ assert(U_SUCCESS(status));
+ *result_buf = icu_chain_token_sortkey(zm->icu_chain);
+ assert(*result_buf);
+
+ *result_len = strlen(*result_buf);
+
+ if (display_buf)
+ {
+ *display_buf = icu_chain_token_display(zm->icu_chain);
+ if (display_len)
+ *display_len = strlen(*display_buf);
+ }
+ if (zm->debug)
+ {
+ wrbuf_rewind(zm->print_str);
+ wrbuf_write_escaped(zm->print_str, *result_buf, *result_len);
+ yaz_log(YLOG_LOG, "output %s", wrbuf_cstr(zm->print_str));
+ }
+
+ if (**result_buf != '\0')
+ return 1;
+ }
+ assert(U_SUCCESS(status));
+ }
+ return 0;
+#else
+ return tokenize_simple(zm, result_buf, result_len);