Bounds check terms based in ICU norm
[idzebra-moved-to-github.git] / index / extract.c
index a4688e5..a93373c 100644 (file)
@@ -1798,6 +1798,11 @@ static void extract_add_icu(RecWord *p, zebra_map_t zm)
     zebra_map_tokenize_start(zm, p->term_buf, p->term_len);
     while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0))
     {
+        if (res_len > IT_MAX_WORD)
+        {
+            yaz_log(YLOG_LOG, "Truncating long term %ld", (long) res_len);
+            res_len = IT_MAX_WORD;
+        }
         extract_add_string(p, zm, res_buf, res_len);
         p->seqno++;
     }
@@ -1823,7 +1828,6 @@ static void extract_token_add(RecWord *p)
 {
     ZebraHandle zh = p->extractCtrl->handle;
     zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, p->index_type);
-    WRBUF wrbuf;
 
     if (log_level_details)
     {
@@ -1832,11 +1836,6 @@ static void extract_token_add(RecWord *p)
                 p->index_type, p->index_name, 
                 p->seqno, p->term_len, p->term_buf);
     }
-    if ((wrbuf = zebra_replace(zm, 0, p->term_buf, p->term_len)))
-    {
-        p->term_buf = wrbuf_buf(wrbuf);
-        p->term_len = wrbuf_len(wrbuf);
-    }
     if (zebra_maps_is_icu(zm))
     {
         extract_add_icu(p, zm);