X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=a93373ce8f259656e1c6d37299c33ae9bb483729;hb=b3f079a571e404665620e8eb3bb1dd7db1ff6669;hp=a4688e52933986da810a8d85c9735a3c4bb8a35c;hpb=e42ae39f438cad849a0cea2cca2f9f67e1225dfd;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index a4688e5..a93373c 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1798,6 +1798,11 @@ static void extract_add_icu(RecWord *p, zebra_map_t zm) zebra_map_tokenize_start(zm, p->term_buf, p->term_len); while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0)) { + if (res_len > IT_MAX_WORD) + { + yaz_log(YLOG_LOG, "Truncating long term %ld", (long) res_len); + res_len = IT_MAX_WORD; + } extract_add_string(p, zm, res_buf, res_len); p->seqno++; } @@ -1823,7 +1828,6 @@ static void extract_token_add(RecWord *p) { ZebraHandle zh = p->extractCtrl->handle; zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, p->index_type); - WRBUF wrbuf; if (log_level_details) { @@ -1832,11 +1836,6 @@ static void extract_token_add(RecWord *p) p->index_type, p->index_name, p->seqno, p->term_len, p->term_buf); } - if ((wrbuf = zebra_replace(zm, 0, p->term_buf, p->term_len))) - { - p->term_buf = wrbuf_buf(wrbuf); - p->term_len = wrbuf_len(wrbuf); - } if (zebra_maps_is_icu(zm)) { extract_add_icu(p, zm);