X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fextract.c;h=a93373ce8f259656e1c6d37299c33ae9bb483729;hp=d593ef8b06f005bfd48bfd6b75dc722bbcb1157b;hb=78f1b5bb001da9494f6a4af717182ff7a835c9cd;hpb=f1ab869126004c5cb02bdfac02c3a71d3eed826c diff --git a/index/extract.c b/index/extract.c index d593ef8..a93373c 100644 --- a/index/extract.c +++ b/index/extract.c @@ -149,69 +149,9 @@ static void snippet_add_complete_field(RecWord *p, int ord, zebra_map_t zm) { struct snip_rec_info *h = p->extractCtrl->handle; - - const char *b = p->term_buf; - char buf[IT_MAX_WORD+1]; - const char **map = 0; - int i = 0, remain = p->term_len; - const char *start = b; - const char *last = 0; - - if (remain > 0) - map = zebra_maps_input(zm, &b, remain, 1); - - while (remain > 0 && i < IT_MAX_WORD) - { - while (map && *map && **map == *CHR_SPACE) - { - remain = p->term_len - (b - p->term_buf); - - if (i == 0) - start = b; /* set to first non-ws area */ - if (remain > 0) - { - int first = i ? 0 : 1; /* first position */ - - map = zebra_maps_input(zm, &b, remain, first); - } - else - map = 0; - } - if (!map) - break; - - if (i && i < IT_MAX_WORD) - buf[i++] = *CHR_SPACE; - while (map && *map && **map != *CHR_SPACE) - { - const char *cp = *map; - - if (**map == *CHR_CUT) - { - i = 0; - } - else - { - if (i >= IT_MAX_WORD) - break; - while (i < IT_MAX_WORD && *cp) - buf[i++] = *(cp++); - } - last = b; - remain = p->term_len - (b - p->term_buf); - if (remain > 0) - { - map = zebra_maps_input(zm, &b, remain, 0); - } - else - map = 0; - } - } - if (!i) - return; - if (last && start != last && zebra_maps_is_index(zm)) + if (p->term_len && p->term_buf && zebra_maps_is_index(zm)) zebra_snippets_appendn(h->snippets, p->seqno, 0, ord, - start, last - start); + p->term_buf, p->term_len); p->seqno++; } @@ -230,8 +170,7 @@ static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) while (map) { - char buf[IT_MAX_WORD+1]; - int i, remain; + int remain; /* Skip spaces */ while (map && *map && **map == *CHR_SPACE) @@ -249,17 +188,10 @@ static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) { zebra_snippets_appendn(h->snippets, p->seqno, 1, ord, start, last - start); - } start = last; - - i = 0; while (map && *map && **map != *CHR_SPACE) { - const char *cp = *map; - - while (i < IT_MAX_WORD && *cp) - buf[i++] = *(cp++); remain = p->term_len - (b - p->term_buf); last = b; if (remain > 0) @@ -267,8 +199,8 @@ static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) else map = 0; } - if (!i) - return; + if (start == last) + return ; if (first) { @@ -345,7 +277,6 @@ void extract_snippet(ZebraHandle zh, zebra_snippets *sn, { struct recExtractCtrl extractCtrl; struct snip_rec_info info; - int r; extractCtrl.stream = stream; extractCtrl.first_record = 1; @@ -368,8 +299,7 @@ void extract_snippet(ZebraHandle zh, zebra_snippets *sn, extractCtrl.setStoreData = 0; - r = (*rt->extract)(recTypeClientData, &extractCtrl); - + (*rt->extract)(recTypeClientData, &extractCtrl); } static void searchRecordKey(ZebraHandle zh, @@ -635,7 +565,6 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, char gprefix[128]; char ext[128]; char ext_res[128]; - struct file_read_info *fi = 0; const char *original_record_type = 0; RecType recType; void *recTypeClientData; @@ -701,7 +630,6 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, if (sysno && (action == action_delete || action == action_a_delete)) { streamp = 0; - fi = 0; } else { @@ -1870,6 +1798,11 @@ static void extract_add_icu(RecWord *p, zebra_map_t zm) zebra_map_tokenize_start(zm, p->term_buf, p->term_len); while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0)) { + if (res_len > IT_MAX_WORD) + { + yaz_log(YLOG_LOG, "Truncating long term %ld", (long) res_len); + res_len = IT_MAX_WORD; + } extract_add_string(p, zm, res_buf, res_len); p->seqno++; } @@ -1895,7 +1828,6 @@ static void extract_token_add(RecWord *p) { ZebraHandle zh = p->extractCtrl->handle; zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, p->index_type); - WRBUF wrbuf; if (log_level_details) { @@ -1904,11 +1836,6 @@ static void extract_token_add(RecWord *p) p->index_type, p->index_name, p->seqno, p->term_len, p->term_buf); } - if ((wrbuf = zebra_replace(zm, 0, p->term_buf, p->term_len))) - { - p->term_buf = wrbuf_buf(wrbuf); - p->term_len = wrbuf_len(wrbuf); - } if (zebra_maps_is_icu(zm)) { extract_add_icu(p, zm);