X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fextract.c;h=a4688e52933986da810a8d85c9735a3c4bb8a35c;hp=f70d8249dd764fc3179fae1b742f52aa4ce1a730;hb=96e4c5479e111511f5df3531b6648931251b9e5d;hpb=3dc61ce9fd0ee13d348119d4269027afe7a67479 diff --git a/index/extract.c b/index/extract.c index f70d824..a4688e5 100644 --- a/index/extract.c +++ b/index/extract.c @@ -21,6 +21,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA \brief indexes records and extract tokens for indexing and sorting */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include @@ -146,69 +149,10 @@ static void snippet_add_complete_field(RecWord *p, int ord, zebra_map_t zm) { struct snip_rec_info *h = p->extractCtrl->handle; - - const char *b = p->term_buf; - char buf[IT_MAX_WORD+1]; - const char **map = 0; - int i = 0, remain = p->term_len; - const char *start = b; - const char *last = 0; - - if (remain > 0) - map = zebra_maps_input(zm, &b, remain, 1); - - while (remain > 0 && i < IT_MAX_WORD) - { - while (map && *map && **map == *CHR_SPACE) - { - remain = p->term_len - (b - p->term_buf); - - if (i == 0) - start = b; /* set to first non-ws area */ - if (remain > 0) - { - int first = i ? 0 : 1; /* first position */ - - map = zebra_maps_input(zm, &b, remain, first); - } - else - map = 0; - } - if (!map) - break; - - if (i && i < IT_MAX_WORD) - buf[i++] = *CHR_SPACE; - while (map && *map && **map != *CHR_SPACE) - { - const char *cp = *map; - - if (**map == *CHR_CUT) - { - i = 0; - } - else - { - if (i >= IT_MAX_WORD) - break; - while (i < IT_MAX_WORD && *cp) - buf[i++] = *(cp++); - } - last = b; - remain = p->term_len - (b - p->term_buf); - if (remain > 0) - { - map = zebra_maps_input(zm, &b, remain, 0); - } - else - map = 0; - } - } - if (!i) - return; - if (last && start != last && zebra_maps_is_index(zm)) + if (p->term_len && p->term_buf && zebra_maps_is_index(zm)) zebra_snippets_appendn(h->snippets, p->seqno, 0, ord, - start, last - start); + p->term_buf, p->term_len); + p->seqno++; } static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) @@ -226,8 +170,7 @@ static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) while (map) { - char buf[IT_MAX_WORD+1]; - int i, remain; + int remain; /* Skip spaces */ while (map && *map && **map == *CHR_SPACE) @@ -245,17 +188,10 @@ static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) { zebra_snippets_appendn(h->snippets, p->seqno, 1, ord, start, last - start); - } start = last; - - i = 0; while (map && *map && **map != *CHR_SPACE) { - const char *cp = *map; - - while (i < IT_MAX_WORD && *cp) - buf[i++] = *(cp++); remain = p->term_len - (b - p->term_buf); last = b; if (remain > 0) @@ -263,8 +199,8 @@ static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) else map = 0; } - if (!i) - return; + if (start == last) + return ; if (first) { @@ -341,7 +277,6 @@ void extract_snippet(ZebraHandle zh, zebra_snippets *sn, { struct recExtractCtrl extractCtrl; struct snip_rec_info info; - int r; extractCtrl.stream = stream; extractCtrl.first_record = 1; @@ -364,8 +299,7 @@ void extract_snippet(ZebraHandle zh, zebra_snippets *sn, extractCtrl.setStoreData = 0; - r = (*rt->extract)(recTypeClientData, &extractCtrl); - + (*rt->extract)(recTypeClientData, &extractCtrl); } static void searchRecordKey(ZebraHandle zh, @@ -631,7 +565,6 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, char gprefix[128]; char ext[128]; char ext_res[128]; - struct file_read_info *fi = 0; const char *original_record_type = 0; RecType recType; void *recTypeClientData; @@ -697,7 +630,6 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, if (sysno && (action == action_delete || action == action_a_delete)) { streamp = 0; - fi = 0; } else { @@ -1855,6 +1787,7 @@ static void extract_add_complete_field(RecWord *p, zebra_map_t zm) if (!i) return; extract_add_string(p, zm, buf, i); + p->seqno++; } static void extract_add_icu(RecWord *p, zebra_map_t zm)