X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fextract.c;h=5071fd74fbfae9a8c5f7a25d83fe43babda50017;hp=cbd4973d310e0906720258cf941bac5db617786a;hb=e2e073b5c947e996304ed7d577497af5e9a879ee;hpb=aeea139423b8eaf28a4de53b3d7b2ad1f22284e7 diff --git a/index/extract.c b/index/extract.c index cbd4973..5071fd7 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1994-2011 Index Data + Copyright (C) Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -144,11 +144,71 @@ struct snip_rec_info { zebra_snippets *snippets; }; +static int parse_complete_field(RecWord *p, zebra_map_t zm, + char *buf) +{ + const char *b = p->term_buf; + const char **map = 0; + int i = 0, remain = p->term_len; + + if (remain > 0) + map = zebra_maps_input(zm, &b, remain, 1); + while (remain > 0 && i < IT_MAX_WORD) + { + while (map && *map && **map == *CHR_SPACE) + { + remain = p->term_len - (b - p->term_buf); + + if (remain > 0) + { + int first = i ? 0 : 1; /* first position */ + map = zebra_maps_input(zm, &b, remain, first); + } + else + map = 0; + } + if (!map) + break; + + if (i && i < IT_MAX_WORD) + buf[i++] = *CHR_SPACE; + while (map && *map && **map != *CHR_SPACE) + { + const char *cp = *map; + + if (**map == *CHR_CUT) + { + i = 0; + } + else + { + if (i >= IT_MAX_WORD) + break; + while (i < IT_MAX_WORD && *cp) + buf[i++] = *(cp++); + } + remain = p->term_len - (b - p->term_buf); + if (remain > 0) + { + map = zebra_maps_input(zm, &b, remain, 0); + } + else + map = 0; + } + } + return i; +} static void snippet_add_complete_field(RecWord *p, int ord, zebra_map_t zm) { struct snip_rec_info *h = p->extractCtrl->handle; + char buf[IT_MAX_WORD+1]; + int i = parse_complete_field(p, zm, buf); + + if (!i) + return; + if (p->term_len && p->term_buf && zebra_maps_is_index(zm)) zebra_snippets_appendn(h->snippets, p->seqno, 0, ord, p->term_buf, p->term_len); @@ -1276,7 +1336,6 @@ void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key, const char *str, size_t slen, NMEM nmem, int level) { char keystr[200]; /* room for zints to print */ - char *dst_term = 0; int ord = CAST_ZINT_TO_INT(key->mem[0]); const char *index_type; int i; @@ -1285,8 +1344,6 @@ void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key, zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, 0/* db */, &string_index); assert(index_type); - zebra_term_untrans_iconv(zh, nmem, index_type, - &dst_term, str); *keystr = '\0'; for (i = 0; i < key->len; i++) { @@ -1315,11 +1372,23 @@ void zebra_it_key_str_dump(ZebraHandle zh, struct it_key *key, } yaz_log(level, "%s%s %s %s", keystr, index_type, string_index, dst_buf); - } else - yaz_log(level, "%s%s %s \"%s\"", keystr, index_type, - string_index, dst_term); + { + char *dst_term = 0; + zebra_term_untrans_iconv(zh, nmem, index_type, &dst_term, str); + if (dst_term) + yaz_log(level, "%s%s %s \"%s\"", keystr, index_type, + string_index, dst_term); + else + { + WRBUF w = wrbuf_alloc(); + wrbuf_write_escaped(w, str, strlen(str)); + yaz_log(level, "%s%s %s %s", keystr, index_type, + string_index, wrbuf_cstr(w)); + wrbuf_destroy(w); + } + } } void extract_rec_keys_log(ZebraHandle zh, int is_insert, @@ -1733,57 +1802,8 @@ static void extract_add_incomplete_field(RecWord *p, zebra_map_t zm) static void extract_add_complete_field(RecWord *p, zebra_map_t zm) { - const char *b = p->term_buf; char buf[IT_MAX_WORD+1]; - const char **map = 0; - int i = 0, remain = p->term_len; - - if (remain > 0) - map = zebra_maps_input(zm, &b, remain, 1); - - while (remain > 0 && i < IT_MAX_WORD) - { - while (map && *map && **map == *CHR_SPACE) - { - remain = p->term_len - (b - p->term_buf); - - if (remain > 0) - { - int first = i ? 0 : 1; /* first position */ - map = zebra_maps_input(zm, &b, remain, first); - } - else - map = 0; - } - if (!map) - break; - - if (i && i < IT_MAX_WORD) - buf[i++] = *CHR_SPACE; - while (map && *map && **map != *CHR_SPACE) - { - const char *cp = *map; - - if (**map == *CHR_CUT) - { - i = 0; - } - else - { - if (i >= IT_MAX_WORD) - break; - while (i < IT_MAX_WORD && *cp) - buf[i++] = *(cp++); - } - remain = p->term_len - (b - p->term_buf); - if (remain > 0) - { - map = zebra_maps_input(zm, &b, remain, 0); - } - else - map = 0; - } - } + int i = parse_complete_field(p, zm, buf); if (!i) return; extract_add_string(p, zm, buf, i);