X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fextract.c;h=be000c2dd8ae2b11732e94979cbc380bf5d5cc0a;hp=cc54d67cc48106029ef404f63c5581f3b04ad7f5;hb=4097bac1dce126e79ce1550fad8b2034a311ff80;hpb=34b6f9669f88fef08ad911000f2b66fb2c3429b9 diff --git a/index/extract.c b/index/extract.c index cc54d67..be000c2 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.272 2007-12-10 17:06:08 adam Exp $ +/* $Id: extract.c,v 1.275 2007-12-20 11:15:42 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -265,6 +265,26 @@ static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) } +static void snippet_add_icu(RecWord *p, int ord, zebra_map_t zm) +{ + struct snip_rec_info *h = p->extractCtrl->handle; + + const char *res_buf = 0; + size_t res_len = 0; + + const char *display_buf = 0; + size_t display_len = 0; + + zebra_map_tokenize_start(zm, p->term_buf, p->term_len); + while (zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + { + zebra_snippets_appendn(h->snippets, p->seqno, 0, ord, + display_buf, display_len); + p->seqno++; + } +} + static void snippet_token_add(RecWord *p) { struct snip_rec_info *h = p->extractCtrl->handle; @@ -277,10 +297,15 @@ static void snippet_token_add(RecWord *p) int ch = zebraExplain_lookup_attr_str( zei, zinfo_index_category_index, p->index_type, p->index_name); - if (zebra_maps_is_complete(zm)) - snippet_add_complete_field(p, ch, zm); + if (zebra_maps_is_icu(zm)) + snippet_add_icu(p, ch, zm); else - snippet_add_incomplete_field(p, ch, zm); + { + if (zebra_maps_is_complete(zm)) + snippet_add_complete_field(p, ch, zm); + else + snippet_add_incomplete_field(p, ch, zm); + } } } @@ -560,7 +585,7 @@ ZEBRA_RES zebra_extract_records_stream(ZebraHandle zh, ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, - int deleteFlag) + enum zebra_recctrl_action_t action) { ZEBRA_RES r = ZEBRA_OK; int i, fd; @@ -630,7 +655,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, default: yaz_log(YLOG_WARN, "Bad filter version: %s", zh->m_record_type); } - if (sysno && deleteFlag) + if (sysno && (action == action_delete || action == action_a_delete)) { streamp = 0; fi = 0; @@ -658,8 +683,7 @@ ZEBRA_RES zebra_extract_file(ZebraHandle zh, zint *sysno, const char *fname, zebra_create_stream_fd(streamp, fd, 0); } r = zebra_extract_records_stream(zh, streamp, - deleteFlag ? - action_delete : action_update, + action, 0, /* tst_mode */ zh->m_record_type, sysno, @@ -944,14 +968,21 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, if (! *sysno) { - /* new record */ + /* new record AKA does not exist already */ if (action == action_delete) { - yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, - pr_fname, (zint) start_offset); + yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); yaz_log(YLOG_WARN, "cannot delete record above (seems new)"); return ZEBRA_FAIL; } + else if (action == action_a_delete) + { + if (show_progress) + yaz_log(YLOG_LOG, "delete %s %s " ZINT_FORMAT, recordType, + pr_fname, (zint) start_offset); + return ZEBRA_OK; + } else if (action == action_replace) { yaz_log(YLOG_LOG, "update %s %s " ZINT_FORMAT, recordType, @@ -1044,7 +1075,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, extract_flush_record_keys(zh, *sysno, 0, delkeys, recordAttr->staticrank); #endif - if (action == action_delete) + if (action == action_delete || action == action_a_delete) { /* record going to be deleted */ #if FLUSH2 @@ -1456,7 +1487,7 @@ void extract_flush_record_keys2(ZebraHandle zh, zint sysno, } -ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, +ZEBRA_RES zebra_rec_keys_to_snippets1(ZebraHandle zh, zebra_rec_keys_t reckeys, zebra_snippets *snippets) { @@ -1725,32 +1756,13 @@ static void extract_add_complete_field(RecWord *p, zebra_map_t zm) static void extract_add_icu(RecWord *p, zebra_map_t zm) { - struct it_key key; const char *res_buf = 0; size_t res_len = 0; - ZebraHandle zh = p->extractCtrl->handle; - - int cat = zinfo_index_category_index; - int ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, - p->index_type, p->index_name); - if (ch < 0) - ch = zebraExplain_add_attr_str(zh->reg->zei, cat, - p->index_type, p->index_name); + zebra_map_tokenize_start(zm, p->term_buf, p->term_len); while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0)) { - int i = 0; - key.mem[i++] = ch; - key.mem[i++] = p->record_id; - key.mem[i++] = p->section_id; - - if (zh->m_segment_indexing) - key.mem[i++] = p->segment; - key.mem[i++] = p->seqno; - key.len = i; - - zebra_rec_keys_write(zh->reg->keys, res_buf, res_len, &key); - + extract_add_string(p, zm, res_buf, res_len); p->seqno++; } } @@ -1760,8 +1772,8 @@ static void extract_add_icu(RecWord *p, zebra_map_t zm) \param p token data to be indexed Call sequence: - extract_token - zebra_add_{in}_complete + extract_token_add + extract_add_{in}_complete / extract_add_icu extract_add_string extract_add_index_string