X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=10c4adcc169578a0fd61c5ea51a5d6530518c4e7;hb=29031807a95fde753e12d606c7db2e31acc5f645;hp=167793a66a20c8e8aefce1e8a8c9b4ff742bbabc;hpb=99dfd244ebcc60d73eb50500f67207ae1aa591d8;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index 167793a..10c4adc 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.269 2007-11-08 21:21:58 adam Exp $ +/* $Id: extract.c,v 1.274 2007-12-17 08:44:07 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -265,6 +265,26 @@ static void snippet_add_incomplete_field(RecWord *p, int ord, zebra_map_t zm) } +static void snippet_add_icu(RecWord *p, int ord, zebra_map_t zm) +{ + struct snip_rec_info *h = p->extractCtrl->handle; + + const char *res_buf = 0; + size_t res_len = 0; + + const char *display_buf = 0; + size_t display_len = 0; + + zebra_map_tokenize_start(zm, p->term_buf, p->term_len); + while (zebra_map_tokenize_next(zm, &res_buf, &res_len, + &display_buf, &display_len)) + { + zebra_snippets_appendn(h->snippets, p->seqno, 0, ord, + display_buf, display_len); + p->seqno++; + } +} + static void snippet_token_add(RecWord *p) { struct snip_rec_info *h = p->extractCtrl->handle; @@ -277,10 +297,15 @@ static void snippet_token_add(RecWord *p) int ch = zebraExplain_lookup_attr_str( zei, zinfo_index_category_index, p->index_type, p->index_name); - if (zebra_maps_is_complete(zm)) - snippet_add_complete_field(p, ch, zm); + if (zebra_maps_is_icu(zm)) + snippet_add_icu(p, ch, zm); else - snippet_add_incomplete_field(p, ch, zm); + { + if (zebra_maps_is_complete(zm)) + snippet_add_complete_field(p, ch, zm); + else + snippet_add_incomplete_field(p, ch, zm); + } } } @@ -520,13 +545,29 @@ struct recordLogInfo { struct recordGroup *rGroup; }; -static void all_matches_add(struct recExtractCtrl *ctrl) +/** \brief add the always-matches index entry and map to real record ID + \param ctrl record control + \param record_id custom record ID + \param sysno system record ID + + This function serves two purposes.. It adds the always matches + entry and makes a pointer from the custom record ID (if defined) + back to the system record ID (sysno) + See zebra_recid_to_sysno . + */ +static void all_matches_add(struct recExtractCtrl *ctrl, zint record_id, + zint sysno) { RecWord word; extract_init(ctrl, &word); + word.record_id = record_id; + /* we use the seqno as placeholder for a way to get back to + record database from _ALLRECORDS.. This is used if a custom + RECORD was defined */ + word.seqno = sysno; word.index_name = "_ALLRECORDS"; word.index_type = "w"; - word.seqno = 1; + extract_add_index_string(&word, zinfo_index_category_alwaysmatches, "", 0); } @@ -874,8 +915,6 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, else end_offset = stream->tellf(stream); - all_matches_add(&extractCtrl); - if (extractCtrl.match_criteria[0]) match_criteria = extractCtrl.match_criteria; } @@ -919,6 +958,7 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, } } } + if (zebra_rec_keys_empty(zh->reg->keys)) { /* the extraction process returned no information - the record @@ -951,6 +991,15 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, *sysno = rec->sysno; + + if (stream) + { + all_matches_add(&extractCtrl, + zebra_rec_keys_get_custom_record_id(zh->reg->keys), + *sysno); + } + + recordAttr = rec_init_attr(zh->reg->zei, rec); if (extractCtrl.staticrank < 0) { @@ -992,6 +1041,13 @@ ZEBRA_RES zebra_extract_record_stream(ZebraHandle zh, rec = rec_get(zh->reg->records, *sysno); assert(rec); + + if (stream) + { + all_matches_add(&extractCtrl, + zebra_rec_keys_get_custom_record_id(zh->reg->keys), + *sysno); + } recordAttr = rec_init_attr(zh->reg->zei, rec); @@ -1425,7 +1481,7 @@ void extract_flush_record_keys2(ZebraHandle zh, zint sysno, } -ZEBRA_RES zebra_rec_keys_to_snippets(ZebraHandle zh, +ZEBRA_RES zebra_rec_keys_to_snippets1(ZebraHandle zh, zebra_rec_keys_t reckeys, zebra_snippets *snippets) { @@ -1694,32 +1750,14 @@ static void extract_add_complete_field(RecWord *p, zebra_map_t zm) static void extract_add_icu(RecWord *p, zebra_map_t zm) { - struct it_key key; const char *res_buf = 0; size_t res_len = 0; - ZebraHandle zh = p->extractCtrl->handle; - int r = zebra_map_tokenize(zm, p->term_buf, p->term_len, - &res_buf, &res_len); - int cat = zinfo_index_category_index; - int ch = zebraExplain_lookup_attr_str(zh->reg->zei, cat, p->index_type, p->index_name); - if (ch < 0) - ch = zebraExplain_add_attr_str(zh->reg->zei, cat, p->index_type, p->index_name); - while (r) - { - int i = 0; - key.mem[i++] = ch; - key.mem[i++] = p->record_id; - key.mem[i++] = p->section_id; - - if (zh->m_segment_indexing) - key.mem[i++] = p->segment; - key.mem[i++] = p->seqno; - key.len = i; - zebra_rec_keys_write(zh->reg->keys, res_buf, res_len, &key); - + zebra_map_tokenize_start(zm, p->term_buf, p->term_len); + while (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0)) + { + extract_add_string(p, zm, res_buf, res_len); p->seqno++; - r = zebra_map_tokenize(zm, 0, 0, &res_buf, &res_len); } } @@ -1728,8 +1766,8 @@ static void extract_add_icu(RecWord *p, zebra_map_t zm) \param p token data to be indexed Call sequence: - extract_token - zebra_add_{in}_complete + extract_token_add + extract_add_{in}_complete / extract_add_icu extract_add_string extract_add_index_string