X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fextract.c;h=e4973ab7fcd1fced79401de6cc9abb4c9cf4a29f;hb=deb0cef3d4d19dc6508b2fed71711b3fb1be26a2;hp=d4d9b691597d729bbbf7cd6727440551e5034999;hpb=6684933a7dbf61609c4c4a1db1ebb8d80169ad05;p=idzebra-moved-to-github.git diff --git a/index/extract.c b/index/extract.c index d4d9b69..e4973ab 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.223 2006-06-22 15:07:20 adam Exp $ +/* $Id: extract.c,v 1.227 2006-08-16 13:16:36 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #include @@ -112,15 +112,15 @@ static void extract_add_index_string (RecWord *p, static void extract_set_store_data_prepare(struct recExtractCtrl *p); -static void extract_init (struct recExtractCtrl *p, RecWord *w) +static void extract_init(struct recExtractCtrl *p, RecWord *w) { - w->zebra_maps = p->zebra_maps; w->seqno = 1; w->index_name = "any"; w->index_type = 'w'; w->extractCtrl = p; w->record_id = 0; w->section_id = 0; + w->segment = 0; } static void searchRecordKey(ZebraHandle zh, @@ -154,7 +154,7 @@ static void searchRecordKey(ZebraHandle zh, zint seqno; while (zebra_rec_keys_read(reckeys, &str, &slen, &key)) { - assert(key.len <= 4 && key.len > 2); + assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2); seqno = key.mem[key.len-1]; @@ -395,7 +395,6 @@ static void init_extractCtrl(ZebraHandle zh, struct recExtractCtrl *ctrl) else ctrl->seqno[i] = 0; } - ctrl->zebra_maps = zh->reg->zebra_maps; ctrl->flagShowRecords = !zh->m_flag_rw; } @@ -403,7 +402,7 @@ static void all_matches_add(struct recExtractCtrl *ctrl) { RecWord word; extract_init(ctrl, &word); - word.index_name = "allrecords"; + word.index_name = "_ALLRECORDS"; word.index_type = 'w'; word.seqno = 1; extract_add_index_string (&word, zinfo_index_category_alwaysmatches, @@ -1376,10 +1375,11 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, while(zebra_rec_keys_read(reckeys, &str, &slen, &key_in)) { int ch = 0; + int i, j = 0; struct it_key key_out; - zint *keyp = key_out.mem; - assert(key_in.len == 4); + assert(key_in.len >= 2); + assert(key_in.len <= IT_KEY_LEVEL_MAX); /* check for buffer overflow */ if (zh->reg->key_buf_used + 1024 > @@ -1391,6 +1391,9 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, (zh->reg->key_buf)[zh->reg->ptr_top - zh->reg->ptr_i] = (char*)zh->reg->key_buf + zh->reg->key_buf_used; + /* key_in.mem[0] ord/ch */ + /* key_in.mem[1] filter specified record ID */ + /* encode the ordinal value (field/use/attribute) .. */ ch = CAST_ZINT_TO_INT(key_in.mem[0]); zh->reg->key_buf_used += @@ -1413,19 +1416,17 @@ void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, (long) staticrank); staticrank = 0; } - *keyp++ = staticrank; - key_out.len = 4; + key_out.mem[j++] = staticrank; } - else - key_out.len = 3; if (key_in.mem[1]) /* filter specified record ID */ - *keyp++ = key_in.mem[1]; + key_out.mem[j++] = key_in.mem[1]; else - *keyp++ = sysno; - *keyp++ = key_in.mem[2]; /* section_id */ - *keyp++ = key_in.mem[3]; /* sequence .. */ - + key_out.mem[j++] = sysno; + for (i = 2; i < key_in.len; i++) + key_out.mem[j++] = key_in.mem[i]; + key_out.len = j; + memcpy((char*)zh->reg->key_buf + zh->reg->key_buf_used, &key_out, sizeof(key_out)); (zh->reg->key_buf_used) += sizeof(key_out); @@ -1580,7 +1581,7 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, zint seqno; int index_type; - assert(key.len <= 4 && key.len > 2); + assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2); seqno = key.mem[key.len-1]; ord = CAST_ZINT_TO_INT(key.mem[0]); @@ -1612,7 +1613,7 @@ void print_rec_keys(ZebraHandle zh, zebra_rec_keys_t reckeys) int index_type; int ord = CAST_ZINT_TO_INT(key.mem[0]); const char *db = 0; - assert(key.len <= 4 && key.len > 2); + assert(key.len <= IT_KEY_LEVEL_MAX && key.len > 2); zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, 0); @@ -1630,40 +1631,23 @@ static void extract_add_index_string(RecWord *p, zinfo_index_category_t cat, const char *str, int length) { struct it_key key; - ZebraHandle zh = p->extractCtrl->handle; ZebraExplainInfo zei = zh->reg->zei; - int ch; - - if (!p->index_name) - return; + int ch, i; ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name); if (ch < 0) ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name); - key.len = 4; - key.mem[0] = ch; - key.mem[1] = p->record_id; - key.mem[2] = p->section_id; - key.mem[3] = p->seqno; + i = 0; + key.mem[i++] = ch; + key.mem[i++] = p->record_id; + key.mem[i++] = p->section_id; -#if 0 - if (1) - { - char strz[80]; - int i; - - strz[0] = 0; - for (i = 0; iattrSet, p->attrUse, p->record_id, p->section_id, p->seqno, - strz); - } -#endif + if (zh->m_segment_indexing) + key.mem[i++] = p->segment; + key.mem[i++] = p->seqno; + key.len = i; zebra_rec_keys_write(zh->reg->keys, str, length, &key); } @@ -1671,54 +1655,36 @@ static void extract_add_index_string(RecWord *p, zinfo_index_category_t cat, static void extract_add_sort_string(RecWord *p, const char *str, int length) { struct it_key key; - ZebraHandle zh = p->extractCtrl->handle; ZebraExplainInfo zei = zh->reg->zei; int ch; zinfo_index_category_t cat = zinfo_index_category_sort; - - - if (!p->index_name) - return; ch = zebraExplain_lookup_attr_str(zei, cat, p->index_type, p->index_name); if (ch < 0) ch = zebraExplain_add_attr_str(zei, cat, p->index_type, p->index_name); - key.len = 4; + key.len = 2; key.mem[0] = ch; key.mem[1] = p->record_id; - key.mem[2] = p->section_id; - key.mem[3] = p->seqno; -#if 0 - if (1) - { - char strz[80]; - int i; - - strz[0] = 0; - for (i = 0; iattrSet, p->attrUse, p->record_id, p->section_id, p->seqno, - strz); - } -#endif zebra_rec_keys_write(zh->reg->sortKeys, str, length, &key); } -static void extract_add_string (RecWord *p, const char *string, int length) +static void extract_add_string(RecWord *p, const char *string, int length) { + ZebraHandle zh = p->extractCtrl->handle; assert (length > 0); - if (zebra_maps_is_sort (p->zebra_maps, p->index_type)) - extract_add_sort_string (p, string, length); + + if (!p->index_name) + return; + + if (zebra_maps_is_sort(zh->reg->zebra_maps, p->index_type)) + extract_add_sort_string(p, string, length); else { extract_add_index_string(p, zinfo_index_category_index, string, length); - if (zebra_maps_is_alwaysmatches(p->zebra_maps, p->index_type)) + if (zebra_maps_is_alwaysmatches(zh->reg->zebra_maps, p->index_type)) { RecWord word; memcpy(&word, p, sizeof(word)); @@ -1732,12 +1698,13 @@ static void extract_add_string (RecWord *p, const char *string, int length) static void extract_add_incomplete_field (RecWord *p) { + ZebraHandle zh = p->extractCtrl->handle; const char *b = p->term_buf; int remain = p->term_len; const char **map = 0; if (remain > 0) - map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, 0); + map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); while (map) { @@ -1749,7 +1716,7 @@ static void extract_add_incomplete_field (RecWord *p) { remain = p->term_len - (b - p->term_buf); if (remain > 0) - map = zebra_maps_input(p->zebra_maps, p->index_type, &b, + map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); else map = 0; @@ -1765,7 +1732,7 @@ static void extract_add_incomplete_field (RecWord *p) buf[i++] = *(cp++); remain = p->term_len - (b - p->term_buf); if (remain > 0) - map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, 0); + map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, 0); else map = 0; } @@ -1778,13 +1745,14 @@ static void extract_add_incomplete_field (RecWord *p) static void extract_add_complete_field (RecWord *p) { + ZebraHandle zh = p->extractCtrl->handle; const char *b = p->term_buf; char buf[IT_MAX_WORD+1]; const char **map = 0; int i = 0, remain = p->term_len; if (remain > 0) - map = zebra_maps_input (p->zebra_maps, p->index_type, &b, remain, 1); + map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b, remain, 1); while (remain > 0 && i < IT_MAX_WORD) { @@ -1795,7 +1763,7 @@ static void extract_add_complete_field (RecWord *p) if (remain > 0) { int first = i ? 0 : 1; /* first position */ - map = zebra_maps_input(p->zebra_maps, p->index_type, &b, remain, first); + map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, &b, remain, first); } else map = 0; @@ -1823,7 +1791,7 @@ static void extract_add_complete_field (RecWord *p) remain = p->term_len - (b - p->term_buf); if (remain > 0) { - map = zebra_maps_input (p->zebra_maps, p->index_type, &b, + map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b, remain, 0); } else @@ -1837,19 +1805,20 @@ static void extract_add_complete_field (RecWord *p) static void extract_token_add(RecWord *p) { + ZebraHandle zh = p->extractCtrl->handle; WRBUF wrbuf; if (log_level) yaz_log(log_level, "extract_token_add " "type=%c index=%s seqno=" ZINT_FORMAT " s=%.*s", p->index_type, p->index_name, p->seqno, p->term_len, p->term_buf); - if ((wrbuf = zebra_replace(p->zebra_maps, p->index_type, 0, + if ((wrbuf = zebra_replace(zh->reg->zebra_maps, p->index_type, 0, p->term_buf, p->term_len))) { p->term_buf = wrbuf_buf(wrbuf); p->term_len = wrbuf_len(wrbuf); } - if (zebra_maps_is_complete (p->zebra_maps, p->index_type)) + if (zebra_maps_is_complete (zh->reg->zebra_maps, p->index_type)) extract_add_complete_field (p); else extract_add_incomplete_field(p);