From 11de0965b256736f01e55b17a603eb9ebad5ec12 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 26 Aug 2005 10:13:30 +0000 Subject: [PATCH] Update snippets handling for new store keys structure. Also omit index_type / reg_type completely from it, because the index ordinal is sufficient. --- include/idzebra/snippet.h | 13 +++++-------- index/extract.c | 15 +++++++++------ index/retrieve.c | 4 ++-- index/zinfo.c | 17 ++++++++++++----- index/zinfo.h | 5 +++-- index/zrpn.c | 10 +++++----- index/zsets.c | 5 ++--- util/snippet.c | 34 ++++++++++++++-------------------- 8 files changed, 52 insertions(+), 51 deletions(-) diff --git a/include/idzebra/snippet.h b/include/idzebra/snippet.h index d64803a..4766927 100644 --- a/include/idzebra/snippet.h +++ b/include/idzebra/snippet.h @@ -1,4 +1,4 @@ -/* $Id: snippet.h,v 1.3 2005-08-18 19:20:37 adam Exp $ +/* $Id: snippet.h,v 1.4 2005-08-26 10:13:30 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -20,8 +20,8 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ -#ifndef SNIPPET_H -#define SNIPPET_H +#ifndef IDZEBRA_SNIPPET_H +#define IDZEBRA_SNIPPET_H #include @@ -29,7 +29,6 @@ YAZ_BEGIN_CDECL struct zebra_snippet_word { zint seqno; - int reg_type; int ord; char *term; int match; @@ -47,13 +46,11 @@ void zebra_snippets_destroy(zebra_snippets *l); YAZ_EXPORT void zebra_snippets_append(zebra_snippets *l, - zint seqno, int reg_type, - int ord, const char *term); + zint seqno, int ord, const char *term); YAZ_EXPORT void zebra_snippets_append_match(zebra_snippets *l, - zint seqno, int reg_type, - int ord, const char *term, + zint seqno, int ord, const char *term, int match); YAZ_EXPORT diff --git a/index/extract.c b/index/extract.c index 33fc1e3..a7fd69b 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.192 2005-08-22 09:04:17 adam Exp $ +/* $Id: extract.c,v 1.193 2005-08-26 10:13:30 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -1526,13 +1526,11 @@ void extract_add_it_key (ZebraHandle zh, ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct recKeys *reckeys, zebra_snippets *snippets) { - void *decode_handle = iscz1_start(); + void *decode_handle = iscz1_start(); int off = 0; int seqno = 0; NMEM nmem = nmem_create(); - yaz_log(YLOG_LOG, "zebra_rec_keys_snippets buf=%p sz=%d", reckeys->buf, - reckeys->buf_used); assert(reckeys->buf); while (off < reckeys->buf_used) { @@ -1541,14 +1539,19 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct recKeys *reckeys, char *dst = (char*) &key; char dst_buf[IT_MAX_WORD]; char *dst_term = dst_buf; + int index_type, ord; iscz1_decode(decode_handle, &dst, &src); assert(key.len <= 4 && key.len > 2); seqno = (int) key.mem[key.len-1]; + ord = key.mem[0]; - zebra_term_untrans_iconv(zh, nmem, src[0], &dst_term, src+1); - zebra_snippets_append(snippets, seqno, src[0], key.mem[0], dst_term); + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, + 0/* db */, 0/* set */, 0/* use */); + zebra_term_untrans_iconv(zh, nmem, index_type, + &dst_term, src); + zebra_snippets_append(snippets, seqno, ord, dst_term); while (*src++) ; off = src - reckeys->buf; diff --git a/index/retrieve.c b/index/retrieve.c index fcedc3b..08257c4 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.34 2005-08-19 14:05:29 marc Exp $ +/* $Id: retrieve.c,v 1.35 2005-08-26 10:13:31 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -216,7 +216,7 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, retrieveCtrl.hit_snippet = hit_snippet; retrieveCtrl.doc_snippet = zebra_snippets_create(); - if (0) + if (1) { /* snippets code */ struct recKeys reckeys; diff --git a/index/zinfo.c b/index/zinfo.c index 60ae415..e13b8c3 100644 --- a/index/zinfo.c +++ b/index/zinfo.c @@ -1,4 +1,4 @@ -/* $Id: zinfo.c,v 1.49 2005-08-18 12:50:18 adam Exp $ +/* $Id: zinfo.c,v 1.50 2005-08-26 10:13:31 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -1416,7 +1416,9 @@ int zebraExplain_trav_ord(ZebraExplainInfo zei, void *handle, } int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord, - const char **db, int *set, int *use) + int *index_type, + const char **db, + int *set, int *use) { struct zebDatabaseInfoB *zdb; for (zdb = zei->databaseInfo; zdb; zdb = zdb->next) @@ -1426,9 +1428,14 @@ int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord, if (zsui->info.which == ZEB_SU_SET_USE && zsui->info.ordinal == ord) { - *db = zdb->databaseName; - *set = zsui->info.u.su.set; - *use = zsui->info.u.su.use; + if (db) + *db = zdb->databaseName; + if (set) + *set = zsui->info.u.su.set; + if (use) + *use = zsui->info.u.su.use; + if (index_type) + *index_type = zsui->info.index_type; return 0; } } diff --git a/index/zinfo.h b/index/zinfo.h index e2b74b6..a1db67c 100644 --- a/index/zinfo.h +++ b/index/zinfo.h @@ -1,4 +1,4 @@ -/* $Id: zinfo.h,v 1.26 2005-08-18 12:50:18 adam Exp $ +/* $Id: zinfo.h,v 1.27 2005-08-26 10:13:31 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -68,7 +68,8 @@ void zebraExplain_loadAttsets (data1_handle dh, Res res); void zebraExplain_flush (ZebraExplainInfo zei, void *updateHandle); int zebraExplain_lookup_ord (ZebraExplainInfo zei, int ord, - const char **db, int *set, int *use); + int *index_type, const char **db, + int *set, int *use); int zebraExplain_trav_ord(ZebraExplainInfo zei, void *handle, int (*f)(void *handle, int ord)); diff --git a/index/zrpn.c b/index/zrpn.c index 135f68b..66e6990 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.204 2005-08-22 09:04:18 adam Exp $ +/* $Id: zrpn.c,v 1.205 2005-08-26 10:13:31 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -235,13 +235,13 @@ static void add_isam_p(const char *name, const char *info, const char *db; int set, use; char term_tmp[IT_MAX_WORD]; - int su_code = 0; - int len = key_SU_decode (&su_code, (const unsigned char *) name); + int ord = 0; + int len = key_SU_decode (&ord, (const unsigned char *) name); zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1); - yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp); + yaz_log(log_level_rpn, "grep: %d %c %s", ord, name[len], term_tmp); zebraExplain_lookup_ord (p->zh->reg->zei, - su_code, &db, &set, &use); + ord, 0 /* index_type */, &db, &set, &use); yaz_log(log_level_rpn, "grep: set=%d use=%d db=%s", set, use, db); resultSetAddTerm(p->zh, p->termset, name[len], db, diff --git a/index/zsets.c b/index/zsets.c index a4b596f..1b76097 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.94 2005-08-19 11:04:23 adam Exp $ +/* $Id: zsets.c,v 1.95 2005-08-26 10:13:31 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -1072,7 +1072,7 @@ ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, zint sysno, zebra_snippets *snippets) { ZebraSet sset = resultSetGet(zh, setname); - yaz_log(YLOG_LOG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT, + yaz_log(YLOG_DEBUG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT, setname, sysno); if (!sset) return ZEBRA_FAIL; @@ -1111,7 +1111,6 @@ ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, for (ol = termid->ol; ol; ol = ol->next) { zebra_snippets_append(snippets, key.mem[key.len-1], - termid->reg_type, ol->ord, termid->name); } } diff --git a/util/snippet.c b/util/snippet.c index a9c6853..7df67f2 100644 --- a/util/snippet.c +++ b/util/snippet.c @@ -1,4 +1,4 @@ -/* $Id: snippet.c,v 1.4 2005-06-07 15:12:39 adam Exp $ +/* $Id: snippet.c,v 1.5 2005-08-26 10:13:31 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -47,14 +47,14 @@ void zebra_snippets_destroy(zebra_snippets *l) } void zebra_snippets_append(zebra_snippets *l, - zint seqno, int reg_type, int ord, const char *term) + zint seqno, int ord, const char *term) { - zebra_snippets_append_match(l, seqno, reg_type, ord, term, 0); + zebra_snippets_append_match(l, seqno, ord, term, 0); } void zebra_snippets_append_match(zebra_snippets *l, - zint seqno, int reg_type, - int ord, const char *term, int match) + zint seqno, int ord, const char *term, + int match) { struct zebra_snippet_word *w = nmem_malloc(l->nmem, sizeof(*w)); @@ -66,7 +66,6 @@ void zebra_snippets_append_match(zebra_snippets *l, l->tail = w; w->seqno = seqno; - w->reg_type = reg_type; w->ord = ord; w->term = nmem_strdup(l->nmem, term); w->match = match; @@ -81,9 +80,8 @@ void zebra_snippets_log(zebra_snippets *l, int log_level) { zebra_snippet_word *w; for (w = l->front; w; w = w->next) - yaz_log(log_level, "term=%s%s seqno=" ZINT_FORMAT " reg_type=%c " - "ord=%d", - w->term, (w->match ? "*" : ""), w->seqno, w->reg_type, w->ord); + yaz_log(log_level, "term=%s%s seqno=" ZINT_FORMAT " ord=%d", + w->term, (w->match ? "*" : ""), w->seqno, w->ord); } zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, @@ -98,7 +96,6 @@ zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, while(1) { int window_start; - int reg_type; int first_seq_no_best_window = 0; int last_seq_no_best_window = 0; int number_best_window = 0; @@ -107,11 +104,9 @@ zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, for (hit_w = zebra_snippets_list(hit); hit_w; hit_w = hit_w->next) if (hit_w->ord > ord && - (min_ord == 0 || - (hit_w->ord < min_ord && hit_w->reg_type == reg_type))) + (min_ord == 0 || hit_w->ord < min_ord)) { min_ord = hit_w->ord; - reg_type = hit_w->reg_type; } if (min_ord == 0) break; @@ -126,7 +121,7 @@ zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, int seq_no_last = 0; while (look_w && look_w->seqno < hit_w->seqno + window_size) { - if (look_w->ord == ord && look_w->reg_type == reg_type) + if (look_w->ord == ord) { seq_no_last = look_w->seqno; number_this++; @@ -149,15 +144,15 @@ zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, window_start = (first_seq_no_best_window + last_seq_no_best_window - window_size) / 2; for (doc_w = zebra_snippets_list(doc); doc_w; doc_w = doc_w->next) - if (doc_w->ord == ord && doc_w->reg_type == reg_type + if (doc_w->ord == ord && doc_w->seqno >= window_start && doc_w->seqno < window_start + window_size) { int match = 0; - for (hit_w = zebra_snippets_list(hit); hit_w; hit_w = hit_w->next) + for (hit_w = zebra_snippets_list(hit); hit_w; + hit_w = hit_w->next) { - if (hit_w->ord == ord && hit_w->reg_type == reg_type && - hit_w->seqno == doc_w->seqno) + if (hit_w->ord == ord && hit_w->seqno == doc_w->seqno) { match = 1; @@ -165,8 +160,7 @@ zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, } } zebra_snippets_append_match(result, doc_w->seqno, - doc_w->reg_type, ord, - doc_w->term, match); + ord, doc_w->term, match); } } return result; -- 1.7.10.4