From 7a2d0f25682890bde5d8f2883d6020df2ed0b365 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 7 Jun 2005 14:53:38 +0000 Subject: [PATCH] Save register type for TERMIDs and snippets. --- include/idzebra/snippet.h | 9 ++++++--- include/rset.h | 6 ++++-- index/extract.c | 4 ++-- index/index.h | 4 ++-- index/trunc.c | 7 ++++--- index/zrpn.c | 16 ++++++++-------- index/zsets.c | 5 ++--- rset/rsbetween.c | 4 ++-- rset/rset.c | 7 +++++-- util/snippet.c | 41 +++++++++++++++++++++++++---------------- 10 files changed, 60 insertions(+), 43 deletions(-) diff --git a/include/idzebra/snippet.h b/include/idzebra/snippet.h index e778e3c..d4cfb1d 100644 --- a/include/idzebra/snippet.h +++ b/include/idzebra/snippet.h @@ -1,4 +1,4 @@ -/* $Id: snippet.h,v 1.1 2005-06-07 11:36:38 adam Exp $ +/* $Id: snippet.h,v 1.2 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -29,6 +29,7 @@ YAZ_BEGIN_CDECL struct zebra_snippet_word { zint seqno; + int reg_type; int ord; char *term; int match; @@ -41,9 +42,11 @@ typedef struct zebra_snippet_word zebra_snippet_word; zebra_snippets *zebra_snippets_create(); void zebra_snippets_destroy(zebra_snippets *l); void zebra_snippets_append(zebra_snippets *l, - zint seqno, int ord, const char *term); + zint seqno, int reg_type, + int ord, const char *term); void zebra_snippets_append_match(zebra_snippets *l, - zint seqno, int ord, const char *term, + zint seqno, int reg_type, + int ord, const char *term, int match); zebra_snippet_word *zebra_snippets_list(zebra_snippets *l); void zebra_snippets_log(zebra_snippets *l, int log_level); diff --git a/include/rset.h b/include/rset.h index b37becc..bf8f87b 100644 --- a/include/rset.h +++ b/include/rset.h @@ -1,4 +1,4 @@ -/* $Id: rset.h,v 1.55 2005-06-07 07:41:04 adam Exp $ +/* $Id: rset.h,v 1.56 2005-06-07 14:53:38 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -58,6 +58,7 @@ struct rset_term { This info is used to return encoded term back for search-result-1 . */ + int reg_type; /** register type */ RSET rset; /** the rset corresponding to this term */ void *rankpriv;/** private stuff for the ranking algorithm */ struct ord_list *ol; @@ -65,7 +66,8 @@ struct rset_term { typedef struct rset_term *TERMID; TERMID rset_term_create (const char *name, int length, const char *flags, - int type, NMEM nmem, struct ord_list *ol); + int type, NMEM nmem, struct ord_list *ol, + int reg_type); /** rsfd is a "file descriptor" for reading from a rset */ struct rsfd { /* the stuff common to all rsfd's. */ diff --git a/index/extract.c b/index/extract.c index ca6d012..acd63b2 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.184 2005-06-07 11:36:38 adam Exp $ +/* $Id: extract.c,v 1.185 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -1498,7 +1498,7 @@ ZEBRA_RES zebra_snippets_rec_keys(ZebraHandle zh, struct recKeys *reckeys, seqno = (int) key.mem[key.len-1]; zebra_term_untrans_iconv(zh, nmem, src[0], &dst_term, src+1); - zebra_snippets_append(snippets, seqno, key.mem[0], dst_term); + zebra_snippets_append(snippets, seqno, src[0], key.mem[0], dst_term); while (*src++) ; off = src - reckeys->buf; diff --git a/index/index.h b/index/index.h index 6d656ac..95371fa 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.141 2005-06-07 11:36:38 adam Exp $ +/* $Id: index.h,v 1.142 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -368,7 +368,7 @@ RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no, const char *term, int length_term, const char *flags, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, - struct ord_list *ol); + struct ord_list *ol, int reg_type); void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type, const char *db, int set, diff --git a/index/trunc.c b/index/trunc.c index 63b27e8..b416592 100644 --- a/index/trunc.c +++ b/index/trunc.c @@ -1,4 +1,4 @@ -/* $Id: trunc.c,v 1.59 2005-06-06 21:31:08 adam Exp $ +/* $Id: trunc.c,v 1.60 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -400,13 +400,14 @@ RSET rset_trunc(ZebraHandle zi, ISAM_P *isam_p, int no, const char *term, int length, const char *flags, int preserve_position, int term_type, NMEM rset_nmem, struct rset_key_control *kctrl, int scope, - struct ord_list *ol) + struct ord_list *ol, int reg_type) { TERMID termid; RSET result; int trunc_chunk; - termid = rset_term_create(term, length, flags, term_type, rset_nmem, ol); + termid = rset_term_create(term, length, flags, term_type, rset_nmem, ol, + reg_type); if (no < 1) return rsnull_create(rset_nmem, kctrl, termid); diff --git a/index/zrpn.c b/index/zrpn.c index 4b39270..1b74b9a 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.196 2005-06-07 07:41:05 adam Exp $ +/* $Id: zrpn.c,v 1.197 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -1018,7 +1018,7 @@ static ZEBRA_RES term_trunc(ZebraHandle zh, grep_info->isam_p_indx, term_dst, strlen(term_dst), rank_type, 1 /* preserve pos */, zapt->term->which, rset_nmem, - kc, kc->scope, ol); + kc, kc->scope, ol, reg_type); if (!*rset) return ZEBRA_FAIL; return ZEBRA_OK; @@ -1965,7 +1965,7 @@ static ZEBRA_RES rpn_search_APT_numeric(ZebraHandle zh, strlen(term_dst), rank_type, 0 /* preserve position */, zapt->term->which, rset_nmem, - kc, kc->scope, 0); + kc, kc->scope, 0, reg_type); if (!result_sets[num_result_sets]) break; num_result_sets++; @@ -2159,7 +2159,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream, rset = rset_trunc(zh, grep_info.isam_p_buf, grep_info.isam_p_indx, term, strlen(term), flags, 1, term_type,rset_nmem, - kc, kc->scope, 0); + kc, kc->scope, 0, reg_type); grep_info_delete(&grep_info); return rset; } @@ -2963,7 +2963,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0); + kc, kc->scope, 0, reg_id); } ptr[j0]++; /* move index for this set .. */ /* get result set for remaining scan terms */ @@ -2984,7 +2984,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which,rset_nmem, - kc, kc->scope, 0); + kc, kc->scope, 0, reg_id); rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */, 2, rsets); @@ -3054,7 +3054,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0); + kc, kc->scope, 0, reg_id); ptr[j0]++; @@ -3073,7 +3073,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0); + kc, kc->scope, 0, reg_id); rset = rsmulti_or_create(rset_nmem, kc, kc->scope, 0 /* termid */, 2, rsets); diff --git a/index/zsets.c b/index/zsets.c index b4650a1..b20041f 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.87 2005-06-07 11:36:38 adam Exp $ +/* $Id: zsets.c,v 1.88 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -1073,11 +1073,10 @@ ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, if (termid) { struct ord_list *ol; - key_logdump_txt(YLOG_LOG, &key, termid->name); for (ol = termid->ol; ol; ol = ol->next) { - yaz_log(YLOG_LOG, " ord=%d", ol->ord); zebra_snippets_append(snippets, key.mem[key.len-1], + termid->reg_type, ol->ord, termid->name); } } diff --git a/rset/rsbetween.c b/rset/rsbetween.c index e2411fd..d988a6b 100644 --- a/rset/rsbetween.c +++ b/rset/rsbetween.c @@ -1,4 +1,4 @@ -/* $Id: rsbetween.c,v 1.40 2005-06-06 21:31:08 adam Exp $ +/* $Id: rsbetween.c,v 1.41 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -96,7 +96,7 @@ static void checkterm(RSET rs, char *tag, NMEM nmem) { if (!rs->term) { - rs->term = rset_term_create(tag, strlen(tag), "", 0, nmem, 0); + rs->term = rset_term_create(tag, strlen(tag), "", 0, nmem, 0, 0); rs->term->rset = rs; } } diff --git a/rset/rset.c b/rset/rset.c index c7821d3..52f3033 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -1,4 +1,4 @@ -/* $Id: rset.c,v 1.49 2005-06-07 07:41:05 adam Exp $ +/* $Id: rset.c,v 1.50 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -307,9 +307,11 @@ struct ord_list *ord_list_dup(NMEM nmem, struct ord_list *list) \param type Term Type, Z_Term_general, Z_Term_characterString,.. \param nmem memory for term. \param ol ord list + \param reg_type register type */ TERMID rset_term_create(const char *name, int length, const char *flags, - int type, NMEM nmem, struct ord_list *ol) + int type, NMEM nmem, struct ord_list *ol, + int reg_type) { TERMID t; @@ -331,6 +333,7 @@ TERMID rset_term_create(const char *name, int length, const char *flags, else t->flags = nmem_strdup(nmem, flags); t->type = type; + t->reg_type = reg_type; t->rankpriv = 0; t->rset = 0; t->ol = ord_list_dup(nmem, ol); diff --git a/util/snippet.c b/util/snippet.c index bca55ef..a3045a3 100644 --- a/util/snippet.c +++ b/util/snippet.c @@ -1,4 +1,4 @@ -/* $Id: snippet.c,v 1.2 2005-06-07 13:10:52 adam Exp $ +/* $Id: snippet.c,v 1.3 2005-06-07 14:53:39 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -47,14 +47,14 @@ void zebra_snippets_destroy(zebra_snippets *l) } void zebra_snippets_append(zebra_snippets *l, - zint seqno, int ord, const char *term) + zint seqno, int reg_type, int ord, const char *term) { - zebra_snippets_append_match(l, seqno, ord, term, 0); + zebra_snippets_append_match(l, seqno, reg_type, ord, term, 0); } void zebra_snippets_append_match(zebra_snippets *l, - zint seqno, int ord, const char *term, - int match) + zint seqno, int reg_type, + int ord, const char *term, int match) { struct zebra_snippet_word *w = nmem_malloc(l->nmem, sizeof(*w)); @@ -66,6 +66,7 @@ void zebra_snippets_append_match(zebra_snippets *l, l->tail = w; w->seqno = seqno; + w->reg_type = reg_type; w->ord = ord; w->term = nmem_strdup(l->nmem, term); w->match = match; @@ -80,8 +81,9 @@ void zebra_snippets_log(zebra_snippets *l, int log_level) { zebra_snippet_word *w; for (w = l->front; w; w = w->next) - yaz_log(log_level, "term=%s%s seqno=" ZINT_FORMAT " ord=%d", - w->term, (w->match ? "*" : ""), w->seqno, w->ord); + yaz_log(log_level, "term=%s%s seqno=" ZINT_FORMAT " reg_type=%c " + "ord=%d", + w->term, (w->match ? "*" : ""), w->seqno, w->reg_type, w->ord); } zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, @@ -96,12 +98,17 @@ zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, while(1) { int window_start; + int reg_type; zebra_snippet_word *hit_w, *doc_w; int min_ord = 0; /* not set yet */ for (hit_w = zebra_snippets_list(hit); hit_w; hit_w = hit_w->next) if (hit_w->ord > ord && - (min_ord == 0 || hit_w->ord < min_ord)) + (min_ord == 0 || + (hit_w->ord < min_ord && hit_w->reg_type == reg_type))) + { min_ord = hit_w->ord; + reg_type = hit_w->reg_type; + } if (min_ord == 0) break; ord = min_ord; @@ -119,7 +126,7 @@ zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, int seq_no_last = 0; while (look_w && look_w->seqno < hit_w->seqno + window_size) { - if (look_w->ord == ord) + if (look_w->ord == ord && look_w->reg_type == reg_type) { seq_no_last = look_w->seqno; number_this++; @@ -134,29 +141,31 @@ zebra_snippets *zebra_snippets_window(zebra_snippets *doc, zebra_snippets *hit, } } } - yaz_log(YLOG_LOG, "ord=%d", ord); - yaz_log(YLOG_LOG, "first_seq_no_best_window=%d", first_seq_no_best_window); - yaz_log(YLOG_LOG, "last_seq_no_best_window=%d", last_seq_no_best_window); - yaz_log(YLOG_LOG, "number_best_window=%d", number_best_window); + yaz_log(YLOG_DEBUG, "ord=%d", ord); + yaz_log(YLOG_DEBUG, "first_seq_no_best_window=%d", first_seq_no_best_window); + yaz_log(YLOG_DEBUG, "last_seq_no_best_window=%d", last_seq_no_best_window); + yaz_log(YLOG_DEBUG, "number_best_window=%d", number_best_window); window_start = (first_seq_no_best_window + last_seq_no_best_window - window_size) / 2; for (doc_w = zebra_snippets_list(doc); doc_w; doc_w = doc_w->next) - if (doc_w->ord == ord + if (doc_w->ord == ord && doc_w->reg_type == reg_type && doc_w->seqno >= window_start && doc_w->seqno < window_start + window_size) { int match = 0; for (hit_w = zebra_snippets_list(hit); hit_w; hit_w = hit_w->next) { - if (hit_w->ord == ord && hit_w->seqno == doc_w->seqno) + if (hit_w->ord == ord && hit_w->reg_type == reg_type && + hit_w->seqno == doc_w->seqno) { match = 1; break; } } - zebra_snippets_append_match(result, doc_w->seqno, ord, + zebra_snippets_append_match(result, doc_w->seqno, + doc_w->reg_type, ord, doc_w->term, match); } } -- 1.7.10.4