From e8393fc8e78d777294f6eabf4029b90d566cf978 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Tue, 31 May 2005 13:01:36 +0000 Subject: [PATCH] Towards snippets/high-light of matched terms in records. Added zebra_get_hit_vector that gets hit terms for record with given ID in a result set. Added print_rec_keys which dumps indexed terms. --- index/extract.c | 43 +++++++++++++++++++++++++++++++++++++++++-- index/index.h | 9 ++++++++- index/retrieve.c | 12 ++++++++++-- index/zebraapi.c | 5 ++++- index/zrpn.c | 24 ++++++++++++------------ index/zsets.c | 53 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 6 files changed, 127 insertions(+), 19 deletions(-) diff --git a/index/extract.c b/index/extract.c index 8b15b89..02ee660 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.182 2005-04-29 10:33:53 adam Exp $ +/* $Id: extract.c,v 1.183 2005-05-31 13:01:36 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -116,7 +116,7 @@ static const char **searchRecordKey (ZebraHandle zh, int attrSet, attrUse; iscz1_decode(decode_handle, &dst, &src); - assert(key.len < 4 && key.len > 2); + assert(key.len <= 4 && key.len > 2); attrSet = (int) key.mem[0] >> 16; attrUse = (int) key.mem[0] & 65535; @@ -1473,6 +1473,45 @@ void extract_add_it_key (ZebraHandle zh, keys->buf_used = dst - keys->buf; } +void print_rec_keys(ZebraHandle zh, struct recKeys *reckeys) +{ + void *decode_handle = iscz1_start(); + int off = 0; + int seqno = 0; + NMEM nmem = nmem_create(); + + yaz_log(YLOG_LOG, "print_rec_keys buf=%p sz=%d", reckeys->buf, + reckeys->buf_used); + assert(reckeys->buf); + while (off < reckeys->buf_used) + { + const char *src = reckeys->buf + off; + struct it_key key; + char *dst = (char*) &key; + int attrSet, attrUse; + char dst_buf[IT_MAX_WORD]; + char *dst_term = dst_buf; + + iscz1_decode(decode_handle, &dst, &src); + assert(key.len <= 4 && key.len > 2); + + attrSet = (int) key.mem[0] >> 16; + attrUse = (int) key.mem[0] & 65535; + seqno = (int) key.mem[key.len-1]; + + zebra_term_untrans_iconv(zh, nmem, src[0], &dst_term, src+1); + + yaz_log(YLOG_LOG, "ord=" ZINT_FORMAT " seqno=%d term=%s", + key.mem[0], seqno, dst_term); + while (*src++) + ; + off = src - reckeys->buf; + nmem_reset(nmem); + } + nmem_destroy(nmem); + iscz1_stop(decode_handle); +} + void extract_add_index_string (RecWord *p, const char *str, int length) { struct it_key key; diff --git a/index/index.h b/index/index.h index 61fe3f4..33ab460 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.138 2005-05-11 12:39:36 adam Exp $ +/* $Id: index.h,v 1.139 2005-05-31 13:01:37 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -463,6 +463,8 @@ off_t zebra_record_int_tell (void *fh); int zebra_record_int_read (void *fh, char *buf, size_t count); void zebra_record_int_end (void *fh, off_t offset); +void print_rec_keys(ZebraHandle zh, struct recKeys *reckeys); + void extract_flushRecordKeys (ZebraHandle zh, SYSNO sysno, int cmd, struct recKeys *reckeys); void extract_flushSortKeys (ZebraHandle zh, SYSNO sysno, @@ -494,6 +496,11 @@ Dict dict_open_res (BFiles bfs, const char *name, int cache, int rw, void zebra_setError(ZebraHandle zh, int code, const char *addinfo); void zebra_setError_zint(ZebraHandle zh, int code, zint i); +void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type, + char **dst, const char *src); + +ZEBRA_RES zebra_get_hit_vector(ZebraHandle zh, const char *setname, zint sysno); + YAZ_END_CDECL #endif diff --git a/index/retrieve.c b/index/retrieve.c index b623437..acdbb79 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.29 2005-04-14 12:02:58 adam Exp $ +/* $Id: retrieve.c,v 1.30 2005-05-31 13:01:37 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -211,7 +211,15 @@ int zebra_record_fetch (ZebraHandle zh, SYSNO sysno, int score, ODR stream, retrieveCtrl.res = zh->res; retrieveCtrl.rec_buf = 0; retrieveCtrl.rec_len = -1; - + + if (1) + { + struct recKeys reckeys; + reckeys.buf = rec->info[recInfo_delKeys]; + reckeys.buf_used = rec->size[recInfo_delKeys]; + + print_rec_keys(zh, &reckeys); + } (*rt->retrieve)(clientData, &retrieveCtrl); *output_format = retrieveCtrl.output_format; *rec_bufp = (char *) retrieveCtrl.rec_buf; diff --git a/index/zebraapi.c b/index/zebraapi.c index 7c4fccd..f822138 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.170 2005-05-30 13:27:08 adam Exp $ +/* $Id: zebraapi.c,v 1.171 2005-05-31 13:01:37 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -944,6 +944,9 @@ ZEBRA_RES zebra_records_retrieve(ZebraHandle zh, ODR stream, { char *buf; int len; + + zebra_get_hit_vector(zh, setname, poset[i].sysno); + recs[i].errCode = zebra_record_fetch(zh, poset[i].sysno, poset[i].score, stream, input_format, comp, diff --git a/index/zrpn.c b/index/zrpn.c index 71c99d5..951f83f 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.192 2005-05-24 11:35:42 adam Exp $ +/* $Id: zrpn.c,v 1.193 2005-05-31 13:01:37 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -173,8 +173,8 @@ struct grep_info { ZebraSet termset; }; -static void term_untrans(ZebraHandle zh, int reg_type, - char *dst, const char *src) +void zebra_term_untrans(ZebraHandle zh, int reg_type, + char *dst, const char *src) { int len = 0; while (*src) @@ -238,7 +238,7 @@ static void add_isam_p(const char *name, const char *info, int su_code = 0; int len = key_SU_decode (&su_code, name); - term_untrans (p->zh, p->reg_type, term_tmp, name+len+1); + zebra_term_untrans (p->zh, p->reg_type, term_tmp, name+len+1); yaz_log(log_level_rpn, "grep: %d %c %s", su_code, name[len], term_tmp); zebraExplain_lookup_ord (p->zh->reg->zei, su_code, &db, &set, &use); @@ -2634,13 +2634,13 @@ static int scan_handle (char *name, const char *info, int pos, void *client) return 0; } -static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type, - char **dst, const char *src) +void zebra_term_untrans_iconv(ZebraHandle zh, NMEM stream, int reg_type, + char **dst, const char *src) { char term_src[IT_MAX_WORD]; char term_dst[IT_MAX_WORD]; - term_untrans (zh, reg_type, term_src, src); + zebra_term_untrans (zh, reg_type, term_src, src); if (zh->iconv_from_utf8 != 0) { @@ -2926,8 +2926,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (lo >= 0) { /* get result set for first term */ - scan_term_untrans(zh, stream->mem, reg_id, - &glist[lo].term, mterm); + zebra_term_untrans_iconv(zh, stream->mem, reg_id, + &glist[lo].term, mterm); rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), NULL, 0, zapt->term->which, rset_nmem, @@ -3015,13 +3015,13 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (j0 == -1) break; - scan_term_untrans (zh, stream->mem, reg_id, - &glist[lo].term, mterm); + zebra_term_untrans_iconv(zh, stream->mem, reg_id, + &glist[lo].term, mterm); rset = rset_trunc (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1, glist[lo].term, strlen(glist[lo].term), - NULL, 0, zapt->term->which,rset_nmem, + NULL, 0, zapt->term->which, rset_nmem, kc, kc->scope); ptr[j0]++; diff --git a/index/zsets.c b/index/zsets.c index d729f93..9726903 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.84 2005-05-31 07:29:10 adam Exp $ +/* $Id: zsets.c,v 1.85 2005-05-31 13:01:37 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -1032,3 +1032,54 @@ ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, } return ZEBRA_FAIL; } + +ZEBRA_RES zebra_get_hit_vector(ZebraHandle zh, const char *setname, + zint sysno) +{ + ZebraSet sset = resultSetGet(zh, setname); + yaz_log(YLOG_LOG, "zebra_get_hit_vector setname=%s zysno=" ZINT_FORMAT, + setname, sysno); + if (!sset) + return ZEBRA_FAIL; + else + { + struct rset_key_control *kc = zebra_key_control_create(zh); + NMEM nmem = nmem_create(); + struct it_key key; + RSET rsets[2], rset_comb; + RSET rset_temp = rstemp_create(nmem, kc, kc->scope, + res_get (zh->res, "setTmpDir"),0 ); + + TERMID termid; + RSFD rsfd = rset_open(rset_temp, RSETF_WRITE); + + key.mem[0] = sysno; + key.mem[1] = 0; + key.mem[2] = 0; + key.mem[3] = 0; + key.len = 2; + rset_write (rsfd, &key); + rset_close (rsfd); + + rsets[0] = rset_temp; + rsets[1] = rset_dup(sset->rset); + + rset_comb = rsmulti_and_create(nmem, kc, kc->scope, 2, rsets); + + rsfd = rset_open(rset_comb, RSETF_READ); + + while (rset_read(rsfd, &key, &termid)) + { + if (termid) + { + key_logdump_txt(YLOG_LOG, &key, termid->name); + yaz_log(YLOG_LOG, " type=%d", termid->type); + } + } + rset_close(rsfd); + + rset_delete(rset_comb); + } + return ZEBRA_OK; +} + -- 1.7.10.4