From: Adam Dickmeiss Date: Tue, 21 Aug 2007 13:27:04 +0000 (+0000) Subject: Scan now returns displayTerm which is extract from original record. X-Git-Tag: ZEBRA.2.0.16~19 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=5623222cf968d493c2bc9506c42b8344de60c4e5 Scan now returns displayTerm which is extract from original record. Goodbye to @'s - for scan. Bug #1411. --- diff --git a/include/idzebra/api.h b/include/idzebra/api.h index f38e657..4a75217 100644 --- a/include/idzebra/api.h +++ b/include/idzebra/api.h @@ -1,4 +1,4 @@ -/* $Id: api.h,v 1.51 2007-05-21 11:54:59 adam Exp $ +/* $Id: api.h,v 1.52 2007-08-21 13:27:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -65,6 +65,7 @@ typedef struct { typedef struct { zint occurrences; /* scan term occurrences */ char *term; /* scan term string */ + char *display_term; /* display scan term entry */ } ZebraScanEntry; /** \var ZebraHandle diff --git a/include/idzebra/snippet.h b/include/idzebra/snippet.h index 474b9a4..60a4ecf 100644 --- a/include/idzebra/snippet.h +++ b/include/idzebra/snippet.h @@ -1,4 +1,4 @@ -/* $Id: snippet.h,v 1.9 2007-08-21 11:06:46 adam Exp $ +/* $Id: snippet.h,v 1.10 2007-08-21 13:27:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -81,6 +81,10 @@ void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit, int before, int after); +YAZ_EXPORT +const struct zebra_snippet_word *zebra_snippets_lookup( + const zebra_snippets *doc, const zebra_snippets *hit); + YAZ_END_CDECL #endif diff --git a/index/extract.c b/index/extract.c index 64e39d3..d9ed681 100644 --- a/index/extract.c +++ b/index/extract.c @@ -1,4 +1,4 @@ -/* $Id: extract.c,v 1.259 2007-08-21 11:06:47 adam Exp $ +/* $Id: extract.c,v 1.260 2007-08-21 13:27:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -128,9 +128,75 @@ struct snip_rec_info { }; -static void snippet_add_complete_field(RecWord *p) +static void snippet_add_complete_field(RecWord *p, int ord) { + struct snip_rec_info *h = p->extractCtrl->handle; + ZebraHandle zh = h->zh; + + const char *b = p->term_buf; + char buf[IT_MAX_WORD+1]; + const char **map = 0; + int i = 0, remain = p->term_len; + const char *start = b; + const char *last = 0; + + if (remain > 0) + map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b, remain, 1); + + while (remain > 0 && i < IT_MAX_WORD) + { + while (map && *map && **map == *CHR_SPACE) + { + remain = p->term_len - (b - p->term_buf); + + if (i == 0) + start = b; /* set to first non-ws area */ + if (remain > 0) + { + int first = i ? 0 : 1; /* first position */ + + map = zebra_maps_input(zh->reg->zebra_maps, p->index_type, + &b, remain, first); + } + else + map = 0; + } + if (!map) + break; + + if (i && i < IT_MAX_WORD) + buf[i++] = *CHR_SPACE; + while (map && *map && **map != *CHR_SPACE) + { + const char *cp = *map; + if (**map == *CHR_CUT) + { + i = 0; + } + else + { + if (i >= IT_MAX_WORD) + break; + while (i < IT_MAX_WORD && *cp) + buf[i++] = *(cp++); + } + last = b; + remain = p->term_len - (b - p->term_buf); + if (remain > 0) + { + map = zebra_maps_input (zh->reg->zebra_maps, p->index_type, &b, + remain, 0); + } + else + map = 0; + } + } + if (!i) + return; + if (last && start != last) + zebra_snippets_appendn(h->snippets, p->seqno, 0, ord, + start, last - start); } static void snippet_add_incomplete_field(RecWord *p, int ord) @@ -220,7 +286,7 @@ static void snippet_token_add(RecWord *p) zei, zinfo_index_category_index, p->index_type, p->index_name); if (zebra_maps_is_complete (h->zh->reg->zebra_maps, p->index_type)) - snippet_add_complete_field (p); + snippet_add_complete_field (p, ch); else snippet_add_incomplete_field(p, ch); } diff --git a/index/index.h b/index/index.h index 8738d1a..8478bde 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.199 2007-08-21 11:06:47 adam Exp $ +/* $Id: index.h,v 1.200 2007-08-21 13:27:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -321,6 +321,9 @@ void extract_snippet(ZebraHandle zh, zebra_snippets *sn, struct ZebraRecStream *stream, RecType rt, void *recTypeClientData); +int zebra_get_rec_snippets(ZebraHandle zh, zint sysno, + zebra_snippets *snippets); + void zebra_index_merge(ZebraHandle zh); ZEBRA_RES zebra_buffer_extract_record(ZebraHandle zh, diff --git a/index/retrieve.c b/index/retrieve.c index d3ca5ad..8957c3c 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.71 2007-08-21 11:06:47 adam Exp $ +/* $Id: retrieve.c,v 1.72 2007-08-21 13:27:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -444,17 +444,11 @@ static void snippet_xml_record(ZebraHandle zh, WRBUF wrbuf, zebra_snippets *doc) wrbuf_printf(wrbuf, ""); } -int zebra_special_snippet_fetch(ZebraHandle zh, const char *setname, - zint sysno, ODR odr, - const char *elemsetname, - const Odr_oid *input_format, - const Odr_oid **output_format, - char **rec_bufp, int *rec_lenp) +int zebra_get_rec_snippets(ZebraHandle zh, zint sysno, + zebra_snippets *snippets) { int return_code = 0; - Record rec; - - rec = rec_get(zh->reg->records, sysno); + Record rec = rec_get(zh->reg->records, sysno); if (!rec) { yaz_log(YLOG_WARN, "rec_get fail on sysno=" ZINT_FORMAT, sysno); @@ -466,60 +460,73 @@ int zebra_special_snippet_fetch(ZebraHandle zh, const char *setname, void *recTypeClientData; RecType rt = recType_byName(zh->reg->recTypes, zh->res, file_type, &recTypeClientData); - zebra_snippets *hit_snippet = zebra_snippets_create(); - WRBUF wrbuf = wrbuf_alloc(); - zebra_snippets_hit_vector(zh, setname, sysno, hit_snippet); - if (!rt) return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; else { struct ZebraRecStream stream; - return_code = zebra_create_record_stream(zh, &rec, &stream); if (return_code == 0) { - zebra_snippets *rec_snippet = zebra_snippets_create(); - extract_snippet(zh, rec_snippet, &stream, + extract_snippet(zh, snippets, &stream, rt, recTypeClientData); -#if 0 - /* for debugging purposes */ - yaz_log(YLOG_LOG, "---------------------------"); - yaz_log(YLOG_LOG, "REC SNIPPET:"); - zebra_snippets_log(rec_snippet, YLOG_LOG, 1); - yaz_log(YLOG_LOG, "---------------------------"); - yaz_log(YLOG_LOG, "HIT SNIPPET:"); - zebra_snippets_log(hit_snippet, YLOG_LOG, 1); -#endif + stream.destroy(&stream); + } + } + rec_free(&rec); + } + return return_code; +} + +int zebra_special_snippet_fetch(ZebraHandle zh, const char *setname, + zint sysno, ODR odr, + const char *elemsetname, + const Odr_oid *input_format, + const Odr_oid **output_format, + char **rec_bufp, int *rec_lenp) +{ + zebra_snippets *rec_snippets = zebra_snippets_create(); + int return_code = zebra_get_rec_snippets(zh, sysno, rec_snippets); + + if (!return_code) + { + WRBUF wrbuf = wrbuf_alloc(); + zebra_snippets *hit_snippet = zebra_snippets_create(); - zebra_snippets_ring(rec_snippet, hit_snippet, 5, 5); + zebra_snippets_hit_vector(zh, setname, sysno, hit_snippet); #if 0 - yaz_log(YLOG_LOG, "---------------------------"); - yaz_log(YLOG_LOG, "RING SNIPPET:"); - zebra_snippets_log(rec_snippet, YLOG_LOG, 1); + /* for debugging purposes */ + yaz_log(YLOG_LOG, "---------------------------"); + yaz_log(YLOG_LOG, "REC SNIPPET:"); + zebra_snippets_log(rec_snippet, YLOG_LOG, 1); + yaz_log(YLOG_LOG, "---------------------------"); + yaz_log(YLOG_LOG, "HIT SNIPPET:"); + zebra_snippets_log(hit_snippet, YLOG_LOG, 1); #endif - - snippet_xml_record(zh, wrbuf, rec_snippet); - - *output_format = yaz_oid_recsyn_xml; - - - zebra_snippets_destroy(rec_snippet); - } - stream.destroy(&stream); - } + + zebra_snippets_ring(rec_snippets, hit_snippet, 5, 5); + +#if 0 + yaz_log(YLOG_LOG, "---------------------------"); + yaz_log(YLOG_LOG, "RING SNIPPET:"); + zebra_snippets_log(rec_snippets, YLOG_LOG, 1); +#endif + snippet_xml_record(zh, wrbuf, rec_snippets); + + *output_format = yaz_oid_recsyn_xml; + if (return_code == 0) { *rec_lenp = wrbuf_len(wrbuf); *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf)); } wrbuf_destroy(wrbuf); - rec_free(&rec); zebra_snippets_destroy(hit_snippet); } + zebra_snippets_destroy(rec_snippets); return return_code; } diff --git a/index/rpnscan.c b/index/rpnscan.c index 7800b11..4fbb8a5 100644 --- a/index/rpnscan.c +++ b/index/rpnscan.c @@ -1,4 +1,4 @@ -/* $Id: rpnscan.c,v 1.11 2007-05-09 07:07:18 adam Exp $ +/* $Id: rpnscan.c,v 1.12 2007-08-21 13:27:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -106,11 +106,44 @@ static void count_set(ZebraHandle zh, RSET rset, zint *count) *count = rset->hits_count; } +static void get_first_snippet_from_rset(RSET rset, zebra_snippets *snippets, + zint *sysno) +{ + struct it_key key; + RSFD rfd; + TERMID termid; + + yaz_log(YLOG_DEBUG, "get_first_snippet_from_rset"); + + rfd = rset_open(rset, RSETF_READ); + *sysno = 0; + while (rset_read(rfd, &key, &termid)) + { + if (key.mem[0] != *sysno) + { + if (*sysno) + break; + *sysno = key.mem[0]; + } + if (termid) + { + struct ord_list *ol; + for (ol = termid->ol; ol; ol = ol->next) + { + zebra_snippets_append(snippets, key.mem[key.len-1], 0, + ol->ord, termid->name); + } + } + } + rset_close (rfd); +} + struct scan2_info_entry { WRBUF term; char prefix[20]; ISAM_P isam_p; int pos_to_save; + int ord; }; static int scan_handle2(char *name, const char *info, int pos, void *client) @@ -153,11 +186,17 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, { if (ar[i].isam_p && strcmp(wrbuf_cstr(ar[i].term), term) == 0) { - RSET rset_t = rset_trunc( + struct ord_list *ol = ord_list_create(nmem); + RSET rset_t; + + ol = ord_list_append(nmem, ol, ar[i].ord); + + assert(ol); + rset_t = rset_trunc( zh, &ar[i].isam_p, 1, wrbuf_buf(ar[i].term), wrbuf_len(ar[i].term), - NULL, 0, zapt->term->which, nmem, - kc, kc->scope, 0, index_type, + NULL, 1, zapt->term->which, nmem, + kc, kc->scope, ol, index_type, 0 /* hits_limit */, 0 /* term_ref_id_str */); if (!rset) @@ -188,17 +227,42 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, } /* count it */ count_set(zh, rset, &count); - rset_delete(rset); - if (count > 0) + + if (pos != -1) { - if (pos != -1) + zint sysno; + int code = -1; + zebra_snippets *rec_snippets = zebra_snippets_create(); + zebra_snippets *hit_snippets = zebra_snippets_create(); + + glist[pos].term = 0; + glist[pos].display_term = 0; + + get_first_snippet_from_rset(rset, hit_snippets, &sysno); + if (sysno) + code = zebra_get_rec_snippets(zh, sysno, rec_snippets); + + if (code == 0) { + const struct zebra_snippet_word *w = + zebra_snippets_lookup(rec_snippets, hit_snippets); + if (w) + { + glist[pos].display_term = odr_strdup(stream, w->term); + } + } + if (!glist[pos].term) zebra_term_untrans_iconv(zh, stream->mem, index_type, &glist[pos].term, term); - glist[pos].occurrences = count; - } - return 1; + glist[pos].occurrences = count; + zebra_snippets_destroy(rec_snippets); + zebra_snippets_destroy(hit_snippets); } + rset_delete(rset); + if (count > 0) + return 1; + else + return 0; } return 0; } @@ -252,6 +316,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, wrbuf_rewind(ar[i].term); wrbuf_puts(ar[i].term, termz + prefix_len); ar[i].isam_p = 0; + ar[i].ord = ords[i]; } /** deal with terms before position .. */ /* the glist index starts at zero (unlike scan positions */ @@ -317,6 +382,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, wrbuf_rewind(ar[i].term); wrbuf_puts(ar[i].term, termz + prefix_len); ar[i].isam_p = 0; + ar[i].ord = ords[i]; } after_pos = 1; /* immediate term first.. */ diff --git a/index/zebrasrv.c b/index/zebrasrv.c index 25f2d27..939c7f0 100644 --- a/index/zebrasrv.c +++ b/index/zebrasrv.c @@ -1,4 +1,4 @@ -/* $Id: zebrasrv.c,v 1.16 2007-05-21 11:54:59 adam Exp $ +/* $Id: zebrasrv.c,v 1.17 2007-08-21 13:27:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -383,6 +383,7 @@ static int bend_scan (void *handle, bend_scan_rr *r) for (i = 0; i < r->num_entries; i++) { r->entries[i].term = entries[i].term; + r->entries[i].display_term = entries[i].display_term; r->entries[i].occurrences = CAST_ZINT_TO_INT(entries[i].occurrences); } diff --git a/util/snippet.c b/util/snippet.c index b299e5f..10e69c0 100644 --- a/util/snippet.c +++ b/util/snippet.c @@ -1,4 +1,4 @@ -/* $Id: snippet.c,v 1.13 2007-08-21 11:06:47 adam Exp $ +/* $Id: snippet.c,v 1.14 2007-08-21 13:27:04 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -209,6 +209,25 @@ static void zebra_snippets_clear(zebra_snippets *sn) } } +const struct zebra_snippet_word *zebra_snippets_lookup( + const zebra_snippets *doc, const zebra_snippets *hit) +{ + const zebra_snippet_word *hit_w; + for (hit_w = zebra_snippets_constlist(hit); hit_w; hit_w = hit_w->next) + { + const zebra_snippet_word *doc_w; + for (doc_w = zebra_snippets_constlist(doc); doc_w; doc_w = doc_w->next) + { + if (doc_w->ord == hit_w->ord && doc_w->seqno == hit_w->seqno + && !doc_w->ws) + { + return doc_w; + } + } + } + return 0; +} + void zebra_snippets_ring(zebra_snippets *doc, const zebra_snippets *hit, int before, int after) {