X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frpnscan.c;h=d484132de0917185f48f4a487fa9a281c41079d5;hb=e30daf1c89e3b781de360a9dcad4f83e72e16d28;hp=846c9792e7df3f2113bb6186d2e30a98e989db71;hpb=bc4308f57215143ed7b634db6ac86a77064d2839;p=idzebra-moved-to-github.git diff --git a/index/rpnscan.c b/index/rpnscan.c index 846c979..d484132 100644 --- a/index/rpnscan.c +++ b/index/rpnscan.c @@ -1,5 +1,5 @@ -/* $Id: rpnscan.c,v 1.4 2006-10-29 17:18:05 adam Exp $ - Copyright (C) 1995-2006 +/* $Id: rpnscan.c,v 1.13 2007-09-18 18:57:29 adam Exp $ + Copyright (C) 1995-2007 Index Data ApS This file is part of the Zebra server. @@ -37,6 +37,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include +#include #define RPN_MAX_ORDS 32 @@ -105,11 +106,49 @@ static void count_set(ZebraHandle zh, RSET rset, zint *count) *count = rset->hits_count; } +static void get_first_snippet_from_rset(ZebraHandle zh, + RSET rset, zebra_snippets *snippets, + zint *sysno) +{ + struct it_key key; + RSFD rfd; + TERMID termid; + size_t sysno_mem_index = 0; + + if (zh->m_staticrank) + sysno_mem_index = 1; + + yaz_log(YLOG_DEBUG, "get_first_snippet_from_rset"); + + rfd = rset_open(rset, RSETF_READ); + *sysno = 0; + while (rset_read(rfd, &key, &termid)) + { + if (key.mem[sysno_mem_index] != *sysno) + { + if (*sysno) + break; + *sysno = key.mem[sysno_mem_index]; + } + if (termid) + { + struct ord_list *ol; + for (ol = termid->ol; ol; ol = ol->next) + { + zebra_snippets_append(snippets, key.mem[key.len-1], 0, + ol->ord, termid->name); + } + } + } + rset_close (rfd); +} + struct scan2_info_entry { WRBUF term; char prefix[20]; ISAM_P isam_p; int pos_to_save; + int ord; }; static int scan_handle2(char *name, const char *info, int pos, void *client) @@ -150,13 +189,19 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, RSET rset = 0; for (i = 0; i < ord_no; i++) { - if (ar[i].isam_p && strcmp(wrbuf_buf(ar[i].term), term) == 0) + if (ar[i].isam_p && strcmp(wrbuf_cstr(ar[i].term), term) == 0) { - RSET rset_t = rset_trunc( + struct ord_list *ol = ord_list_create(nmem); + RSET rset_t; + + ol = ord_list_append(nmem, ol, ar[i].ord); + + assert(ol); + rset_t = rset_trunc( zh, &ar[i].isam_p, 1, wrbuf_buf(ar[i].term), wrbuf_len(ar[i].term), - NULL, 0, zapt->term->which, nmem, - kc, kc->scope, 0, index_type, + NULL, 1, zapt->term->which, nmem, + kc, kc->scope, ol, index_type, 0 /* hits_limit */, 0 /* term_ref_id_str */); if (!rset) @@ -187,17 +232,42 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, } /* count it */ count_set(zh, rset, &count); - rset_delete(rset); - if (count > 0) + + if (pos != -1) { - if (pos != -1) + zint sysno; + int code = -1; + zebra_snippets *rec_snippets = zebra_snippets_create(); + zebra_snippets *hit_snippets = zebra_snippets_create(); + + glist[pos].term = 0; + glist[pos].display_term = 0; + + get_first_snippet_from_rset(zh, rset, hit_snippets, &sysno); + if (sysno) + code = zebra_get_rec_snippets(zh, sysno, rec_snippets); + + if (code == 0) { + const struct zebra_snippet_word *w = + zebra_snippets_lookup(rec_snippets, hit_snippets); + if (w) + { + glist[pos].display_term = odr_strdup(stream, w->term); + } + } + if (!glist[pos].term) zebra_term_untrans_iconv(zh, stream->mem, index_type, &glist[pos].term, term); - glist[pos].occurrences = count; - } - return 1; + glist[pos].occurrences = count; + zebra_snippets_destroy(rec_snippets); + zebra_snippets_destroy(hit_snippets); } + rset_delete(rset); + if (count > 0) + return 1; + else + return 0; } return 0; } @@ -213,6 +283,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, struct scan2_info_entry *ar = nmem_malloc(nmem, sizeof(*ar) * ord_no); struct rpn_char_map_info rcmi; int i, dif; + int after_pos; int pos = 0; ZebraScanEntry *glist = (ZebraScanEntry *) @@ -242,10 +313,15 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == ZEBRA_FAIL) + { + for (i = 0; i < ord_no; i++) + wrbuf_destroy(ar[i].term); return ZEBRA_FAIL; + } wrbuf_rewind(ar[i].term); wrbuf_puts(ar[i].term, termz + prefix_len); ar[i].isam_p = 0; + ar[i].ord = ords[i]; } /** deal with terms before position .. */ /* the glist index starts at zero (unlike scan positions */ @@ -265,7 +341,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, ar[i].pos_to_save = -1; strcpy(termz, ar[i].prefix); - strcat(termz, wrbuf_buf(ar[i].term)); + strcat(termz, wrbuf_cstr(ar[i].term)); dict_scan(zh->reg->dict, termz, &before, &after, ar+i, scan_handle2); } @@ -274,8 +350,8 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, for (i = 0; i < ord_no; i++) { if (ar[i].isam_p - && (hi == 0 || strcmp(wrbuf_buf(ar[i].term), hi) > 0)) - hi = wrbuf_buf(ar[i].term); + && (hi == 0 || strcmp(wrbuf_cstr(ar[i].term), hi) > 0)) + hi = wrbuf_cstr(ar[i].term); } if (!hi) break; @@ -311,8 +387,10 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, wrbuf_rewind(ar[i].term); wrbuf_puts(ar[i].term, termz + prefix_len); ar[i].isam_p = 0; + ar[i].ord = ords[i]; } + after_pos = 1; /* immediate term first.. */ for (pos = *position-1; pos < *num_entries; ) { const char *lo = 0; @@ -324,22 +402,24 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, { char termz[IT_MAX_WORD+20]; int before = 0; - int after = (pos == *position-1) ? 1 : 2; + int after = after_pos; ar[i].pos_to_save = 1; strcpy(termz, ar[i].prefix); - strcat(termz, wrbuf_buf(ar[i].term)); + strcat(termz, wrbuf_cstr(ar[i].term)); dict_scan(zh->reg->dict, termz, &before, &after, ar+i, scan_handle2); } } + after_pos = 2; /* next round we grab following term */ + /* get minimum after scan */ for (i = 0; i < ord_no; i++) { if (ar[i].isam_p - && (lo == 0 || strcmp(wrbuf_buf(ar[i].term), lo) < 0)) - lo = wrbuf_buf(ar[i].term); + && (lo == 0 || strcmp(wrbuf_cstr(ar[i].term), lo) < 0)) + lo = wrbuf_cstr(ar[i].term); } if (!lo) break; @@ -360,6 +440,9 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, *list = glist; + for (i = 0; i < ord_no; i++) + wrbuf_destroy(ar[i].term); + return ZEBRA_OK; } @@ -376,7 +459,7 @@ struct scan_info { }; ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, - oid_value attributeset, + const Odr_oid *attributeset, int num_bases, char **basenames, int *position, int *num_entries, ZebraScanEntry **list, int *is_partial, RSET limit_set) @@ -396,8 +479,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, *list = 0; *is_partial = 0; - if (attributeset == VAL_NONE) - attributeset = VAL_BIB1; + if (!attributeset) + attributeset = yaz_oid_attset_bib_1; if (!limit_set) /* no limit set given already */ { @@ -415,7 +498,6 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (termset_value_numeric != -2) { - sprintf(resname, "%d", termset_value_numeric); termset_name = resname; } @@ -423,11 +505,19 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, termset_name = termset_value_string; limit_set = resultSetRef (zh, termset_name); + + if (!limit_set) + { + zebra_setError(zh, + YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, + termset_name); + return ZEBRA_FAIL; + } } } - yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d", - *position, *num_entries, attributeset); + yaz_log(YLOG_DEBUG, "position = %d, num = %d", + *position, *num_entries); if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type, rank_type, &complete_flag, &sort_flag))