X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frpnscan.c;h=670a30dbc6f92a488318fbec5438d1cf7eb3bd63;hb=bfe7ce5c9c47f3f4ad1ac76d4232b9807d5ee158;hp=8ea6850f086dbe13ac22e562267258f5b0bb2138;hpb=53f50a1b1dd002ef484a41f50f3598386335cae1;p=idzebra-moved-to-github.git diff --git a/index/rpnscan.c b/index/rpnscan.c index 8ea6850..670a30d 100644 --- a/index/rpnscan.c +++ b/index/rpnscan.c @@ -1,4 +1,4 @@ -/* $Id: rpnscan.c,v 1.9 2007-05-08 12:50:04 adam Exp $ +/* $Id: rpnscan.c,v 1.19 2007-11-01 14:56:07 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -41,11 +41,11 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define RPN_MAX_ORDS 32 -int log_scan = YLOG_LOG; +static int log_scan = YLOG_LOG; /* convert APT SCAN term to internal cmap */ static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, - char *termz, int reg_type) + char *termz, zebra_map_t zm) { char termz0[IT_MAX_WORD]; @@ -63,7 +63,7 @@ static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, while ((len = (cp_end - cp)) > 0) { - map = zebra_maps_input(zh->reg->zebra_maps, reg_type, &cp, len, 0); + map = zebra_maps_input(zm, &cp, len, 0); if (**map == *CHR_SPACE) space_map = *map; else @@ -81,7 +81,7 @@ static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_OK; } -static void count_set(ZebraHandle zh, RSET rset, zint *count) +static void count_set(ZebraHandle zh, RSET rset, zint *count, zint approx_limit) { zint psysno = 0; struct it_key key; @@ -89,7 +89,7 @@ static void count_set(ZebraHandle zh, RSET rset, zint *count) yaz_log(YLOG_DEBUG, "count_set"); - rset->hits_limit = zh->approx_limit; + rset->hits_limit = approx_limit; *count = 0; rfd = rset_open(rset, RSETF_READ); @@ -102,15 +102,53 @@ static void count_set(ZebraHandle zh, RSET rset, zint *count) break; } } - rset_close (rfd); + rset_close(rfd); *count = rset->hits_count; } +static void get_first_snippet_from_rset(ZebraHandle zh, + RSET rset, zebra_snippets *snippets, + zint *sysno) +{ + struct it_key key; + RSFD rfd; + TERMID termid; + size_t sysno_mem_index = 0; + + if (zh->m_staticrank) + sysno_mem_index = 1; + + yaz_log(YLOG_DEBUG, "get_first_snippet_from_rset"); + + rfd = rset_open(rset, RSETF_READ); + *sysno = 0; + while (rset_read(rfd, &key, &termid)) + { + if (key.mem[sysno_mem_index] != *sysno) + { + if (*sysno) + break; + *sysno = key.mem[sysno_mem_index]; + } + if (termid) + { + struct ord_list *ol; + for (ol = termid->ol; ol; ol = ol->next) + { + zebra_snippets_append(snippets, key.mem[key.len-1], 0, + ol->ord, termid->name); + } + } + } + rset_close(rfd); +} + struct scan2_info_entry { WRBUF term; char prefix[20]; ISAM_P isam_p; int pos_to_save; + int ord; }; static int scan_handle2(char *name, const char *info, int pos, void *client) @@ -122,7 +160,7 @@ static int scan_handle2(char *name, const char *info, int pos, void *client) return 0; len_prefix = strlen(scan_info->prefix); - if (memcmp (name, scan_info->prefix, len_prefix)) + if (memcmp(name, scan_info->prefix, len_prefix)) return 1; /* skip special terms such as first-in-field specials */ @@ -132,8 +170,8 @@ static int scan_handle2(char *name, const char *info, int pos, void *client) wrbuf_rewind(scan_info->term); wrbuf_puts(scan_info->term, name+len_prefix); - assert (*info == sizeof(ISAM_P)); - memcpy (&scan_info->isam_p, info+1, sizeof(ISAM_P)); + assert(*info == sizeof(ISAM_P)); + memcpy(&scan_info->isam_p, info+1, sizeof(ISAM_P)); return 0; } @@ -143,22 +181,37 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, Z_AttributesPlusTerm *zapt, RSET limit_set, const char *term, - int index_type, + const char *index_type, struct scan2_info_entry *ar, int ord_no, ZebraScanEntry *glist, int pos) { int i; RSET rset = 0; + zint approx_limit = zh->approx_limit; + AttrType global_hits_limit_attr; + int l; + attr_init_APT(&global_hits_limit_attr, zapt, 12); + + l = attr_find(&global_hits_limit_attr, NULL); + if (l != -1) + approx_limit = l; + for (i = 0; i < ord_no; i++) { if (ar[i].isam_p && strcmp(wrbuf_cstr(ar[i].term), term) == 0) { - RSET rset_t = rset_trunc( + struct ord_list *ol = ord_list_create(nmem); + RSET rset_t; + + ol = ord_list_append(nmem, ol, ar[i].ord); + + assert(ol); + rset_t = rset_trunc( zh, &ar[i].isam_p, 1, wrbuf_buf(ar[i].term), wrbuf_len(ar[i].term), - NULL, 0, zapt->term->which, nmem, - kc, kc->scope, 0, index_type, - 0 /* hits_limit */, + NULL, 1, zapt->term->which, nmem, + kc, kc->scope, ol, index_type, + 0 /* hits_limit_value */, 0 /* term_ref_id_str */); if (!rset) rset = rset_t; @@ -187,33 +240,61 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, rset = rset_create_and(nmem, kc, kc->scope, 2, rsets); } /* count it */ - count_set(zh, rset, &count); - rset_delete(rset); - if (count > 0) + count_set(zh, rset, &count, approx_limit); + + if (pos != -1) { - if (pos != -1) + zint sysno; + int code = -1; + zebra_snippets *rec_snippets = zebra_snippets_create(); + zebra_snippets *hit_snippets = zebra_snippets_create(); + + glist[pos].term = 0; + glist[pos].display_term = 0; + + get_first_snippet_from_rset(zh, rset, hit_snippets, &sysno); + if (sysno) + code = zebra_get_rec_snippets(zh, sysno, rec_snippets); + + if (code == 0) { + const struct zebra_snippet_word *w = + zebra_snippets_lookup(rec_snippets, hit_snippets); + if (w) + { + glist[pos].display_term = odr_strdup(stream, w->term); + } + } + if (!glist[pos].term) zebra_term_untrans_iconv(zh, stream->mem, index_type, &glist[pos].term, term); - glist[pos].occurrences = count; - } - return 1; + glist[pos].occurrences = count; + zebra_snippets_destroy(rec_snippets); + zebra_snippets_destroy(hit_snippets); } + rset_delete(rset); + if (count > 0) + return 1; + else + return 0; } return 0; } - -static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, + +static ZEBRA_RES rpn_scan_norm(ZebraHandle zh, ODR stream, NMEM nmem, struct rset_key_control *kc, Z_AttributesPlusTerm *zapt, int *position, int *num_entries, ZebraScanEntry **list, int *is_partial, RSET limit_set, - int index_type, int ord_no, int *ords) + const char *index_type, + int ord_no, int *ords) { struct scan2_info_entry *ar = nmem_malloc(nmem, sizeof(*ar) * ord_no); struct rpn_char_map_info rcmi; + zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type); int i, dif; + int after_pos; int pos = 0; ZebraScanEntry *glist = (ZebraScanEntry *) @@ -227,7 +308,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, *num_entries = 0; return ZEBRA_OK; } - rpn_char_map_prepare (zh->reg, index_type, &rcmi); + rpn_char_map_prepare(zh->reg, zm, &rcmi); for (i = 0; i < ord_no; i++) ar[i].term = wrbuf_alloc(); @@ -237,16 +318,21 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, char termz[IT_MAX_WORD+20]; int prefix_len = 0; - prefix_len = key_SU_encode (ords[i], termz); + prefix_len = key_SU_encode(ords[i], termz); termz[prefix_len] = 0; strcpy(ar[i].prefix, termz); - if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == + if (trans_scan_term(zh, zapt, termz+prefix_len, zm) == ZEBRA_FAIL) + { + for (i = 0; i < ord_no; i++) + wrbuf_destroy(ar[i].term); return ZEBRA_FAIL; + } wrbuf_rewind(ar[i].term); wrbuf_puts(ar[i].term, termz + prefix_len); ar[i].isam_p = 0; + ar[i].ord = ords[i]; } /** deal with terms before position .. */ /* the glist index starts at zero (unlike scan positions */ @@ -302,18 +388,20 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, char termz[IT_MAX_WORD+20]; int prefix_len = 0; - prefix_len = key_SU_encode (ords[i], termz); + prefix_len = key_SU_encode(ords[i], termz); termz[prefix_len] = 0; strcpy(ar[i].prefix, termz); - if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == + if (trans_scan_term(zh, zapt, termz+prefix_len, zm) == ZEBRA_FAIL) return ZEBRA_FAIL; wrbuf_rewind(ar[i].term); wrbuf_puts(ar[i].term, termz + prefix_len); ar[i].isam_p = 0; + ar[i].ord = ords[i]; } + after_pos = 1; /* immediate term first.. */ for (pos = *position-1; pos < *num_entries; ) { const char *lo = 0; @@ -325,7 +413,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, { char termz[IT_MAX_WORD+20]; int before = 0; - int after = (pos == *position-1) ? 1 : 2; + int after = after_pos; ar[i].pos_to_save = 1; @@ -335,6 +423,8 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, ar+i, scan_handle2); } } + after_pos = 2; /* next round we grab following term */ + /* get minimum after scan */ for (i = 0; i < ord_no; i++) { @@ -361,6 +451,9 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, *list = glist; + for (i = 0; i < ord_no; i++) + wrbuf_destroy(ar[i].term); + return ZEBRA_OK; } @@ -385,7 +478,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, int base_no; int ords[RPN_MAX_ORDS], ord_no = 0; - unsigned index_type; + const char *index_type; char *search_type = NULL; char rank_type[128]; int complete_flag; @@ -416,14 +509,21 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (termset_value_numeric != -2) { - sprintf(resname, "%d", termset_value_numeric); termset_name = resname; } else termset_name = termset_value_string; - limit_set = resultSetRef (zh, termset_name); + limit_set = resultSetRef(zh, termset_name); + + if (!limit_set) + { + zebra_setError(zh, + YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, + termset_name); + return ZEBRA_FAIL; + } } } @@ -447,7 +547,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, { int ord; - if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no])) + if (zebraExplain_curDatabase(zh->reg->zei, basenames[base_no])) { zebra_setError(zh, YAZ_BIB1_DATABASE_UNAVAILABLE, basenames[base_no]); @@ -472,9 +572,14 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, nmem = nmem_create(); kc = zebra_key_control_create(zh); - res = rpn_scan_ver2(zh, stream, nmem, kc, zapt, position, num_entries, + if (sort_flag) + res = rpn_facet(zh, stream, nmem, kc, zapt, position, num_entries, list, is_partial, limit_set, index_type, ord_no, ords); + else + res = rpn_scan_norm(zh, stream, nmem, kc, zapt, position, num_entries, + list, + is_partial, limit_set, index_type, ord_no, ords); nmem_destroy(nmem); (*kc->dec)(kc); return res;