X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Frpnscan.c;h=7800b11ad6530652a1a4352537421637a1d366ef;hb=a12477a52f995b809b00ac9e7b73a9b98cfbc540;hp=02bf5dabf8fa98d3386ab5eb82b75978fe94405d;hpb=918c8b1ec479083d82c390d5dceb4899654cb666;p=idzebra-moved-to-github.git diff --git a/index/rpnscan.c b/index/rpnscan.c index 02bf5da..7800b11 100644 --- a/index/rpnscan.c +++ b/index/rpnscan.c @@ -1,5 +1,5 @@ -/* $Id: rpnscan.c,v 1.3 2006-09-21 20:22:34 adam Exp $ - Copyright (C) 1995-2006 +/* $Id: rpnscan.c,v 1.11 2007-05-09 07:07:18 adam Exp $ + Copyright (C) 1995-2007 Index Data ApS This file is part of the Zebra server. @@ -37,6 +37,7 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #include #include #include +#include #define RPN_MAX_ORDS 32 @@ -150,7 +151,7 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, RSET rset = 0; for (i = 0; i < ord_no; i++) { - if (ar[i].isam_p && strcmp(wrbuf_buf(ar[i].term), term) == 0) + if (ar[i].isam_p && strcmp(wrbuf_cstr(ar[i].term), term) == 0) { RSET rset_t = rset_trunc( zh, &ar[i].isam_p, 1, @@ -213,6 +214,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, struct scan2_info_entry *ar = nmem_malloc(nmem, sizeof(*ar) * ord_no); struct rpn_char_map_info rcmi; int i, dif; + int after_pos; int pos = 0; ZebraScanEntry *glist = (ZebraScanEntry *) @@ -242,7 +244,11 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == ZEBRA_FAIL) + { + for (i = 0; i < ord_no; i++) + wrbuf_destroy(ar[i].term); return ZEBRA_FAIL; + } wrbuf_rewind(ar[i].term); wrbuf_puts(ar[i].term, termz + prefix_len); ar[i].isam_p = 0; @@ -265,7 +271,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, ar[i].pos_to_save = -1; strcpy(termz, ar[i].prefix); - strcat(termz, wrbuf_buf(ar[i].term)); + strcat(termz, wrbuf_cstr(ar[i].term)); dict_scan(zh->reg->dict, termz, &before, &after, ar+i, scan_handle2); } @@ -274,8 +280,8 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, for (i = 0; i < ord_no; i++) { if (ar[i].isam_p - && (hi == 0 || strcmp(wrbuf_buf(ar[i].term), hi) > 0)) - hi = wrbuf_buf(ar[i].term); + && (hi == 0 || strcmp(wrbuf_cstr(ar[i].term), hi) > 0)) + hi = wrbuf_cstr(ar[i].term); } if (!hi) break; @@ -313,6 +319,7 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, ar[i].isam_p = 0; } + after_pos = 1; /* immediate term first.. */ for (pos = *position-1; pos < *num_entries; ) { const char *lo = 0; @@ -324,22 +331,24 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, { char termz[IT_MAX_WORD+20]; int before = 0; - int after = (pos == *position-1) ? 1 : 2; + int after = after_pos; ar[i].pos_to_save = 1; strcpy(termz, ar[i].prefix); - strcat(termz, wrbuf_buf(ar[i].term)); + strcat(termz, wrbuf_cstr(ar[i].term)); dict_scan(zh->reg->dict, termz, &before, &after, ar+i, scan_handle2); } } + after_pos = 2; /* next round we grab following term */ + /* get minimum after scan */ for (i = 0; i < ord_no; i++) { if (ar[i].isam_p - && (lo == 0 || strcmp(wrbuf_buf(ar[i].term), lo) < 0)) - lo = wrbuf_buf(ar[i].term); + && (lo == 0 || strcmp(wrbuf_cstr(ar[i].term), lo) < 0)) + lo = wrbuf_cstr(ar[i].term); } if (!lo) break; @@ -360,6 +369,9 @@ static ZEBRA_RES rpn_scan_ver2(ZebraHandle zh, ODR stream, NMEM nmem, *list = glist; + for (i = 0; i < ord_no; i++) + wrbuf_destroy(ar[i].term); + return ZEBRA_OK; } @@ -375,285 +387,8 @@ struct scan_info { char prefix[20]; }; -static int scan_handle1(char *name, const char *info, int pos, void *client) -{ - int len_prefix, idx; - struct scan_info *scan_info = (struct scan_info *) client; - - len_prefix = strlen(scan_info->prefix); - if (memcmp (name, scan_info->prefix, len_prefix)) - return 1; - if (pos > 0) - idx = scan_info->after - pos + scan_info->before; - else - idx = - pos - 1; - - /* skip special terms such as first-in-field specials */ - if (name[len_prefix] < CHR_BASE_CHAR) - return 1; - - if (idx < 0) - return 0; - scan_info->list[idx].term = (char *) - odr_malloc(scan_info->odr, strlen(name + len_prefix)+1); - strcpy(scan_info->list[idx].term, name + len_prefix); - assert (*info == sizeof(ISAM_P)); - memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P)); - return 0; -} - -static ZEBRA_RES rpn_scan_ver1(ZebraHandle zh, ODR stream, NMEM rset_nmem, - struct rset_key_control *kc, - Z_AttributesPlusTerm *zapt, - int *position, int *num_entries, - ZebraScanEntry **list, - int *is_partial, RSET limit_set, - int index_type, int ord_no, int *ords) -{ - int pos = *position; - int num = *num_entries; - int before; - int after; - int i; - struct scan_info *scan_info_array; - char termz[IT_MAX_WORD+20]; - ZebraScanEntry *glist; - int ptr[RPN_MAX_ORDS]; - - before = pos-1; - if (before < 0) - before = 0; - after = 1+num-pos; - if (after < 0) - after = 0; - yaz_log(YLOG_DEBUG, "rpn_scan pos=%d num=%d before=%d " - "after=%d before+after=%d", - pos, num, before, after, before+after); - scan_info_array = (struct scan_info *) - odr_malloc(stream, ord_no * sizeof(*scan_info_array)); - for (i = 0; i < ord_no; i++) - { - int j, prefix_len = 0; - int before_tmp = before, after_tmp = after; - struct scan_info *scan_info = scan_info_array + i; - struct rpn_char_map_info rcmi; - - rpn_char_map_prepare (zh->reg, index_type, &rcmi); - - scan_info->before = before; - scan_info->after = after; - scan_info->odr = stream; - - scan_info->list = (struct scan1_info_entry *) - odr_malloc(stream, (before+after) * sizeof(*scan_info->list)); - for (j = 0; jlist[j].term = NULL; - - prefix_len += key_SU_encode (ords[i], termz + prefix_len); - termz[prefix_len] = 0; - strcpy(scan_info->prefix, termz); - - if (trans_scan_term(zh, zapt, termz+prefix_len, index_type) == - ZEBRA_FAIL) - return ZEBRA_FAIL; - - dict_scan(zh->reg->dict, termz, &before_tmp, &after_tmp, - scan_info, scan_handle1); - } - glist = (ZebraScanEntry *) - odr_malloc(stream, (before+after)*sizeof(*glist)); - - /* consider terms after main term */ - for (i = 0; i < ord_no; i++) - ptr[i] = before; - - *is_partial = 0; - for (i = 0; i= 0 && - (tst = scan_info_array[j].list[ptr[j]].term) && - (!mterm || strcmp (tst, mterm) < 0)) - { - j0 = j; - mterm = tst; - } - } - if (j0 == -1) - break; /* no value found, stop */ - - /* get result set for first one , but only if it's within bounds */ - if (lo >= 0) - { - /* get result set for first term */ - zebra_term_untrans_iconv(zh, stream->mem, index_type, - &glist[lo].term, mterm); - rset = rset_trunc(zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1, - glist[lo].term, strlen(glist[lo].term), - NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0, index_type, 0 /* hits_limit */, - 0 /* term_ref_id_str */); - } - ptr[j0]++; /* move index for this set .. */ - /* get result set for remaining scan terms */ - for (j = j0+1; j= 0 && - (tst = scan_info_array[j].list[ptr[j]].term) && - !strcmp (tst, mterm)) - { - if (lo >= 0) - { - RSET rsets[2]; - - rsets[0] = rset; - rsets[1] = - rset_trunc( - zh, &scan_info_array[j].list[ptr[j]].isam_p, 1, - glist[lo].term, - strlen(glist[lo].term), NULL, 0, - zapt->term->which,rset_nmem, - kc, kc->scope, 0, index_type, 0 /* hits_limit */, - 0 /* term_ref_id_str */ ); - rset = rset_create_or(rset_nmem, kc, - kc->scope, 0 /* termid */, - 2, rsets); - } - ptr[j]++; - } - } - if (lo >= 0) - { - zint count; - /* merge with limit_set if given */ - if (limit_set) - { - RSET rsets[2]; - rsets[0] = rset; - rsets[1] = rset_dup(limit_set); - - rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets); - } - /* count it */ - count_set(zh, rset, &count); - glist[lo].occurrences = count; - rset_delete(rset); - } - } - if (i < after) - { - *num_entries -= (after-i); - *is_partial = 1; - if (*num_entries < 0) - { - *num_entries = 0; - return ZEBRA_OK; - } - } - /* consider terms before main term */ - for (i = 0; i= 0 && - (tst = scan_info_array[j].list[before-1-ptr[j]].term) && - (!mterm || strcmp (tst, mterm) > 0)) - { - j0 = j; - mterm = tst; - } - } - if (j0 == -1) - break; - - zebra_term_untrans_iconv(zh, stream->mem, index_type, - &glist[lo].term, mterm); - - rset = rset_trunc - (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1, - glist[lo].term, strlen(glist[lo].term), - NULL, 0, zapt->term->which, rset_nmem, - kc, kc->scope, 0, index_type, 0 /* hits_limit */, - 0 /* term_ref_id_str */); - - ptr[j0]++; - - for (j = j0+1; j= 0 && - (tst = scan_info_array[j].list[before-1-ptr[j]].term) && - !strcmp (tst, mterm)) - { - RSET rsets[2]; - - rsets[0] = rset; - rsets[1] = rset_trunc( - zh, - &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1, - glist[lo].term, - strlen(glist[lo].term), NULL, 0, - zapt->term->which, rset_nmem, - kc, kc->scope, 0, index_type, 0 /* hits_limit */, - 0 /* term_ref_id_str */); - rset = rset_create_or(rset_nmem, kc, - kc->scope, 0 /* termid */, 2, rsets); - - ptr[j]++; - } - } - if (limit_set) - { - RSET rsets[2]; - rsets[0] = rset; - rsets[1] = rset_dup(limit_set); - - rset = rset_create_and(rset_nmem, kc, kc->scope, 2, rsets); - } - count_set(zh, rset, &count); - glist[lo].occurrences = count; - rset_delete (rset); - } - i = before-i; - if (i) - { - *is_partial = 1; - *position -= i; - *num_entries -= i; - if (*num_entries <= 0) - { - *num_entries = 0; - return ZEBRA_OK; - } - } - - *list = glist + i; /* list is set to first 'real' entry */ - - yaz_log(YLOG_DEBUG, "position = %d, num_entries = %d", - *position, *num_entries); - return ZEBRA_OK; -} - - ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, - oid_value attributeset, + const Odr_oid *attributeset, int num_bases, char **basenames, int *position, int *num_entries, ZebraScanEntry **list, int *is_partial, RSET limit_set) @@ -673,8 +408,8 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, *list = 0; *is_partial = 0; - if (attributeset == VAL_NONE) - attributeset = VAL_BIB1; + if (!attributeset) + attributeset = yaz_oid_attset_bib_1; if (!limit_set) /* no limit set given already */ { @@ -692,7 +427,6 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, if (termset_value_numeric != -2) { - sprintf(resname, "%d", termset_value_numeric); termset_name = resname; } @@ -700,11 +434,19 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, termset_name = termset_value_string; limit_set = resultSetRef (zh, termset_name); + + if (!limit_set) + { + zebra_setError(zh, + YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, + termset_name); + return ZEBRA_FAIL; + } } } - yaz_log(YLOG_DEBUG, "position = %d, num = %d set=%d", - *position, *num_entries, attributeset); + yaz_log(YLOG_DEBUG, "position = %d, num = %d", + *position, *num_entries); if (zebra_maps_attr(zh->reg->zebra_maps, zapt, &index_type, &search_type, rank_type, &complete_flag, &sort_flag))