X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fzsets.c;h=fd00a2695450e53aa372a26660567dee9a9705d0;hp=10ecb8c8fbcd2b5973c1188ace1b52b01b8d90bc;hb=c3ff843e467932c6027a8b3b2ebda7b44612447e;hpb=befd4e7c3fdae7fd6de9d38cb832d275d2d081aa diff --git a/index/zsets.c b/index/zsets.c index 10ecb8c..fd00a26 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,5 +1,5 @@ /* This file is part of the Zebra server. - Copyright (C) 1995-2008 Index Data + Copyright (C) Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -18,6 +18,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #ifdef WIN32 @@ -109,10 +112,11 @@ static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, sizeof(*sort_sequence->specs)); for (i = 0; inum_specs; i++) sort_sequence->specs[i] = 0; - + rpn_get_top_approx_limit(zh, rpn->RPNStructure, &sset->approx_limit); res = rpn_search_top(zh, rpn->RPNStructure, rpn->attributeSetId, + sset->approx_limit, nmem, rset_nmem, sort_sequence, sset->num_bases, sset->basenames, @@ -125,7 +129,8 @@ static ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem, for (i = 0; sort_sequence->specs[i]; i++) ; sort_sequence->num_specs = i; - rset->hits_limit = sset->approx_limit; + rset_set_hits_limit(rset, sset->approx_limit); + if (!i) { res = resultSetRank(zh, sset, rset, rset_nmem); @@ -158,10 +163,10 @@ ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, zebraSet->locked = 1; zebraSet->rpn = 0; zebraSet->nmem = m; - zebraSet->rset_nmem = nmem_create(); + zebraSet->rset_nmem = nmem_create(); zebraSet->num_bases = num_bases; - zebraSet->basenames = + zebraSet->basenames = nmem_malloc(zebraSet->nmem, num_bases * sizeof(*zebraSet->basenames)); for (i = 0; ibasenames[i] = nmem_strdup(zebraSet->nmem, basenames[i]); @@ -181,7 +186,7 @@ ZEBRA_RES resultSetAddRPN(ZebraHandle zh, NMEM m, Z_RPNQuery *rpn, } void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type, - const char *db, const char *index_name, + const char *db, const char *index_name, const char *term) { assert(zh); /* compiler shut up */ @@ -192,7 +197,7 @@ void resultSetAddTerm(ZebraHandle zh, ZebraSet s, int reg_type, int i; s->term_entries_max = 1000; s->term_entries = - nmem_malloc(s->nmem, s->term_entries_max * + nmem_malloc(s->nmem, s->term_entries_max * sizeof(*s->term_entries)); for (i = 0; i < s->term_entries_max; i++) s->term_entries[i].term = 0; @@ -215,7 +220,7 @@ ZebraSet resultSetAdd(ZebraHandle zh, const char *name, int ov) for (s = zh->sets; s; s = s->next) if (!strcmp(s->name, name)) break; - + if (!log_level_set) loglevels(); if (s) @@ -317,7 +322,7 @@ ZEBRA_RES resultSetGetBaseNames(ZebraHandle zh, const char *setname, void resultSetInvalidate(ZebraHandle zh) { ZebraSet s = zh->sets; - + yaz_log(log_level_resultsets, "invalidating result sets"); for (; s; s = s->next) { @@ -340,7 +345,7 @@ void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses) { ZebraSet * ss = &zh->sets; int i; - + if (statuses) for (i = 0; inext; - + xfree(s->sort_info->all_entries); xfree(s->sort_info->entries); xfree(s->sort_info); - + if (s->nmem) nmem_destroy(s->nmem); if (s->rset) @@ -386,7 +391,7 @@ void resultSetDestroy(ZebraHandle zh, int num, char **names,int *statuses) } ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh, - const char *name, + const char *name, zint start, int num) { zint pos_small[10]; @@ -399,18 +404,18 @@ ZebraMetaRecord *zebra_meta_records_create_range(ZebraHandle zh, if (num > 10) pos = xmalloc(sizeof(*pos) * num); - + for (i = 0; i 10) xfree(pos); return mr; } -ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, +ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, int num, zint *positions) { ZebraSet sset; @@ -460,7 +465,7 @@ ZebraMetaRecord *zebra_meta_records_create(ZebraHandle zh, const char *name, if (sort_info) { zint position; - + for (i = 0; inum_entries; while (num_i < num && positions[num_i] <= position) num_i++; - + if (sset->cache_rfd && num_i < num && positions[num_i] > sset->cache_position) { @@ -562,25 +567,53 @@ void resultSetInsertSort(ZebraHandle zh, ZebraSet sset, struct zset_sort_entry *new_entry = NULL; struct zset_sort_info *sort_info = sset->sort_info; int i, j; + WRBUF w = wrbuf_alloc(); zebra_sort_sysno(zh->reg->sort_index, sysno); for (i = 0; ireg->sort_index, criteria[i].ord[database_no]); - zebra_sort_read(zh->reg->sort_index, this_entry_buf); + wrbuf_rewind(w); + if (zebra_sort_read(zh->reg->sort_index, 0, w)) + { + /* consider each sort entry and take lowest/highest one + of the one as sorting key depending on whether sort is + ascending/descending */ + int off = 0; + while (off != wrbuf_len(w)) + { + size_t l = strlen(wrbuf_buf(w)+off); + assert(off < wrbuf_len(w)); + + if (l >= SORT_IDX_ENTRYSIZE) + l = SORT_IDX_ENTRYSIZE-1; + if ( (off == 0) + || (criteria[i].relation == 'A' + && strcmp(wrbuf_buf(w)+off, this_entry_buf) < 0) + || (criteria[i].relation == 'D' + && strcmp(wrbuf_buf(w)+off, this_entry_buf) > 0) + ) + { + memcpy(this_entry_buf, wrbuf_buf(w)+off, l); + this_entry_buf[l] = '\0'; + } + off += 1 + strlen(wrbuf_buf(w)+off); + } + } } else { yaz_log(log_level_sort, "criteria[i].ord is -1 so not reading from sort index"); } } + wrbuf_destroy(w); i = sort_info->num_entries; while (--i >= 0) { @@ -588,26 +621,20 @@ void resultSetInsertSort(ZebraHandle zh, ZebraSet sset, for (j = 0; j 0.0) rel = 1; else if (diff < 0.0) @@ -620,11 +647,11 @@ void resultSetInsertSort(ZebraHandle zh, ZebraSet sset, rel = memcmp(this_entry_buf, other_entry_buf, SORT_IDX_ENTRYSIZE); } - /* when the compare is equal, continue to next criteria, + /* when the compare is equal, continue to next criteria, else break out */ if (rel) break; - } + } if (!rel) break; if (criteria[j].relation == 'A') @@ -712,7 +739,7 @@ void resultSetInsertRank(ZebraHandle zh, struct zset_sort_info *sort_info, --j; else j = (sort_info->num_entries)++; - + new_entry = sort_info->entries[j]; while (j != i) { @@ -784,7 +811,7 @@ ZebraSet resultSetClone(ZebraHandle zh, const char *setname, nset->nmem = nmem_create(); nset->num_bases = rset->num_bases; - nset->basenames = + nset->basenames = nmem_malloc(nset->nmem, nset->num_bases * sizeof(*rset->basenames)); for (i = 0; inum_bases; i++) nset->basenames[i] = nmem_strdup(nset->nmem, rset->basenames[i]); @@ -858,7 +885,7 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, TERMID *terms; int numTerms = 0; size_t sysno_mem_index = 0; - + int numbases = zh->num_basenames; yaz_log(log_level_sort, "searching %d databases",numbases); @@ -881,13 +908,12 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, { Z_SortKeySpec *sks = sort_sequence->specs[i]; Z_SortKey *sk; - ZEBRA_RES res; - + sort_criteria[i].ord = (int *) nmem_malloc(nmem, sizeof(int)*numbases); sort_criteria[i].numerical = (int *) nmem_malloc(nmem, sizeof(int)*numbases); - + /* initialize ord and numerical for each database */ for (ib = 0; ib < numbases; ib++) { @@ -918,7 +944,7 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, { zebra_setError(zh, YAZ_BIB1_SORT_ILLEGAL_SORT, 0); return ZEBRA_FAIL; - } + } sk = sks->sortElement->u.generic; switch (sk->which) { @@ -928,7 +954,7 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, { zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]); sort_criteria[i].numerical[ib] = 0; - sort_criteria[i].ord[ib] = + sort_criteria[i].ord[ib] = zebraExplain_lookup_attr_str(zh->reg->zei, zinfo_index_category_sort, 0, sk->u.sortField); @@ -953,13 +979,12 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, for (ib = 0; ib < numbases; ib++) { zebraExplain_curDatabase(zh->reg->zei, zh->basenames[ib]); - res = zebra_sort_get_ord(zh, sk->u.sortAttributes, - &sort_criteria[i].ord[ib], - &sort_criteria[i].numerical[ib]); + if (zebra_sort_get_ord(zh, sk->u.sortAttributes, + &sort_criteria[i].ord[ib], + &sort_criteria[i].numerical[ib]) != + ZEBRA_OK && sks->which != Z_SortKeySpec_null) + return ZEBRA_FAIL; } - - if (sks->which != Z_SortKeySpec_null && res != ZEBRA_OK) - return ZEBRA_FAIL; break; } /* right now we look up the index type based on the first database @@ -1039,7 +1064,7 @@ ZEBRA_RES resultSetSortSingle(ZebraHandle zh, NMEM nmem, } yaz_log(log_level_sort, ZINT_FORMAT " keys, " ZINT_FORMAT " sysnos, sort", - kno, sset->hits); + kno, sset->hits); for (i = 0; i < numTerms; i++) yaz_log(log_level_sort, "term=\"%s\" type=%s count=" ZINT_FORMAT, terms[i]->name, terms[i]->flags, terms[i]->rset->hits_count); @@ -1110,7 +1135,7 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, kno++; if (log_level_searchhits) key_logdump_txt(log_level_searchhits, &key, termid->name); - if (this_sys != psysno) + if (this_sys != psysno) { /* new record .. */ if (!(rfd->counted_items & 255) && zh->break_handler_func) { @@ -1122,6 +1147,11 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, } if (rfd->counted_items > rset->hits_limit) stop_flag = 1; + if (stop_flag) + { + zebraSet->estimated_hit_count = 1; + break; + } if (psysno) { /* only if we did have a previous record */ score = (*rc->calc)(handle, psysno, pstaticrank, @@ -1130,12 +1160,6 @@ ZEBRA_RES resultSetRank(ZebraHandle zh, ZebraSet zebraSet, resultSetInsertRank(zh, sort_info, psysno, score, 'A'); count++; } - if (stop_flag) - { - zebraSet->estimated_hit_count = 1; - rset_set_hits_limit(rset, 0); - break; - } psysno = this_sys; if (zh->m_staticrank) pstaticrank = key.mem[0]; @@ -1262,7 +1286,7 @@ ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, TERMID *term_array = xmalloc(num_terms * sizeof(*term_array)); zint *hits_array = xmalloc(num_terms * sizeof(*hits_array)); int *approx_array = xmalloc(num_terms * sizeof(*approx_array)); - + trav_rset_for_termids(sset->rset, term_array, hits_array, approx_array); @@ -1280,14 +1304,14 @@ ZEBRA_RES zebra_result_set_term_info(ZebraHandle zh, const char *setname, { char *outbuf = termbuf; size_t ret; - + ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft, &outbuf, &outleft); if (ret == (size_t)(-1)) *termlen = 0; else { - yaz_iconv(zh->iconv_from_utf8, 0, 0, + yaz_iconv(zh->iconv_from_utf8, 0, 0, &outbuf, &outleft); *termlen = outbuf - termbuf; } @@ -1327,12 +1351,12 @@ ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, NMEM nmem = nmem_create(); struct it_key key; RSET rsets[2], rset_comb; - RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, + RSET rset_temp = rset_create_temp(nmem, kc, kc->scope, res_get(zh->res, "setTmpDir"),0 ); - + TERMID termid; RSFD rsfd = rset_open(rset_temp, RSETF_WRITE); - + key.mem[0] = sysno; key.mem[1] = 0; key.mem[2] = 0; @@ -1343,7 +1367,7 @@ ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, rsets[0] = rset_temp; rsets[1] = rset_dup(sset->rset); - + rset_comb = rset_create_and(nmem, kc, kc->scope, 2, rsets); rsfd = rset_open(rset_comb, RSETF_READ); @@ -1361,7 +1385,7 @@ ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, } } rset_close(rsfd); - + rset_delete(rset_comb); nmem_destroy(nmem); kc->dec(kc); @@ -1369,7 +1393,7 @@ ZEBRA_RES zebra_snippets_hit_vector(ZebraHandle zh, const char *setname, return ZEBRA_OK; } -static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, +static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, const char **basenames, int num_bases, zint recid, zint *sysnos, int *no_sysnos) @@ -1377,7 +1401,7 @@ static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, ZEBRA_RES res = ZEBRA_OK; int sysnos_offset = 0; int i; - + if (!zh->reg->isamb || !zh->m_segment_indexing) { if (sysnos_offset < *no_sysnos) @@ -1402,9 +1426,9 @@ static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, char ord_buf[32]; int ord_len = key_SU_encode(ord, ord_buf); char *info; - + ord_buf[ord_len] = '\0'; - + info = dict_lookup(zh->reg->dict, ord_buf); if (info) { @@ -1419,9 +1443,9 @@ static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, struct it_key key_until, key_found; int i = 0; int r; - + memcpy(&isam_p, info+1, sizeof(ISAM_P)); - + pt = isamb_pp_open(zh->reg->isamb, isam_p, 2); if (!pt) res = ZEBRA_FAIL; @@ -1433,12 +1457,12 @@ static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, key_until.mem[i++] = 0; /* segment */ key_until.mem[i++] = 0; key_until.len = i; - + r = isamb_pp_forward(pt, &key_found, &key_until); while (r && key_found.mem[0] == recid) { if (sysnos_offset < *no_sysnos) - sysnos[sysnos_offset++] = + sysnos[sysnos_offset++] = key_found.mem[key_found.len-1]; r = isamb_pp_read(pt, &key_found); } @@ -1454,7 +1478,7 @@ static ZEBRA_RES zebra_recid_to_sysno(ZebraHandle zh, return res; } -ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh, +ZEBRA_RES zebra_result_recid_to_sysno(ZebraHandle zh, const char *setname, zint recid, zint *sysnos, int *no_sysnos) @@ -1496,11 +1520,12 @@ void zebra_count_set(ZebraHandle zh, RSET rset, zint *count, rset_close(rfd); *count = rset->hits_count; } - + /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab