X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Frpnscan.c;h=f638f830b3f29c8922cedc6c1006bcae27a00827;hp=3402e77081a0864af7a67eade79bfe0b55adeea9;hb=693a0db94b4b3ac9aee7722572a6b81a86a12e13;hpb=cb0ae247ea096af96d5fdb453290747fbbf032fd diff --git a/index/rpnscan.c b/index/rpnscan.c index 3402e77..f638f83 100644 --- a/index/rpnscan.c +++ b/index/rpnscan.c @@ -1,8 +1,5 @@ -/* $Id: rpnscan.c,v 1.21 2007-11-05 11:20:39 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2011 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -20,6 +17,9 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA */ +#if HAVE_CONFIG_H +#include +#endif #include #include #ifdef WIN32 @@ -45,14 +45,28 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, char *termz, zebra_map_t zm) { - char termz0[IT_MAX_WORD]; + char term_utf8[IT_MAX_WORD]; - if (zapt_term_to_utf8(zh, zapt, termz0) == ZEBRA_FAIL) + if (zapt_term_to_utf8(zh, zapt, term_utf8) == ZEBRA_FAIL) return ZEBRA_FAIL; /* error */ + else if (zebra_maps_is_icu(zm)) + { + const char *res_buf; + size_t res_len; + zebra_map_tokenize_start(zm, term_utf8, strlen(term_utf8)); + + if (zebra_map_tokenize_next(zm, &res_buf, &res_len, 0, 0)) + { + memcpy(termz, res_buf, res_len); + termz[res_len] = '\0'; + } + else + termz[0] = '\0'; + } else { const char **map; - const char *cp = (const char *) termz0; + const char *cp = (const char *) term_utf8; const char *cp_end = cp + strlen(cp); const char *src; int i = 0; @@ -79,31 +93,6 @@ static ZEBRA_RES trans_scan_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt, return ZEBRA_OK; } -static void count_set(ZebraHandle zh, RSET rset, zint *count, zint approx_limit) -{ - zint psysno = 0; - struct it_key key; - RSFD rfd; - - yaz_log(YLOG_DEBUG, "count_set"); - - rset->hits_limit = approx_limit; - - *count = 0; - rfd = rset_open(rset, RSETF_READ); - while (rset_read(rfd, &key,0 /* never mind terms */)) - { - if (key.mem[0] != psysno) - { - psysno = key.mem[0]; - if (rfd->counted_items >= rset->hits_limit) - break; - } - } - rset_close(rfd); - *count = rset->hits_count; -} - static void get_first_snippet_from_rset(ZebraHandle zh, RSET rset, zebra_snippets *snippets, zint *sysno) @@ -160,11 +149,6 @@ static int scan_handle2(char *name, const char *info, int pos, void *client) len_prefix = strlen(scan_info->prefix); if (memcmp(name, scan_info->prefix, len_prefix)) return 1; - - /* skip special terms such as first-in-field specials */ - if (name[len_prefix] < CHR_BASE_CHAR) - return 1; - wrbuf_rewind(scan_info->term); wrbuf_puts(scan_info->term, name+len_prefix); @@ -198,29 +182,32 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, { if (ar[i].isam_p && strcmp(wrbuf_cstr(ar[i].term), term) == 0) { - struct ord_list *ol = ord_list_create(nmem); - RSET rset_t; - - ol = ord_list_append(nmem, ol, ar[i].ord); - - assert(ol); - rset_t = rset_trunc( + if (strcmp(term, FIRST_IN_FIELD_STR)) + { + struct ord_list *ol = ord_list_create(nmem); + RSET rset_t; + + ol = ord_list_append(nmem, ol, ar[i].ord); + + assert(ol); + rset_t = rset_trunc( zh, &ar[i].isam_p, 1, wrbuf_buf(ar[i].term), wrbuf_len(ar[i].term), NULL, 1, zapt->term->which, nmem, kc, kc->scope, ol, index_type, 0 /* hits_limit_value */, 0 /* term_ref_id_str */); - if (!rset) - rset = rset_t; - else - { - RSET rsets[2]; - - rsets[0] = rset; - rsets[1] = rset_t; - rset = rset_create_or(nmem, kc, kc->scope, 0 /* termid */, - 2, rsets); + if (!rset) + rset = rset_t; + else + { + RSET rsets[2]; + + rsets[0] = rset; + rsets[1] = rset_t; + rset = rset_create_or(nmem, kc, kc->scope, 0 /* termid */, + 2, rsets); + } } ar[i].isam_p = 0; } @@ -238,13 +225,11 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, rset = rset_create_and(nmem, kc, kc->scope, 2, rsets); } /* count it */ - count_set(zh, rset, &count, approx_limit); + zebra_count_set(zh, rset, &count, approx_limit); if (pos != -1) { zint sysno; - int code = -1; - zebra_snippets *rec_snippets = zebra_snippets_create(); zebra_snippets *hit_snippets = zebra_snippets_create(); glist[pos].term = 0; @@ -252,22 +237,38 @@ static int scan_save_set(ZebraHandle zh, ODR stream, NMEM nmem, get_first_snippet_from_rset(zh, rset, hit_snippets, &sysno); if (sysno) - code = zebra_get_rec_snippets(zh, sysno, rec_snippets); - - if (code == 0) { - const struct zebra_snippet_word *w = - zebra_snippets_lookup(rec_snippets, hit_snippets); - if (w) + zebra_snippets *rec_snippets = zebra_snippets_create(); + int code = zebra_get_rec_snippets(zh, sysno, rec_snippets); + if (code == 0) { - glist[pos].display_term = odr_strdup(stream, w->term); + const struct zebra_snippet_word *w = + zebra_snippets_lookup(rec_snippets, hit_snippets); + if (w) + { + glist[pos].display_term = odr_strdup(stream, w->term); + } + else + { + yaz_log(YLOG_WARN, "zebra_snippets_lookup failed for pos=%d", pos); + } } + zebra_snippets_destroy(rec_snippets); + } + if (zebra_term_untrans_iconv(zh, stream->mem, index_type, + &glist[pos].term, term)) + { + /* failed.. use display_term instead (which could be 0) */ + glist[pos].term = glist[pos].display_term; } + if (!glist[pos].term) - zebra_term_untrans_iconv(zh, stream->mem, index_type, - &glist[pos].term, term); + { + yaz_log(YLOG_WARN, "Could not generate scan term for pos=%d", + pos); + glist[pos].term = "None"; + } glist[pos].occurrences = count; - zebra_snippets_destroy(rec_snippets); zebra_snippets_destroy(hit_snippets); } rset_delete(rset); @@ -290,7 +291,7 @@ static ZEBRA_RES rpn_scan_norm(ZebraHandle zh, ODR stream, NMEM nmem, { struct scan2_info_entry *ar = nmem_malloc(nmem, sizeof(*ar) * ord_no); struct rpn_char_map_info rcmi; - zebra_map_t zm = zebra_map_get(zh->reg->zebra_maps, index_type); + zebra_map_t zm = zebra_map_get_or_add(zh->reg->zebra_maps, index_type); int i, dif; int after_pos; int pos = 0; @@ -375,7 +376,6 @@ static ZEBRA_RES rpn_scan_norm(ZebraHandle zh, ODR stream, NMEM nmem, { /* did not get all terms; adjust the real position and reduce number of entries */ - yaz_log(YLOG_LOG, "before terms dif=%d", dif); glist = glist + dif; *num_entries -= dif; *position -= dif; @@ -542,11 +542,6 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, zebra_setError(zh, YAZ_BIB1_TOO_MANY_DATABASES_SPECIFIED, 0); return ZEBRA_FAIL; } - if (sort_flag) - { - return rpn_facet(zh, stream, zapt, attributeset, position, num_entries, - list, is_partial, set_name); - } for (base_no = 0; base_no < num_bases; base_no++) { int ord; @@ -587,6 +582,7 @@ ZEBRA_RES rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab