X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=index%2Fretrieve.c;h=4ff858610229c1edd5d5f531485a26dbd9ac0e42;hp=3e50ae55ac941b3f196356f30fb76141b4098bd9;hb=cd02e9c5558d2f0db2ab83fcad810a6522fd2319;hpb=95a5868bb03e3445b1e234a969358b6eaa74a49c diff --git a/index/retrieve.c b/index/retrieve.c index 3e50ae5..4ff8586 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,8 +1,5 @@ -/* $Id: retrieve.c,v 1.79 2007-12-03 13:34:17 adam Exp $ - Copyright (C) 1995-2007 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1995-2008 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -43,13 +40,22 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA #define ZEBRA_XML_HEADER_STR "reg->zei, *rec); - if ((*rec)->size[recInfo_storeData] > 0) + if ((*rec)->size[recInfo_storeData] > 0 + || (*rec)->info[recInfo_filename] == 0) zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData], (*rec)->size[recInfo_storeData]); else @@ -75,11 +81,12 @@ static int zebra_create_record_stream(ZebraHandle zh, } return 0; } - + struct index_spec { const char *index_name; const char *index_type; + const char *extra; struct index_spec *next; }; @@ -103,6 +110,7 @@ struct index_spec *parse_index_spec(const char *elem, NMEM nmem, struct index_spec *spec = nmem_malloc(nmem, sizeof(*spec)); spec->index_type = 0; spec->next = 0; + spec->extra = 0; if (!first) first = spec; @@ -119,10 +127,19 @@ struct index_spec *parse_index_spec(const char *elem, NMEM nmem, cp++; cp0 = cp; - while (*cp != '\0' && *cp != ',') + while (*cp != '\0' && *cp != ',' && *cp != ':') cp++; spec->index_type = nmem_strdupn(nmem, cp0, cp - cp0); } + if (*cp == ':') /* extra arguments */ + { + cp++; + cp0 = cp; + + while (*cp != '\0' && *cp != ',' && *cp != ':') + cp++; + spec->extra = nmem_strdupn(nmem, cp0, cp - cp0); + } if (*cp != ',') break; } @@ -177,11 +194,11 @@ static int parse_zebra_elem(const char *elem, } -int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr, - const char *elemsetname, - const Odr_oid *input_format, - const Odr_oid **output_format, - char **rec_bufp, int *rec_lenp) +static int sort_fetch( + struct special_fetch_s *fi, const char *elemsetname, + const Odr_oid *input_format, + const Odr_oid **output_format, + WRBUF result, WRBUF addinfo) { const char *retrieval_index; size_t retrieval_index_len; @@ -190,17 +207,8 @@ int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr, char retrieval_index_cstr[256]; char retrieval_type_cstr[256]; int ord; + ZebraHandle zh = fi->zh; - /* only accept XML and SUTRS requests */ - if (oid_oidcmp(input_format, yaz_oid_recsyn_xml) - && oid_oidcmp(input_format, yaz_oid_recsyn_sutrs)) - { - yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", - elemsetname); - *output_format = 0; - return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST; - } - if (!parse_zebra_elem(elemsetname, &retrieval_index, &retrieval_index_len, &retrieval_type, &retrieval_type_len)) @@ -230,57 +238,76 @@ int zebra_special_sort_fetch(ZebraHandle zh, zint sysno, ODR odr, return -1; /* is not a sort index */ else { - char dst_buf[IT_MAX_WORD]; - char str[IT_MAX_WORD]; + WRBUF wrbuf_str = wrbuf_alloc(); const char *index_type; const char *db = 0; const char *string_index = 0; - WRBUF wrbuf = wrbuf_alloc(); + WRBUF wrbuf_result = result; + int off = 0; - zebra_sort_sysno(zh->reg->sort_index, sysno); - zebra_sort_type(zh->reg->sort_index, ord); - zebra_sort_read(zh->reg->sort_index, str); - - zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, &string_index); - - zebra_term_untrans(zh, index_type, dst_buf, str); - + zebraExplain_lookup_ord(zh->reg->zei, ord, &index_type, &db, + &string_index); if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) { *output_format = yaz_oid_recsyn_xml; - wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR + wrbuf_printf(wrbuf_result, ZEBRA_XML_HEADER_STR " sysno=\"" ZINT_FORMAT "\"" " set=\"zebra::index%s/\">\n", - sysno, elemsetname); - - wrbuf_printf(wrbuf, " ", index_type); - wrbuf_xmlputs(wrbuf, dst_buf); - wrbuf_printf(wrbuf, "\n"); - wrbuf_printf(wrbuf, "\n"); + fi->sysno, elemsetname); } else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs)) { *output_format = yaz_oid_recsyn_sutrs; + } + else + { + yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", + elemsetname); + *output_format = 0; + wrbuf_destroy(wrbuf_str); + return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST; + } + zebra_sort_type(zh->reg->sort_index, ord); + zebra_sort_sysno(zh->reg->sort_index, fi->sysno); + zebra_sort_read(zh->reg->sort_index, wrbuf_str); + + while (off != wrbuf_len(wrbuf_str)) + { + char dst_buf[IT_MAX_WORD]; + assert(off < wrbuf_len(wrbuf_str)); + zebra_term_untrans(zh, index_type, dst_buf, + wrbuf_buf(wrbuf_str)+off); - wrbuf_printf(wrbuf, "%s %s %s\n", string_index, index_type, - dst_buf); + if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) + { + wrbuf_printf(wrbuf_result, " ", index_type); + wrbuf_xmlputs(wrbuf_result, dst_buf); + wrbuf_printf(wrbuf_result, "\n"); + } + else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs)) + { + wrbuf_printf(wrbuf_result, "%s %s %s\n", string_index, index_type, + dst_buf); + } + off += strlen(wrbuf_buf(wrbuf_str)+off) + 1; + } + if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) + { + wrbuf_printf(wrbuf_result, "\n"); } - *rec_lenp = wrbuf_len(wrbuf); - *rec_bufp = odr_malloc(odr, *rec_lenp); - memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp); - wrbuf_destroy(wrbuf); + wrbuf_destroy(wrbuf_str); return 0; } } -int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, - Record rec, - const char *elemsetname, - const Odr_oid *input_format, - const Odr_oid **output_format, - char **rec_bufp, int *rec_lenp) +static int special_index_fetch( + struct special_fetch_s *fi, const char *elemsetname, + const Odr_oid *input_format, + const Odr_oid **output_format, + WRBUF result, WRBUF addinfo, + Record rec) { const char *retrieval_index; size_t retrieval_index_len; @@ -289,6 +316,7 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, zebra_rec_keys_t keys; int ret_code = 0; char retrieval_type_cstr[256]; + ZebraHandle zh = fi->zh; /* set output variables before processing possible error states */ /* *rec_lenp = 0; */ @@ -345,7 +373,7 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, size_t slen; const char *str; struct it_key key_in; - WRBUF wrbuf = wrbuf_alloc(); + WRBUF wrbuf = result; if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) { @@ -353,7 +381,7 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR " sysno=\"" ZINT_FORMAT "\"" " set=\"zebra::index%s/\">\n", - sysno, elemsetname); + fi->sysno, elemsetname); } else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs)) *output_format = input_format; @@ -383,36 +411,35 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, if (retrieval_type == 0 || !strcmp(retrieval_type_cstr, index_type)) { - zebra_term_untrans(zh, index_type, dst_buf, str); - if (strlen(dst_buf)) + if (zebra_term_untrans(zh, index_type, dst_buf, str)) + *dst_buf = '\0'; /* untrans failed */ + + if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) { - if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) - { - wrbuf_printf(wrbuf, " ", - key_in.mem[key_in.len -1]); + wrbuf_printf(wrbuf, " \n"); - } - else - { - wrbuf_printf(wrbuf, "%s ", string_index); - - wrbuf_printf(wrbuf, "%s", index_type); - - for (i = 1; i < key_in.len; i++) - wrbuf_printf(wrbuf, " " ZINT_FORMAT, + wrbuf_printf(wrbuf, " type=\"%s\"", index_type); + + wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">", + key_in.mem[key_in.len -1]); + wrbuf_xmlputs(wrbuf, dst_buf); + wrbuf_printf(wrbuf, "\n"); + } + else + { + wrbuf_printf(wrbuf, "%s ", string_index); + + wrbuf_printf(wrbuf, "%s", index_type); + + for (i = 1; i < key_in.len; i++) + wrbuf_printf(wrbuf, " " ZINT_FORMAT, key_in.mem[i]); - - wrbuf_printf(wrbuf, " %s", dst_buf); - wrbuf_printf(wrbuf, "\n"); - } + wrbuf_printf(wrbuf, " %s", dst_buf); + + wrbuf_printf(wrbuf, "\n"); + } } @@ -420,10 +447,6 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, } if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) wrbuf_printf(wrbuf, "\n"); - *rec_lenp = wrbuf_len(wrbuf); - *rec_bufp = odr_malloc(odr, *rec_lenp); - memcpy(*rec_bufp, wrbuf_buf(wrbuf), *rec_lenp); - wrbuf_destroy(wrbuf); } zebra_rec_keys_close(keys); return ret_code; @@ -540,28 +563,28 @@ int zebra_get_rec_snippets(ZebraHandle zh, zint sysno, return return_code; } -static int snippet_fetch(ZebraHandle zh, const char *setname, - zint sysno, ODR odr, - const char *elemsetname, - const Odr_oid *input_format, - const Odr_oid **output_format, - char **rec_bufp, int *rec_lenp) +static int snippet_fetch( + struct special_fetch_s *fi, const char *elemsetname, + const Odr_oid *input_format, + const Odr_oid **output_format, + WRBUF result, WRBUF addinfo) { + ZebraHandle zh = fi->zh; zebra_snippets *rec_snippets = zebra_snippets_create(); - int return_code = zebra_get_rec_snippets(zh, sysno, rec_snippets); + int return_code = zebra_get_rec_snippets(zh, fi->sysno, rec_snippets); if (!return_code) { - WRBUF wrbuf = wrbuf_alloc(); + WRBUF wrbuf = result; zebra_snippets *hit_snippet = zebra_snippets_create(); - zebra_snippets_hit_vector(zh, setname, sysno, hit_snippet); + zebra_snippets_hit_vector(zh, fi->setname, fi->sysno, hit_snippet); #if 0 /* for debugging purposes */ yaz_log(YLOG_LOG, "---------------------------"); yaz_log(YLOG_LOG, "REC SNIPPET:"); - zebra_snippets_log(rec_snippet, YLOG_LOG, 1); + zebra_snippets_log(rec_snippets, YLOG_LOG, 1); yaz_log(YLOG_LOG, "---------------------------"); yaz_log(YLOG_LOG, "HIT SNIPPET:"); zebra_snippets_log(hit_snippet, YLOG_LOG, 1); @@ -578,12 +601,6 @@ static int snippet_fetch(ZebraHandle zh, const char *setname, *output_format = yaz_oid_recsyn_xml; - if (return_code == 0) - { - *rec_lenp = wrbuf_len(wrbuf); - *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf)); - } - wrbuf_destroy(wrbuf); zebra_snippets_destroy(hit_snippet); } zebra_snippets_destroy(rec_snippets); @@ -596,7 +613,7 @@ struct term_collect { zint set_occur; }; -zint freq_term(ZebraHandle zh, int ord, const char *term, RSET rset_set) +static zint freq_term(ZebraHandle zh, int ord, const char *term, RSET rset_set) { struct rset_key_control *kc = zebra_key_control_create(zh); char ord_buf[IT_MAX_WORD]; @@ -629,9 +646,25 @@ zint freq_term(ZebraHandle zh, int ord, const char *term, RSET rset_set) return hits; } -void term_collect_freq(ZebraHandle zh, - struct term_collect *col, int no_terms_collect, - int ord, RSET rset) +static int term_qsort_handle(const void *a, const void *b) +{ + const struct term_collect *l = a; + const struct term_collect *r = b; + if (l->set_occur < r->set_occur) + return 1; + else if (l->set_occur > r->set_occur) + return -1; + else + { + const char *lterm = l->term ? l->term : ""; + const char *rterm = r->term ? r->term : ""; + return strcmp(lterm, rterm); + } +} + +static void term_collect_freq(ZebraHandle zh, + struct term_collect *col, int no_terms_collect, + int ord, RSET rset) { int i; for (i = 0; i < no_terms_collect; i++) @@ -639,11 +672,12 @@ void term_collect_freq(ZebraHandle zh, if (col[i].term) col[i].set_occur = freq_term(zh, ord, col[i].term, rset); } + qsort(col, no_terms_collect, sizeof(*col), term_qsort_handle); } -struct term_collect *term_collect_create(zebra_strmap_t sm, - int no_terms_collect, - NMEM nmem) +static struct term_collect *term_collect_create(zebra_strmap_t sm, + int no_terms_collect, + NMEM nmem) { const char *term; void *data_buf; @@ -662,14 +696,17 @@ struct term_collect *term_collect_create(zebra_strmap_t sm, it = zebra_strmap_it_create(sm); while ((term = zebra_strmap_it_next(it, &data_buf, &data_len))) { + /* invariant: + col[0] has lowest oc . col[no_terms_collect-1] has highest oc */ int oc = *(int*) data_buf; int j = 0; /* insertion may be slow but terms terms will be "infrequent" and - thus number of iterations should be small below */ + thus number of iterations should be small below + */ while (j < no_terms_collect && oc > col[j].oc) j++; - if (j) - { + if (j) + { /* oc <= col[j] and oc > col[j-1] */ --j; memmove(col, col+1, sizeof(*col) * j); col[j].term = term; @@ -680,32 +717,245 @@ struct term_collect *term_collect_create(zebra_strmap_t sm, return col; } -static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, - ODR odr, - const char *elemsetname, - const Odr_oid *input_format, - const Odr_oid **output_format, - char **rec_bufp, int *rec_lenp) +static int perform_facet_sort(ZebraHandle zh, int no_ord, int *ord_array, + zebra_strmap_t *map_array, + int num_recs, ZebraMetaRecord *poset) +{ + int rec_i; + WRBUF w = wrbuf_alloc(); + int ord_i; + + for (ord_i = 0; ord_i < no_ord; ord_i++) + { + for (rec_i = 0; rec_i < num_recs; rec_i++) + { + if (!poset[rec_i].sysno) + continue; + + zebra_sort_sysno(zh->reg->sort_index, poset[rec_i].sysno); + zebra_sort_type(zh->reg->sort_index, ord_array[ord_i]); + + wrbuf_rewind(w); + if (zebra_sort_read(zh->reg->sort_index, w)) + { + zebra_strmap_t sm = map_array[ord_i]; + int off = 0; + while (off != wrbuf_len(w)) + { + const char *str = wrbuf_buf(w) + off; + int *freq = zebra_strmap_lookup(sm, str, 0, 0); + if (freq) + (*freq)++; + else + { + int v = 1; + zebra_strmap_add(sm, str, &v, sizeof v); + } + off += strlen(str)+1; + } + } + } + } + wrbuf_destroy(w); + return 0; +} + + +static int perform_facet_index(ZebraHandle zh, + struct special_fetch_s *fi, + int no_ord, int *ord_array, + zebra_strmap_t *map_array, + int num_recs, ZebraMetaRecord *poset, + struct index_spec *spec_list) +{ + int max_chunks = 2; + int rec_i; + res_get_int(zh->res, "facetMaxChunks", &max_chunks); + + for (rec_i = 0; rec_i < num_recs; rec_i++) + { + int ret; + int j; + zint sysnos[MAX_SYSNOS_PER_RECORD]; + int no_sysnos = MAX_SYSNOS_PER_RECORD; + if (!poset[rec_i].sysno) + continue; + ret = zebra_result_recid_to_sysno(zh, fi->setname, + poset[rec_i].sysno, + sysnos, &no_sysnos); + assert(no_sysnos > 0); + yaz_log(YLOG_DEBUG, "Analyzing rec=%d ISAM sysno=" ZINT_FORMAT " chunks=%d", + rec_i, poset[rec_i].sysno, no_sysnos); + for (j = 0; j < no_sysnos && j < max_chunks; j++) + { + size_t slen; + const char *str; + struct it_key key_in; + Record rec = rec_get(zh->reg->records, sysnos[j]); + zebra_rec_keys_t keys = zebra_rec_keys_open(); + zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys], + rec->size[recInfo_delKeys], 0); + + yaz_log(YLOG_DEBUG, "rec %d " ZINT_FORMAT " %s", + j, sysnos[j], zebra_rec_keys_empty(keys) ? "empty" : "non-empty"); + if (zebra_rec_keys_rewind(keys)) + { + while (zebra_rec_keys_read(keys, &str, &slen, &key_in)) + { + int ord_i; + struct index_spec *spec; + for (spec = spec_list, ord_i = 0; ord_i < no_ord; + ord_i++, spec = spec->next) + { + int ord = CAST_ZINT_TO_INT(key_in.mem[0]); + if (ord == ord_array[ord_i] && + str[0] != FIRST_IN_FIELD_CHAR) + { + int *freq; + zebra_strmap_t sm = map_array[ord_i]; + + freq = zebra_strmap_lookup(sm, str, 0, 0); + if (freq) + (*freq)++; + else + { + int v = 1; + zebra_strmap_add(sm, str, &v, sizeof v); + } + } + } + } + } + zebra_rec_keys_close(keys); + rec_free(&rec); + } + } + return 0; +} + +static int perform_facet(ZebraHandle zh, + struct special_fetch_s *fi, + WRBUF result, + int num_recs, ZebraMetaRecord *poset, + struct index_spec *spec_list, + int no_ord, int *ord_array, + int use_xml, + zinfo_index_category_t cat) +{ + int i; + int ret = 0; + WRBUF wr = result; + struct index_spec *spec; + yaz_timing_t timing = yaz_timing_create(); + zebra_strmap_t *map_array + = nmem_malloc(fi->nmem, sizeof *map_array * no_ord); + for (i = 0; i < no_ord; i++) + map_array[i] = zebra_strmap_create(); + + if (cat == zinfo_index_category_sort) + perform_facet_sort(zh, no_ord, ord_array, map_array, + num_recs, poset); + else + perform_facet_index(zh, fi, no_ord, ord_array, map_array, + num_recs, poset, spec_list); + yaz_timing_stop(timing); + yaz_log(YLOG_LOG, "facet first phase real=%4.2f cat=%s", + yaz_timing_get_real(timing), + (cat == zinfo_index_category_sort) ? "sort" : "index"); + yaz_timing_start(timing); + for (spec = spec_list, i = 0; i < no_ord; i++, spec = spec->next) + { + int j; + NMEM nmem = nmem_create(); + struct term_collect *col; + int no_collect_terms = 20; + + if (spec->extra) + no_collect_terms = atoi(spec->extra); + if (no_collect_terms < 1) + no_collect_terms = 1; + col = term_collect_create(map_array[i], no_collect_terms, nmem); + term_collect_freq(zh, col, no_collect_terms, ord_array[i], + resultSetRef(zh, fi->setname)); + + if (use_xml) + wrbuf_printf(wr, " \n", + spec->index_type, spec->index_name); + else + wrbuf_printf(wr, "facet %s %s\n", + spec->index_type, spec->index_name); + for (j = 0; j < no_collect_terms; j++) + { + if (col[j].term) + { + char dst_buf[IT_MAX_WORD]; + zebra_term_untrans(zh, spec->index_type, dst_buf, col[j].term); + if (use_xml) + { + wrbuf_printf(wr, " "); + wrbuf_xmlputs(wr, dst_buf); + wrbuf_printf(wr, "\n"); + } + else + { + wrbuf_printf(wr, "term %d", col[j].oc); + if (col[j].set_occur) + wrbuf_printf(wr, " " ZINT_FORMAT, + col[j].set_occur); + wrbuf_printf(wr, ": %s\n", dst_buf); + } + } + } + if (use_xml) + wrbuf_puts(wr, " \n"); + nmem_destroy(nmem); + } + for (i = 0; i < no_ord; i++) + zebra_strmap_destroy(map_array[i]); + yaz_timing_stop(timing); + yaz_log(YLOG_LOG, "facet second phase real=%4.2f", + yaz_timing_get_real(timing)); + yaz_timing_destroy(&timing); + return ret; +} + +static int facet_fetch( + struct special_fetch_s *fi, const char *elemsetname, + const Odr_oid *input_format, + const Odr_oid **output_format, + WRBUF result, WRBUF addinfo) { zint *pos_array; int i; int num_recs = 10; /* number of records to analyze */ - int no_collect_terms = 20; /* number of term candidates */ ZebraMetaRecord *poset; ZEBRA_RES ret = ZEBRA_OK; int *ord_array; - WRBUF wr = wrbuf_alloc(); - + int use_xml = 0; int no_ord = 0; struct index_spec *spec, *spec_list; int error; + ZebraHandle zh = fi->zh; + /* whether sort or index based */ + zinfo_index_category_t cat = zinfo_index_category_sort; + res_get_int(zh->res, "facetNumRecs", &num_recs); - spec_list = parse_index_spec(elemsetname, odr_getmem(odr), &error); + /* see if XML is required for response */ + if (oid_oidcmp(input_format, yaz_oid_recsyn_xml) == 0) + use_xml = 1; + + spec_list = parse_index_spec(elemsetname, fi->nmem, &error); if (!spec_list || error) + { return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; - + } + for (spec = spec_list; spec; spec = spec->next) { if (!spec->index_type) @@ -713,200 +963,119 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, no_ord++; } - ord_array = odr_malloc(odr, sizeof(*ord_array) * no_ord); + /* try to see if all specs are sort based.. If not, try the + index based ones */ + ord_array = nmem_malloc(fi->nmem, sizeof(*ord_array) * no_ord); for (spec = spec_list, i = 0; spec; spec = spec->next, i++) { int ord = zebraExplain_lookup_attr_str(zh->reg->zei, - zinfo_index_category_index, + zinfo_index_category_sort, spec->index_type, spec->index_name); if (ord == -1) + break; + ord_array[i] = ord; + } + if (spec) + { + cat = zinfo_index_category_index; + for (spec = spec_list, i = 0; spec; spec = spec->next, i++) { - return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + int ord = zebraExplain_lookup_attr_str(zh->reg->zei, + zinfo_index_category_index, + spec->index_type, + spec->index_name); + if (ord == -1) + break; + ord_array[i] = ord; + } - ord_array[i] = ord; } + if (spec) + return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; - pos_array = (zint *) xmalloc(num_recs * sizeof(*pos_array)); + pos_array = (zint *) nmem_malloc(fi->nmem, num_recs * sizeof(*pos_array)); for (i = 0; i < num_recs; i++) pos_array[i] = i+1; - poset = zebra_meta_records_create(zh, setname, num_recs, pos_array); + poset = zebra_meta_records_create(zh, fi->setname, num_recs, pos_array); if (!poset) { - zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, - setname); - xfree(pos_array); - ret = ZEBRA_FAIL; + wrbuf_puts(addinfo, fi->setname); + return YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST; } else { - zebra_strmap_t *map_array - = odr_malloc(odr, sizeof *map_array * no_ord); - for (i = 0; i < no_ord; i++) - map_array[i] = zebra_strmap_create(); - - for (i = 0; i < num_recs; i++) + if (use_xml) { - int j; - zint sysnos[MAX_SYSNOS_PER_RECORD]; - int no_sysnos = MAX_SYSNOS_PER_RECORD; - if (!poset[i].sysno) - continue; - ret = zebra_result_recid_to_sysno(zh, setname, - poset[i].sysno, - sysnos, &no_sysnos); - assert(no_sysnos > 0); - for (j = 0; j < no_sysnos; j++) - { - size_t slen; - const char *str; - struct it_key key_in; - Record rec = rec_get(zh->reg->records, sysnos[j]); - zebra_rec_keys_t keys = zebra_rec_keys_open(); - zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys], - rec->size[recInfo_delKeys], 0); - - if (zebra_rec_keys_rewind(keys)) - { - while (zebra_rec_keys_read(keys, &str, &slen, &key_in)) - { - int i; - struct index_spec *spec; - for (spec = spec_list, i = 0; i < no_ord; - i++, spec = spec->next) - { - int ord = CAST_ZINT_TO_INT(key_in.mem[0]); - if (ord == ord_array[i] && - str[0] != FIRST_IN_FIELD_CHAR) - { - int *freq; - zebra_strmap_t sm = map_array[i]; - - freq = zebra_strmap_lookup(sm, str, 0, 0); - if (freq) - (*freq)++; - else - { - int v = 1; - zebra_strmap_add(sm, str, &v, sizeof v); - } - } - } - } - } - zebra_rec_keys_close(keys); - rec_free(&rec); - } + wrbuf_printf(result, ZEBRA_XML_HEADER_STR ">\n"); } - wrbuf_puts(wr, "\n"); - for (spec = spec_list, i = 0; i < no_ord; i++, spec = spec->next) - { - int j; - NMEM nmem = nmem_create(); - struct term_collect *col = term_collect_create(map_array[i], - no_collect_terms, - nmem); - term_collect_freq(zh, col, no_collect_terms, ord_array[i], - resultSetRef(zh, setname)); - - wrbuf_printf(wr, " \n", - spec->index_type, spec->index_name); - for (j = 0; j < no_collect_terms; j++) - { - if (col[j].term) - { - char dst_buf[IT_MAX_WORD]; - zebra_term_untrans(zh, spec->index_type, dst_buf, col[j].term); - wrbuf_printf(wr, " "); - wrbuf_xmlputs(wr, dst_buf); - wrbuf_printf(wr, "\n"); - } - } - wrbuf_puts(wr, " \n"); - nmem_destroy(nmem); - } - wrbuf_puts(wr, "\n"); - for (i = 0; i < no_ord; i++) - zebra_strmap_destroy(map_array[i]); + ret = perform_facet(zh, fi, result, num_recs, poset, + spec_list, no_ord, ord_array, use_xml, + cat); + if (use_xml) + wrbuf_puts(result, "\n"); } - - - *rec_bufp = odr_strdup(odr, wrbuf_cstr(wr)); - wrbuf_destroy(wr); - *rec_lenp = strlen(*rec_bufp); *output_format = yaz_oid_recsyn_xml; - - xfree(pos_array); zebra_meta_records_destroy(zh, poset, num_recs); return ret; } -int zebra_special_fetch(ZebraHandle zh, const char *setname, - zint sysno, int score, ODR odr, - const char *elemsetname, - const Odr_oid *input_format, - const Odr_oid **output_format, - char **rec_bufp, int *rec_lenp) + +static int zebra_special_fetch( + void *handle, const char *elemsetname, + const Odr_oid *input_format, + const Odr_oid **output_format, + WRBUF result, WRBUF addinfo) { - Record rec; + Record rec = 0; + struct special_fetch_s *fi = (struct special_fetch_s *) handle; + ZebraHandle zh = fi->zh; + zint sysno = fi->sysno; - /* set output variables before processing possible error states */ - /* *rec_lenp = 0; */ - + /* processing zebra::facet */ if (elemsetname && 0 == strncmp(elemsetname, "facet", 5)) { - return facet_fetch(zh, setname, odr, - elemsetname + 5, + return facet_fetch(fi, elemsetname + 5, input_format, output_format, - rec_bufp, rec_lenp); + result, addinfo); } if (elemsetname && 0 == strcmp(elemsetname, "snippet")) { - return snippet_fetch(zh, setname, sysno, odr, - elemsetname + 7, + return snippet_fetch(fi, elemsetname + 7, input_format, output_format, - rec_bufp, rec_lenp); + result, addinfo); } - /* processing zebra::meta::sysno elemset without fetching binary data */ + /* processing zebra::meta::sysno */ if (elemsetname && 0 == strcmp(elemsetname, "meta::sysno")) { int ret = 0; - WRBUF wrbuf = wrbuf_alloc(); if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs)) { - wrbuf_printf(wrbuf, ZINT_FORMAT, sysno); + wrbuf_printf(result, ZINT_FORMAT, fi->sysno); *output_format = input_format; } else if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) { - wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR + wrbuf_printf(result, ZEBRA_XML_HEADER_STR " sysno=\"" ZINT_FORMAT "\"/>\n", - sysno); + fi->sysno); *output_format = input_format; } - *rec_lenp = wrbuf_len(wrbuf); - if (*rec_lenp) - *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf)); else ret = YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST; - wrbuf_destroy(wrbuf); return ret; } /* processing special elementsetname zebra::index:: for sort elements */ if (elemsetname && 0 == strncmp(elemsetname, "index", 5)) { - int ret = zebra_special_sort_fetch(zh, sysno, odr, - elemsetname + 5, - input_format, output_format, - rec_bufp, rec_lenp); + int ret = sort_fetch( + fi, elemsetname + 5, + input_format, output_format, + result, addinfo); if (ret != -1) return ret; /* not a sort index so we continue to get the full record */ @@ -926,46 +1095,39 @@ int zebra_special_fetch(ZebraHandle zh, const char *setname, { struct ZebraRecStream stream; RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec); + char *b; + zebra_create_record_stream(zh, &rec, &stream); *output_format = input_format; - *rec_lenp = recordAttr->recordSize; - *rec_bufp = (char *) odr_malloc(odr, *rec_lenp); - stream.readf(&stream, *rec_bufp, *rec_lenp); + + b = nmem_malloc(fi->nmem, recordAttr->recordSize); + stream.readf(&stream, b, recordAttr->recordSize); + wrbuf_write(result, b, recordAttr->recordSize); + stream.destroy(&stream); rec_free(&rec); return 0; } - /* only accept XML and SUTRS requests from now */ - if (oid_oidcmp(input_format, yaz_oid_recsyn_xml) - && oid_oidcmp(input_format, yaz_oid_recsyn_sutrs)) - { - yaz_log(YLOG_WARN, "unsupported format for element set zebra::%s", - elemsetname); - return YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST; - } - - /* processing special elementsetnames zebra::meta:: */ if (elemsetname && 0 == strcmp(elemsetname, "meta")) { int ret = 0; - WRBUF wrbuf = wrbuf_alloc(); RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, rec); if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) { *output_format = input_format; - wrbuf_printf(wrbuf, ZEBRA_XML_HEADER_STR + wrbuf_printf(result, ZEBRA_XML_HEADER_STR " sysno=\"" ZINT_FORMAT "\"", sysno); - retrieve_puts_attr(wrbuf, "base", rec->info[recInfo_databaseName]); - retrieve_puts_attr(wrbuf, "file", rec->info[recInfo_filename]); - retrieve_puts_attr(wrbuf, "type", rec->info[recInfo_fileType]); - if (score >= 0) - retrieve_puts_attr_int(wrbuf, "score", score); + retrieve_puts_attr(result, "base", rec->info[recInfo_databaseName]); + retrieve_puts_attr(result, "file", rec->info[recInfo_filename]); + retrieve_puts_attr(result, "type", rec->info[recInfo_fileType]); + if (fi->score >= 0) + retrieve_puts_attr_int(result, "score", fi->score); - wrbuf_printf(wrbuf, + wrbuf_printf(result, " rank=\"" ZINT_FORMAT "\"" " size=\"%i\"" " set=\"zebra::%s\"/>\n", @@ -976,14 +1138,14 @@ int zebra_special_fetch(ZebraHandle zh, const char *setname, else if (!oid_oidcmp(input_format, yaz_oid_recsyn_sutrs)) { *output_format = input_format; - wrbuf_printf(wrbuf, "sysno " ZINT_FORMAT "\n", sysno); - retrieve_puts_str(wrbuf, "base", rec->info[recInfo_databaseName]); - retrieve_puts_str(wrbuf, "file", rec->info[recInfo_filename]); - retrieve_puts_str(wrbuf, "type", rec->info[recInfo_fileType]); - if (score >= 0) - retrieve_puts_int(wrbuf, "score", score); - - wrbuf_printf(wrbuf, + wrbuf_printf(result, "sysno " ZINT_FORMAT "\n", sysno); + retrieve_puts_str(result, "base", rec->info[recInfo_databaseName]); + retrieve_puts_str(result, "file", rec->info[recInfo_filename]); + retrieve_puts_str(result, "type", rec->info[recInfo_fileType]); + if (fi->score >= 0) + retrieve_puts_int(result, "score", fi->score); + + wrbuf_printf(result, "rank " ZINT_FORMAT "\n" "size %i\n" "set zebra::%s\n", @@ -991,13 +1153,9 @@ int zebra_special_fetch(ZebraHandle zh, const char *setname, recordAttr->recordSize, elemsetname); } - *rec_lenp = wrbuf_len(wrbuf); - if (*rec_lenp) - *rec_bufp = odr_strdup(odr, wrbuf_cstr(wrbuf)); else - ret = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; + ret = YAZ_BIB1_NO_SYNTAXES_AVAILABLE_FOR_THIS_REQUEST; - wrbuf_destroy(wrbuf); rec_free(&rec); return ret; } @@ -1005,11 +1163,10 @@ int zebra_special_fetch(ZebraHandle zh, const char *setname, /* processing special elementsetnames zebra::index:: */ if (elemsetname && 0 == strncmp(elemsetname, "index", 5)) { - int ret = zebra_special_index_fetch(zh, sysno, odr, rec, - elemsetname + 5, - input_format, output_format, - rec_bufp, rec_lenp); - + int ret = special_index_fetch( + fi, elemsetname + 5, + input_format, output_format, + result, addinfo, rec); rec_free(&rec); return ret; } @@ -1019,14 +1176,13 @@ int zebra_special_fetch(ZebraHandle zh, const char *setname, return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; } - int zebra_record_fetch(ZebraHandle zh, const char *setname, zint sysno, int score, ODR odr, const Odr_oid *input_format, Z_RecordComposition *comp, const Odr_oid **output_format, char **rec_bufp, int *rec_lenp, char **basenamep, - char **addinfo) + WRBUF addinfo_w) { Record rec; char *fname, *file_type, *basename; @@ -1038,6 +1194,7 @@ int zebra_record_fetch(ZebraHandle zh, const char *setname, zint sysnos[MAX_SYSNOS_PER_RECORD]; int no_sysnos = MAX_SYSNOS_PER_RECORD; ZEBRA_RES res; + struct special_fetch_s fetch_info; res = zebra_result_recid_to_sysno(zh, setname, sysno, sysnos, &no_sysnos); if (res != ZEBRA_OK) @@ -1045,16 +1202,29 @@ int zebra_record_fetch(ZebraHandle zh, const char *setname, sysno = sysnos[0]; *basenamep = 0; - *addinfo = 0; elemsetname = yaz_get_esn(comp); + fetch_info.zh = zh; + fetch_info.setname = setname; + fetch_info.sysno = sysno; + fetch_info.score = score; + fetch_info.nmem = odr->mem; + /* processing zebra special elementset names of form 'zebra:: */ if (elemsetname && 0 == strncmp(elemsetname, "zebra::", 7)) - return zebra_special_fetch(zh, setname, sysno, score, odr, - elemsetname + 7, + { + WRBUF result = wrbuf_alloc(); + int r = zebra_special_fetch(&fetch_info, elemsetname + 7, input_format, output_format, - rec_bufp, rec_lenp); - + result, addinfo_w); + if (r == 0) + { + *rec_bufp = odr_strdup(odr, wrbuf_cstr(result)); + *rec_lenp = wrbuf_len(result); + } + wrbuf_destroy(result); + return r; + } /* processing all other element set names */ rec = rec_get(zh->reg->records, sysno); @@ -1100,16 +1270,14 @@ int zebra_record_fetch(ZebraHandle zh, const char *setname, retrieveCtrl.res = zh->res; retrieveCtrl.rec_buf = 0; retrieveCtrl.rec_len = -1; + retrieveCtrl.handle = &fetch_info; + retrieveCtrl.special_fetch = zebra_special_fetch; if (!(rt = recType_byName(zh->reg->recTypes, zh->res, file_type, &clientData))) { - char addinfo_str[100]; - - sprintf(addinfo_str, "Could not handle record type %.40s", - file_type); - - *addinfo = odr_strdup(odr, addinfo_str); + wrbuf_printf(addinfo_w, "Could not handle record type %.40s", + file_type); return_code = YAZ_BIB1_SYSTEM_ERROR_IN_PRESENTING_RECORDS; } else @@ -1120,7 +1288,8 @@ int zebra_record_fetch(ZebraHandle zh, const char *setname, *output_format = retrieveCtrl.output_format; *rec_bufp = (char *) retrieveCtrl.rec_buf; *rec_lenp = retrieveCtrl.rec_len; - *addinfo = retrieveCtrl.addinfo; + if (retrieveCtrl.addinfo) + wrbuf_puts(addinfo_w, retrieveCtrl.addinfo); } stream.destroy(&stream);