X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fretrieve.c;h=7dc82b858e528c366b4a8e8a1e07a77f4083fe94;hb=131e8143a9b8da294d582f0793833679101a2672;hp=ea16718db4722c184a9d44ced259d751112d0518;hpb=a1f5729d06659c681dfd68d56c756478bbdcdf99;p=idzebra-moved-to-github.git diff --git a/index/retrieve.c b/index/retrieve.c index ea16718..7dc82b8 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.81 2007-12-05 09:29:52 adam Exp $ +/* $Id: retrieve.c,v 1.85 2008-03-05 09:21:48 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -49,7 +49,8 @@ static int zebra_create_record_stream(ZebraHandle zh, { RecordAttr *recordAttr = rec_init_attr(zh->reg->zei, *rec); - if ((*rec)->size[recInfo_storeData] > 0) + if ((*rec)->size[recInfo_storeData] > 0 + || (*rec)->info[recInfo_filename] == 0) zebra_create_stream_mem(stream, (*rec)->info[recInfo_storeData], (*rec)->size[recInfo_storeData]); else @@ -75,11 +76,12 @@ static int zebra_create_record_stream(ZebraHandle zh, } return 0; } - + struct index_spec { const char *index_name; const char *index_type; + const char *extra; struct index_spec *next; }; @@ -103,6 +105,7 @@ struct index_spec *parse_index_spec(const char *elem, NMEM nmem, struct index_spec *spec = nmem_malloc(nmem, sizeof(*spec)); spec->index_type = 0; spec->next = 0; + spec->extra = 0; if (!first) first = spec; @@ -119,10 +122,19 @@ struct index_spec *parse_index_spec(const char *elem, NMEM nmem, cp++; cp0 = cp; - while (*cp != '\0' && *cp != ',') + while (*cp != '\0' && *cp != ',' && *cp != ':') cp++; spec->index_type = nmem_strdupn(nmem, cp0, cp - cp0); } + if (*cp == ':') /* extra arguments */ + { + cp++; + cp0 = cp; + + while (*cp != '\0' && *cp != ',' && *cp != ':') + cp++; + spec->extra = nmem_strdupn(nmem, cp0, cp - cp0); + } if (*cp != ',') break; } @@ -383,36 +395,35 @@ int zebra_special_index_fetch(ZebraHandle zh, zint sysno, ODR odr, if (retrieval_type == 0 || !strcmp(retrieval_type_cstr, index_type)) { - zebra_term_untrans(zh, index_type, dst_buf, str); - if (strlen(dst_buf)) + if (zebra_term_untrans(zh, index_type, dst_buf, str)) + *dst_buf = '\0'; /* untrans failed */ + + if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) { - if (!oid_oidcmp(input_format, yaz_oid_recsyn_xml)) - { - wrbuf_printf(wrbuf, " ", - key_in.mem[key_in.len -1]); + wrbuf_printf(wrbuf, " \n"); - } - else - { - wrbuf_printf(wrbuf, "%s ", string_index); - - wrbuf_printf(wrbuf, "%s", index_type); - - for (i = 1; i < key_in.len; i++) - wrbuf_printf(wrbuf, " " ZINT_FORMAT, + wrbuf_printf(wrbuf, " type=\"%s\"", index_type); + + wrbuf_printf(wrbuf, " seq=\"" ZINT_FORMAT "\">", + key_in.mem[key_in.len -1]); + wrbuf_xmlputs(wrbuf, dst_buf); + wrbuf_printf(wrbuf, "\n"); + } + else + { + wrbuf_printf(wrbuf, "%s ", string_index); + + wrbuf_printf(wrbuf, "%s", index_type); + + for (i = 1; i < key_in.len; i++) + wrbuf_printf(wrbuf, " " ZINT_FORMAT, key_in.mem[i]); - - wrbuf_printf(wrbuf, " %s", dst_buf); - wrbuf_printf(wrbuf, "\n"); - } + wrbuf_printf(wrbuf, " %s", dst_buf); + + wrbuf_printf(wrbuf, "\n"); + } } @@ -710,17 +721,19 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, zint *pos_array; int i; int num_recs = 10; /* number of records to analyze */ - int no_collect_terms = 20; /* number of term candidates */ + int max_chunks = 2; ZebraMetaRecord *poset; ZEBRA_RES ret = ZEBRA_OK; int *ord_array; WRBUF wr = wrbuf_alloc(); int use_xml = 0; - int no_ord = 0; struct index_spec *spec, *spec_list; int error; + res_get_int(zh->res, "facetNumRecs", &num_recs); + res_get_int(zh->res, "facetMaxChunks", &max_chunks); + /* see if XML is required for response */ if (oid_oidcmp(input_format, yaz_oid_recsyn_xml) == 0) use_xml = 1; @@ -779,6 +792,7 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, } else { + yaz_timing_t timing = yaz_timing_create(); zebra_strmap_t *map_array = odr_malloc(odr, sizeof *map_array * no_ord); for (i = 0; i < no_ord; i++) @@ -795,7 +809,9 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, poset[i].sysno, sysnos, &no_sysnos); assert(no_sysnos > 0); - for (j = 0; j < no_sysnos; j++) + yaz_log(YLOG_LOG, "Analyzing rec=%d ISAM sysno=" ZINT_FORMAT " chunks=%d", + i, poset[i].sysno, no_sysnos); + for (j = 0; j < no_sysnos && j < max_chunks; j++) { size_t slen; const char *str; @@ -804,7 +820,9 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, zebra_rec_keys_t keys = zebra_rec_keys_open(); zebra_rec_keys_set_buf(keys, rec->info[recInfo_delKeys], rec->size[recInfo_delKeys], 0); - + + yaz_log(YLOG_LOG, "rec %d " ZINT_FORMAT " %s", + j, sysnos[j], zebra_rec_keys_empty(keys) ? "empty" : "non-empty"); if (zebra_rec_keys_rewind(keys)) { while (zebra_rec_keys_read(keys, &str, &slen, &key_in)) @@ -837,15 +855,24 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, rec_free(&rec); } } + yaz_timing_stop(timing); + yaz_log(YLOG_LOG, "facet first phase real=%4.2f", + yaz_timing_get_real(timing)); + yaz_timing_start(timing); if (use_xml) wrbuf_puts(wr, "\n"); for (spec = spec_list, i = 0; i < no_ord; i++, spec = spec->next) { int j; NMEM nmem = nmem_create(); - struct term_collect *col = term_collect_create(map_array[i], - no_collect_terms, - nmem); + struct term_collect *col; + int no_collect_terms = 20; + + if (spec->extra) + no_collect_terms = atoi(spec->extra); + if (no_collect_terms < 1) + no_collect_terms = 1; + col = term_collect_create(map_array[i], no_collect_terms, nmem); term_collect_freq(zh, col, no_collect_terms, ord_array[i], resultSetRef(zh, setname)); @@ -889,9 +916,11 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, wrbuf_puts(wr, "\n"); for (i = 0; i < no_ord; i++) zebra_strmap_destroy(map_array[i]); + yaz_timing_stop(timing); + yaz_log(YLOG_LOG, "facet second phase real=%4.2f", + yaz_timing_get_real(timing)); + yaz_timing_destroy(&timing); } - - *rec_bufp = odr_strdup(odr, wrbuf_cstr(wr)); wrbuf_destroy(wr); *rec_lenp = strlen(*rec_bufp);