X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fretrieve.c;h=ff3bced3d0cbdd74a44d7fe08be345a6648b3c59;hb=527dab66d9847bb7f8a931c558306a070064bf25;hp=d3928d9055f33ace326a0c9b570a3a582ce61da9;hpb=f82c996895d8dcd69e987660ebf9fdaafdba35b0;p=idzebra-moved-to-github.git diff --git a/index/retrieve.c b/index/retrieve.c index d3928d9..ff3bced 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.78 2007-12-03 13:04:04 adam Exp $ +/* $Id: retrieve.c,v 1.82 2007-12-05 09:55:57 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -80,6 +80,7 @@ static int zebra_create_record_stream(ZebraHandle zh, struct index_spec { const char *index_name; const char *index_type; + const char *extra; struct index_spec *next; }; @@ -103,6 +104,7 @@ struct index_spec *parse_index_spec(const char *elem, NMEM nmem, struct index_spec *spec = nmem_malloc(nmem, sizeof(*spec)); spec->index_type = 0; spec->next = 0; + spec->extra = 0; if (!first) first = spec; @@ -119,10 +121,19 @@ struct index_spec *parse_index_spec(const char *elem, NMEM nmem, cp++; cp0 = cp; - while (*cp != '\0' && *cp != ',') + while (*cp != '\0' && *cp != ',' && *cp != ':') cp++; spec->index_type = nmem_strdupn(nmem, cp0, cp - cp0); } + if (*cp == ':') /* extra arguments */ + { + cp++; + cp0 = cp; + + while (*cp != '\0' && *cp != ',' && *cp != ':') + cp++; + spec->extra = nmem_strdupn(nmem, cp0, cp - cp0); + } if (*cp != ',') break; } @@ -629,6 +640,22 @@ zint freq_term(ZebraHandle zh, int ord, const char *term, RSET rset_set) return hits; } +int term_qsort_handle(const void *a, const void *b) +{ + const struct term_collect *l = a; + const struct term_collect *r = b; + if (l->set_occur < r->set_occur) + return 1; + else if (l->set_occur > r->set_occur) + return -1; + else + { + const char *lterm = l->term ? l->term : ""; + const char *rterm = r->term ? r->term : ""; + return strcmp(lterm, rterm); + } +} + void term_collect_freq(ZebraHandle zh, struct term_collect *col, int no_terms_collect, int ord, RSET rset) @@ -639,6 +666,7 @@ void term_collect_freq(ZebraHandle zh, if (col[i].term) col[i].set_occur = freq_term(zh, ord, col[i].term, rset); } + qsort(col, no_terms_collect, sizeof(*col), term_qsort_handle); } struct term_collect *term_collect_create(zebra_strmap_t sm, @@ -662,14 +690,17 @@ struct term_collect *term_collect_create(zebra_strmap_t sm, it = zebra_strmap_it_create(sm); while ((term = zebra_strmap_it_next(it, &data_buf, &data_len))) { + /* invariant: + col[0] has lowest oc . col[no_terms_collect-1] has highest oc */ int oc = *(int*) data_buf; int j = 0; /* insertion may be slow but terms terms will be "infrequent" and - thus number of iterations should be small below */ + thus number of iterations should be small below + */ while (j < no_terms_collect && oc > col[j].oc) j++; - if (j) - { + if (j) + { /* oc <= col[j] and oc > col[j-1] */ --j; memmove(col, col+1, sizeof(*col) * j); col[j].term = term; @@ -690,26 +721,41 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, zint *pos_array; int i; int num_recs = 10; /* number of records to analyze */ - int no_collect_terms = 20; /* number of term candidates */ ZebraMetaRecord *poset; ZEBRA_RES ret = ZEBRA_OK; int *ord_array; WRBUF wr = wrbuf_alloc(); + int use_xml = 0; int no_ord = 0; struct index_spec *spec, *spec_list; int error; + /* see if XML is required for response */ + if (oid_oidcmp(input_format, yaz_oid_recsyn_xml) == 0) + use_xml = 1; spec_list = parse_index_spec(elemsetname, odr_getmem(odr), &error); if (!spec_list || error) - return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; - + { + zebra_setError( + zh, + YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_, + 0); + return ZEBRA_FAIL; + } + for (spec = spec_list; spec; spec = spec->next) { if (!spec->index_type) - return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + { + zebra_setError( + zh, + YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_, + 0); + return ZEBRA_FAIL; + } no_ord++; } @@ -723,12 +769,15 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, spec->index_name); if (ord == -1) { - return YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_; + zebra_setError( + zh, + YAZ_BIB1_SPECIFIED_ELEMENT_SET_NAME_NOT_VALID_FOR_SPECIFIED_, + 0); + return ZEBRA_FAIL; } ord_array[i] = ord; } - - pos_array = (zint *) xmalloc(num_recs * sizeof(*pos_array)); + pos_array = (zint *) odr_malloc(odr, num_recs * sizeof(*pos_array)); for (i = 0; i < num_recs; i++) pos_array[i] = i+1; poset = zebra_meta_records_create(zh, setname, num_recs, pos_array); @@ -736,7 +785,6 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, { zebra_setError(zh, YAZ_BIB1_SPECIFIED_RESULT_SET_DOES_NOT_EXIST, setname); - xfree(pos_array); ret = ZEBRA_FAIL; } else @@ -777,7 +825,8 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, i++, spec = spec->next) { int ord = CAST_ZINT_TO_INT(key_in.mem[0]); - if (ord == ord_array[i]) + if (ord == ord_array[i] && + str[0] != FIRST_IN_FIELD_CHAR) { int *freq; zebra_strmap_t sm = map_array[i]; @@ -798,38 +847,61 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, rec_free(&rec); } } - wrbuf_puts(wr, "\n"); + if (use_xml) + wrbuf_puts(wr, "\n"); for (spec = spec_list, i = 0; i < no_ord; i++, spec = spec->next) { int j; NMEM nmem = nmem_create(); - struct term_collect *col = term_collect_create(map_array[i], - no_collect_terms, - nmem); + struct term_collect *col; + int no_collect_terms = 20; + + if (spec->extra) + no_collect_terms = atoi(spec->extra); + if (no_collect_terms < 1) + no_collect_terms = 1; + col = term_collect_create(map_array[i], no_collect_terms, nmem); term_collect_freq(zh, col, no_collect_terms, ord_array[i], resultSetRef(zh, setname)); - wrbuf_printf(wr, " \n", - spec->index_type, spec->index_name); + if (use_xml) + wrbuf_printf(wr, " \n", + spec->index_type, spec->index_name); + else + wrbuf_printf(wr, "facet %s %s\n", + spec->index_type, spec->index_name); for (j = 0; j < no_collect_terms; j++) { if (col[j].term) { char dst_buf[IT_MAX_WORD]; zebra_term_untrans(zh, spec->index_type, dst_buf, col[j].term); - wrbuf_printf(wr, " "); - wrbuf_xmlputs(wr, dst_buf); - wrbuf_printf(wr, "\n"); + if (use_xml) + { + wrbuf_printf(wr, " "); + wrbuf_xmlputs(wr, dst_buf); + wrbuf_printf(wr, "\n"); + } + else + { + wrbuf_printf(wr, "term %d", col[j].oc); + if (col[j].set_occur) + wrbuf_printf(wr, " " ZINT_FORMAT, + col[j].set_occur); + wrbuf_printf(wr, ": %s\n", dst_buf); + } } } - wrbuf_puts(wr, " \n"); + if (use_xml) + wrbuf_puts(wr, " \n"); nmem_destroy(nmem); } - wrbuf_puts(wr, "\n"); + if (use_xml) + wrbuf_puts(wr, "\n"); for (i = 0; i < no_ord; i++) zebra_strmap_destroy(map_array[i]); } @@ -840,7 +912,6 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, *rec_lenp = strlen(*rec_bufp); *output_format = yaz_oid_recsyn_xml; - xfree(pos_array); zebra_meta_records_destroy(zh, poset, num_recs); return ret; }