X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fretrieve.c;h=ff3bced3d0cbdd74a44d7fe08be345a6648b3c59;hb=5e8a750d3038d737054cd56cb893e534d77bc109;hp=021a7ea70ba9a7365d74e09f7c10c3e6b15aef89;hpb=52faec54d6e3cc18105f36546df7b23faeb9c945;p=idzebra-moved-to-github.git diff --git a/index/retrieve.c b/index/retrieve.c index 021a7ea..ff3bced 100644 --- a/index/retrieve.c +++ b/index/retrieve.c @@ -1,4 +1,4 @@ -/* $Id: retrieve.c,v 1.80 2007-12-04 12:52:33 adam Exp $ +/* $Id: retrieve.c,v 1.82 2007-12-05 09:55:57 adam Exp $ Copyright (C) 1995-2007 Index Data ApS @@ -80,6 +80,7 @@ static int zebra_create_record_stream(ZebraHandle zh, struct index_spec { const char *index_name; const char *index_type; + const char *extra; struct index_spec *next; }; @@ -103,6 +104,7 @@ struct index_spec *parse_index_spec(const char *elem, NMEM nmem, struct index_spec *spec = nmem_malloc(nmem, sizeof(*spec)); spec->index_type = 0; spec->next = 0; + spec->extra = 0; if (!first) first = spec; @@ -119,10 +121,19 @@ struct index_spec *parse_index_spec(const char *elem, NMEM nmem, cp++; cp0 = cp; - while (*cp != '\0' && *cp != ',') + while (*cp != '\0' && *cp != ',' && *cp != ':') cp++; spec->index_type = nmem_strdupn(nmem, cp0, cp - cp0); } + if (*cp == ':') /* extra arguments */ + { + cp++; + cp0 = cp; + + while (*cp != '\0' && *cp != ',' && *cp != ':') + cp++; + spec->extra = nmem_strdupn(nmem, cp0, cp - cp0); + } if (*cp != ',') break; } @@ -629,6 +640,22 @@ zint freq_term(ZebraHandle zh, int ord, const char *term, RSET rset_set) return hits; } +int term_qsort_handle(const void *a, const void *b) +{ + const struct term_collect *l = a; + const struct term_collect *r = b; + if (l->set_occur < r->set_occur) + return 1; + else if (l->set_occur > r->set_occur) + return -1; + else + { + const char *lterm = l->term ? l->term : ""; + const char *rterm = r->term ? r->term : ""; + return strcmp(lterm, rterm); + } +} + void term_collect_freq(ZebraHandle zh, struct term_collect *col, int no_terms_collect, int ord, RSET rset) @@ -639,6 +666,7 @@ void term_collect_freq(ZebraHandle zh, if (col[i].term) col[i].set_occur = freq_term(zh, ord, col[i].term, rset); } + qsort(col, no_terms_collect, sizeof(*col), term_qsort_handle); } struct term_collect *term_collect_create(zebra_strmap_t sm, @@ -662,14 +690,17 @@ struct term_collect *term_collect_create(zebra_strmap_t sm, it = zebra_strmap_it_create(sm); while ((term = zebra_strmap_it_next(it, &data_buf, &data_len))) { + /* invariant: + col[0] has lowest oc . col[no_terms_collect-1] has highest oc */ int oc = *(int*) data_buf; int j = 0; /* insertion may be slow but terms terms will be "infrequent" and - thus number of iterations should be small below */ + thus number of iterations should be small below + */ while (j < no_terms_collect && oc > col[j].oc) j++; - if (j) - { + if (j) + { /* oc <= col[j] and oc > col[j-1] */ --j; memmove(col, col+1, sizeof(*col) * j); col[j].term = term; @@ -690,7 +721,6 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, zint *pos_array; int i; int num_recs = 10; /* number of records to analyze */ - int no_collect_terms = 20; /* number of term candidates */ ZebraMetaRecord *poset; ZEBRA_RES ret = ZEBRA_OK; int *ord_array; @@ -823,9 +853,14 @@ static ZEBRA_RES facet_fetch(ZebraHandle zh, const char *setname, { int j; NMEM nmem = nmem_create(); - struct term_collect *col = term_collect_create(map_array[i], - no_collect_terms, - nmem); + struct term_collect *col; + int no_collect_terms = 20; + + if (spec->extra) + no_collect_terms = atoi(spec->extra); + if (no_collect_terms < 1) + no_collect_terms = 1; + col = term_collect_create(map_array[i], no_collect_terms, nmem); term_collect_freq(zh, col, no_collect_terms, ord_array[i], resultSetRef(zh, setname));