X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=rset%2Frset.c;h=3da4bd6220f55f037e06aac3bd556cd26fb7b504;hp=5a3c4d2fbb93eb8f2cbcb778d2e3e85bf88f32a0;hb=3bb2fd863b1664f1ff71dadb540313f301ca234f;hpb=3c5f6226f97612c0d6ac40591f600587c5ffa858 diff --git a/rset/rset.c b/rset/rset.c index 5a3c4d2..3da4bd6 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -1,143 +1,441 @@ -/* - * Copyright (C) 1994-1999, Index Data - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: rset.c,v $ - * Revision 1.13 1999-02-02 14:51:33 adam - * Updated WIN32 code specific sections. Changed header. - * - * Revision 1.12 1998/04/26 10:56:57 adam - * Added include of string.h. - * - * Revision 1.11 1998/03/05 08:36:28 adam - * New result set model. - * - * Revision 1.10 1998/02/10 11:56:46 adam - * Implemented rset_dup. - * - * Revision 1.9 1996/10/29 13:55:21 adam - * Include of zebrautl.h instead of alexutil.h. - * - * Revision 1.8 1995/12/11 09:15:23 adam - * New set types: sand/sor/snot - ranked versions of and/or/not in - * ranked/semi-ranked result sets. - * Note: the snot not finished yet. - * New rset member: flag. - * Bug fix: r_delete in rsrel.c did free bad memory block. - * - * Revision 1.7 1995/10/12 12:41:56 adam - * Private info (buf) moved from struct rset_control to struct rset. - * Bug fixes in relevance. - * - * Revision 1.6 1995/09/08 14:52:41 adam - * Work on relevance feedback. - * - * Revision 1.5 1995/09/07 13:58:43 adam - * New parameter: result-set file descriptor (RSFD) to support multiple - * positions within the same result-set. - * Boolean operators: and, or, not implemented. - * - * Revision 1.4 1995/09/06 16:11:56 adam - * More work on boolean sets. - * - * Revision 1.3 1995/09/04 15:20:39 adam - * More work on temp sets. is_open member removed. - * - * Revision 1.2 1995/09/04 12:33:56 adam - * Various cleanup. YAZ util used instead. - * - * Revision 1.1 1994/11/04 13:21:28 quinn - * Working. - * - */ +/* This file is part of the Zebra server. + Copyright (C) 1994-2011 Index Data + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +*/ + +#if HAVE_CONFIG_H +#include +#endif #include #include -#include - +#include +#include +#include #include -RSET rset_create(const struct rset_control *sel, void *parms) +static int log_level = 0; +static int log_level_initialized = 0; + + +/** + \brief Common constuctor for RFDs + \param rs Result set handle. + + Creates an rfd. Either allocates a new one, in which case the priv + pointer is null, and will have to be filled in, or picks up one + from the freelist, in which case the priv is already allocated, + and presumably everything that hangs from it as well +*/ +RSFD rfd_create_base(RSET rs) { - RSET rnew; - int i; + RSFD rnew = rs->free_list; - logf (LOG_DEBUG, "rs_create(%s)", sel->desc); - rnew = xmalloc(sizeof(*rnew)); - rnew->control = sel; - rnew->flags = 0; - rnew->count = 1; - rnew->rset_terms = NULL; - rnew->no_rset_terms = 0; - rnew->buf = (*sel->f_create)(rnew, sel, parms); - logf (LOG_DEBUG, "no_rset_terms: %d", rnew->no_rset_terms); - for (i = 0; ino_rset_terms; i++) - logf (LOG_DEBUG, " %s", rnew->rset_terms[i]->name); + if (rnew) + { + rs->free_list = rnew->next; + assert(rnew->rset==rs); + yaz_log(log_level, "rfd_create_base (fl): rfd=%p rs=%p fl=%p priv=%p", + rnew, rs, rs->free_list, rnew->priv); + } + else + { + rnew = nmem_malloc(rs->nmem, sizeof(*rnew)); + rnew->counted_buf = nmem_malloc(rs->nmem, rs->keycontrol->key_size); + rnew->priv = 0; + rnew->rset = rs; + yaz_log(log_level, "rfd_create_base (new): rfd=%p rs=%p fl=%p priv=%p", + rnew, rs, rs->free_list, rnew->priv); + } + rnew->next = rs->use_list; + rs->use_list = rnew; + rnew->counted_items = 0; return rnew; } -void rset_delete (RSET rs) +static void rset_close_int(RSET rs, RSFD rfd) +{ + RSFD *pfd; + (*rs->control->f_close)(rfd); + + yaz_log(log_level, "rfd_delete_base: rfd=%p rs=%p priv=%p fl=%p", + rfd, rs, rfd->priv, rs->free_list); + for (pfd = &rs->use_list; *pfd; pfd = &(*pfd)->next) + if (*pfd == rfd) + { + *pfd = (*pfd)->next; + rfd->next = rs->free_list; + rs->free_list = rfd; + return; + } + yaz_log(YLOG_WARN, "rset_close handle not found. type=%s", + rs->control->desc); +} + +void rset_set_hits_limit(RSET rs, zint l) +{ + yaz_log(log_level, "rset_set_hits_limit %p l=" ZINT_FORMAT, rs, l); + rs->hits_limit = l; +} + +/** + \brief Closes a result set RFD handle + \param rfd the RFD handle. +*/ +void rset_close(RSFD rfd) { - (rs->count)--; - if (!rs->count) + RSET rs = rfd->rset; + + if (rs->hits_count == 0) + { + TERMID termid; + char buf[100]; + + while (rfd->counted_items <= rs->hits_limit + && rset_default_read(rfd, buf, &termid)) + ; + + rs->hits_count = rfd->counted_items; + yaz_log(log_level, "rset_close rset=%p hits_count=" ZINT_FORMAT + " hits_limit=" ZINT_FORMAT, + rs, rs->hits_count, rs->hits_limit); + rs->hits_approx = 0; + if (rs->hits_count > rs->hits_limit && rs->hits_limit > 0) + { + double cur, tot; + zint est; + rset_pos(rfd, &cur, &tot); + if (tot > 0) { + int i; + double ratio = cur/tot; + est = (zint)(0.5 + rs->hits_count / ratio); + yaz_log(log_level, "Estimating hits (%s) " + "%0.1f->" ZINT_FORMAT + "; %0.1f->" ZINT_FORMAT, + rs->control->desc, + cur, rs->hits_count, + tot, est); + i = 0; /* round to significant digits */ + while (est > rs->hits_round) { + est /= 10; + i++; + } + while (i--) + est *= 10; + rs->hits_count = est; + rs->hits_approx = 1; + } + } + yaz_log(log_level, "rset_close(%s) p=%p count=" ZINT_FORMAT, + rs->control->desc, rs, + rs->hits_count); + } + rset_close_int(rs, rfd); +} + +/** + \brief Common constuctor for RSETs + \param sel The interface control handle + \param nmem The memory handle for it. + \param kcontrol Key control info (decode, encode, comparison etc) + \param scope scope for set + \param term Information about term for it (NULL for none). + \param no_children number of child rsets (0 for none) + \param children child rsets (NULL for none). + + Creates an rfd. Either allocates a new one, in which case the priv + pointer is null, and will have to be filled in, or picks up one + from the freelist, in which case the priv is already allocated, + and presumably everything that hangs from it as well +*/ +RSET rset_create_base(const struct rset_control *sel, + NMEM nmem, struct rset_key_control *kcontrol, + int scope, TERMID term, + int no_children, RSET *children) +{ + RSET rset; + assert(nmem); + if (!log_level_initialized) + { + log_level = yaz_log_module_level("rset"); + log_level_initialized = 1; + } + + rset = (RSET) nmem_malloc(nmem, sizeof(*rset)); + yaz_log(log_level, "rs_create(%s) rs=%p (nm=%p)", sel->desc, rset, nmem); + yaz_log(log_level, " ref_id=%s", + (term && term->ref_id ? term->ref_id : "null")); + rset->nmem = nmem; + rset->control = sel; + rset->refcount = 1; + rset->priv = 0; + rset->free_list = NULL; + rset->use_list = NULL; + rset->hits_count = 0; + rset->hits_limit = 0; + rset->hits_round = 1000; + rset->keycontrol = kcontrol; + + (*kcontrol->inc)(kcontrol); + rset->scope = scope; + rset->term = term; + if (term) + { + term->rset = rset; + rset->hits_limit = term->hits_limit; + } + rset->no_children = no_children; + rset->children = 0; + if (no_children) + { + rset->children = (RSET*) + nmem_malloc(rset->nmem, no_children*sizeof(RSET *)); + memcpy(rset->children, children, no_children*sizeof(RSET *)); + } + return rset; +} + +/** + \brief Destructor RSETs + \param rs Handle for result set. + + Destroys a result set and all its children. + The f_delete method of control is called for the result set. +*/ +void rset_delete(RSET rs) +{ + (rs->refcount)--; + yaz_log(log_level, "rs_delete(%s), rs=%p, refcount=%d", + rs->control->desc, rs, rs->refcount); + if (!rs->refcount) { - (*rs->control->f_delete)(rs); - xfree(rs); + int i; + if (rs->use_list) + yaz_log(YLOG_WARN, "rs_delete(%s) still has RFDs in use", + rs->control->desc); + for (i = 0; ino_children; i++) + rset_delete(rs->children[i]); + (*rs->control->f_delete)(rs); + (*rs->keycontrol->dec)(rs->keycontrol); } } +/** + \brief Test for last use of RFD + \param rfd RFD handle. + + Returns 1 if this RFD is the last reference to it; 0 otherwise. +*/ +int rfd_is_last(RSFD rfd) +{ + if (rfd->rset->use_list == rfd && rfd->next == 0) + return 1; + return 0; +} + +/** + \brief Duplicate an RSET + \param rs Handle for result set. + + Duplicates a result set by incrementing the reference count to it. +*/ RSET rset_dup (RSET rs) { - (rs->count)++; + (rs->refcount)++; + yaz_log(log_level, "rs_dup(%s), rs=%p, refcount=%d", + rs->control->desc, rs, rs->refcount); return rs; } -RSET_TERM *rset_terms(RSET rs, int *no) +/** + \brief Estimates hit count for result set. + \param rs Result Set. + + rset_count uses rset_pos to get the total and returns that. + This is ok for rsisamb/c/s, and for some other rsets, but in case of + booleans etc it will give bad estimate, as nothing has been read + from that rset +*/ +zint rset_count(RSET rs) +{ + double cur, tot; + RSFD rfd = rset_open(rs, 0); + rset_pos(rfd, &cur, &tot); + rset_close_int(rs, rfd); + return (zint) tot; +} + +/** + \brief is a getterms function for those that don't have any + \param ct result set handle + \param terms array of terms (0..maxterms-1) + \param maxterms length of terms array + \param curterm current size of terms array + + If there is a term associated with rset the term is appeneded; otherwise + the terms array is untouched but curterm is incremented anyway. +*/ +void rset_get_one_term(RSET ct, TERMID *terms, int maxterms, int *curterm) +{ + if (ct->term) + { + if (*curterm < maxterms) + terms[*curterm] = ct->term; + (*curterm)++; + } +} + +struct ord_list *ord_list_create(NMEM nmem) +{ + return 0; +} + +struct ord_list *ord_list_append(NMEM nmem, struct ord_list *list, + int ord) +{ + struct ord_list *n = nmem_malloc(nmem, sizeof(*n)); + n->ord = ord; + n->next = list; + return n; +} + +struct ord_list *ord_list_dup(NMEM nmem, struct ord_list *list) +{ + struct ord_list *n = ord_list_create(nmem); + for (; list; list = list->next) + n = ord_list_append(nmem, n, list->ord); + return n; +} + +void ord_list_print(struct ord_list *list) { - *no = rs->no_rset_terms; - return rs->rset_terms; + for (; list; list = list->next) + yaz_log(YLOG_LOG, "ord_list %d", list->ord); } +/** + \brief Creates a TERMID entry. + \param name Term/Name buffer with given length + \param length of term + \param flags for term + \param type Term Type, Z_Term_general, Z_Term_characterString,.. + \param nmem memory for term. + \param ol ord list + \param reg_type register type + \param hits_limit limit before counting stops and gets approximate + \param ref_id supplied ID for term that can be used to identify this +*/ +TERMID rset_term_create(const char *name, int length, const char *flags, + int type, NMEM nmem, struct ord_list *ol, + int reg_type, + zint hits_limit, const char *ref_id) -RSET_TERM rset_term_create (const char *name, int length, const char *flags) { - RSET_TERM t = xmalloc (sizeof(*t)); + TERMID t; + yaz_log (log_level, "term_create '%s' %d f=%s type=%d nmem=%p", + name, length, flags, type, nmem); + t= (TERMID) nmem_malloc(nmem, sizeof(*t)); if (!name) - t->name = NULL; + t->name = NULL; else if (length == -1) - t->name = xstrdup (name); + t->name = nmem_strdup(nmem, name); else - { - t->name = xmalloc (length+1); - memcpy (t->name, name, length); - t->name[length] = '\0'; - } + t->name = nmem_strdupn(nmem, name, length); + if (!ref_id) + t->ref_id = 0; + else + t->ref_id = nmem_strdup(nmem, ref_id); if (!flags) - t->flags = NULL; + t->flags = NULL; else - t->flags = xstrdup (flags); - t->nn = 1; + t->flags = nmem_strdup(nmem, flags); + t->hits_limit = hits_limit; + t->type = type; + t->reg_type = reg_type; + t->rankpriv = 0; + t->rset = 0; + t->ol = ord_list_dup(nmem, ol); return t; } -void rset_term_destroy (RSET_TERM t) +int rset_default_read(RSFD rfd, void *buf, TERMID *term) { - xfree (t->name); - xfree (t->flags); - xfree (t); + RSET rset = rfd->rset; + int rc = (*rset->control->f_read)(rfd, buf, term); + if (rc > 0) + { + int got_scope; + if (rfd->counted_items == 0) + got_scope = rset->scope+1; + else + got_scope = rset->keycontrol->cmp(buf, rfd->counted_buf); + +#if 0 + key_logdump_txt(YLOG_LOG, buf, "rset_default_read"); + yaz_log(YLOG_LOG, "rset_scope=%d got_scope=%d", rset->scope, got_scope); +#endif + if (got_scope > rset->scope) + { + memcpy(rfd->counted_buf, buf, rset->keycontrol->key_size); + rfd->counted_items++; + } + } + return rc; } -RSET_TERM rset_term_dup (RSET_TERM t) +int rset_default_forward(RSFD rfd, void *buf, TERMID *term, + const void *untilbuf) { - RSET_TERM nt = xmalloc (sizeof(*nt)); - if (t->name) - nt->name = xstrdup (t->name); - else - nt->name = NULL; - if (t->flags) - nt->flags = xstrdup (t->flags); - else - nt->flags = NULL; - nt->nn = t->nn; - return nt; + RSET rset = rfd->rset; + int more; + + if (rset->control->f_forward && + rfd->counted_items >= rset->hits_limit) + { + assert (rset->control->f_forward != rset_default_forward); + return rset->control->f_forward(rfd, buf, term, untilbuf); + } + + while ((more = rset_read(rfd, buf, term)) > 0) + { + if ((rfd->rset->keycontrol->cmp)(untilbuf, buf) < rset->scope) + break; + } + if (log_level) + yaz_log(log_level, "rset_default_forward exiting rfd=%p scope=%d m=%d c=%d", + rfd, rset->scope, more, rset->scope); + + return more; +} + +void rset_visit(RSET rset, int level) +{ + int i; + yaz_log(YLOG_LOG, "%*s%c " ZINT_FORMAT, level, "", + rset->hits_approx ? '~' : '=', + rset->hits_count); + for (i = 0; ino_children; i++) + rset_visit(rset->children[i], level+1); } + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +