From: Adam Dickmeiss Date: Thu, 5 Mar 1998 08:36:27 +0000 (+0000) Subject: New result set model. X-Git-Tag: ZEBRA.1.0~234 X-Git-Url: http://git.indexdata.com/?a=commitdiff_plain;ds=sidebyside;h=33b386a95c0aac273527d596ce1aafa0dc567b7b;p=idzebra-moved-to-github.git New result set model. --- diff --git a/include/rsbool.h b/include/rsbool.h index 35ac8bc..165a3f3 100644 --- a/include/rsbool.h +++ b/include/rsbool.h @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1997, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rsbool.h,v $ - * Revision 1.5 1997-09-05 15:30:02 adam + * Revision 1.6 1998-03-05 08:37:44 adam + * New result set model. + * + * Revision 1.5 1997/09/05 15:30:02 adam * Changed prototype for chr_map_input - added const. * Added support for C++, headers uses extern "C" for public definitions. * @@ -37,13 +40,9 @@ extern "C" { #endif -extern const rset_control *rset_kind_and; -extern const rset_control *rset_kind_or; -extern const rset_control *rset_kind_not; - -extern const rset_control *rset_kind_sand; -extern const rset_control *rset_kind_sor; -extern const rset_control *rset_kind_snot; +extern const struct rset_control *rset_kind_and; +extern const struct rset_control *rset_kind_or; +extern const struct rset_control *rset_kind_not; typedef struct rset_bool_parms { diff --git a/include/rset.h b/include/rset.h index fd89b66..cf36870 100644 --- a/include/rset.h +++ b/include/rset.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rset.h,v $ - * Revision 1.14 1998-02-10 11:56:46 adam + * Revision 1.15 1998-03-05 08:37:44 adam + * New result set model. + * + * Revision 1.14 1998/02/10 11:56:46 adam * Implemented rset_dup. * * Revision 1.13 1997/12/18 10:54:24 adam @@ -71,38 +74,46 @@ extern "C" { typedef void *RSFD; typedef struct rset *RSET; +typedef struct rset_term *RSET_TERM; -typedef struct rset_control +struct rset_control { char *desc; /* text description of set type (for debugging) */ - void *(*f_create)(const struct rset_control *sel, void *parms, - int *flags); + void *(*f_create)(RSET ct, const struct rset_control *sel, void *parms); RSFD (*f_open)(RSET ct, int wflag); void (*f_close)(RSFD rfd); void (*f_delete)(RSET ct); void (*f_rewind)(RSFD rfd); int (*f_count)(RSET ct); - int (*f_hits)(RSET ct, void *oi); - int (*f_read)(RSFD rfd, void *buf); + int (*f_read)(RSFD rfd, void *buf, int *term_index); int (*f_write)(RSFD rfd, const void *buf); - int (*f_score)(RSFD rfd, int *score); -} rset_control; +}; + +struct rset_term { + char *name; + int nn; + char *flags; +}; typedef struct rset { - const rset_control *control; + const struct rset_control *control; int flags; int count; void *buf; + RSET_TERM *rset_terms; + int no_rset_terms; } rset; +RSET_TERM rset_term_create (const char *name, int length, const char *flags); +void rset_term_destroy (RSET_TERM t); +RSET_TERM rset_term_dup (RSET_TERM t); + #define RSETF_READ 0 #define RSETF_WRITE 1 -#define RSETF_SORT_SYSNO 0 -#define RSETF_SORT_RANK 2 - -RSET rset_create(const rset_control *sel, void *parms); /* parameters? */ +RSET rset_create(const struct rset_control *sel, void *parms); +/* parameters? */ /* int rset_open(RSET rs, int wflag); */ #define rset_open(rs, wflag) (*(rs)->control->f_open)((rs), (wflag)) @@ -120,27 +131,18 @@ RSET rset_dup (RSET rs); /* int rset_count(RSET rs); */ #define rset_count(rs) (*(rs)->control->f_count)(rs) -/* int rset_hits (RSET) */ -#define rset_hits(rs) (*(rs)->control->f_hits)((rs), 0) - /* int rset_read(RSET rs, void *buf); */ -#define rset_read(rs, fd, buf) (*(rs)->control->f_read)((fd), (buf)) +#define rset_read(rs, fd, buf, indx) (*(rs)->control->f_read)((fd), (buf), indx) /* int rset_write(RSET rs, const void *buf); */ #define rset_write(rs, fd, buf) (*(rs)->control->f_write)((fd), (buf)) -/* int rset_score(RSET rs, int *buf); */ -#define rset_score(rs, fd, score) (*(rs)->control->f_score)((fd), (score)) - - /* int rset_type (RSET) */ #define rset_type(rs) ((rs)->control->desc) #define RSET_FLAG_VOLATILE 1 -#define RSET_FLAG_RANKED 2 #define rset_is_volatile(rs) ((rs)->flags & RSET_FLAG_VOLATILE) -#define rset_is_ranked(rs) ((rs)->flags & RSET_FLAG_RANKED) #ifdef __cplusplus } diff --git a/include/rsisam.h b/include/rsisam.h index ee47552..0148fde 100644 --- a/include/rsisam.h +++ b/include/rsisam.h @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1997, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rsisam.h,v $ - * Revision 1.3 1997-09-05 15:30:04 adam + * Revision 1.4 1998-03-05 08:37:44 adam + * New result set model. + * + * Revision 1.3 1997/09/05 15:30:04 adam * Changed prototype for chr_map_input - added const. * Added support for C++, headers uses extern "C" for public definitions. * @@ -27,12 +30,13 @@ extern "C" { #endif -extern const rset_control *rset_kind_isam; +extern const struct rset_control *rset_kind_isam; typedef struct rset_isam_parms { ISAM is; ISAM_P pos; + RSET_TERM rset_term; } rset_isam_parms; #ifdef __cplusplus diff --git a/include/rsisamc.h b/include/rsisamc.h index 5b9d33b..29e1c8a 100644 --- a/include/rsisamc.h +++ b/include/rsisamc.h @@ -1,10 +1,13 @@ /* - * Copyright (C) 1996-1997, Index Data I/S + * Copyright (C) 1996-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rsisamc.h,v $ - * Revision 1.2 1997-09-05 15:30:04 adam + * Revision 1.3 1998-03-05 08:37:44 adam + * New result set model. + * + * Revision 1.2 1997/09/05 15:30:04 adam * Changed prototype for chr_map_input - added const. * Added support for C++, headers uses extern "C" for public definitions. * @@ -23,12 +26,13 @@ extern "C" { #endif -extern const rset_control *rset_kind_isamc; +extern const struct rset_control *rset_kind_isamc; typedef struct rset_isamc_parms { ISAMC is; ISAMC_P pos; + RSET_TERM rset_term; } rset_isamc_parms; #ifdef __cplusplus diff --git a/include/rsm_or.h b/include/rsm_or.h index 6007df4..c934b8a 100644 --- a/include/rsm_or.h +++ b/include/rsm_or.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rsm_or.h,v $ - * Revision 1.3 1997-09-05 15:30:04 adam + * Revision 1.4 1998-03-05 08:37:44 adam + * New result set model. + * + * Revision 1.3 1997/09/05 15:30:04 adam * Changed prototype for chr_map_input - added const. * Added support for C++, headers uses extern "C" for public definitions. * @@ -26,7 +29,7 @@ extern "C" { #endif -extern const rset_control *rset_kind_m_or; +extern const struct rset_control *rset_kind_m_or; typedef struct rset_m_or_parms { @@ -35,6 +38,7 @@ typedef struct rset_m_or_parms ISAMC isc; ISAM_P *isam_positions; + RSET_TERM rset_term; int no_isam_positions; int no_save_positions; diff --git a/include/rsnull.h b/include/rsnull.h index e4ac4c3..204903e 100644 --- a/include/rsnull.h +++ b/include/rsnull.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rsnull.h,v $ - * Revision 1.2 1997-09-05 15:30:05 adam + * Revision 1.3 1998-03-05 08:37:44 adam + * New result set model. + * + * Revision 1.2 1997/09/05 15:30:05 adam * Changed prototype for chr_map_input - added const. * Added support for C++, headers uses extern "C" for public definitions. * @@ -22,7 +25,14 @@ extern "C" { #endif -extern const rset_control *rset_kind_null; +typedef struct rset_null_parms +{ + int key_size; + char *temp_path; + RSET_TERM rset_term; +} rset_null_parms; + +extern const struct rset_control *rset_kind_null; #ifdef __cplusplus } diff --git a/include/rstemp.h b/include/rstemp.h index b8c41cf..5ac9e51 100644 --- a/include/rstemp.h +++ b/include/rstemp.h @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rstemp.h,v $ - * Revision 1.4 1997-09-17 12:19:11 adam + * Revision 1.5 1998-03-05 08:37:44 adam + * New result set model. + * + * Revision 1.4 1997/09/17 12:19:11 adam * Zebra version corresponds to YAZ version 1.4. * Changed Zebra server so that it doesn't depend on global common_resource. * @@ -29,12 +32,13 @@ extern "C" { #endif -extern const rset_control *rset_kind_temp; +extern const struct rset_control *rset_kind_temp; typedef struct rset_temp_parms { int key_size; char *temp_path; + RSET_TERM rset_term; } rset_temp_parms; #ifdef __cplusplus diff --git a/rset/Makefile b/rset/Makefile index 00f6abd..e4b5e2b 100644 --- a/rset/Makefile +++ b/rset/Makefile @@ -1,7 +1,7 @@ -# Copyright (C) 1994-1996, Index Data I/S +# Copyright (C) 1994-1998, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.14 1996-12-20 11:07:20 adam Exp $ +# $Id: Makefile,v 1.15 1998-03-05 08:36:27 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -13,8 +13,7 @@ INCLUDE=-I../include $(YAZINC) DEFS=$(INCLUDE) LIB=../lib/rset.a PROG= -PO=rset.o rstemp.o rsisam.o rsnull.o rsbool.o rssbool.o \ - rsrel.o rsisamc.o rsm_or.o +PO=rset.o rstemp.o rsisam.o rsnull.o rsbool.o rsisamc.o rsm_or.o CPP=$(CC) -E all: $(LIB) diff --git a/rset/rsbool.c b/rset/rsbool.c index 568a937..5ebab94 100644 --- a/rset/rsbool.c +++ b/rset/rsbool.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rsbool.c,v $ - * Revision 1.13 1997-12-18 10:54:24 adam + * Revision 1.14 1998-03-05 08:36:27 adam + * New result set model. + * + * Revision 1.13 1997/12/18 10:54:24 adam * New method result set method rs_hits that returns the number of * hits in result-set (if known). The ranked result set returns real * number of hits but only when not combined with other operands. @@ -63,21 +66,18 @@ #include #include -static void *r_create(const struct rset_control *sel, void *parms, - int *flags); +static void *r_create(RSET ct, const struct rset_control *sel, void *parms); static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); static int r_count (RSET ct); -static int r_hits (RSET ct, void *oi); -static int r_read_and (RSFD rfd, void *buf); -static int r_read_or (RSFD rfd, void *buf); -static int r_read_not (RSFD rfd, void *buf); +static int r_read_and (RSFD rfd, void *buf, int *term_index); +static int r_read_or (RSFD rfd, void *buf, int *term_index); +static int r_read_not (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); -static const rset_control control_and = +static const struct rset_control control_and = { "and", r_create, @@ -86,13 +86,11 @@ static const rset_control control_and = r_delete, r_rewind, r_count, - r_hits, r_read_and, r_write, - r_score }; -static const rset_control control_or = +static const struct rset_control control_or = { "or", r_create, @@ -101,13 +99,11 @@ static const rset_control control_or = r_delete, r_rewind, r_count, - r_hits, r_read_or, r_write, - r_score }; -static const rset_control control_not = +static const struct rset_control control_not = { "not", r_create, @@ -116,21 +112,20 @@ static const rset_control control_not = r_delete, r_rewind, r_count, - r_hits, r_read_not, r_write, - r_score }; -const rset_control *rset_kind_and = &control_and; -const rset_control *rset_kind_or = &control_or; -const rset_control *rset_kind_not = &control_not; +const struct rset_control *rset_kind_and = &control_and; +const struct rset_control *rset_kind_or = &control_or; +const struct rset_control *rset_kind_not = &control_not; struct rset_bool_info { int key_size; RSET rset_l; RSET rset_r; + int term_index_s; int (*cmp)(const void *p1, const void *p2); struct rset_bool_rfd *rfd_list; }; @@ -140,14 +135,15 @@ struct rset_bool_rfd { RSFD rfd_r; int more_l; int more_r; + int term_index_l; + int term_index_r; void *buf_l; void *buf_r; struct rset_bool_rfd *next; struct rset_bool_info *info; }; -static void *r_create (const struct rset_control *sel, void *parms, - int *flags) +static void *r_create (RSET ct, const struct rset_control *sel, void *parms) { rset_bool_parms *bool_parms = parms; struct rset_bool_info *info; @@ -157,9 +153,20 @@ static void *r_create (const struct rset_control *sel, void *parms, info->rset_l = bool_parms->rset_l; info->rset_r = bool_parms->rset_r; if (rset_is_volatile(info->rset_l) || rset_is_volatile(info->rset_r)) - *flags |= RSET_FLAG_VOLATILE; + ct->flags |= RSET_FLAG_VOLATILE; info->cmp = bool_parms->cmp; info->rfd_list = NULL; + + info->term_index_s = info->rset_l->no_rset_terms; + ct->no_rset_terms = + info->rset_l->no_rset_terms + info->rset_r->no_rset_terms; + ct->rset_terms = xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms); + + memcpy (ct->rset_terms, info->rset_l->rset_terms, + info->rset_l->no_rset_terms * sizeof(*ct->rset_terms)); + memcpy (ct->rset_terms + info->rset_l->no_rset_terms, + info->rset_r->rset_terms, + info->rset_r->no_rset_terms * sizeof(*ct->rset_terms)); return info; } @@ -180,10 +187,12 @@ static RSFD r_open (RSET ct, int flag) rfd->buf_l = xmalloc (info->key_size); rfd->buf_r = xmalloc (info->key_size); - rfd->rfd_l = rset_open (info->rset_l, RSETF_READ|RSETF_SORT_SYSNO); - rfd->rfd_r = rset_open (info->rset_r, RSETF_READ|RSETF_SORT_SYSNO); - rfd->more_l = rset_read (info->rset_l, rfd->rfd_l, rfd->buf_l); - rfd->more_r = rset_read (info->rset_r, rfd->rfd_r, rfd->buf_r); + rfd->rfd_l = rset_open (info->rset_l, RSETF_READ); + rfd->rfd_r = rset_open (info->rset_r, RSETF_READ); + rfd->more_l = rset_read (info->rset_l, rfd->rfd_l, rfd->buf_l, + &rfd->term_index_l); + rfd->more_r = rset_read (info->rset_r, rfd->rfd_r, rfd->buf_r, + &rfd->term_index_r); return rfd; } @@ -212,6 +221,7 @@ static void r_delete (RSET ct) struct rset_bool_info *info = ct->buf; assert (info->rfd_list == NULL); + xfree (ct->rset_terms); rset_delete (info->rset_l); rset_delete (info->rset_r); xfree (info); @@ -225,8 +235,8 @@ static void r_rewind (RSFD rfd) logf (LOG_DEBUG, "rsbool_rewind"); rset_rewind (info->rset_l, p->rfd_l); rset_rewind (info->rset_r, p->rfd_r); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, &p->term_index_l); + p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, &p->term_index_r); } static int r_count (RSET ct) @@ -234,12 +244,7 @@ static int r_count (RSET ct) return 0; } -static int r_hits (RSET ct, void *oi) -{ - return -1; -} - -static int r_read_and (RSFD rfd, void *buf) +static int r_read_and (RSFD rfd, void *buf, int *term_index) { struct rset_bool_rfd *p = rfd; struct rset_bool_info *info = p->info; @@ -252,31 +257,41 @@ static int r_read_and (RSFD rfd, void *buf) if (!cmp) { memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + *term_index = p->term_index_l; + p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, + &p->term_index_l); + p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, + &p->term_index_r); return 1; } else if (cmp == 1) { memcpy (buf, p->buf_r, info->key_size); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + + *term_index = p->term_index_r + info->term_index_s; + p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, + &p->term_index_r); return 1; } else if (cmp == -1) { memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); + *term_index = p->term_index_l; + p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, + &p->term_index_l); return 1; } else if (cmp > 1) - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, + &p->term_index_r); else - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); + p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, + &p->term_index_l); } return 0; } -static int r_read_or (RSFD rfd, void *buf) +static int r_read_or (RSFD rfd, void *buf, int *term_index) { struct rset_bool_rfd *p = rfd; struct rset_bool_info *info = p->info; @@ -294,27 +309,34 @@ static int r_read_or (RSFD rfd, void *buf) if (!cmp) { memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + *term_index = p->term_index_l; + p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, + &p->term_index_l); + p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, + &p->term_index_r); return 1; } else if (cmp > 0) { memcpy (buf, p->buf_r, info->key_size); - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + *term_index = p->term_index_r + info->term_index_s; + p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, + &p->term_index_r); return 1; } else { memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); + *term_index = p->term_index_l; + p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, + &p->term_index_l); return 1; } } return 0; } -static int r_read_not (RSFD rfd, void *buf) +static int r_read_not (RSFD rfd, void *buf, int *term_index) { struct rset_bool_rfd *p = rfd; struct rset_bool_info *info = p->info; @@ -332,24 +354,29 @@ static int r_read_not (RSFD rfd, void *buf) if (cmp < -1) { memcpy (buf, p->buf_l, info->key_size); - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); + *term_index = p->term_index_l; + p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, + &p->term_index_l); return 1; } else if (cmp > 1) - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, + &p->term_index_r); else { memcpy (buf, p->buf_l, info->key_size); do { - p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l); + p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, + &p->term_index_l); if (!p->more_l) break; cmp = (*info->cmp)(p->buf_l, buf); } while (cmp >= -1 && cmp <= 1); do { - p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r); + p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, + &p->term_index_r); if (!p->more_r) break; cmp = (*info->cmp)(p->buf_r, buf); @@ -366,9 +393,3 @@ static int r_write (RSFD rfd, const void *buf) return -1; } -static int r_score (RSFD rfd, int *score) -{ - *score = -1; - return -1; -} - diff --git a/rset/rset.c b/rset/rset.c index e0d1d80..3ebde5b 100644 --- a/rset/rset.c +++ b/rset/rset.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rset.c,v $ - * Revision 1.10 1998-02-10 11:56:46 adam + * Revision 1.11 1998-03-05 08:36:28 adam + * New result set model. + * + * Revision 1.10 1998/02/10 11:56:46 adam * Implemented rset_dup. * * Revision 1.9 1996/10/29 13:55:21 adam @@ -48,16 +51,22 @@ #include -RSET rset_create(const rset_control *sel, void *parms) +RSET rset_create(const struct rset_control *sel, void *parms) { RSET rnew; + int i; logf (LOG_DEBUG, "rs_create(%s)", sel->desc); rnew = xmalloc(sizeof(*rnew)); rnew->control = sel; rnew->flags = 0; rnew->count = 1; - rnew->buf = (*sel->f_create)(sel, parms, &rnew->flags); + rnew->rset_terms = NULL; + rnew->no_rset_terms = 0; + rnew->buf = (*sel->f_create)(rnew, sel, parms); + logf (LOG_DEBUG, "no_rset_terms: %d", rnew->no_rset_terms); + for (i = 0; ino_rset_terms; i++) + logf (LOG_DEBUG, " %s", rnew->rset_terms[i]->name); return rnew; } @@ -65,8 +74,10 @@ void rset_delete (RSET rs) { (rs->count)--; if (!rs->count) + { (*rs->control->f_delete)(rs); - xfree(rs); + xfree(rs); + } } RSET rset_dup (RSET rs) @@ -74,3 +85,52 @@ RSET rset_dup (RSET rs) (rs->count)++; return rs; } + +RSET_TERM *rset_terms(RSET rs, int *no) +{ + *no = rs->no_rset_terms; + return rs->rset_terms; +} + +RSET_TERM rset_term_create (const char *name, int length, const char *flags) +{ + RSET_TERM t = xmalloc (sizeof(*t)); + if (!name) + t->name = NULL; + else if (length == -1) + t->name = xstrdup (name); + else + { + t->name = xmalloc (length+1); + memcpy (t->name, name, length); + t->name[length] = '\0'; + } + if (!flags) + t->flags = NULL; + else + t->flags = xstrdup (flags); + t->nn = 1; + return t; +} + +void rset_term_destroy (RSET_TERM t) +{ + xfree (t->name); + xfree (t->flags); + xfree (t); +} + +RSET_TERM rset_term_dup (RSET_TERM t) +{ + RSET_TERM nt = xmalloc (sizeof(*nt)); + if (t->name) + nt->name = xstrdup (t->name); + else + nt->name = NULL; + if (t->flags) + nt->flags = xstrdup (t->flags); + else + nt->flags = NULL; + nt->nn = t->nn; + return nt; +} diff --git a/rset/rsisam.c b/rset/rsisam.c index b2ad821..3cc42c1 100644 --- a/rset/rsisam.c +++ b/rset/rsisam.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1995, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rsisam.c,v $ - * Revision 1.17 1997-12-18 10:54:25 adam + * Revision 1.18 1998-03-05 08:36:28 adam + * New result set model. + * + * Revision 1.17 1997/12/18 10:54:25 adam * New method result set method rs_hits that returns the number of * hits in result-set (if known). The ranked result set returns real * number of hits but only when not combined with other operands. @@ -69,19 +72,16 @@ #include #include -static void *r_create(const struct rset_control *sel, void *parms, - int *flags); +static void *r_create(RSET ct, const struct rset_control *sel, void *parms); static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); static int r_count (RSET ct); -static int r_hits (RSET ct, void *oi); -static int r_read (RSFD rfd, void *buf); +static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); -static const rset_control control = +static const struct rset_control control = { "isam", r_create, @@ -90,13 +90,11 @@ static const rset_control control = r_delete, r_rewind, r_count, - r_hits, r_read, r_write, - r_score }; -const rset_control *rset_kind_isam = &control; +const struct rset_control *rset_kind_isam = &control; struct rset_ispt_info { ISPT pt; @@ -110,17 +108,20 @@ struct rset_isam_info { struct rset_ispt_info *ispt_list; }; -static void *r_create(const struct rset_control *sel, void *parms, - int *flags) +static void *r_create(RSET ct, const struct rset_control *sel, void *parms) { rset_isam_parms *pt = parms; struct rset_isam_info *info; - *flags |= RSET_FLAG_VOLATILE; + ct->flags |= RSET_FLAG_VOLATILE; info = xmalloc (sizeof(struct rset_isam_info)); info->is = pt->is; info->pos = pt->pos; info->ispt_list = NULL; + + ct->no_rset_terms = 1; + ct->rset_terms = xmalloc (sizeof(*ct->rset_terms)); + ct->rset_terms[0] = pt->rset_term; return info; } @@ -140,6 +141,9 @@ RSFD r_open (RSET ct, int flag) info->ispt_list = ptinfo; ptinfo->pt = is_position (info->is, info->pos); ptinfo->info = info; + + if (ct->rset_terms[0]->nn < 0) + ct->rset_terms[0]->nn = is_numkeys (ptinfo->pt); return ptinfo; } @@ -166,6 +170,8 @@ static void r_delete (RSET ct) logf (LOG_DEBUG, "rsisam_delete"); assert (info->ispt_list == NULL); + rset_term_destroy (ct->rset_terms[0]); + xfree (ct->rset_terms); xfree (info); } @@ -180,13 +186,9 @@ static int r_count (RSET ct) return 0; } -static int r_hits (RSET ct, void *oi) -{ - return -1; -} - -static int r_read (RSFD rfd, void *buf) +static int r_read (RSFD rfd, void *buf, int *term_index) { + *term_index = 0; return is_readkey( ((struct rset_ispt_info*) rfd)->pt, buf); } @@ -195,9 +197,3 @@ static int r_write (RSFD rfd, const void *buf) logf (LOG_FATAL, "ISAM set type is read-only"); return -1; } - -static int r_score (RSFD rfd, int *score) -{ - *score = -1; - return -1; -} diff --git a/rset/rsisamc.c b/rset/rsisamc.c index fb429ad..7fad179 100644 --- a/rset/rsisamc.c +++ b/rset/rsisamc.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1996, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rsisamc.c,v $ - * Revision 1.4 1997-12-18 10:54:25 adam + * Revision 1.5 1998-03-05 08:36:28 adam + * New result set model. + * + * Revision 1.4 1997/12/18 10:54:25 adam * New method result set method rs_hits that returns the number of * hits in result-set (if known). The ranked result set returns real * number of hits but only when not combined with other operands. @@ -25,19 +28,16 @@ #include #include -static void *r_create(const struct rset_control *sel, void *parms, - int *flags); +static void *r_create(RSET ct, const struct rset_control *sel, void *parms); static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); static int r_count (RSET ct); -static int r_hits (RSET ct, void *oi); -static int r_read (RSFD rfd, void *buf); +static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); -static const rset_control control = +static const struct rset_control control = { "isamc", r_create, @@ -46,13 +46,11 @@ static const rset_control control = r_delete, r_rewind, r_count, - r_hits, r_read, r_write, - r_score }; -const rset_control *rset_kind_isamc = &control; +const struct rset_control *rset_kind_isamc = &control; struct rset_pp_info { ISAMC_PP pt; @@ -66,17 +64,19 @@ struct rset_isamc_info { struct rset_pp_info *ispt_list; }; -static void *r_create(const struct rset_control *sel, void *parms, - int *flags) +static void *r_create(RSET ct, const struct rset_control *sel, void *parms) { rset_isamc_parms *pt = parms; struct rset_isamc_info *info; - *flags |= RSET_FLAG_VOLATILE; + ct->flags |= RSET_FLAG_VOLATILE; info = xmalloc (sizeof(*info)); info->is = pt->is; info->pos = pt->pos; info->ispt_list = NULL; + ct->no_rset_terms = 1; + ct->rset_terms = xmalloc (sizeof(*ct->rset_terms)); + ct->rset_terms[0] = pt->rset_term; return info; } @@ -96,6 +96,8 @@ RSFD r_open (RSET ct, int flag) info->ispt_list = ptinfo; ptinfo->pt = isc_pp_open (info->is, info->pos); ptinfo->info = info; + if (ct->rset_terms[0]->nn < 0) + ct->rset_terms[0]->nn = isc_pp_num (ptinfo->pt); return ptinfo; } @@ -122,6 +124,8 @@ static void r_delete (RSET ct) logf (LOG_DEBUG, "rsisamc_delete"); assert (info->ispt_list == NULL); + rset_term_destroy (ct->rset_terms[0]); + xfree (ct->rset_terms); xfree (info); } @@ -136,13 +140,9 @@ static int r_count (RSET ct) return 0; } -static int r_hits (RSET ct, void *oi) -{ - return -1; -} - -static int r_read (RSFD rfd, void *buf) +static int r_read (RSFD rfd, void *buf, int *term_index) { + *term_index = 0; return isc_pp_read( ((struct rset_pp_info*) rfd)->pt, buf); } @@ -151,9 +151,3 @@ static int r_write (RSFD rfd, const void *buf) logf (LOG_FATAL, "ISAMC set type is read-only"); return -1; } - -static int r_score (RSFD rfd, int *score) -{ - *score = -1; - return -1; -} diff --git a/rset/rsm_or.c b/rset/rsm_or.c index aa61ba3..72f09fa 100644 --- a/rset/rsm_or.c +++ b/rset/rsm_or.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1996, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rsm_or.c,v $ - * Revision 1.5 1997-12-18 10:54:25 adam + * Revision 1.6 1998-03-05 08:36:28 adam + * New result set model. + * + * Revision 1.5 1997/12/18 10:54:25 adam * New method result set method rs_hits that returns the number of * hits in result-set (if known). The ranked result set returns real * number of hits but only when not combined with other operands. @@ -33,19 +36,16 @@ #include #include -static void *r_create(const struct rset_control *sel, void *parms, - int *flags); +static void *r_create(RSET ct, const struct rset_control *sel, void *parms); static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); static int r_count (RSET ct); -static int r_hits (RSET ct, void *oi); -static int r_read (RSFD rfd, void *buf); +static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); -static const rset_control control = +static const struct rset_control control = { "multi-or", r_create, @@ -54,13 +54,11 @@ static const rset_control control = r_delete, r_rewind, r_count, - r_hits, r_read, r_write, - r_score }; -const rset_control *rset_kind_m_or = &control; +const struct rset_control *rset_kind_m_or = &control; struct rset_mor_info { int key_size; @@ -179,14 +177,12 @@ static void heap_close (struct trunc_info *ti) xfree (ti); } - -static void *r_create (const struct rset_control *sel, void *parms, - int *flags) +static void *r_create (RSET ct, const struct rset_control *sel, void *parms) { rset_m_or_parms *r_parms = parms; struct rset_mor_info *info; - *flags |= RSET_FLAG_VOLATILE; + ct->flags |= RSET_FLAG_VOLATILE; info = xmalloc (sizeof(*info)); info->key_size = r_parms->key_size; assert (info->key_size > 1); @@ -202,6 +198,9 @@ static void *r_create (const struct rset_control *sel, void *parms, sizeof(*info->isam_positions) * info->no_isam_positions); info->rfd_list = NULL; + ct->no_rset_terms = 1; + ct->rset_terms = xmalloc (sizeof(*ct->rset_terms)); + ct->rset_terms[0] = rset_term_dup (r_parms->rset_term); return info; } @@ -226,9 +225,13 @@ static RSFD r_open (RSET ct, int flag) rfd->ti = heap_init (info->no_isam_positions, info->key_size, info->cmp); + ct->rset_terms[0]->nn = 0; for (i = 0; ino_isam_positions; i++) { rfd->ispt[i] = isc_pp_open (info->isc, info->isam_positions[i]); + + ct->rset_terms[0]->nn += isc_pp_num (rfd->ispt[i]); + if (isc_pp_read (rfd->ispt[i], rfd->ti->tmpbuf)) heap_insert (rfd->ti, rfd->ti->tmpbuf, i); else @@ -268,9 +271,15 @@ static void r_close (RSFD rfd) static void r_delete (RSET ct) { struct rset_mor_info *info = ct->buf; + int i; assert (info->rfd_list == NULL); xfree (info->isam_positions); + + for (i = 0; ino_rset_terms; i++) + rset_term_destroy (ct->rset_terms[i]); + xfree (ct->rset_terms); + xfree (info); } @@ -283,18 +292,14 @@ static int r_count (RSET ct) return 0; } -static int r_hits (RSET ct, void *oi) -{ - return -1; -} - -static int r_read (RSFD rfd, void *buf) +static int r_read (RSFD rfd, void *buf, int *term_index) { struct trunc_info *ti = ((struct rset_mor_rfd *) rfd)->ti; int n = ti->indx[ti->ptr[1]]; if (!ti->heapnum) return 0; + *term_index = 0; memcpy (buf, ti->heap[ti->ptr[1]], ti->keysize); if (((struct rset_mor_rfd *) rfd)->position) { @@ -326,12 +331,6 @@ static int r_read (RSFD rfd, void *buf) return 1; } -static int r_score (RSFD rfd, int *score) -{ - *score = -1; - return -1; -} - static int r_write (RSFD rfd, const void *buf) { logf (LOG_FATAL, "mor set type is read-only"); diff --git a/rset/rsnull.c b/rset/rsnull.c index 5ba6a4f..578dc7c 100644 --- a/rset/rsnull.c +++ b/rset/rsnull.c @@ -4,7 +4,10 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: rsnull.c,v $ - * Revision 1.9 1997-12-18 10:54:25 adam + * Revision 1.10 1998-03-05 08:36:28 adam + * New result set model. + * + * Revision 1.9 1997/12/18 10:54:25 adam * New method result set method rs_hits that returns the number of * hits in result-set (if known). The ranked result set returns real * number of hits but only when not combined with other operands. @@ -47,19 +50,16 @@ #include #include -static void *r_create(const struct rset_control *sel, void *parms, - int *flags); +static void *r_create(RSET ct, const struct rset_control *sel, void *parms); static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); static int r_count (RSET ct); -static int r_hits (RSET ct, void *oi); -static int r_read (RSFD rfd, void *buf); +static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); -static const rset_control control = +static const struct rset_control control = { "null", r_create, @@ -68,17 +68,24 @@ static const rset_control control = r_delete, r_rewind, r_count, - r_hits, r_read, r_write, - r_score }; -const rset_control *rset_kind_null = &control; +const struct rset_control *rset_kind_null = &control; -static void *r_create(const struct rset_control *sel, void *parms, - int *flags) +static void *r_create(RSET ct, const struct rset_control *sel, void *parms) { + rset_null_parms *null_parms = parms; + + ct->no_rset_terms = 1; + ct->rset_terms = xmalloc (sizeof(*ct->rset_terms)); + if (parms) + ct->rset_terms[0] = null_parms->rset_term; + else + ct->rset_terms[0] = rset_term_create ("term", -1, "rank-0"); + ct->rset_terms[0]->nn = 0; + return NULL; } @@ -98,6 +105,8 @@ static void r_close (RSFD rfd) static void r_delete (RSET ct) { + rset_term_destroy (ct->rset_terms[0]); + xfree (ct->rset_terms); } static void r_rewind (RSFD rfd) @@ -110,13 +119,9 @@ static int r_count (RSET ct) return 0; } -static int r_hits (RSET ct, void *oi) -{ - return 0; -} - -static int r_read (RSFD rfd, void *buf) +static int r_read (RSFD rfd, void *buf, int *term_index) { + *term_index = -1; return 0; } @@ -126,9 +131,3 @@ static int r_write (RSFD rfd, const void *buf) return -1; } -static int r_score (RSFD rfd, int *score) -{ - *score = -1; - return -1; -} - diff --git a/rset/rsrel.c b/rset/rsrel.c deleted file mode 100644 index a02fb23..0000000 --- a/rset/rsrel.c +++ /dev/null @@ -1,557 +0,0 @@ -/* - * Copyright (C) 1994-1998, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: rsrel.c,v $ - * Revision 1.23 1998-01-07 13:53:41 adam - * Queries using simple ranked operands returns right number of hits. - * - * Revision 1.22 1997/12/18 10:54:25 adam - * New method result set method rs_hits that returns the number of - * hits in result-set (if known). The ranked result set returns real - * number of hits but only when not combined with other operands. - * - * Revision 1.21 1997/11/18 10:05:08 adam - * Changed character map facility so that admin can specify character - * mapping files for each register type, w, p, etc. - * - * Revision 1.20 1997/10/31 12:37:55 adam - * Code calls xfree() instead of free(). - * - * Revision 1.19 1997/10/01 11:44:06 adam - * Small improvement of new ranking. - * - * Revision 1.18 1997/09/24 13:36:41 adam - * More work on new ranking algorithm. - * - * Revision 1.17 1997/09/22 12:39:07 adam - * Added get_pos method for the ranked result sets. - * - * Revision 1.16 1997/09/17 12:19:23 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.15 1997/09/09 13:38:16 adam - * Partial port to WIN95/NT. - * - * Revision 1.14 1996/11/08 11:15:58 adam - * Compressed isam fully supported. - * - * Revision 1.13 1996/10/29 13:55:26 adam - * Include of zebrautl.h instead of alexutil.h. - * - * Revision 1.12 1996/10/08 13:00:40 adam - * Bug fix: result sets with ranked operands in boolean operations weren't - * sorted. - * - * Revision 1.11 1996/10/07 16:05:29 quinn - * Work. - * - * Revision 1.9 1995/12/11 09:15:26 adam - * New set types: sand/sor/snot - ranked versions of and/or/not in - * ranked/semi-ranked result sets. - * Note: the snot not finished yet. - * New rset member: flag. - * Bug fix: r_delete in rsrel.c did free bad memory block. - * - * Revision 1.8 1995/12/05 11:25:45 adam - * Doesn't include math.h. - * - * Revision 1.7 1995/10/12 12:41:57 adam - * Private info (buf) moved from struct rset_control to struct rset. - * Bug fixes in relevance. - * - * Revision 1.6 1995/10/10 14:00:04 adam - * Function rset_open changed its wflag parameter to general flags. - * - * Revision 1.5 1995/10/06 14:38:06 adam - * New result set method: r_score. - * Local no (sysno) and score is transferred to retrieveCtrl. - * - * Revision 1.4 1995/09/14 07:48:56 adam - * Other score calculation. - * - * Revision 1.3 1995/09/11 15:23:40 adam - * More work on relevance search. - * - * Revision 1.2 1995/09/11 13:09:41 adam - * More work on relevance feedback. - * - * Revision 1.1 1995/09/08 14:52:42 adam - * Work on relevance feedback. - * - */ - -#include -#include -#include -#include - -#include -#include -#include -#include - -static void *r_create(const struct rset_control *sel, void *parms, - int *flags); -static RSFD r_open (RSET ct, int flag); -static void r_close (RSFD rfd); -static void r_delete (RSET ct); -static void r_rewind (RSFD rfd); -static int r_count (RSET ct); -static int r_hits (RSET ct, void *oi); -static int r_read (RSFD rfd, void *buf); -static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); - -static const rset_control control = -{ - "relevance", - r_create, - r_open, - r_close, - r_delete, - r_rewind, - r_count, - r_hits, - r_read, - r_write, - r_score -}; - -const rset_control *rset_kind_relevance = &control; - -struct rset_rel_info { - int key_size; - int max_rec; - int no_rec; - int hits; /* hits count */ - int (*cmp)(const void *p1, const void *p2); - int (*get_pos)(const void *p); - char *key_buf; /* key buffer */ - float *score_buf; /* score buffer */ - int *sort_idx; /* score sorted index */ - int *sysno_idx; /* sysno sorted index (ring buffer) */ - int no_isam_positions; - ISAM is; - ISAMC isc; - ISAM_P *isam_positions; - int no_terms; - int *term_no; - - int method; - struct rset_rel_rfd *rfd_list; -}; - -struct rset_rel_rfd { - int last_read_pos; - int position; - int flag; - struct rset_rel_rfd *next; - struct rset_rel_info *info; -}; - -static void add_rec (struct rset_rel_info *info, double score, void *key) -{ - int idx, i, j; - - (info->hits)++; - for (i = 0; ino_rec; i++) - { - idx = info->sort_idx[i]; - if (score <= info->score_buf[idx]) - break; - } - if (info->no_rec < info->max_rec) - { /* there is room for this entry */ - for (j = info->no_rec; j > i; --j) - info->sort_idx[j] = info->sort_idx[j-1]; - idx = info->sort_idx[j] = info->no_rec; - ++(info->no_rec); - } - else if (i == 0) - return; /* score too low */ - else - { - idx = info->sort_idx[0]; /* remove this entry */ - - --i; - for (j = 0; j < i; ++j) /* make room */ - info->sort_idx[j] = info->sort_idx[j+1]; - info->sort_idx[j] = idx; /* allocate sort entry */ - } - memcpy (info->key_buf + idx*info->key_size, key, info->key_size); - info->score_buf[idx] = score; -} - - -static struct rset_rel_info *qsort_info; - -static int qcomp (const void *p1, const void *p2) -{ - int i1 = *(int*) p1; - int i2 = *(int*) p2; - - return qsort_info->cmp (qsort_info->key_buf + i1*qsort_info->key_size, - qsort_info->key_buf + i2*qsort_info->key_size); -} - -#define NEW_RANKING 0 - -#define SCORE_SHOW 0.0 /* base score for showing up */ -#define SCORE_COOC 0.3 /* component dependent on co-oc */ -#define SCORE_DYN (1-(SCORE_SHOW+SCORE_COOC)) /* dynamic component of score */ - -static void relevance (struct rset_rel_info *info) -{ - char **isam_buf; - char *isam_tmp_buf; - int *isam_r; - int *max_tf, *tf; - - int *pos_tf = NULL; - int score_sum = 0; - int no_occur = 0; - char *isam_prev_buf = NULL; - int fact1, fact2; - - ISPT *isam_pt = NULL; - ISAMC_PP *isamc_pp = NULL; - int i; - - logf (LOG_DEBUG, "relevance"); - isam_buf = xmalloc (info->no_isam_positions * sizeof(*isam_buf)); - isam_r = xmalloc (sizeof (*isam_r) * info->no_isam_positions); - if (info->is) - isam_pt = xmalloc (sizeof (*isam_pt) * info->no_isam_positions); - else if (info->isc) - isamc_pp = xmalloc (sizeof (*isamc_pp) * info->no_isam_positions); - else - { - logf (LOG_FATAL, "No isamc or isam in rs_rel"); - abort (); - } - isam_tmp_buf = xmalloc (info->key_size); - max_tf = xmalloc (sizeof (*max_tf) * info->no_terms); - tf = xmalloc (sizeof (*tf) * info->no_terms); - - for (i = 0; ino_terms; i++) - max_tf[i] = 0; - for (i = 0; i < info->no_isam_positions; i++) - { - isam_buf[i] = xmalloc (info->key_size); - if (isam_pt) - { - isam_pt[i] = is_position (info->is, info->isam_positions[i]); - max_tf [info->term_no[i]] = is_numkeys (isam_pt[i]); - isam_r[i] = is_readkey (isam_pt[i], isam_buf[i]); - } - else if (isamc_pp) - { - isamc_pp[i] = isc_pp_open (info->isc, info->isam_positions[i]); - max_tf [info->term_no[i]] = isc_pp_num (isamc_pp[i]); - isam_r[i] = isc_pp_read (isamc_pp[i], isam_buf[i]); - } - logf (LOG_DEBUG, "max tf %d = %d", i, max_tf[i]); - } - switch (info->method) - { - case RSREL_METHOD_B: - while (1) - { - int r, min = -1; - int pos = 0; - for (i = 0; ino_isam_positions; i++) - if (isam_r[i] && - (min < 0 || - (r = (*info->cmp)(isam_buf[i], isam_buf[min])) < 1)) - min = i; - if (!isam_prev_buf) - { - pos_tf = xmalloc (sizeof(*pos_tf) * info->no_isam_positions); - isam_prev_buf = xmalloc (info->key_size); - fact1 = 100000/info->no_isam_positions; - fact2 = 100000/ - (info->no_isam_positions*info->no_isam_positions); - - no_occur = score_sum = 0; - memcpy (isam_prev_buf, isam_buf[min], info->key_size); - for (i = 0; ino_isam_positions; i++) - pos_tf[i] = -10; - } - else if (min < 0 || - (*info->cmp)(isam_buf[min], isam_prev_buf) > 1) - { - logf (LOG_LOG, "final occur = %d ratio=%d", - no_occur, score_sum / no_occur); - add_rec (info, score_sum / (10000.0*no_occur), isam_prev_buf); - if (min < 0) - break; - no_occur = score_sum = 0; - memcpy (isam_prev_buf, isam_buf[min], info->key_size); - for (i = 0; ino_isam_positions; i++) - pos_tf[i] = -10; - } - pos = (*info->get_pos)(isam_buf[min]); - logf (LOG_LOG, "pos=%d", pos); - for (i = 0; ino_isam_positions; i++) - { - int d = pos - pos_tf[i]; - - no_occur++; - if (pos_tf[i] < 0 && i != min) - continue; - if (d < 10) - d = 10; - if (i == min) - score_sum += fact2 / d; - else - score_sum += fact1 / d; - } - pos_tf[min] = pos; - logf (LOG_LOG, "score_sum = %d", score_sum); - i = min; - if (isam_pt) - isam_r[i] = is_readkey (isam_pt[i], isam_buf[i]); - else if (isamc_pp) - isam_r[i] = isc_pp_read (isamc_pp[i], isam_buf[i]); - } /* while */ - xfree (isam_prev_buf); - xfree (pos_tf); - break; - case RSREL_METHOD_A: - while (1) - { - int min = -1, i, r; - double score; - int co_oc, last_term; /* Number of co-occurrences */ - - last_term = -1; - /* find min with lowest sysno */ - for (i = 0; ino_isam_positions; i++) - { - if (isam_r[i] && - (min < 0 - || (r = (*info->cmp)(isam_buf[i], isam_buf[min])) < 2)) - { - min = i; - co_oc = 1; - } - else if (!r && last_term != info->term_no[i]) - co_oc++; /* new occurrence */ - last_term = info->term_no[i]; - } - - if (min < 0) - break; - memcpy (isam_tmp_buf, isam_buf[min], info->key_size); - /* calculate for all with those sysno */ - for (i = 0; i < info->no_terms; i++) - tf[i] = 0; - for (i = 0; ino_isam_positions; i++) - { - int r; - - if (isam_r[i]) - r = (*info->cmp)(isam_buf[i], isam_tmp_buf); - else - r = 2; - if (r <= 1 && r >= -1) - { - do - { - tf[info->term_no[i]]++; - if (isam_pt) - isam_r[i] = is_readkey (isam_pt[i], isam_buf[i]); - else if (isamc_pp) - isam_r[i] = isc_pp_read (isamc_pp[i], isam_buf[i]); - } while (isam_r[i] && - (*info->cmp)(isam_buf[i], isam_tmp_buf) <= 1); - } - } - /* calculate relevance value */ - score = 0.0; - for (i = 0; ino_terms; i++) - if (tf[i]) - score += SCORE_SHOW + SCORE_COOC*co_oc/info->no_terms + - SCORE_DYN*tf[i]/max_tf[i]; - /* if value is in the top score, then save it - don't emit yet */ - add_rec (info, score/info->no_terms, isam_tmp_buf); - } /* while */ - break; - } /* switch */ - for (i = 0; ino_rec; i++) - info->sysno_idx[i] = i; - qsort_info = info; - qsort (info->sysno_idx, info->no_rec, sizeof(*info->sysno_idx), qcomp); - for (i = 0; ino_isam_positions; i++) - { - if (isam_pt) - is_pt_free (isam_pt[i]); - if (isamc_pp) - isc_pp_close (isamc_pp[i]); - xfree (isam_buf[i]); - } - xfree (max_tf); - xfree (isam_tmp_buf); - xfree (isam_buf); - xfree (isam_r); - xfree (isam_pt); - xfree (isamc_pp); - xfree(tf); -} - -static void *r_create (const struct rset_control *sel, void *v_parms, - int *flags) -{ - rset_relevance_parms *parms = v_parms; - struct rset_rel_info *info; - - *flags |= RSET_FLAG_RANKED; - info = xmalloc (sizeof(struct rset_rel_info)); - info->key_size = parms->key_size; - assert (info->key_size > 1); - info->max_rec = parms->max_rec; - assert (info->max_rec > 1); - info->cmp = parms->cmp; - info->get_pos = parms->get_pos; - - info->method = parms->method; - info->no_isam_positions = parms->no_isam_positions; - info->isam_positions = - xmalloc (sizeof(*info->isam_positions)*parms->no_isam_positions); - memcpy (info->isam_positions, parms->isam_positions, - sizeof(*info->isam_positions) * parms->no_isam_positions); - info->is = parms->is; - info->isc = parms->isc; - info->no_terms = parms->no_terms; - info->term_no = xmalloc (sizeof(*info->term_no)*parms->no_isam_positions); - memcpy (info->term_no, parms->term_no, - sizeof(*info->term_no)*parms->no_isam_positions); - - info->key_buf = xmalloc (info->key_size * info->max_rec); - info->score_buf = xmalloc (sizeof(*info->score_buf) * info->max_rec); - info->sort_idx = xmalloc (sizeof(*info->sort_idx) * info->max_rec); - info->sysno_idx = xmalloc (sizeof(*info->sysno_idx) * info->max_rec); - info->no_rec = 0; - info->hits = 0; - info->rfd_list = NULL; - - relevance (info); - return info; -} - -static RSFD r_open (RSET ct, int flag) -{ - struct rset_rel_rfd *rfd; - struct rset_rel_info *info = ct->buf; - - if (flag & RSETF_WRITE) - { - logf (LOG_FATAL, "relevance set type is read-only"); - return NULL; - } - rfd = xmalloc (sizeof(*rfd)); - rfd->flag = flag; - rfd->next = info->rfd_list; - rfd->info = info; - info->rfd_list = rfd; - r_rewind (rfd); - return rfd; -} - -static void r_close (RSFD rfd) -{ - struct rset_rel_info *info = ((struct rset_rel_rfd*)rfd)->info; - struct rset_rel_rfd **rfdp; - - for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next) - if (*rfdp == rfd) - { - *rfdp = (*rfdp)->next; - xfree (rfd); - return; - } - logf (LOG_FATAL, "r_close but no rfd match!"); - assert (0); -} - -static void r_delete (RSET ct) -{ - struct rset_rel_info *info = ct->buf; - - assert (info->rfd_list == NULL); - xfree (info->key_buf); - xfree (info->score_buf); - xfree (info->sort_idx); - xfree (info->sysno_idx); - xfree (info->isam_positions); - xfree (info->term_no); - xfree (info); -} - -static void r_rewind (RSFD rfd) -{ - struct rset_rel_rfd *p = rfd; - struct rset_rel_info *info = p->info; - - if (p->flag & RSETF_SORT_RANK) - p->position = info->no_rec; - else - p->position = 0; -} - -static int r_count (RSET ct) -{ - struct rset_rel_info *info = ct->buf; - - return info->no_rec; -} - -static int r_hits (RSET ct, void *oi) -{ - struct rset_rel_info *info = ct->buf; - - return info->hits; -} - -static int r_read (RSFD rfd, void *buf) -{ - struct rset_rel_rfd *p = rfd; - struct rset_rel_info *info = p->info; - - if (p->flag & RSETF_SORT_RANK) - { - if (p->position <= 0) - return 0; - --(p->position); - p->last_read_pos = info->sort_idx[p->position]; - } - else - { - if (p->position == info->no_rec) - return 0; - p->last_read_pos = info->sysno_idx[p->position]; - ++(p->position); - } - memcpy ((char*) buf, - info->key_buf + info->key_size * p->last_read_pos, - info->key_size); - return 1; -} - -static int r_score (RSFD rfd, int *score) -{ - struct rset_rel_rfd *p = rfd; - struct rset_rel_info *info = p->info; - - *score = (int) (1000*info->score_buf[p->last_read_pos]); - return 1; -} - -static int r_write (RSFD rfd, const void *buf) -{ - logf (LOG_FATAL, "relevance set type is read-only"); - return -1; -} diff --git a/rset/rssbool.c b/rset/rssbool.c deleted file mode 100644 index d34f3c4..0000000 --- a/rset/rssbool.c +++ /dev/null @@ -1,468 +0,0 @@ -/* - * Copyright (C) 1994-1996, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: rssbool.c,v $ - * Revision 1.6 1997-12-18 10:54:25 adam - * New method result set method rs_hits that returns the number of - * hits in result-set (if known). The ranked result set returns real - * number of hits but only when not combined with other operands. - * - * Revision 1.5 1997/09/09 13:38:16 adam - * Partial port to WIN95/NT. - * - * Revision 1.4 1996/10/29 13:55:27 adam - * Include of zebrautl.h instead of alexutil.h. - * - * Revision 1.3 1996/10/08 13:00:41 adam - * Bug fix: result sets with ranked operands in boolean operations weren't - * sorted. - * - * Revision 1.2 1996/05/15 18:35:17 adam - * Implemented snot operation. - * - * Revision 1.1 1995/12/11 09:15:27 adam - * New set types: sand/sor/snot - ranked versions of and/or/not in - * ranked/semi-ranked result sets. - * Note: the snot not finished yet. - * New rset member: flag. - * Bug fix: r_delete in rsrel.c did free bad memory block. - * - */ - -#include -#include -#include - -#include -#include - -static void *r_create_and(const struct rset_control *sel, void *parms, - int *flags); -static void *r_create_or(const struct rset_control *sel, void *parms, - int *flags); -static void *r_create_not(const struct rset_control *sel, void *parms, - int *flags); -static RSFD r_open (RSET ct, int flag); -static void r_close (RSFD rfd); -static void r_delete (RSET ct); -static void r_rewind (RSFD rfd); -static int r_count (RSET ct); -static int r_hits (RSET ct, void *oi); -static int r_read (RSFD rfd, void *buf); -static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); - -static const rset_control control_sand = -{ - "sand", - r_create_and, - r_open, - r_close, - r_delete, - r_rewind, - r_count, - r_hits, - r_read, - r_write, - r_score -}; - -static const rset_control control_sor = -{ - "sor", - r_create_or, - r_open, - r_close, - r_delete, - r_rewind, - r_count, - r_hits, - r_read, - r_write, - r_score -}; - -static const rset_control control_snot = -{ - "snot", - r_create_not, - r_open, - r_close, - r_delete, - r_rewind, - r_count, - r_hits, - r_read, - r_write, - r_score -}; - - -const rset_control *rset_kind_sand = &control_sand; -const rset_control *rset_kind_sor = &control_sor; -const rset_control *rset_kind_snot = &control_snot; - -struct rset_bool_info { - int key_size; - RSET rset_l; - RSET rset_r; - char *key_buf; - int *score_buf; - int *score_idx; - int key_no; - int key_max; - int (*cmp)(const void *p1, const void *p2); - struct rset_bool_rfd *rfd_list; -}; - -struct rset_bool_rfd { - struct rset_bool_rfd *next; - struct rset_bool_info *info; - int position; - int last_pos; - int flag; -}; - -static void *r_create_common (const struct rset_control *sel, - rset_bool_parms *bool_parms, int *flags); - -static struct rset_bool_info *qsort_info; - -static int qcomp (const void *p1, const void *p2) -{ - int i1 = *(int*) p1; - int i2 = *(int*) p2; - return qsort_info->score_buf[i2] - qsort_info->score_buf[i1]; -} - -static void key_add (struct rset_bool_info *info, - char *buf, int score) -{ - if (info->key_no == info->key_max) - return; - memcpy (info->key_buf + info->key_size * info->key_no, - buf, info->key_size); - info->score_buf[info->key_no] = score; - info->score_idx[info->key_no] = info->key_no; - (info->key_no)++; -} - -static void *r_create_and (const struct rset_control *sel, void *parms, - int *flags) -{ - int more_l, more_r; - RSFD fd_l, fd_r; - char *buf_l, *buf_r; - - struct rset_bool_info *info; - info = r_create_common (sel, parms, flags); - - buf_l = xmalloc (info->key_size); - buf_r = xmalloc (info->key_size); - fd_l = rset_open (info->rset_l, RSETF_SORT_SYSNO|RSETF_READ); - fd_r = rset_open (info->rset_r, RSETF_SORT_SYSNO|RSETF_READ); - - more_l = rset_read(info->rset_l, fd_l, buf_l); - more_r = rset_read(info->rset_r, fd_r, buf_r); - - while (more_l || more_r) - { - int cmp; - int score, score_l, score_r; - - if (more_l && more_r) - cmp = (*info->cmp)(buf_l, buf_r); - else if (more_r) - cmp = 2; - else - cmp = -2; - - if (cmp >= -1 && cmp <= 1) - { - rset_score (info->rset_l, fd_l, &score_l); - rset_score (info->rset_r, fd_r, &score_r); - if (score_l == -1) - score = score_r; - else if (score_r == -1) - score = score_l; - else - score = score_l > score_r ? score_r : score_l; - key_add (info, buf_l, score); - - more_l = rset_read (info->rset_l, fd_l, buf_l); - more_r = rset_read (info->rset_r, fd_r, buf_r); - } - else if (cmp > 1) - more_r = rset_read (info->rset_r, fd_r, buf_r); - else - more_l = rset_read (info->rset_l, fd_l, buf_l); - } - rset_close (info->rset_l, fd_l); - rset_close (info->rset_r, fd_r); - rset_delete (info->rset_l); - rset_delete (info->rset_r); - xfree (buf_l); - xfree (buf_r); - qsort_info = info; - qsort (info->score_idx, info->key_no, sizeof(*info->score_idx), qcomp); - return info; -} - -static void *r_create_or (const struct rset_control *sel, void *parms, - int *flags) -{ - int more_l, more_r; - RSFD fd_l, fd_r; - char *buf_l, *buf_r; - - struct rset_bool_info *info; - info = r_create_common (sel, parms, flags); - - buf_l = xmalloc (info->key_size); - buf_r = xmalloc (info->key_size); - fd_l = rset_open (info->rset_l, RSETF_SORT_SYSNO|RSETF_READ); - fd_r = rset_open (info->rset_r, RSETF_SORT_SYSNO|RSETF_READ); - - more_l = rset_read(info->rset_l, fd_l, buf_l); - more_r = rset_read(info->rset_r, fd_r, buf_r); - - while (more_l || more_r) - { - int cmp; - int score, score_l, score_r; - - if (more_l && more_r) - cmp = (*info->cmp)(buf_l, buf_r); - else if (more_r) - cmp = 2; - else - cmp = -2; - - if (cmp >= -1 && cmp <= 1) - { - rset_score (info->rset_l, fd_l, &score_l); - rset_score (info->rset_r, fd_r, &score_r); - if (score_l == -1) - score = score_r; - else if (score_r == -1) - score = score_l; - else - score = score_r > score_l ? score_r : score_l; - key_add (info, buf_l, score); - - more_l = rset_read (info->rset_l, fd_l, buf_l); - more_r = rset_read (info->rset_r, fd_r, buf_r); - } - else if (cmp > 1) - { - rset_score (info->rset_r, fd_r, &score_r); - if (score_r != -1) - key_add (info, buf_r, score_r / 2); - more_r = rset_read (info->rset_r, fd_r, buf_r); - } - else - { - rset_score (info->rset_l, fd_l, &score_l); - if (score_l != -1) - key_add (info, buf_l, score_l / 2); - more_l = rset_read (info->rset_l, fd_l, buf_l); - } - } - rset_close (info->rset_l, fd_l); - rset_close (info->rset_r, fd_r); - rset_delete (info->rset_l); - rset_delete (info->rset_r); - xfree (buf_l); - xfree (buf_r); - qsort_info = info; - qsort (info->score_idx, info->key_no, sizeof(*info->score_idx), qcomp); - return info; -} - -static void *r_create_not (const struct rset_control *sel, void *parms, - int *flags) -{ - char *buf_l, *buf_r; - int more_l, more_r; - RSFD fd_l, fd_r; - - struct rset_bool_info *info; - info = r_create_common (sel, parms, flags); - - buf_l = xmalloc (info->key_size); - buf_r = xmalloc (info->key_size); - - fd_l = rset_open (info->rset_l, RSETF_SORT_SYSNO|RSETF_READ); - fd_r = rset_open (info->rset_r, RSETF_SORT_SYSNO|RSETF_READ); - - more_l = rset_read(info->rset_l, fd_l, buf_l); - more_r = rset_read(info->rset_r, fd_r, buf_r); - - while (more_l || more_r) - { - int cmp; - int score; - - if (more_l && more_r) - cmp = (*info->cmp)(buf_l, buf_r); - else if (more_r) - cmp = 2; - else - cmp = -2; - - if (cmp >= -1 && cmp <= 1) - more_l = rset_read (info->rset_l, fd_l, buf_l); - else if (cmp > 1) - { - more_r = rset_read (info->rset_r, fd_r, buf_r); - } - else - { - rset_score (info->rset_l, fd_l, &score); - key_add (info, buf_l, score == -1 ? 1 : score); - more_l = rset_read (info->rset_l, fd_l, buf_l); - } - } - rset_close (info->rset_l, fd_l); - rset_close (info->rset_r, fd_r); - - rset_delete (info->rset_l); - rset_delete (info->rset_r); - xfree (buf_l); - xfree (buf_r); - qsort_info = info; - qsort (info->score_idx, info->key_no, sizeof(*info->score_idx), qcomp); - return info; -} - -static void *r_create_common (const struct rset_control *sel, - rset_bool_parms *bool_parms, int *flags) -{ - struct rset_bool_info *info; - - info = xmalloc (sizeof(*info)); - info->key_size = bool_parms->key_size; - info->rset_l = bool_parms->rset_l; - info->rset_r = bool_parms->rset_r; - info->cmp = bool_parms->cmp; - info->rfd_list = NULL; - - if (rset_is_ranked(info->rset_l) || rset_is_ranked(info->rset_r)) - *flags |= RSET_FLAG_RANKED; - - info->key_max = rset_count(bool_parms->rset_l) - +rset_count(bool_parms->rset_r); - if (!info->key_max) - info->key_max = 1; - if (info->key_max > 1000) - info->key_max = 1000; - info->key_buf = xmalloc (info->key_size * info->key_max); - info->score_buf = xmalloc (info->key_max * sizeof(*info->score_buf)); - info->score_idx = xmalloc (info->key_max * sizeof(*info->score_idx)); - info->key_no = 0; - - return info; -} - -static RSFD r_open (RSET ct, int flag) -{ - struct rset_bool_info *info = ct->buf; - struct rset_bool_rfd *rfd; - - if (flag & RSETF_WRITE) - { - logf (LOG_FATAL, "sbool set type is read-only"); - return NULL; - } - rfd = xmalloc (sizeof(*rfd)); - rfd->next = info->rfd_list; - info->rfd_list = rfd; - rfd->info = info; - - rfd->position = 0; - rfd->last_pos = 0; - rfd->flag = flag; - - return rfd; -} - -static void r_close (RSFD rfd) -{ - struct rset_bool_info *info = ((struct rset_bool_rfd*)rfd)->info; - struct rset_bool_rfd **rfdp; - - for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next) - if (*rfdp == rfd) - { - *rfdp = (*rfdp)->next; - xfree (rfd); - return; - } - logf (LOG_FATAL, "r_close but no rfd match!"); - assert (0); -} - -static void r_delete (RSET ct) -{ - struct rset_bool_info *info = ct->buf; - - assert (info->rfd_list == NULL); - xfree (info->score_buf); - xfree (info->score_idx); - xfree (info->key_buf); - xfree (info); -} - -static void r_rewind (RSFD rfd) -{ - struct rset_bool_rfd *p = rfd; - - logf (LOG_DEBUG, "rsbool_rewind"); - p->position = p->last_pos = 0; -} - -static int r_count (RSET ct) -{ - struct rset_bool_info *info = ct->buf; - - return info->key_no; -} - -static int r_hits (RSET ct, void *oi) -{ - return -1; -} - -static int r_read (RSFD rfd, void *buf) -{ - struct rset_bool_rfd *p = rfd; - struct rset_bool_info *info = p->info; - - if (p->position >= info->key_no) - return 0; - if (p->flag & RSETF_SORT_RANK) - p->last_pos = info->score_idx[(p->position)++]; - else - p->last_pos = (p->position)++; - memcpy (buf, info->key_buf + info->key_size * p->last_pos, - info->key_size); - return 1; -} - -static int r_write (RSFD rfd, const void *buf) -{ - logf (LOG_FATAL, "sbool set type is read-only"); - return -1; -} - -static int r_score (RSFD rfd, int *score) -{ - struct rset_bool_rfd *p = rfd; - struct rset_bool_info *info = p->info; - - *score = info->score_buf[p->last_pos]; - return 1; -} - diff --git a/rset/rstemp.c b/rset/rstemp.c index f43e5e2..6110d2c 100644 --- a/rset/rstemp.c +++ b/rset/rstemp.c @@ -1,10 +1,13 @@ /* - * Copyright (C) 1994-1997, Index Data I/S + * Copyright (C) 1994-1998, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: rstemp.c,v $ - * Revision 1.23 1997-12-18 10:54:25 adam + * Revision 1.24 1998-03-05 08:36:28 adam + * New result set model. + * + * Revision 1.23 1997/12/18 10:54:25 adam * New method result set method rs_hits that returns the number of * hits in result-set (if known). The ranked result set returns real * number of hits but only when not combined with other operands. @@ -100,19 +103,16 @@ #include #include -static void *r_create(const struct rset_control *sel, void *parms, - int *flags); +static void *r_create(RSET ct, const struct rset_control *sel, void *parms); static RSFD r_open (RSET ct, int flag); static void r_close (RSFD rfd); static void r_delete (RSET ct); static void r_rewind (RSFD rfd); static int r_count (RSET ct); -static int r_hits (RSET ct, void *oi); -static int r_read (RSFD rfd, void *buf); +static int r_read (RSFD rfd, void *buf, int *term_index); static int r_write (RSFD rfd, const void *buf); -static int r_score (RSFD rfd, int *score); -static const rset_control control = +static const struct rset_control control = { "temp", r_create, @@ -121,13 +121,11 @@ static const rset_control control = r_delete, r_rewind, r_count, - r_hits, r_read, r_write, - r_score }; -const rset_control *rset_kind_temp = &control; +const struct rset_control *rset_kind_temp = &control; struct rset_temp_info { int fd; @@ -149,7 +147,7 @@ struct rset_temp_rfd { struct rset_temp_rfd *next; }; -static void *r_create(const struct rset_control *sel, void *parms, int *flags) +static void *r_create(RSET ct, const struct rset_control *sel, void *parms) { rset_temp_parms *temp_parms = parms; struct rset_temp_info *info; @@ -172,7 +170,9 @@ static void *r_create(const struct rset_control *sel, void *parms, int *flags) info->temp_path = xmalloc (strlen(temp_parms->temp_path)+1); strcpy (info->temp_path, temp_parms->temp_path); } - + ct->no_rset_terms = 1; + ct->rset_terms = xmalloc (sizeof(*ct->rset_terms)); + ct->rset_terms[0] = temp_parms->rset_term; return info; } @@ -276,6 +276,8 @@ static void r_delete (RSET ct) } if (info->temp_path) xfree (info->temp_path); + rset_term_destroy (ct->rset_terms[0]); + xfree (ct->rset_terms); xfree (info); } @@ -334,14 +336,7 @@ static int r_count (RSET ct) return info->pos_end / info->key_size; } -static int r_hits (RSET ct, void *oi) -{ - struct rset_temp_info *info = ct->buf; - - return info->hits; -} - -static int r_read (RSFD rfd, void *buf) +static int r_read (RSFD rfd, void *buf, int *term_index) { struct rset_temp_info *info = ((struct rset_temp_rfd*)rfd)->info; @@ -358,6 +353,7 @@ static int r_read (RSFD rfd, void *buf) memcpy (buf, info->buf_mem + (info->pos_cur - info->pos_buf), info->key_size); info->pos_cur = nc; + *term_index = 0; return 1; } @@ -382,9 +378,3 @@ static int r_write (RSFD rfd, const void *buf) info->pos_border = info->pos_end = nc; return 1; } - -static int r_score (RSFD rfd, int *score) -{ - *score = -1; - return -1; -}