From: Adam Dickmeiss Date: Tue, 4 Jul 2006 14:10:28 +0000 (+0000) Subject: Working on and operation which deals with segments (matches within fields) X-Git-Tag: ZEBRA.2.0.0~35 X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=commitdiff_plain;h=651974e51cda5f503b3cbecbe11370a329d6b7e4 Working on and operation which deals with segments (matches within fields) --- diff --git a/include/idzebra/recctrl.h b/include/idzebra/recctrl.h index 4c8a7cc..71c8deb 100644 --- a/include/idzebra/recctrl.h +++ b/include/idzebra/recctrl.h @@ -1,4 +1,4 @@ -/* $Id: recctrl.h,v 1.24 2006-06-13 20:03:33 adam Exp $ +/* $Id: recctrl.h,v 1.25 2006-07-04 14:10:29 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -169,6 +169,8 @@ RecType recType_byName(RecTypes rts, Res res, const char *name, void **clientDataP); +#define KEY_SEGMENT_SIZE 1024 + YAZ_END_CDECL #endif diff --git a/include/rset.h b/include/rset.h index ccc66ba..2fc5be4 100644 --- a/include/rset.h +++ b/include/rset.h @@ -1,4 +1,4 @@ -/* $Id: rset.h,v 1.59 2006-06-06 21:01:30 adam Exp $ +/* $Id: rset.h,v 1.60 2006-07-04 14:10:28 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -133,6 +133,7 @@ struct rset_key_control { int (*cmp)(const void *p1, const void *p2); void (*key_logdump_txt) (int logmask, const void *p, const char *txt); zint (*getseq)(const void *p); + zint (*get_segment)(const void *p); int (*filter_func)(const void *p, void *data); void *filter_data; void (*inc)(struct rset_key_control *kc); diff --git a/index/Makefile.am b/index/Makefile.am index 7972530..0c75b0d 100644 --- a/index/Makefile.am +++ b/index/Makefile.am @@ -1,4 +1,4 @@ -## $Id: Makefile.am,v 1.48 2006-07-03 14:27:09 adam Exp $ +## $Id: Makefile.am,v 1.49 2006-07-04 14:10:29 adam Exp $ aux_libs = \ ../rset/libidzebra-rset.la \ @@ -62,7 +62,7 @@ EXTRA_libidzebra_2_0_la_SOURCES = \ lib_LTLIBRARIES = $(zebralib) -libidzebra_2_0_la_DEPENDENCIES = $(STATIC_MODULE_OBJ) +libidzebra_2_0_la_DEPENDENCIES = $(STATIC_MODULE_OBJ) $(aux_libs) libidzebra_2_0_la_LIBADD = $(STATIC_MODULE_OBJ) $(aux_libs) $(STATIC_MODULE_LADD) libidzebra_2_0_la_LDFLAGS=-export-dynamic -version-info $(ZEBRALIBS_VERSION) libidzebra_2_0_la_SOURCES = \ diff --git a/index/index.h b/index/index.h index e2f5f3e..e3045f7 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.169 2006-06-23 11:21:38 adam Exp $ +/* $Id: index.h,v 1.170 2006-07-04 14:10:29 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -104,6 +104,7 @@ int key_compare (const void *p1, const void *p2); void key_init(struct it_key *k); char *key_print_it (const void *p, char *buf); zint key_get_seq (const void *p); +zint key_get_segment (const void *p); int key_compare_it (const void *p1, const void *p2); int key_qsort_compare (const void *p1, const void *p2); void key_logdump (int mask, const void *p); diff --git a/index/kcompare.c b/index/kcompare.c index 83f5af5..c4ca985 100644 --- a/index/kcompare.c +++ b/index/kcompare.c @@ -1,4 +1,4 @@ -/* $Id: kcompare.c,v 1.58 2006-05-10 08:13:21 adam Exp $ +/* $Id: kcompare.c,v 1.59 2006-07-04 14:10:30 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -118,6 +118,13 @@ zint key_get_seq(const void *p) return k.mem[k.len-1]; } +zint key_get_segment(const void *p) +{ + struct it_key k; + memcpy (&k, p, sizeof(k)); + return k.mem[k.len-1] / KEY_SEGMENT_SIZE; +} + int key_qsort_compare (const void *p1, const void *p2) { int r; diff --git a/index/kcontrol.c b/index/kcontrol.c index e5d2b6b..7cde52e 100644 --- a/index/kcontrol.c +++ b/index/kcontrol.c @@ -1,4 +1,4 @@ -/* $Id: kcontrol.c,v 1.2 2006-05-10 08:13:21 adam Exp $ +/* $Id: kcontrol.c,v 1.3 2006-07-04 14:10:30 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -53,8 +53,10 @@ static void my_dec(struct rset_key_control *kc) } } + struct rset_key_control *zebra_key_control_create(ZebraHandle zh) { + const char *res_val; struct rset_key_control *kc = xmalloc(sizeof(*kc)); struct context_control *cp = xmalloc(sizeof(*cp)); @@ -64,6 +66,12 @@ struct rset_key_control *zebra_key_control_create(ZebraHandle zh) kc->cmp = key_compare_it; kc->key_logdump_txt = key_logdump_txt; kc->getseq = key_get_seq; + res_val = zebra_get_resource(zh, "segment", 0); + kc->get_segment = 0; + if (res_val && atoi(res_val)) + { + kc->get_segment = key_get_segment; + } zebra_limit_for_rset(zh->m_limit, &kc->filter_func, &cp->filter_destroy, diff --git a/rset/rsmultiandor.c b/rset/rsmultiandor.c index 4812e2c..49bcea2 100644 --- a/rset/rsmultiandor.c +++ b/rset/rsmultiandor.c @@ -1,4 +1,4 @@ -/* $Id: rsmultiandor.c,v 1.22 2006-07-04 10:25:22 adam Exp $ +/* $Id: rsmultiandor.c,v 1.23 2006-07-04 14:10:31 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -120,6 +120,8 @@ struct rfd_private { zint hits; /* returned so far */ int eof; /* seen the end of it */ int tailcount; /* how many items are tailing */ + zint segment; + int skip; char *tailbits; }; @@ -472,22 +474,39 @@ static int r_read_and (RSFD rfd, void *buf, TERMID *term) { struct rfd_private *p = rfd->priv; RSET ct = rfd->rset; const struct rset_key_control *kctrl = ct->keycontrol; - int i, mintail; - int cmp; + int i; while (1) { if (p->tailcount) { /* we are tailing, find lowest tail and return it */ - mintail = 0; - while ((mintailno_children) && !p->tailbits[mintail]) - mintail++; /* first tail */ - for (i = mintail+1; ino_children; i++) + int mintail = -1; + int cmp; + + for (i = 0; ino_children; i++) { if (p->tailbits[i]) { - cmp=(*kctrl->cmp)(p->items[i].buf,p->items[mintail].buf); - if (cmp<0) + if (mintail >= 0) + cmp = (*kctrl->cmp) + (p->items[i].buf, p->items[mintail].buf); + else + cmp = -1; + if (cmp < 0) mintail = i; + + if (kctrl->get_segment) + { + /* segments enabled */ + + zint segment = kctrl->get_segment(p->items[i].buf); + /* store segment if not stored already */ + if (!p->segment && segment) + p->segment = segment; + + /* skip rest entirely if segments don't match */ + if (p->segment && segment && p->segment != segment) + p->skip = 1; + } } } /* return the lowest tail */ @@ -500,15 +519,19 @@ static int r_read_and (RSFD rfd, void *buf, TERMID *term) p->eof = 1; /* game over, once tails have been returned */ p->tailbits[mintail] = 0; (p->tailcount)--; - (p->hits)++; - return 1; } - /* still a tail? */ - cmp = (*kctrl->cmp)(p->items[mintail].buf,buf); - if (cmp >= rfd->rset->scope){ - p->tailbits[mintail] = 0; - (p->tailcount)--; + else + { + /* still a tail? */ + cmp = (*kctrl->cmp)(p->items[mintail].buf,buf); + if (cmp >= rfd->rset->scope) + { + p->tailbits[mintail] = 0; + (p->tailcount)--; + } } + if (p->skip) + continue; /* skip again.. eventually tailcount will be 0 */ (p->hits)++; return 1; } @@ -517,8 +540,9 @@ static int r_read_and (RSFD rfd, void *buf, TERMID *term) if (p->eof) return 0; /* nothing more to see */ i = 1; /* assume items[0] is highest up */ - while (ino_children) { - cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf); + while (i < ct->no_children) + { + int cmp = (*kctrl->cmp)(p->items[0].buf, p->items[i].buf); if (cmp <= -rfd->rset->scope) { /* [0] was behind, forward it */ if (!rset_forward(p->items[0].fd, p->items[0].buf, &p->items[0].term, p->items[i].buf)) @@ -526,7 +550,7 @@ static int r_read_and (RSFD rfd, void *buf, TERMID *term) p->eof = 1; /* game over */ return 0; } - i = 0; /* start frowarding from scratch */ + i = 0; /* start forwarding from scratch */ } else if (cmp>=rfd->rset->scope) { /* [0] was ahead, forward i */ @@ -543,9 +567,11 @@ static int r_read_and (RSFD rfd, void *buf, TERMID *term) /* if we get this far, all rsets are now within +- scope of [0] */ /* ergo, we have a hit. Mark them all as tailing, and let the */ /* upper 'if' return the hits in right order */ - for (i = 0; ino_children; i++) + for (i = 0; i < ct->no_children; i++) p->tailbits[i] = 1; p->tailcount = ct->no_children; + p->segment = 0; + p->skip = 0; } /* while 1 */ } diff --git a/test/api/safari.cfg b/test/api/safari.cfg index 9c571f2..557ab57 100644 --- a/test/api/safari.cfg +++ b/test/api/safari.cfg @@ -1,10 +1,12 @@ -# $Id: safari.cfg,v 1.3 2006-07-03 14:27:15 adam Exp $ +# $Id: safari.cfg,v 1.4 2006-07-04 14:10:32 adam Exp $ profilepath: ${srcdir:-.}/../../tab attset: bib1.att recordType: safari +segment: 1024 + isam: b # Set up modulePath diff --git a/test/api/safari1.c b/test/api/safari1.c index ae561ba..c014f85 100644 --- a/test/api/safari1.c +++ b/test/api/safari1.c @@ -1,4 +1,4 @@ -/* $Id: safari1.c,v 1.11 2006-05-10 08:13:35 adam Exp $ +/* $Id: safari1.c,v 1.12 2006-07-04 14:10:32 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -30,28 +30,42 @@ const char *myrec[] = /* chunk owner seq idx term */ "00024338 125060 1 any the\n" "00024338 125060 2 any art\n" - "00024338 125060 3 any mand\n", - + "00024338 125060 3 any mand\n" + , "5678\n" /* other record - same owner id */ "00024339 125060 1 any den\n" "00024339 125060 2 any gamle\n" - "00024339 125060 3 any mand\n", - + "00024339 125060 3 any mand\n" + , "5678\n" /* same record chunk id as before .. */ "00024339 125060 1 any the\n" "00024339 125060 2 any gamle\n" - "00024339 125060 3 any mand\n", - + "00024339 125060 3 any mand\n" + , "1000\n" /* separate record */ "00024339 125061 1 any the\n" "00024339 125061 2 any gamle\n" - "00024339 125061 3 any mand\n", - + "00024339 125061 3 any mand\n" + , "1001\n" /* separate record */ "00024340 125062 1 any the\n" "00024340 125062 1 any the\n" /* DUP KEY, bug #432 */ "00024340 125062 2 any old\n" - "00024340 125062 3 any mand\n", + "00024340 125062 3 any mand\n" + , + "1002\n" /* segment testing record */ + "00024341 125062 1 title a\n" + "00024341 125062 2 title b\n" + + "00024341 125062 1024 title b\n" + "00024341 125062 1025 title c\n" + "00024341 125062 1026 title d\n" + "00024341 125062 1027 title e\n" + "00024341 125062 1028 title f\n" + + "00024341 125062 2048 title g\n" + "00024341 125062 2049 title c\n" + , 0 }; @@ -107,6 +121,23 @@ static void tst(int argc, char **argv) zebra_set_limit(zh, 1, 0); YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=any mand", 3)); + /* test segments */ + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title a", 1)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title b", 1)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title c", 1)); + + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and a b", 1)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and a c", 1)); + + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and c d", 1)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and b f", 1)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and f g", 0)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and g f", 0)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and d g", 0)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and g c", 0)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and c g", 0)); + YAZ_CHECK(tl_query(zh, "@attr 4=3 @attr 1=title @and c c", 1)); + YAZ_CHECK(tl_close_down(zh, zs)); }