1 /* $Id: rsbetween.c,v 1.15.2.4 2004-12-17 13:43:09 heikki Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
24 /* rsbetween is (mostly) used for xml searches. It returns the hits of the
25 * "middle" rset, that are in between the "left" and "right" rsets. For
26 * example "Shakespeare" in between "<title>" and </title>. The thing is
27 * complicated by the inclusion of attributes (from their own rset). If attrs
28 * specified, they must match the "left" rset (start tag). "Hamlet" between
29 * "<title lang=eng>" and "</title>". (This assumes that the attributes are
30 * indexed to the same seqno as the tags).
39 #include <rsbetween.h>
42 #define RSBETWEEN_DEBUG 0
44 #define log_level LOG_DEBUG
47 static void *r_create_between(RSET ct, const struct rset_control *sel, void *parms);
48 static RSFD r_open_between (RSET ct, int flag);
49 static void r_close_between (RSFD rfd);
50 static void r_delete_between (RSET ct);
51 static void r_rewind_between (RSFD rfd);
52 static int r_forward_between(RSET ct, RSFD rfd, void *buf, int *term_index,
53 int (*cmpfunc)(const void *p1, const void *p2),
54 const void *untilbuf);
55 static int r_read (RSFD rfd, void *buf, int *term_index);
56 static int r_write_between (RSFD rfd, const void *buf);
58 static const struct rset_control control_between =
66 r_forward_between, /* rset_default_forward, */
73 const struct rset_control *rset_kind_between = &control_between;
80 struct rset_between_info {
87 int (*cmp)(const void *p1, const void *p2);
88 char *(*printer)(const void *p1, char *buf);
89 struct rset_between_rfd *rfd_list;
92 struct rset_between_rfd {
109 struct rset_between_rfd *next;
110 struct rset_between_info *info;
112 void *recbuf; /* a key that tells which record we are in */
113 void *startbuf; /* the start tag */
114 int startbufok; /* we have seen the first start tag */
115 void *attrbuf; /* the attr tag. If these two match, we have attr match */
116 int attrbufok; /* we have seen the first attr tag, can compare */
117 int depth; /* number of start-tags without end-tags */
118 int attrdepth; /* on what depth the attr matched */
119 int hits; /* number of hits returned so far */
120 /* Stuff for the multi-and read, also backported */
121 /* uses more_l/m/r/a etc from above */
128 static void log2 (struct rset_between_rfd *p, char *msg, int cmp_l, int cmp_r)
133 logf(LOG_DEBUG,"btw: %s l=%s(%d/%d) m=%s(%d) r=%s(%d/%d), lev=%d",
135 (*p->info->printer)(p->buf_l, buf_l), p->more_l, cmp_l,
136 (*p->info->printer)(p->buf_m, buf_m), p->more_m,
137 (*p->info->printer)(p->buf_r, buf_r), p->more_r, cmp_r,
142 static void *r_create_between (RSET ct, const struct rset_control *sel,
145 rset_between_parms *between_parms = (rset_between_parms *) parms;
146 struct rset_between_info *info;
148 info = (struct rset_between_info *) xmalloc (sizeof(*info));
149 info->key_size = between_parms->key_size;
150 info->rset_l = between_parms->rset_l;
151 info->rset_m = between_parms->rset_m;
152 info->rset_r = between_parms->rset_r;
153 info->rset_attr = between_parms->rset_attr;
154 if (rset_is_volatile(info->rset_l) ||
155 rset_is_volatile(info->rset_m) ||
156 rset_is_volatile(info->rset_r))
157 ct->flags |= RSET_FLAG_VOLATILE;
158 info->cmp = between_parms->cmp;
159 info->printer = between_parms->printer;
160 info->rfd_list = NULL;
162 info->term_index_s = info->rset_l->no_rset_terms;
166 info->rset_l->no_rset_terms +
167 info->rset_m->no_rset_terms +
168 info->rset_r->no_rset_terms;
169 ct->rset_terms = (RSET_TERM *)
170 xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms);
171 memcpy (ct->rset_terms, info->rset_l->rset_terms,
172 info->rset_l->no_rset_terms * sizeof(*ct->rset_terms));
173 memcpy (ct->rset_terms + info->rset_l->no_rset_terms,
174 info->rset_m->rset_terms,
175 info->rset_m->no_rset_terms * sizeof(*ct->rset_terms));
176 memcpy (ct->rset_terms + info->rset_l->no_rset_terms +
177 info->rset_m->no_rset_terms,
178 info->rset_r->rset_terms,
179 info->rset_r->no_rset_terms * sizeof(*ct->rset_terms));
184 info->rset_l->no_rset_terms +
185 info->rset_r->no_rset_terms;
186 ct->rset_terms = (RSET_TERM *)
187 xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms);
188 memcpy (ct->rset_terms, info->rset_l->rset_terms,
189 info->rset_l->no_rset_terms * sizeof(*ct->rset_terms));
190 memcpy (ct->rset_terms + info->rset_l->no_rset_terms,
191 info->rset_r->rset_terms,
192 info->rset_r->no_rset_terms * sizeof(*ct->rset_terms));
198 static RSFD r_open_between (RSET ct, int flag)
200 struct rset_between_info *info = (struct rset_between_info *) ct->buf;
201 struct rset_between_rfd *rfd;
203 if (flag & RSETF_WRITE)
205 logf (LOG_FATAL, "between set type is read-only");
208 rfd = (struct rset_between_rfd *) xmalloc (sizeof(*rfd));
209 rfd->next = info->rfd_list;
210 info->rfd_list = rfd;
213 rfd->buf_l = xmalloc (info->key_size);
214 rfd->buf_m = xmalloc (info->key_size);
215 rfd->buf_r = xmalloc (info->key_size);
216 rfd->buf_attr = xmalloc (info->key_size);
218 rfd->rfd_l = rset_open (info->rset_l, RSETF_READ);
219 rfd->rfd_m = rset_open (info->rset_m, RSETF_READ);
220 rfd->rfd_r = rset_open (info->rset_r, RSETF_READ);
222 rfd->more_l = rset_read (info->rset_l, rfd->rfd_l, rfd->buf_l,
224 rfd->more_m = rset_read (info->rset_m, rfd->rfd_m, rfd->buf_m,
226 rfd->more_r = rset_read (info->rset_r, rfd->rfd_r, rfd->buf_r,
231 rfd->rfd_attr = rset_open (info->rset_attr, RSETF_READ);
232 rfd->more_attr = rset_read (info->rset_attr, rfd->rfd_attr,
233 rfd->buf_attr, &dummy);
236 rfd->recbuf=xmalloc (info->key_size);
237 rfd->startbuf=xmalloc (info->key_size);
238 rfd->attrbuf=xmalloc (info->key_size);
243 rfd->hits=-1; /* signals uninitialized */
245 rfd->tailbits[WHICH_L]=0;
246 rfd->tailbits[WHICH_M]=0;
247 rfd->tailbits[WHICH_R]=0;
248 rfd->tailbits[WHICH_A]=0;
253 static void r_close_between (RSFD rfd)
255 struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
256 struct rset_between_rfd **rfdp;
258 for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next)
261 xfree ((*rfdp)->buf_l);
262 xfree ((*rfdp)->buf_m);
263 xfree ((*rfdp)->buf_r);
264 xfree ((*rfdp)->buf_attr);
265 xfree ((*rfdp)->recbuf);
266 xfree ((*rfdp)->startbuf);
267 xfree ((*rfdp)->attrbuf);
268 rset_close (info->rset_l, (*rfdp)->rfd_l);
269 rset_close (info->rset_m, (*rfdp)->rfd_m);
270 rset_close (info->rset_r, (*rfdp)->rfd_r);
272 rset_close (info->rset_attr, (*rfdp)->rfd_attr);
274 *rfdp = (*rfdp)->next;
278 logf (LOG_FATAL, "r_close_between but no rfd match!");
282 static void r_delete_between (RSET ct)
284 struct rset_between_info *info = (struct rset_between_info *) ct->buf;
286 assert (info->rfd_list == NULL);
287 xfree (ct->rset_terms);
288 rset_delete (info->rset_l);
289 rset_delete (info->rset_m);
290 rset_delete (info->rset_r);
292 rset_delete (info->rset_attr);
296 static void r_rewind_between (RSFD rfd)
298 struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
299 struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
302 logf (LOG_DEBUG, "rsbetween_rewind");
304 rset_rewind (info->rset_l, p->rfd_l);
305 rset_rewind (info->rset_m, p->rfd_m);
306 rset_rewind (info->rset_r, p->rfd_r);
307 p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, &p->term_index_l);
308 p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m, &p->term_index_m);
309 p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, &p->term_index_r);
313 rset_rewind (info->rset_attr, p->rfd_attr);
314 p->more_attr = rset_read (info->rset_attr, p->rfd_attr, p->buf_attr,
322 static int r_forward_between(RSET ct, RSFD rfd, void *buf, int *term_index,
323 int (*cmpfunc)(const void *p1, const void *p2),
324 const void *untilbuf)
326 struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
327 struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
330 log2( p, "fwd: before forward", 0,0);
332 /* It is enough to forward the m pointer here, the read will */
333 /* naturally forward the l, m, and attr pointers */
335 p->more_m=rset_forward(info->rset_m,p->rfd_m, p->buf_m,
336 &p->term_index_m, info->cmp,untilbuf);
338 log2( p, "fwd: after forward M", 0,0);
340 /* rc = r_read_between(rfd, buf, term_index);*/
341 rc = r_read(rfd, buf, term_index);
343 log2( p, "fwd: after forward", 0,0);
350 static int r_write_between (RSFD rfd, const void *buf)
352 logf (LOG_FATAL, "between set type is read-only");
358 /* Heikki's back-port of the cleaner 1.4 algorithm */
360 static void checkattr(struct rset_between_rfd *p)
362 struct rset_between_info *info =p->info;
365 return; /* already found one */
366 if (!info->rset_attr)
368 p->attrdepth=-1; /* matches always */
371 if ( p->startbufok && p->attrbufok )
372 { /* have buffers to compare */
373 cmp=(*info->cmp)(p->startbuf,p->attrbuf);
374 if (0==cmp) /* and the keys match */
376 p->attrdepth=p->depth;
378 yaz_log(log_level, "found attribute match at depth %d",p->attrdepth);
385 /* Implements a multi-and between the l,r, and m rfds. Returns all */
386 /* hits from those, provided that all three point to the same record */
387 /* See rsmultiandor.c in zebra 1.4 for details */
388 static int read_anded(struct rset_between_rfd *p,void *buf,
389 int *term, int *which) {
395 struct rset_between_info *info =p->info;
400 /* make the individual args into arrays, to match rsmultiandor */
401 rfds[WHICH_L]=p->rfd_l;
402 rfds[WHICH_M]=p->rfd_m;
403 rfds[WHICH_R]=p->rfd_r;
404 rfds[WHICH_A]=p->rfd_attr;
405 rsets[WHICH_L]=info->rset_l;
406 rsets[WHICH_M]=info->rset_m;
407 rsets[WHICH_R]=info->rset_r;
408 rsets[WHICH_A]=info->rset_attr;
409 bufs[WHICH_L]=p->buf_l;
410 bufs[WHICH_M]=p->buf_m;
411 bufs[WHICH_R]=p->buf_r;
412 bufs[WHICH_A]=p->buf_attr;
413 terms[WHICH_L]=&(p->term_index_l);
414 terms[WHICH_M]=&(p->term_index_m);
415 terms[WHICH_R]=&(p->term_index_r);
416 terms[WHICH_A]=&dummyterm;
424 { /* we are tailing */
426 while ((mintail<n) && !p->tailbits[mintail])
427 mintail++; /* first tail */
428 for (i=mintail+1;i<n;i++)
432 cmp=(*info->cmp)(bufs[i],bufs[mintail]);
437 /* return the lowest tail */
438 memcpy(buf, bufs[mintail], info->key_size);
440 *term=*terms[mintail];
441 /* and read that one further */
442 if (!rset_read(rsets[mintail],rfds[mintail],
443 bufs[mintail],terms[mintail]))
446 p->tailbits[mintail]=0;
451 cmp=(info->cmp)(bufs[mintail],buf);
454 p->tailbits[mintail]=0;
462 return 0; /* game over */
463 i=1; /* assume items[0] is highest up */
466 cmp=(*info->cmp)(bufs[0],bufs[i]);
468 { /* [0] was behid, forward it and start anew */
469 if (!rset_forward(rsets[0],rfds[0], bufs[0],
470 terms[0], info->cmp, bufs[i]))
475 i=0; /* all over again */
478 {/* [0] was ahead, forward i */
479 if (!rset_forward(rsets[i],rfds[i], bufs[i],
480 terms[i], info->cmp, bufs[0]))
489 /* now all are within the same records, set tails and let upper
498 static int r_read (RSFD rfd, void *buf, int *term)
500 struct rset_between_rfd *p=(struct rset_between_rfd *)rfd;
501 struct rset_between_info *info =p->info;
505 *term=0; /* just in case, should not be necessary */
507 yaz_log(log_level,"btw: == read: term=%p",term);
509 while ( read_anded(p,buf,&thisterm,&which) )
512 yaz_log(log_level,"btw: read loop term=%p d=%d ad=%d",
513 term, p->depth, p->attrdepth);
517 memcpy(p->recbuf,buf,info->key_size);
522 cmp=(*info->cmp)(buf,p->recbuf);
524 yaz_log(log_level, "btw: cmp=%d",cmp);
531 yaz_log(log_level,"btw: new record");
535 memcpy(p->recbuf,buf,info->key_size);
539 yaz_log(log_level,"btw: which: %d", which);
545 yaz_log(log_level,"btw: read start tag. d=%d",p->depth);
547 memcpy(p->startbuf,buf,info->key_size);
549 checkattr(rfd); /* in case we already saw the attr here */
551 else if (which==WHICH_R)
553 if (p->depth == p->attrdepth)
554 p->attrdepth=0; /* ending the tag with attr match */
557 yaz_log(log_level,"btw: read end tag. d=%d ad=%d",
558 p->depth, p->attrdepth);
561 else if (which==WHICH_A)
564 yaz_log(log_level,"btw: read attr");
566 memcpy(p->attrbuf,buf,info->key_size);
568 checkattr(rfd); /* in case the start tag came first */
571 { /* mut be a real hit */
572 if (p->depth && p->attrdepth)
576 yaz_log(log_level,"btw: got a hit h=%d d=%d ad=%d t=%d+%d",
577 p->hits,p->depth,p->attrdepth,
578 info->rset_m->no_rset_terms,thisterm);
580 *term= info->rset_m->no_rset_terms + thisterm;
581 return 1; /* everything else is in place already */
585 yaz_log(log_level, "btw: Ignoring hit. h=%d d=%d ad=%d",
586 p->hits,p->depth,p->attrdepth);