4222c3081a9908300f1640c0d3751ef8a8d687aa
[idzebra-moved-to-github.git] / rset / rsbetween.c
1 /*
2  * Copyright (C) 1994-2002, Index Data
3  * All rights reserved.
4  * Heikki Levanto
5  *
6  * $Id: rsbetween.c,v 1.6 2002-08-01 08:53:35 adam Exp $
7  */
8
9 #include <stdio.h>
10 #include <stdlib.h>
11 #include <string.h>
12 #include <assert.h>
13
14 #include <rsbetween.h>
15 #include <zebrautl.h>
16
17 static void *r_create_between(RSET ct, const struct rset_control *sel, void *parms);
18 static RSFD r_open_between (RSET ct, int flag);
19 static void r_close_between (RSFD rfd);
20 static void r_delete_between (RSET ct);
21 static void r_rewind_between (RSFD rfd);
22 static int r_count_between (RSET ct);
23 static int r_read_between (RSFD rfd, void *buf, int *term_index);
24 static int r_write_between (RSFD rfd, const void *buf);
25
26 static const struct rset_control control_between = 
27 {
28     "between",
29     r_create_between,
30     r_open_between,
31     r_close_between,
32     r_delete_between,
33     r_rewind_between,
34     r_count_between,
35     r_read_between,
36     r_write_between,
37 };
38
39
40 const struct rset_control *rset_kind_between = &control_between;
41
42 struct rset_between_info {
43     int key_size;
44     RSET rset_l;
45     RSET rset_m;
46     RSET rset_r;
47     RSET rset_attr;
48     int term_index_s;
49     int (*cmp)(const void *p1, const void *p2);
50     char *(*printer)(const void *p1, char *buf);
51     struct rset_between_rfd *rfd_list;
52 };
53
54 struct rset_between_rfd {
55     RSFD rfd_l;
56     RSFD rfd_m;
57     RSFD rfd_r;
58     RSFD rfd_attr;
59     int  more_l;
60     int  more_m;
61     int  more_r;
62     int  more_attr;
63     int term_index_l;
64     int term_index_m;
65     int term_index_r;
66     void *buf_l;
67     void *buf_m;
68     void *buf_r;
69     void *buf_attr;
70     int level;
71     struct rset_between_rfd *next;
72     struct rset_between_info *info;
73 };    
74
75 static void *r_create_between (RSET ct, const struct rset_control *sel,
76                                void *parms)
77 {
78     rset_between_parms *between_parms = (rset_between_parms *) parms;
79     struct rset_between_info *info;
80
81     info = (struct rset_between_info *) xmalloc (sizeof(*info));
82     info->key_size = between_parms->key_size;
83     info->rset_l = between_parms->rset_l;
84     info->rset_m = between_parms->rset_m;
85     info->rset_r = between_parms->rset_r;
86     info->rset_attr = between_parms->rset_attr;
87     if (rset_is_volatile(info->rset_l) || 
88         rset_is_volatile(info->rset_m) ||
89         rset_is_volatile(info->rset_r))
90         ct->flags |= RSET_FLAG_VOLATILE;
91     info->cmp = between_parms->cmp;
92     info->printer = between_parms->printer;
93     info->rfd_list = NULL;
94     
95     info->term_index_s = info->rset_l->no_rset_terms;
96     if (info->rset_m)
97     {
98         ct->no_rset_terms =
99             info->rset_l->no_rset_terms + 
100             info->rset_m->no_rset_terms + 
101             info->rset_r->no_rset_terms;
102         ct->rset_terms = (RSET_TERM *)
103             xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms);
104         memcpy (ct->rset_terms, info->rset_l->rset_terms,
105                 info->rset_l->no_rset_terms * sizeof(*ct->rset_terms));
106         memcpy (ct->rset_terms + info->rset_l->no_rset_terms,
107                 info->rset_m->rset_terms,
108                 info->rset_m->no_rset_terms * sizeof(*ct->rset_terms));
109         memcpy (ct->rset_terms + info->rset_l->no_rset_terms + 
110                 info->rset_m->no_rset_terms,
111                 info->rset_r->rset_terms,
112                 info->rset_r->no_rset_terms * sizeof(*ct->rset_terms));
113     }
114     else
115     {
116         ct->no_rset_terms =
117             info->rset_l->no_rset_terms + 
118             info->rset_r->no_rset_terms;
119         ct->rset_terms = (RSET_TERM *)
120             xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms);
121         memcpy (ct->rset_terms, info->rset_l->rset_terms,
122                 info->rset_l->no_rset_terms * sizeof(*ct->rset_terms));
123         memcpy (ct->rset_terms + info->rset_l->no_rset_terms,
124                 info->rset_r->rset_terms,
125                 info->rset_r->no_rset_terms * sizeof(*ct->rset_terms));
126     }
127
128     return info;
129 }
130
131 static RSFD r_open_between (RSET ct, int flag)
132 {
133     struct rset_between_info *info = (struct rset_between_info *) ct->buf;
134     struct rset_between_rfd *rfd;
135
136     if (flag & RSETF_WRITE)
137     {
138         logf (LOG_FATAL, "between set type is read-only");
139         return NULL;
140     }
141     rfd = (struct rset_between_rfd *) xmalloc (sizeof(*rfd));
142     rfd->next = info->rfd_list;
143     info->rfd_list = rfd;
144     rfd->info = info;
145
146     rfd->buf_l = xmalloc (info->key_size);
147     rfd->buf_m = xmalloc (info->key_size);
148     rfd->buf_r = xmalloc (info->key_size);
149     rfd->buf_attr = xmalloc (info->key_size);
150
151     rfd->rfd_l = rset_open (info->rset_l, RSETF_READ);
152     rfd->rfd_m = rset_open (info->rset_m, RSETF_READ);
153     rfd->rfd_r = rset_open (info->rset_r, RSETF_READ);
154     
155     rfd->more_l = rset_read (info->rset_l, rfd->rfd_l, rfd->buf_l,
156                              &rfd->term_index_l);
157     rfd->more_m = rset_read (info->rset_m, rfd->rfd_m, rfd->buf_m,
158                              &rfd->term_index_m);
159     rfd->more_r = rset_read (info->rset_r, rfd->rfd_r, rfd->buf_r,
160                              &rfd->term_index_r);
161     if (info->rset_attr)
162     {
163         int dummy;
164         rfd->rfd_attr = rset_open (info->rset_attr, RSETF_READ);
165         rfd->more_attr = rset_read (info->rset_attr, rfd->rfd_attr,
166                                     rfd->buf_attr, &dummy);
167     }
168     rfd->level=0;
169     return rfd;
170 }
171
172 static void r_close_between (RSFD rfd)
173 {
174     struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
175     struct rset_between_rfd **rfdp;
176     
177     for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next)
178         if (*rfdp == rfd)
179         {
180             xfree ((*rfdp)->buf_l);
181             xfree ((*rfdp)->buf_m);
182             xfree ((*rfdp)->buf_r);
183             xfree ((*rfdp)->buf_attr);
184             rset_close (info->rset_l, (*rfdp)->rfd_l);
185             rset_close (info->rset_m, (*rfdp)->rfd_m);
186             rset_close (info->rset_r, (*rfdp)->rfd_r);
187             if (info->rset_attr)
188                 rset_close (info->rset_attr, (*rfdp)->rfd_attr);
189             
190             *rfdp = (*rfdp)->next;
191             xfree (rfd);
192             return;
193         }
194     logf (LOG_FATAL, "r_close_between but no rfd match!");
195     assert (0);
196 }
197
198 static void r_delete_between (RSET ct)
199 {
200     struct rset_between_info *info = (struct rset_between_info *) ct->buf;
201
202     assert (info->rfd_list == NULL);
203     xfree (ct->rset_terms);
204     rset_delete (info->rset_l);
205     rset_delete (info->rset_m);
206     rset_delete (info->rset_r);
207     if (info->rset_attr)
208         rset_delete (info->rset_attr);
209     xfree (info);
210 }
211
212 static void r_rewind_between (RSFD rfd)
213 {
214     struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
215     struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
216
217     logf (LOG_DEBUG, "rsbetween_rewind");
218     rset_rewind (info->rset_l, p->rfd_l);
219     rset_rewind (info->rset_m, p->rfd_m);
220     rset_rewind (info->rset_r, p->rfd_r);
221     p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l, &p->term_index_l);
222     p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m, &p->term_index_m);
223     p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r, &p->term_index_r);
224     if (info->rset_attr)
225     {
226         int dummy;
227         rset_rewind (info->rset_attr, p->rfd_attr);
228         p->more_attr = rset_read (info->rset_attr, p->rfd_attr, p->buf_attr,
229                                   &dummy);
230     }
231     p->level=0;
232 }
233
234 static int r_count_between (RSET ct)
235 {
236     return 0;
237 }
238
239 static void logit( struct rset_between_info *info, char *prefix, void *l, void *m, void *r)
240 {
241     char buf_l[32];
242     char buf_m[32];
243     char buf_r[32];
244     logf(LOG_DEBUG,"btw: %s l=%s m=%s r=%s",
245       prefix, 
246       (*info->printer)(l, buf_l),
247       (*info->printer)(m, buf_m),
248       (*info->printer)(r, buf_r) );
249 }
250
251 static int r_read_between (RSFD rfd, void *buf, int *term_index)
252 {
253     struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
254     struct rset_between_info *info = p->info;
255     int cmp_l;
256     int cmp_r;
257     int attr_match;
258
259     while (p->more_m)
260     {
261         logit( info, "start of loop", p->buf_l, p->buf_m, p->buf_r);
262
263         /* forward L until past m, count levels, note rec boundaries */
264         if (p->more_l)
265             cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
266         else
267             cmp_l=2; /* past this record */
268         logf(LOG_DEBUG, "cmp_l=%d", cmp_l);
269
270         while (cmp_l < 0)   /* l before m */
271         {
272             if (cmp_l == -2)
273                 p->level=0; /* earlier record */
274             if (cmp_l == -1)
275             {
276                 p->level++; /* relevant start tag */
277
278                 if (!info->rset_attr)
279                     attr_match = 1;
280                 else
281                 {
282                     int cmp_attr;
283                     int dummy_term;
284                     attr_match = 0;
285                     while (p->more_attr)
286                     {
287                         cmp_attr = (*info->cmp)(p->buf_attr, p->buf_l);
288                         if (cmp_attr == 0)
289                         {
290                             attr_match = 1;
291                             break;
292                         }
293                         else if (cmp_attr > 0)
294                             break;
295                         p->more_attr = rset_read (info->rset_attr, p->rfd_attr,
296                                                   p->buf_attr, &dummy_term);
297                     }
298                 }
299             }
300             if (p->more_l)
301             {
302                 p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l,
303                                    &p->term_index_l);
304                 cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
305                 logit( info, "forwarded L", p->buf_l, p->buf_m, p->buf_r);
306                 logf(LOG_DEBUG, "  cmp_l=%d", cmp_l);
307             }
308             else
309                 cmp_l=2; 
310         } /* forward L */
311
312             
313         /* forward R until past m, count levels */
314         if (p->more_r)
315             cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
316         else
317             cmp_r=2; 
318         logf(LOG_DEBUG, "cmp_r=%d", cmp_r);
319         while (cmp_r < 0)   /* r before m */
320         {
321             /* -2, earlier record, doesn't matter */
322             if (cmp_r == -1)
323                 p->level--; /* relevant end tag */
324             if (p->more_r)
325             {
326                 p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r,
327                                    &p->term_index_r);
328                 cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
329                 logit( info, "forwarded R", p->buf_l, p->buf_m, p->buf_r);
330                 logf(LOG_DEBUG, "  cmp_r=%d", cmp_r);
331             }
332             else
333                 cmp_r=2; 
334         } /* forward R */
335         
336         if ( ( p->level <= 0 ) && ! p->more_l)
337             return 0; /* no more start tags, nothing more to find */
338         
339         if ( attr_match && p->level > 0)  /* within a tag pair (or deeper) */
340         {
341             memcpy (buf, p->buf_m, info->key_size);
342             *term_index = p->term_index_m;
343             logit( info, "Returning a hit (m)", p->buf_l, p->buf_m, p->buf_r);
344             p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m,
345                                    &p->term_index_m);
346             return 1;
347         }
348         else
349             if ( ! p->more_l )  /* not in data, no more starts */
350                 return 0;  /* ergo, nothing can be found. stop scanning */
351         
352         p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m,
353                                &p->term_index_m);
354     } /* while more_m */
355       
356     logf(LOG_DEBUG,"Exiting, no more stuff in m");
357     return 0;  /* no more data possible */
358
359
360 }  /* r_read */
361
362
363 static int r_write_between (RSFD rfd, const void *buf)
364 {
365     logf (LOG_FATAL, "between set type is read-only");
366     return -1;
367 }
368