Updated code to use new YAZ log functions/defines.
[idzebra-moved-to-github.git] / rset / rsprox.c
1 /* $Id: rsprox.c,v 1.5.2.3 2006-12-05 21:14:45 adam Exp $
2    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
3    Index Data Aps
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
20
21 */
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <assert.h>
27
28 #include <rsprox.h>
29 #include <zebrautl.h>
30
31 #ifndef RSET_DEBUG
32 #define RSET_DEBUG 0
33 #endif
34
35 static void *r_create(RSET ct, const struct rset_control *sel, void *parms);
36 static RSFD r_open (RSET ct, int flag);
37 static void r_close (RSFD rfd);
38 static void r_delete (RSET ct);
39 static void r_rewind (RSFD rfd);
40 static int r_forward(RSET ct, RSFD rfd, void *buf, int *term_index,
41                      int (*cmpfunc)(const void *p1, const void *p2),
42                      const void *untilbuf);
43 static int r_read (RSFD rfd, void *buf, int *term_index);
44 static int r_write (RSFD rfd, const void *buf);
45
46 static const struct rset_control control_prox = 
47 {
48     "prox",
49     r_create,
50     r_open,
51     r_close,
52     r_delete,
53     r_rewind,
54     r_forward,
55     rset_default_pos,
56     r_read,
57     r_write,
58 };
59
60 const struct rset_control *rset_kind_prox = &control_prox;
61
62 struct rset_prox_info {
63     struct rset_prox_parms p;
64
65     struct rset_prox_rfd *rfd_list;
66 };
67
68 struct rset_prox_rfd {
69     RSFD *rfd;
70     char **buf;  /* lookahead key buffers */
71     char *more;  /* more in each lookahead? */
72     struct rset_prox_rfd *next;
73     struct rset_prox_info *info;
74     int  *countp;
75     char *pbuf;
76 };    
77
78 static void *r_create (RSET ct, const struct rset_control *sel, void *parms)
79 {
80     rset_prox_parms *prox_parms = (rset_prox_parms *) parms;
81     struct rset_prox_info *info;
82     int i;
83     char prox_term[512];
84     int length_prox_term = 0;
85     int min_nn = 10000000;
86     const char *flags = NULL;
87     int term_type = 0;
88
89
90     info = (struct rset_prox_info *) xmalloc (sizeof(*info));
91     memcpy(&info->p, prox_parms, sizeof(struct rset_prox_parms));
92     assert(info->p.rset_no >= 2);
93     info->p.rset = xmalloc(info->p.rset_no * sizeof(*info->p.rset));
94     memcpy(info->p.rset, prox_parms->rset,
95            info->p.rset_no * sizeof(*info->p.rset));
96     info->rfd_list = NULL;
97
98     for (i = 0; i<info->p.rset_no; i++)
99         if (rset_is_volatile(info->p.rset[i]))
100             ct->flags |= RSET_FLAG_VOLATILE;
101
102     *prox_term = '\0';
103     for (i = 0; i<info->p.rset_no; i++)
104     {
105         int j;
106         for (j = 0; j < info->p.rset[i]->no_rset_terms; j++)
107         {
108             const char *nflags = info->p.rset[i]->rset_terms[j]->flags;
109             char *term = info->p.rset[i]->rset_terms[j]->name;
110             int lterm = strlen(term);
111             if (lterm + length_prox_term < sizeof(prox_term)-1)
112             {
113                 if (length_prox_term)
114                     prox_term[length_prox_term++] = ' ';
115                 strcpy (prox_term + length_prox_term, term);
116                 length_prox_term += lterm;
117             }
118             if (min_nn > info->p.rset[i]->rset_terms[j]->nn)
119                 min_nn = info->p.rset[i]->rset_terms[j]->nn;
120             flags = nflags;
121             term_type = info->p.rset[i]->rset_terms[j]->type;
122         }
123     }
124
125     ct->no_rset_terms = 1;
126     ct->rset_terms = (RSET_TERM *)
127         xmalloc (sizeof (*ct->rset_terms) * ct->no_rset_terms);
128
129     ct->rset_terms[0] = rset_term_create (prox_term, length_prox_term,
130                                           flags, term_type);
131
132     return info;
133 }
134
135 static RSFD r_open (RSET ct, int flag)
136 {
137     struct rset_prox_info *info = (struct rset_prox_info *) ct->buf;
138     struct rset_prox_rfd *rfd;
139     int i, dummy;
140
141     if (flag & RSETF_WRITE)
142     {
143         yaz_log(YLOG_FATAL, "prox set type is read-only");
144         return NULL;
145     }
146     rfd = (struct rset_prox_rfd *) xmalloc (sizeof(*rfd));
147     yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p]", ct->control->desc, rfd);
148     rfd->next = info->rfd_list;
149     info->rfd_list = rfd;
150     rfd->info = info;
151
152     rfd->more = xmalloc (sizeof(*rfd->more) * info->p.rset_no);
153
154     rfd->buf = xmalloc(sizeof(*rfd->buf) * info->p.rset_no);
155     for (i = 0; i < info->p.rset_no; i++)
156         rfd->buf[i] = xmalloc (info->p.key_size);
157
158     rfd->rfd = xmalloc(sizeof(*rfd->rfd) * info->p.rset_no);
159     for (i = 0; i < info->p.rset_no; i++)
160         rfd->rfd[i] = rset_open (info->p.rset[i], RSETF_READ);
161
162     for (i = 0; i < info->p.rset_no; i++)
163         rfd->more[i] = rset_read (info->p.rset[i], rfd->rfd[i],
164                                   rfd->buf[i], &dummy);
165
166     rfd->countp = &ct->rset_terms[0]->count;
167     rfd->pbuf = xmalloc (info->p.key_size);
168     return rfd;
169 }
170
171 static void r_close (RSFD rfd)
172 {
173     struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info;
174     struct rset_prox_rfd **rfdp;
175     
176     for (rfdp = &info->rfd_list; *rfdp; rfdp = &(*rfdp)->next)
177         if (*rfdp == rfd)
178         {
179             int i;
180             for (i = 0; i<info->p.rset_no; i++)
181                 xfree ((*rfdp)->buf[i]);
182             xfree ((*rfdp)->buf);
183             xfree ((*rfdp)->more);
184
185             xfree ((*rfdp)->pbuf);
186
187             for (i = 0; i<info->p.rset_no; i++)
188                 rset_close (info->p.rset[i], (*rfdp)->rfd[i]);
189             xfree ((*rfdp)->rfd);
190
191             *rfdp = (*rfdp)->next;
192             xfree (rfd);
193             return;
194         }
195     yaz_log(YLOG_FATAL, "r_close but no rfd match!");
196     assert (0);
197 }
198
199 static void r_delete (RSET ct)
200 {
201     struct rset_prox_info *info = (struct rset_prox_info *) ct->buf;
202     int i;
203
204     assert (info->rfd_list == NULL);
205     rset_term_destroy(ct->rset_terms[0]);
206     xfree (ct->rset_terms);
207     for (i = 0; i<info->p.rset_no; i++)
208         rset_delete (info->p.rset[i]);
209     xfree (info->p.rset);
210     xfree (info);
211 }
212
213 static void r_rewind (RSFD rfd)
214 {
215     struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info;
216     struct rset_prox_rfd *p = (struct rset_prox_rfd *) rfd;
217     int dummy, i;
218
219     yaz_log(YLOG_DEBUG, "rsprox_rewind");
220
221     for (i = 0; i < info->p.rset_no; i++)
222     {
223         rset_rewind (info->p.rset[i], p->rfd[i]);
224         p->more[i] = rset_read (info->p.rset[i], p->rfd[i], p->buf[i], &dummy);
225     }
226 }
227
228 static int r_forward (RSET ct, RSFD rfd, void *buf, int *term_index,
229                       int (*cmpfunc)(const void *p1, const void *p2),
230                       const void *untilbuf)
231 {
232     /* Note: CT is not used. We _can_ pass NULL for it */
233     struct rset_prox_info *info = ((struct rset_prox_rfd*)rfd)->info;
234     struct rset_prox_rfd *p = (struct rset_prox_rfd *) rfd;
235     int cmp=0;
236     int i;
237     int dummy;
238
239     if (untilbuf)
240     {
241         /* it's enough to forward first one. Other will follow
242            automatically */
243         if ( p->more[0] && ((cmpfunc)(untilbuf, p->buf[0]) >= 2) )
244             p->more[0] = rset_forward(info->p.rset[0], p->rfd[0],
245                                       p->buf[0], &dummy, info->p.cmp,
246                                       untilbuf);
247     }
248     if (info->p.ordered && info->p.relation == 3 && info->p.exclusion == 0
249         && info->p.distance == 1)
250     {
251         while (p->more[0]) 
252         {
253             for (i = 1; i < info->p.rset_no; i++)
254             {
255                 if (!p->more[i]) 
256                 {
257                     p->more[0] = 0;    /* saves us a goto out of while loop. */
258                     break;
259                 }
260                 cmp = (*info->p.cmp) (p->buf[i], p->buf[i-1]);
261                 if (cmp > 1)
262                 {
263                     p->more[i-1] = rset_forward (info->p.rset[i-1],
264                                                  p->rfd[i-1],
265                                                  p->buf[i-1], &dummy,
266                                                  info->p.cmp,
267                                                  p->buf[i]);
268                     break;
269                 }
270                 else if (cmp == 1)
271                 {
272                     if ((*info->p.getseq)(p->buf[i-1]) +1 != 
273                         (*info->p.getseq)(p->buf[i]))
274                     {
275                         p->more[i-1] = rset_read (
276                             info->p.rset[i-1], p->rfd[i-1],
277                             p->buf[i-1], &dummy);
278                         break;
279                     }
280                 }
281                 else
282                 {
283                     p->more[i] = rset_forward (info->p.rset[i], p->rfd[i],
284                                                p->buf[i], &dummy,
285                                                info->p.cmp,
286                                                p->buf[i-1]);
287                     break;
288                 }
289             }
290             if (i == p->info->p.rset_no)
291             {
292                 memcpy (buf, p->buf[0], info->p.key_size);
293                 *term_index = 0;
294                 
295                 p->more[0] = rset_read (info->p.rset[0], p->rfd[0],
296                                         p->buf[0], &dummy);
297
298                 if (p->countp && (
299                         *p->countp == 0 || (*info->p.cmp)(buf, p->pbuf) > 1))
300                 {
301                     memcpy (p->pbuf, buf, info->p.key_size);
302                     (*p->countp)++;
303                 }
304                 return 1;
305             }
306         }
307     }
308     else if (info->p.rset_no == 2)
309     {
310         while (p->more[0] && p->more[1]) 
311         {
312             int cmp = (*info->p.cmp)(p->buf[0], p->buf[1]);
313             if (cmp < -1)
314                 p->more[0] = rset_forward (info->p.rset[0], p->rfd[0],
315                                            p->buf[0],
316                                            term_index, info->p.cmp, p->buf[0]);
317             else if (cmp > 1)
318                 p->more[1] = rset_forward (info->p.rset[1], p->rfd[1],
319                                            p->buf[1],
320                                            term_index, info->p.cmp, p->buf[1]);
321             else
322             {
323                 int seqno[500];
324                 int n = 0;
325                 
326                 seqno[n++] = (*info->p.getseq)(p->buf[0]);
327                 while ((p->more[0] = rset_read (info->p.rset[0], p->rfd[0],
328                                                 p->buf[0],
329                                                 term_index)) >= -1 &&
330                        p->more[0] <= -1)
331                     if (n < 500)
332                         seqno[n++] = (*info->p.getseq)(p->buf[0]);
333                 
334                 for (i = 0; i<n; i++)
335                 {
336                     int diff = (*info->p.getseq)(p->buf[1]) - seqno[i];
337                     int excl = info->p.exclusion;
338                     if (!info->p.ordered && diff < 0)
339                         diff = -diff;
340                     switch (info->p.relation)
341                     {
342                     case 1:      /* < */
343                         if (diff < info->p.distance && diff >= 0)
344                             excl = !excl;
345                         break;
346                     case 2:      /* <= */
347                         if (diff <= info->p.distance && diff >= 0)
348                             excl = !excl;
349                         break;
350                     case 3:      /* == */
351                         if (diff == info->p.distance && diff >= 0)
352                             excl = !excl;
353                         break;
354                     case 4:      /* >= */
355                         if (diff >= info->p.distance && diff >= 0)
356                             excl = !excl;
357                         break;
358                     case 5:      /* > */
359                         if (diff > info->p.distance && diff >= 0)
360                             excl = !excl;
361                         break;
362                     case 6:      /* != */
363                         if (diff != info->p.distance && diff >= 0)
364                             excl = !excl;
365                         break;
366                     }
367                     if (excl)
368                     {
369                         memcpy (buf, p->buf[1], info->p.key_size);
370                         *term_index = 0;
371                         
372                         p->more[1] = rset_read (info->p.rset[1],
373                                                 p->rfd[1], p->buf[1],
374                                                 term_index);
375                         return 1;
376                     }
377                 }
378                 p->more[1] = rset_read (info->p.rset[1], p->rfd[1],
379                                         p->buf[1],
380                                         term_index);
381             }
382         }
383     }
384     return 0;
385 }
386
387
388 static int r_read (RSFD rfd, void *buf, int *term_index)
389 {
390     return r_forward(0, rfd, buf, term_index, 0, 0);
391 }
392
393 static int r_write (RSFD rfd, const void *buf)
394 {
395     yaz_log(YLOG_FATAL, "prox set type is read-only");
396     return -1;
397 }
398