Version 2.0.59
[idzebra-moved-to-github.git] / rset / rsprox.c
1 /* This file is part of the Zebra server.
2    Copyright (C) Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <assert.h>
27
28 #include <idzebra/util.h>
29 #include <rset.h>
30
31 #ifndef RSET_DEBUG
32 #define RSET_DEBUG 0
33 #endif
34
35 static RSFD r_open(RSET ct, int flag);
36 static void r_close(RSFD rfd);
37 static void r_delete(RSET ct);
38 static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf);
39 static int r_read(RSFD rfd, void *buf, TERMID *term);
40 static void r_pos(RSFD rfd, double *current, double *total);
41 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
42
43 static const struct rset_control control =
44 {
45     "prox",
46     r_delete,
47     r_get_terms,
48     r_open,
49     r_close,
50     r_forward,
51     r_pos,
52     r_read,
53     rset_no_write,
54 };
55
56 struct rset_prox_info {
57     int ordered;
58     int exclusion;
59     int relation;
60     int distance;
61 };
62
63 struct rset_prox_rfd {
64     RSFD *rfd;
65     char **buf;  /* lookahead key buffers */
66     char *more;  /* more in each lookahead? */
67     TERMID *terms; /* lookahead terms */
68     zint hits;
69 };
70
71
72 RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol,
73                       int scope,
74                       int rset_no, RSET *rset,
75                       int ordered, int exclusion,
76                       int relation, int distance)
77 {
78     RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, 0,
79                                  rset_no, rset);
80     struct rset_prox_info *info;
81     info = (struct rset_prox_info *) nmem_malloc(rnew->nmem,sizeof(*info));
82     info->ordered = ordered;
83     info->exclusion = exclusion;
84     info->relation = relation;
85     info->distance = distance;
86     rnew->priv = info;
87     return rnew;
88 }
89
90 static void r_delete(RSET ct)
91 {
92 }
93
94 static RSFD r_open(RSET ct, int flag)
95 {
96     RSFD rfd;
97     struct rset_prox_rfd *p;
98     int i;
99
100     if (flag & RSETF_WRITE)
101     {
102         yaz_log(YLOG_FATAL, "prox set type is read-only");
103         return NULL;
104     }
105     rfd = rfd_create_base(ct);
106     if (rfd->priv)
107         p = (struct rset_prox_rfd *)(rfd->priv);
108     else
109     {
110         p = (struct rset_prox_rfd *) nmem_malloc(ct->nmem,sizeof(*p));
111         rfd->priv = p;
112         p->more = nmem_malloc(ct->nmem,sizeof(*p->more) * ct->no_children);
113         p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * ct->no_children);
114         p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * ct->no_children);
115         for (i = 0; i < ct->no_children; i++)
116         {
117             p->buf[i] = nmem_malloc(ct->nmem,ct->keycontrol->key_size);
118             p->terms[i] = 0;
119         }
120         p->rfd = nmem_malloc(ct->nmem,sizeof(*p->rfd) * ct->no_children);
121     }
122     yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p] n=%d",
123             ct->control->desc, rfd, ct->no_children);
124
125     for (i = 0; i < ct->no_children; i++)
126     {
127         p->rfd[i] = rset_open(ct->children[i], RSETF_READ);
128         p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]);
129     }
130     p->hits = 0;
131     return rfd;
132 }
133
134 static void r_close(RSFD rfd)
135 {
136     RSET ct = rfd->rset;
137     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
138
139     int i;
140     for (i = 0; i < ct->no_children; i++)
141         rset_close(p->rfd[i]);
142 }
143
144 static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
145 {
146     RSET ct = rfd->rset;
147     struct rset_prox_info *info = (struct rset_prox_info *)(ct->priv);
148     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
149     const struct rset_key_control *kctrl = ct->keycontrol;
150     int cmp = 0;
151     int i;
152
153     if (untilbuf)
154     {
155         /* it is enough to forward first one. Other will follow. */
156         if (p->more[0] &&   /* was: cmp >=2 */
157             ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) )
158             p->more[0] = rset_forward(p->rfd[0], p->buf[0],
159                                       &p->terms[0], untilbuf);
160     }
161     if (info->ordered && info->relation <= 3 && info->exclusion == 0)
162     {
163         while (p->more[0])
164         {
165             for (i = 1; i < ct->no_children; i++)
166             {
167                 if (!p->more[i])
168                 {
169                     p->more[0] = 0; /* saves us a goto out of while loop. */
170                     break;
171                 }
172                 cmp = (*kctrl->cmp)(p->buf[i], p->buf[i-1]);
173                 if (cmp >= rfd->rset->scope)  /* not same record */
174                 {
175                     p->more[i-1] = rset_forward(p->rfd[i-1],
176                                                 p->buf[i-1],
177                                                 &p->terms[i-1],
178                                                 p->buf[i]);
179                     break;
180                 }
181                 else if (cmp > 0) /* within record and ordered */
182                 {
183                     zint diff = (*kctrl->getseq)(p->buf[i]) -
184                         (*kctrl->getseq)(p->buf[i-1]);
185                     if (info->relation == 3 && diff == info->distance)
186                         continue;
187                     else if (info->relation == 2 && diff <= info->distance)
188                         continue;
189                     else if (info->relation == 1 && diff < info->distance)
190                         continue;
191
192                     p->more[i-1] = rset_read(p->rfd[i-1], p->buf[i-1],
193                                              &p->terms[i-1]);
194                     break;
195                 }
196                 else  /* within record - wrong order */
197                 {
198                     p->more[i] = rset_forward(p->rfd[i], p->buf[i],
199                                               &p->terms[i], p->buf[i-1]);
200                     break;
201                 }
202             }
203             if (i == ct->no_children)
204             {
205                 i = ct->no_children-1;
206                 memcpy(buf, p->buf[i], kctrl->key_size);
207                 if (term)
208                     *term = p->terms[i];
209                 p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]);
210                 p->hits++;
211                 return 1;
212             }
213         }
214     }
215     else if (ct->no_children == 2)
216     {
217         while (p->more[0] && p->more[1])
218         {
219             int cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
220             if ( cmp <= - rfd->rset->scope) /* cmp<-1*/
221                 p->more[0] = rset_forward(p->rfd[0], p->buf[0],
222                                           &p->terms[0],p->buf[1]);
223             else if ( cmp >= rfd->rset->scope ) /* cmp>1 */
224                 p->more[1] = rset_forward(p->rfd[1], p->buf[1],
225                                           &p->terms[1],p->buf[0]);
226             else
227             {
228                 zint seqno[500]; /* FIXME - why 500 ?? */
229                 int n = 0;
230
231                 seqno[n++] = (*kctrl->getseq)(p->buf[0]);
232                 while ((p->more[0] = rset_read(p->rfd[0],
233                                                p->buf[0], &p->terms[0])))
234                 {
235                     cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
236                     if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
237                         break;
238                     if (n < 500)
239                         seqno[n++] = (*kctrl->getseq)(p->buf[0]);
240                 }
241                 /* set up return buffer.. (save buf[1]) */
242                 memcpy(buf, p->buf[1], kctrl->key_size);
243                 if (term)
244                     *term = p->terms[1];
245                 while (1)
246                 {
247                     for (i = 0; i < n; i++)
248                     {
249                         zint diff = (*kctrl->getseq)(p->buf[1]) - seqno[i];
250                         int excl = info->exclusion;
251                         if (!info->ordered && diff < 0)
252                             diff = -diff;
253                         switch (info->relation)
254                         {
255                         case 1:      /* < */
256                             if (diff < info->distance && diff >= 0)
257                                 excl = !excl;
258                             break;
259                         case 2:      /* <= */
260                             if (diff <= info->distance && diff >= 0)
261                                 excl = !excl;
262                             break;
263                         case 3:      /* == */
264                             if (diff == info->distance && diff >= 0)
265                                 excl = !excl;
266                             break;
267                         case 4:      /* >= */
268                             if (diff >= info->distance && diff >= 0)
269                                 excl = !excl;
270                             break;
271                         case 5:      /* > */
272                             if (diff > info->distance && diff >= 0)
273                                 excl = !excl;
274                             break;
275                         case 6:      /* != */
276                             if (diff != info->distance && diff >= 0)
277                                 excl = !excl;
278                             break;
279                         }
280                         if (excl)
281                         {
282                             p->more[1] = rset_read( p->rfd[1], p->buf[1],
283                                                     &p->terms[1]);
284                             p->hits++;
285                             return 1;
286                         }
287                     }
288                     p->more[1] = rset_read(p->rfd[1], p->buf[1], &p->terms[1]);
289                     if (!p->more[1])
290                         break;
291                     cmp = (*kctrl->cmp)(buf, p->buf[1]);
292                     if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
293                         break;
294                 }
295             }
296         }
297     }
298     return 0;
299 }
300
301
302 static int r_read(RSFD rfd, void *buf, TERMID *term)
303 {
304     return r_forward(rfd, buf, term, 0);
305 }
306
307 static void r_pos(RSFD rfd, double *current, double *total)
308 {
309     RSET ct = rfd->rset;
310     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
311     int i;
312     double ratio = 0.0;
313
314     for (i = 0; i < ct->no_children; i++)
315     {
316         double cur, tot;
317         rset_pos(p->rfd[i], &cur, &tot);
318         if (tot > 0.0)
319         {
320             double nratio = cur / tot;
321             if (ratio < nratio)
322                 ratio = nratio;
323         }
324     }
325     *current = (double) p->hits;
326     if (ratio > 0.0)
327         *total = *current/ratio;
328     else
329         *total = 0.0;
330
331     yaz_log(YLOG_DEBUG, "prox_pos: [%d] %0.1f/%0.1f= %0.4f ",
332             i, *current, *total, ratio);
333 }
334
335 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
336 {
337     int i;
338     for (i = 0; i < ct->no_children; i++)
339         rset_getterms(ct->children[i], terms, maxterms, curterm);
340 }
341
342 /*
343  * Local variables:
344  * c-basic-offset: 4
345  * c-file-style: "Stroustrup"
346  * indent-tabs-mode: nil
347  * End:
348  * vim: shiftwidth=4 tabstop=8 expandtab
349  */
350