6972828915a45ddefec6facb48b01837b3868e35
[idzebra-moved-to-github.git] / rset / rsprox.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2009 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <assert.h>
24
25 #include <idzebra/util.h>
26 #include <rset.h>
27
28 #ifndef RSET_DEBUG
29 #define RSET_DEBUG 0
30 #endif
31
32 static RSFD r_open (RSET ct, int flag);
33 static void r_close (RSFD rfd);
34 static void r_delete (RSET ct);
35 static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf);
36 static int r_read (RSFD rfd, void *buf, TERMID *term);
37 static int r_write (RSFD rfd, const void *buf);
38 static void r_pos (RSFD rfd, double *current, double *total);
39 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
40
41 static const struct rset_control control = 
42 {
43     "prox",
44     r_delete,
45     r_get_terms,
46     r_open,
47     r_close,
48     r_forward,
49     r_pos,
50     r_read,
51     r_write,
52 };
53
54 struct rset_prox_info {
55     int ordered;
56     int exclusion;
57     int relation;
58     int distance;
59 };
60
61 struct rset_prox_rfd {
62     RSFD *rfd;
63     char **buf;  /* lookahead key buffers */
64     char *more;  /* more in each lookahead? */
65     TERMID *terms; /* lookahead terms */
66     zint hits;
67 };    
68
69
70 RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol,
71                       int scope,
72                       int rset_no, RSET *rset,
73                       int ordered, int exclusion,
74                       int relation, int distance)
75 {
76     RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, 0,
77                                  rset_no, rset);
78     struct rset_prox_info *info;
79     info = (struct rset_prox_info *) nmem_malloc(rnew->nmem,sizeof(*info));
80     info->ordered = ordered;
81     info->exclusion = exclusion;
82     info->relation = relation;
83     info->distance = distance;
84     rnew->priv = info;
85     return rnew;
86 }
87
88 static void r_delete (RSET ct)
89 {
90 }
91
92 static RSFD r_open (RSET ct, int flag)
93 {
94     RSFD rfd;
95     struct rset_prox_rfd *p;
96     int i;
97
98     if (flag & RSETF_WRITE)
99     {
100         yaz_log(YLOG_FATAL, "prox set type is read-only");
101         return NULL;
102     }
103     rfd = rfd_create_base(ct);
104     if (rfd->priv)
105         p = (struct rset_prox_rfd *)(rfd->priv);
106     else {
107         p = (struct rset_prox_rfd *) nmem_malloc(ct->nmem,sizeof(*p));
108         rfd->priv = p;
109         p->more = nmem_malloc (ct->nmem,sizeof(*p->more) * ct->no_children);
110         p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * ct->no_children);
111         p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * ct->no_children);
112         for (i = 0; i < ct->no_children; i++) 
113         {
114             p->buf[i] = nmem_malloc(ct->nmem,ct->keycontrol->key_size);
115             p->terms[i] = 0;
116         }
117         p->rfd = nmem_malloc(ct->nmem,sizeof(*p->rfd) * ct->no_children);
118     }
119     yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p] n=%d", 
120             ct->control->desc, rfd, ct->no_children);
121
122     for (i = 0; i < ct->no_children; i++) {
123         p->rfd[i] = rset_open (ct->children[i], RSETF_READ);
124         p->more[i] = rset_read (p->rfd[i], p->buf[i], &p->terms[i]);
125     }
126     p->hits = 0;
127     return rfd;
128 }
129
130 static void r_close (RSFD rfd)
131 {
132     RSET ct = rfd->rset;
133     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
134     
135     int i;
136     for (i = 0; i<ct->no_children; i++)
137         rset_close(p->rfd[i]);
138 }
139
140 static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
141 {
142     RSET ct = rfd->rset;
143     struct rset_prox_info *info = (struct rset_prox_info *)(ct->priv);
144     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
145     const struct rset_key_control *kctrl = ct->keycontrol;
146     int cmp = 0;
147     int i;
148
149     if (untilbuf)
150     {
151         /* it is enough to forward first one. Other will follow. */
152         if ( p->more[0] &&   /* was: cmp >=2 */
153            ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) ) 
154             p->more[0] = rset_forward(p->rfd[0], p->buf[0], 
155                                       &p->terms[0], untilbuf);
156     }
157     if (info->ordered && info->relation == 3 && info->exclusion == 0
158         && info->distance == 1)
159     {
160         while (p->more[0]) 
161         {
162             for (i = 1; i < ct->no_children; i++)
163             {
164                 if (!p->more[i]) 
165                 {
166                     p->more[0] = 0; /* saves us a goto out of while loop. */
167                     break;
168                 }
169                 cmp = (*kctrl->cmp) (p->buf[i], p->buf[i-1]);
170                 if (cmp >= rfd->rset->scope )  /* cmp>1 */
171                 {
172                     p->more[i-1] = rset_forward (p->rfd[i-1],
173                                                  p->buf[i-1],
174                                                  &p->terms[i-1],
175                                                  p->buf[i]);
176                     break;
177                 }
178                 else if ( cmp>0 ) /* cmp == 1*/
179                 {
180                     if ((*kctrl->getseq)(p->buf[i-1]) +1 != 
181                         (*kctrl->getseq)(p->buf[i]))
182                     { /* FIXME - We need more flexible multilevel stuff */
183                         p->more[i-1] = rset_read ( p->rfd[i-1], p->buf[i-1],
184                                                    &p->terms[i-1]);
185                         break;
186                     }
187                 }
188                 else
189                 {
190                     p->more[i] = rset_forward (p->rfd[i], 
191                                   p->buf[i], &p->terms[i], p->buf[i-1]);
192                     break;
193                 }
194             }
195             if (i == ct->no_children)
196             {
197                 i = ct->no_children-1;
198                 memcpy(buf, p->buf[i], kctrl->key_size);
199                 if (term)
200                     *term = p->terms[i];
201                 p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]);
202                 p->hits++;
203                 return 1;
204             }
205         }
206     }
207     else if (ct->no_children == 2)
208     {
209         while (p->more[0] && p->more[1]) 
210         {
211             int cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
212             if ( cmp <= - rfd->rset->scope) /* cmp<-1*/
213                 p->more[0] = rset_forward (p->rfd[0], p->buf[0], 
214                                            &p->terms[0],p->buf[1]);
215             else if ( cmp >= rfd->rset->scope ) /* cmp>1 */
216                 p->more[1] = rset_forward (p->rfd[1], p->buf[1], 
217                                            &p->terms[1],p->buf[0]);
218             else
219             {
220                 zint seqno[500]; /* FIXME - why 500 ?? */
221                 int n = 0;
222                 
223                 seqno[n++] = (*kctrl->getseq)(p->buf[0]);
224                 while ((p->more[0] = rset_read (p->rfd[0],
225                                         p->buf[0], &p->terms[0])))
226                 {
227                     cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
228                     if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
229                         break;
230                     if (n < 500)
231                         seqno[n++] = (*kctrl->getseq)(p->buf[0]);
232                 }
233                 for (i = 0; i<n; i++)
234                 {
235                     zint diff = (*kctrl->getseq)(p->buf[1]) - seqno[i];
236                     int excl = info->exclusion;
237                     if (!info->ordered && diff < 0)
238                         diff = -diff;
239                     switch (info->relation)
240                     {
241                     case 1:      /* < */
242                         if (diff < info->distance && diff >= 0)
243                             excl = !excl;
244                         break;
245                     case 2:      /* <= */
246                         if (diff <= info->distance && diff >= 0)
247                             excl = !excl;
248                         break;
249                     case 3:      /* == */
250                         if (diff == info->distance && diff >= 0)
251                             excl = !excl;
252                         break;
253                     case 4:      /* >= */
254                         if (diff >= info->distance && diff >= 0)
255                             excl = !excl;
256                         break;
257                     case 5:      /* > */
258                         if (diff > info->distance && diff >= 0)
259                             excl = !excl;
260                         break;
261                     case 6:      /* != */
262                         if (diff != info->distance && diff >= 0)
263                             excl = !excl;
264                         break;
265                     }
266                     if (excl)
267                     {
268                         memcpy (buf, p->buf[1], kctrl->key_size);
269                         if (term)
270                             *term = p->terms[1];
271                         p->more[1] = rset_read ( p->rfd[1], p->buf[1],
272                                                  &p->terms[1]);
273                         p->hits++;
274                         return 1;
275                     }
276                 }
277                 p->more[1] = rset_read (p->rfd[1], p->buf[1], &p->terms[1]);
278             }
279         }
280     }
281     return 0;
282 }
283
284
285 static int r_read (RSFD rfd, void *buf, TERMID *term)
286 {
287     return r_forward(rfd, buf, term, 0);
288 }
289
290 static int r_write (RSFD rfd, const void *buf)
291 {
292     yaz_log(YLOG_FATAL, "prox set type is read-only");
293     return -1;
294 }
295
296 static void r_pos (RSFD rfd, double *current, double *total)
297 {
298     RSET ct = rfd->rset;
299     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
300     int i;
301     double r = 0.0;
302     double cur, tot = -1.0;
303     double scur = 0.0, stot = 0.0;
304
305     yaz_log(YLOG_DEBUG, "rsprox_pos");
306
307     for (i = 0; i < ct->no_children; i++)
308     {
309         rset_pos(p->rfd[i],  &cur, &tot);
310         if (tot>0) {
311             scur += cur;
312             stot += tot;
313         }
314     }
315     if (tot <0) {  /* nothing found */
316         *current = -1;
317         *total = -1;
318     } else if (tot < 1) { /* most likely tot==0 */
319         *current = 0;
320         *total = 0;
321     } else {
322         r = scur/stot; 
323         *current = (double) p->hits;
324         *total=*current/r ; 
325     }
326     yaz_log(YLOG_DEBUG,"prox_pos: [%d] %0.1f/%0.1f= %0.4f ",
327                     i,*current, *total, r);
328 }
329
330 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
331 {
332     int i;
333     for (i = 0; i<ct->no_children; i++)
334         rset_getterms(ct->children[i], terms, maxterms, curterm);
335 }
336
337 /*
338  * Local variables:
339  * c-basic-offset: 4
340  * c-file-style: "Stroustrup"
341  * indent-tabs-mode: nil
342  * End:
343  * vim: shiftwidth=4 tabstop=8 expandtab
344  */
345