Report 'too many characters in search..'
[idzebra-moved-to-github.git] / rset / rsprox.c
1 /* This file is part of the Zebra server.
2    Copyright (C) 1994-2011 Index Data
3
4 Zebra is free software; you can redistribute it and/or modify it under
5 the terms of the GNU General Public License as published by the Free
6 Software Foundation; either version 2, or (at your option) any later
7 version.
8
9 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
10 WARRANTY; without even the implied warranty of MERCHANTABILITY or
11 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
12 for more details.
13
14 You should have received a copy of the GNU General Public License
15 along with this program; if not, write to the Free Software
16 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
17
18 */
19
20 #if HAVE_CONFIG_H
21 #include <config.h>
22 #endif
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <assert.h>
27
28 #include <idzebra/util.h>
29 #include <rset.h>
30
31 #ifndef RSET_DEBUG
32 #define RSET_DEBUG 0
33 #endif
34
35 static RSFD r_open(RSET ct, int flag);
36 static void r_close(RSFD rfd);
37 static void r_delete(RSET ct);
38 static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf);
39 static int r_read(RSFD rfd, void *buf, TERMID *term);
40 static int r_write(RSFD rfd, const void *buf);
41 static void r_pos(RSFD rfd, double *current, double *total);
42 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
43
44 static const struct rset_control control = 
45 {
46     "prox",
47     r_delete,
48     r_get_terms,
49     r_open,
50     r_close,
51     r_forward,
52     r_pos,
53     r_read,
54     r_write,
55 };
56
57 struct rset_prox_info {
58     int ordered;
59     int exclusion;
60     int relation;
61     int distance;
62 };
63
64 struct rset_prox_rfd {
65     RSFD *rfd;
66     char **buf;  /* lookahead key buffers */
67     char *more;  /* more in each lookahead? */
68     TERMID *terms; /* lookahead terms */
69     zint hits;
70 };    
71
72
73 RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol,
74                       int scope,
75                       int rset_no, RSET *rset,
76                       int ordered, int exclusion,
77                       int relation, int distance)
78 {
79     RSET rnew = rset_create_base(&control, nmem, kcontrol, scope, 0,
80                                  rset_no, rset);
81     struct rset_prox_info *info;
82     info = (struct rset_prox_info *) nmem_malloc(rnew->nmem,sizeof(*info));
83     info->ordered = ordered;
84     info->exclusion = exclusion;
85     info->relation = relation;
86     info->distance = distance;
87     rnew->priv = info;
88     return rnew;
89 }
90
91 static void r_delete(RSET ct)
92 {
93 }
94
95 static RSFD r_open(RSET ct, int flag)
96 {
97     RSFD rfd;
98     struct rset_prox_rfd *p;
99     int i;
100
101     if (flag & RSETF_WRITE)
102     {
103         yaz_log(YLOG_FATAL, "prox set type is read-only");
104         return NULL;
105     }
106     rfd = rfd_create_base(ct);
107     if (rfd->priv)
108         p = (struct rset_prox_rfd *)(rfd->priv);
109     else
110     {
111         p = (struct rset_prox_rfd *) nmem_malloc(ct->nmem,sizeof(*p));
112         rfd->priv = p;
113         p->more = nmem_malloc(ct->nmem,sizeof(*p->more) * ct->no_children);
114         p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * ct->no_children);
115         p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * ct->no_children);
116         for (i = 0; i < ct->no_children; i++) 
117         {
118             p->buf[i] = nmem_malloc(ct->nmem,ct->keycontrol->key_size);
119             p->terms[i] = 0;
120         }
121         p->rfd = nmem_malloc(ct->nmem,sizeof(*p->rfd) * ct->no_children);
122     }
123     yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p] n=%d", 
124             ct->control->desc, rfd, ct->no_children);
125     
126     for (i = 0; i < ct->no_children; i++)
127     {
128         p->rfd[i] = rset_open(ct->children[i], RSETF_READ);
129         p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]);
130     }
131     p->hits = 0;
132     return rfd;
133 }
134
135 static void r_close(RSFD rfd)
136 {
137     RSET ct = rfd->rset;
138     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
139     
140     int i;
141     for (i = 0; i < ct->no_children; i++)
142         rset_close(p->rfd[i]);
143 }
144
145 static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
146 {
147     RSET ct = rfd->rset;
148     struct rset_prox_info *info = (struct rset_prox_info *)(ct->priv);
149     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
150     const struct rset_key_control *kctrl = ct->keycontrol;
151     int cmp = 0;
152     int i;
153
154     if (untilbuf)
155     {
156         /* it is enough to forward first one. Other will follow. */
157         if (p->more[0] &&   /* was: cmp >=2 */
158             ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) ) 
159             p->more[0] = rset_forward(p->rfd[0], p->buf[0], 
160                                       &p->terms[0], untilbuf);
161     }
162     if (info->ordered && info->relation <= 3 && info->exclusion == 0)
163     {
164         while (p->more[0]) 
165         {
166             for (i = 1; i < ct->no_children; i++)
167             {
168                 if (!p->more[i]) 
169                 {
170                     p->more[0] = 0; /* saves us a goto out of while loop. */
171                     break;
172                 }
173                 cmp = (*kctrl->cmp)(p->buf[i], p->buf[i-1]);
174                 if (cmp >= rfd->rset->scope)  /* not same record */
175                 {
176                     p->more[i-1] = rset_forward(p->rfd[i-1],
177                                                 p->buf[i-1],
178                                                 &p->terms[i-1],
179                                                 p->buf[i]);
180                     break;
181                 }
182                 else if (cmp > 0) /* within record and ordered */
183                 {
184                     zint diff = (*kctrl->getseq)(p->buf[i]) -
185                         (*kctrl->getseq)(p->buf[i-1]);
186                     if (info->relation == 3 && diff == info->distance)
187                         continue;
188                     else if (info->relation == 2 && diff <= info->distance)
189                         continue;
190                     else if (info->relation == 1 && diff < info->distance)
191                         continue;
192                     
193                     p->more[i-1] = rset_read(p->rfd[i-1], p->buf[i-1],
194                                              &p->terms[i-1]);
195                     break;
196                 }
197                 else  /* within record - wrong order */
198                 {
199                     p->more[i] = rset_forward(p->rfd[i], p->buf[i],
200                                               &p->terms[i], p->buf[i-1]);
201                     break;
202                 }
203             }
204             if (i == ct->no_children)
205             {
206                 i = ct->no_children-1;
207                 memcpy(buf, p->buf[i], kctrl->key_size);
208                 if (term)
209                     *term = p->terms[i];
210                 p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]);
211                 p->hits++;
212                 return 1;
213             }
214         }
215     }
216     else if (ct->no_children == 2)
217     {
218         while (p->more[0] && p->more[1]) 
219         {
220             int cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
221             if ( cmp <= - rfd->rset->scope) /* cmp<-1*/
222                 p->more[0] = rset_forward(p->rfd[0], p->buf[0], 
223                                           &p->terms[0],p->buf[1]);
224             else if ( cmp >= rfd->rset->scope ) /* cmp>1 */
225                 p->more[1] = rset_forward(p->rfd[1], p->buf[1], 
226                                           &p->terms[1],p->buf[0]);
227             else
228             {
229                 zint seqno[500]; /* FIXME - why 500 ?? */
230                 int n = 0;
231                 
232                 seqno[n++] = (*kctrl->getseq)(p->buf[0]);
233                 while ((p->more[0] = rset_read(p->rfd[0],
234                                                p->buf[0], &p->terms[0])))
235                 {
236                     cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
237                     if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
238                         break;
239                     if (n < 500)
240                         seqno[n++] = (*kctrl->getseq)(p->buf[0]);
241                 }
242                 /* set up return buffer.. (save buf[1]) */
243                 memcpy(buf, p->buf[1], kctrl->key_size);
244                 if (term)
245                     *term = p->terms[1];
246                 while (1)
247                 {
248                     for (i = 0; i < n; i++)
249                     {
250                         zint diff = (*kctrl->getseq)(p->buf[1]) - seqno[i];
251                         int excl = info->exclusion;
252                         if (!info->ordered && diff < 0)
253                             diff = -diff;
254                         switch (info->relation)
255                         {
256                         case 1:      /* < */
257                             if (diff < info->distance && diff >= 0)
258                                 excl = !excl;
259                             break;
260                         case 2:      /* <= */
261                             if (diff <= info->distance && diff >= 0)
262                                 excl = !excl;
263                             break;
264                         case 3:      /* == */
265                             if (diff == info->distance && diff >= 0)
266                                 excl = !excl;
267                             break;
268                         case 4:      /* >= */
269                             if (diff >= info->distance && diff >= 0)
270                                 excl = !excl;
271                             break;
272                         case 5:      /* > */
273                             if (diff > info->distance && diff >= 0)
274                                 excl = !excl;
275                             break;
276                         case 6:      /* != */
277                             if (diff != info->distance && diff >= 0)
278                                 excl = !excl;
279                             break;
280                         }
281                         if (excl)
282                         {
283                             p->more[1] = rset_read( p->rfd[1], p->buf[1],
284                                                     &p->terms[1]);
285                             p->hits++;
286                             return 1;
287                         }
288                     }
289                     p->more[1] = rset_read(p->rfd[1], p->buf[1], &p->terms[1]);
290                     if (!p->more[1])
291                         break;
292                     cmp = (*kctrl->cmp)(buf, p->buf[1]);
293                     if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
294                         break;
295                 }
296             }
297         }
298     }
299     return 0;
300 }
301
302
303 static int r_read(RSFD rfd, void *buf, TERMID *term)
304 {
305     return r_forward(rfd, buf, term, 0);
306 }
307
308 static int r_write(RSFD rfd, const void *buf)
309 {
310     yaz_log(YLOG_FATAL, "prox set type is read-only");
311     return -1;
312 }
313
314 static void r_pos(RSFD rfd, double *current, double *total)
315 {
316     RSET ct = rfd->rset;
317     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
318     int i;
319     double ratio = 0.0;
320     
321     for (i = 0; i < ct->no_children; i++)
322     {
323         double cur, tot;
324         rset_pos(p->rfd[i], &cur, &tot);
325         if (tot > 0.0)
326         {
327             double nratio = cur / tot;
328             if (ratio < nratio)
329                 ratio = nratio;
330         }
331     }
332     *current = (double) p->hits;
333     if (ratio > 0.0)
334         *total = *current/ratio;
335     else
336         *total = 0.0;
337     
338     yaz_log(YLOG_DEBUG, "prox_pos: [%d] %0.1f/%0.1f= %0.4f ",
339             i, *current, *total, ratio);
340 }
341
342 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
343 {
344     int i;
345     for (i = 0; i < ct->no_children; i++)
346         rset_getterms(ct->children[i], terms, maxterms, curterm);
347 }
348
349 /*
350  * Local variables:
351  * c-basic-offset: 4
352  * c-file-style: "Stroustrup"
353  * indent-tabs-mode: nil
354  * End:
355  * vim: shiftwidth=4 tabstop=8 expandtab
356  */
357