Moved AC_CHECK_HEADERS near top. Added unistd.h in check
[idzebra-moved-to-github.git] / index / trunc.c
1 /* $Id: trunc.c,v 1.51 2005-01-15 20:47:15 adam Exp $
2    Copyright (C) 1995-2005
3    Index Data ApS
4
5 This file is part of the Zebra server.
6
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
10 version.
11
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
15 for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra.  If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
20 02111-1307, USA.
21 */
22
23
24 #include <stdio.h>
25 #include <assert.h>
26
27 #include "index.h"
28 #include <rset.h>
29
30 struct trunc_info {
31     int  *ptr;
32     int  *indx;
33     char **heap;
34     int  heapnum;
35     int  (*cmp)(const void *p1, const void *p2);
36     int  keysize;
37     char *swapbuf;
38     char *tmpbuf;
39     char *buf;
40 };
41
42 static void heap_swap (struct trunc_info *ti, int i1, int i2)
43 {
44     int swap;
45
46     swap = ti->ptr[i1];
47     ti->ptr[i1] = ti->ptr[i2];
48     ti->ptr[i2] = swap;
49 }
50
51 static void heap_delete (struct trunc_info *ti)
52 {
53     int cur = 1, child = 2;
54
55     heap_swap (ti, 1, ti->heapnum--);
56     while (child <= ti->heapnum) {
57         if (child < ti->heapnum &&
58             (*ti->cmp)(ti->heap[ti->ptr[child]],
59                        ti->heap[ti->ptr[1+child]]) > 0)
60             child++;
61         if ((*ti->cmp)(ti->heap[ti->ptr[cur]],
62                        ti->heap[ti->ptr[child]]) > 0)
63         {
64             heap_swap (ti, cur, child);
65             cur = child;
66             child = 2*cur;
67         }
68         else
69             break;
70     }
71 }
72
73 static void heap_insert (struct trunc_info *ti, const char *buf, int indx)
74 {
75     int cur, parent;
76
77     cur = ++(ti->heapnum);
78     memcpy (ti->heap[ti->ptr[cur]], buf, ti->keysize);
79     ti->indx[ti->ptr[cur]] = indx;
80     parent = cur/2;
81     while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]],
82                                 ti->heap[ti->ptr[cur]]) > 0)
83     {
84         heap_swap (ti, cur, parent);
85         cur = parent;
86         parent = cur/2;
87     }
88 }
89
90 static struct trunc_info *heap_init (int size, int key_size,
91                                      int (*cmp)(const void *p1,
92                                                 const void *p2))
93 {
94     struct trunc_info *ti = (struct trunc_info *) xmalloc(sizeof(*ti));
95     int i;
96
97     ++size;
98     ti->heapnum = 0;
99     ti->keysize = key_size;
100     ti->cmp = cmp;
101     ti->indx = (int *) xmalloc(size * sizeof(*ti->indx));
102     ti->heap = (char **) xmalloc(size * sizeof(*ti->heap));
103     ti->ptr = (int *) xmalloc(size * sizeof(*ti->ptr));
104     ti->swapbuf = (char *) xmalloc(ti->keysize);
105     ti->tmpbuf = (char *) xmalloc(ti->keysize);
106     ti->buf = (char *) xmalloc(size * ti->keysize);
107     for (i = size; --i >= 0; )
108     {
109         ti->ptr[i] = i;
110         ti->heap[i] = ti->buf + ti->keysize * i;
111     }
112     return ti;
113 }
114
115 static void heap_close (struct trunc_info *ti)
116 {
117     xfree(ti->ptr);
118     xfree(ti->indx);
119     xfree(ti->heap);
120     xfree(ti->swapbuf);
121     xfree(ti->tmpbuf);
122     xfree(ti->buf);
123     xfree(ti);
124 }
125
126 static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
127                           const char *flags, ISAMS_P *isam_p, int from, int to,
128                           int merge_chunk, int preserve_position,
129                           int term_type, NMEM rset_nmem,
130                           const struct key_control *kctrl, int scope,
131                           TERMID termid)
132 {
133     RSET result; 
134     RSFD result_rsfd;
135     int nn = 0;
136
137     /*
138     rset_temp_parms parms;
139     parms.cmp = key_compare_it;
140     parms.key_size = sizeof(struct it_key);
141     parms.temp_path = res_get (zi->res, "setTmpDir");
142     result = rset_create (rset_kind_temp, &parms);
143     */
144     result = rstemp_create( rset_nmem,kctrl, scope,
145             res_get (zi->res, "setTmpDir"), termid);
146     result_rsfd = rset_open (result, RSETF_WRITE);
147
148     if (to - from > merge_chunk)
149     {
150         RSFD *rsfd;
151         RSET *rset;
152         int i, i_add = (to-from)/merge_chunk + 1;
153         struct trunc_info *ti;
154         int rscur = 0;
155         int rsmax = (to-from)/i_add + 1;
156         
157         rset = (RSET *) xmalloc(sizeof(*rset) * rsmax);
158         rsfd = (RSFD *) xmalloc(sizeof(*rsfd) * rsmax);
159         
160         for (i = from; i < to; i += i_add)
161         {
162             if (i_add <= to - i)
163                 rset[rscur] = rset_trunc_r (zi, term, length, flags,
164                                             isam_p, i, i+i_add,
165                                             merge_chunk, preserve_position,
166                                             term_type, rset_nmem, 
167                                             kctrl, scope,termid);
168             else
169                 rset[rscur] = rset_trunc_r (zi, term, length, flags,
170                                             isam_p, i, to,
171                                             merge_chunk, preserve_position,
172                                             term_type, rset_nmem, 
173                                             kctrl, scope,termid);
174             rscur++;
175         }
176         ti = heap_init (rscur, sizeof(struct it_key), key_compare_it);
177         for (i = rscur; --i >= 0; )
178         {
179             rsfd[i] = rset_open (rset[i], RSETF_READ);
180             if (rset_read(rsfd[i], ti->tmpbuf,0))
181                 heap_insert (ti, ti->tmpbuf, i);
182             else
183             {
184                 rset_close (rsfd[i]);
185                 rset_delete (rset[i]);
186             }
187         }
188         while (ti->heapnum)
189         {
190             int n = ti->indx[ti->ptr[1]];
191
192             rset_write (result_rsfd, ti->heap[ti->ptr[1]]);
193             nn++;
194
195             while (1)
196             {
197                 if (!rset_read (rsfd[n], ti->tmpbuf,0))
198                 {
199                     heap_delete (ti);
200                     rset_close (rsfd[n]);
201                     rset_delete (rset[n]);
202                     break;
203                 }
204                 if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
205                 {
206                     heap_delete (ti);
207                     heap_insert (ti, ti->tmpbuf, n);
208                     break;
209                 }
210             }
211         }
212         xfree(rset);
213         xfree(rsfd);
214         heap_close (ti);
215     }
216     else if (zi->reg->isamc)
217     {
218         ISAMC_PP *ispt;
219         int i;
220         struct trunc_info *ti;
221
222         ispt = (ISAMC_PP *) xmalloc(sizeof(*ispt) * (to-from));
223
224         ti = heap_init (to-from, sizeof(struct it_key),
225                         key_compare_it);
226         for (i = to-from; --i >= 0; )
227         {
228             ispt[i] = isc_pp_open (zi->reg->isamc, isam_p[from+i]);
229             if (isc_pp_read (ispt[i], ti->tmpbuf))
230                 heap_insert (ti, ti->tmpbuf, i);
231             else
232                 isc_pp_close (ispt[i]);
233         }
234         while (ti->heapnum)
235         {
236             int n = ti->indx[ti->ptr[1]];
237
238             rset_write (result_rsfd, ti->heap[ti->ptr[1]]);
239             nn++;
240             if (preserve_position)
241             {
242                 heap_delete (ti);
243                 if (isc_pp_read (ispt[n], ti->tmpbuf))
244                     heap_insert (ti, ti->tmpbuf, n);
245                 else
246                     isc_pp_close (ispt[n]);
247             }
248             else
249             {
250                 while (1)
251                 {
252                     if (!isc_pp_read (ispt[n], ti->tmpbuf))
253                     {
254                         heap_delete (ti);
255                         isc_pp_close (ispt[n]);
256                         break;
257                     }
258                     if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
259                     {
260                         heap_delete (ti);
261                         heap_insert (ti, ti->tmpbuf, n);
262                         break;
263                     }
264                 }
265             }
266         }
267         heap_close (ti);
268         xfree(ispt);
269     }
270     else if (zi->reg->isams)
271     {
272         ISAMS_PP *ispt;
273         int i;
274         struct trunc_info *ti;
275         int nn = 0;
276
277         ispt = (ISAMS_PP *) xmalloc(sizeof(*ispt) * (to-from));
278
279         ti = heap_init (to-from, sizeof(struct it_key),
280                         key_compare_it);
281         for (i = to-from; --i >= 0; )
282         {
283             ispt[i] = isams_pp_open (zi->reg->isams, isam_p[from+i]);
284             if (isams_pp_read (ispt[i], ti->tmpbuf))
285                 heap_insert (ti, ti->tmpbuf, i);
286             else
287                 isams_pp_close (ispt[i]);
288         }
289         while (ti->heapnum)
290         {
291             int n = ti->indx[ti->ptr[1]];
292
293             rset_write (result_rsfd, ti->heap[ti->ptr[1]]);
294             nn++;
295             while (1)
296             {
297                 if (!isams_pp_read (ispt[n], ti->tmpbuf))
298                 {
299                     heap_delete (ti);
300                     isams_pp_close (ispt[n]);
301                     break;
302                 }
303                 if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
304                 {
305                     heap_delete (ti);
306                     heap_insert (ti, ti->tmpbuf, n);
307                     break;
308                 }
309             }
310         }
311         heap_close (ti);
312         xfree(ispt);
313     }
314     else if (zi->reg->isamb)
315     {
316         ISAMB_PP *ispt;
317         int i;
318         struct trunc_info *ti;
319
320         ispt = (ISAMB_PP *) xmalloc(sizeof(*ispt) * (to-from));
321
322         ti = heap_init (to-from, sizeof(struct it_key),
323                         key_compare_it);
324         for (i = to-from; --i >= 0; )
325         {
326             if (isam_p[from+i]) {
327                 ispt[i] = isamb_pp_open (zi->reg->isamb, isam_p[from+i], scope);
328                 if (isamb_pp_read (ispt[i], ti->tmpbuf))
329                     heap_insert (ti, ti->tmpbuf, i);
330                 else
331                     isamb_pp_close (ispt[i]);
332             }
333         }
334         while (ti->heapnum)
335         {
336             int n = ti->indx[ti->ptr[1]];
337
338             rset_write (result_rsfd, ti->heap[ti->ptr[1]]);
339             nn++;
340
341             if (preserve_position)
342             {
343                 heap_delete (ti);
344                 if (isamb_pp_read (ispt[n], ti->tmpbuf))
345                     heap_insert (ti, ti->tmpbuf, n);
346                 else
347                     isamb_pp_close (ispt[n]);
348             }
349             else
350             {
351                 while (1)
352                 {
353                     if (!isamb_pp_read (ispt[n], ti->tmpbuf))
354                     {
355                         heap_delete (ti);
356                         isamb_pp_close (ispt[n]);
357                         break;
358                     }
359                     if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
360                     {
361                         heap_delete (ti);
362                         heap_insert (ti, ti->tmpbuf, n);
363                         break;
364                     }
365                 }
366             }
367         }
368         heap_close (ti);
369         xfree(ispt);
370     }
371     else
372         yaz_log (YLOG_WARN, "Unknown isam set in rset_trunc_r");
373
374     rset_close (result_rsfd);
375     return result;
376 }
377
378 static int isams_trunc_cmp (const void *p1, const void *p2)
379 {
380     ISAMS_P i1 = *(ISAMS_P*) p1;
381     ISAMS_P i2 = *(ISAMS_P*) p2;
382
383     if (i1 > i2)
384         return 1;
385     else if (i1 < i2)
386         return -1;
387     return 0;
388 }
389
390 static int isamc_trunc_cmp (const void *p1, const void *p2)
391 {
392     ISAMC_P i1 = *(ISAMC_P*) p1;
393     ISAMC_P i2 = *(ISAMC_P*) p2;
394     zint d;
395
396     d = (isc_type (i1) - isc_type (i2));
397     if (d == 0)
398         d = isc_block (i1) - isc_block (i2);
399     if (d > 0)
400         return 1;
401     else if (d < 0)
402         return -1;
403     return 0;
404 }
405
406 RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no,
407                  const char *term, int length, const char *flags,
408                  int preserve_position, int term_type, NMEM rset_nmem,
409                  const struct key_control *kctrl, int scope)
410 {
411     TERMID termid;
412     yaz_log (YLOG_DEBUG, "rset_trunc no=%d", no);
413     if (no < 1)
414         return rsnull_create (rset_nmem,kctrl);
415     termid = rset_term_create(term, length, flags, term_type,rset_nmem);
416     if (zi->reg->isams)
417     {
418         if (no == 1)
419             return rsisams_create(rset_nmem, kctrl, scope,
420                     zi->reg->isams, *isam_p, termid);
421         qsort (isam_p, no, sizeof(*isam_p), isams_trunc_cmp);
422     }
423     else if (zi->reg->isamc)
424     {
425         if (no == 1)
426             return rsisamc_create(rset_nmem, kctrl, scope,
427                     zi->reg->isamc, *isam_p, termid);
428         qsort (isam_p, no, sizeof(*isam_p), isamc_trunc_cmp);
429     }
430     else if (zi->reg->isamb)
431     {
432         if (no == 1)
433             return rsisamb_create(rset_nmem,kctrl, scope,
434                     zi->reg->isamb, *isam_p, termid);
435         else if (no <10000 ) /* FIXME - hardcoded number */
436         {
437             RSET r;
438             RSET *rsets = xmalloc(no*sizeof(RSET)); /* use nmem! */
439             int i;
440             for (i = 0; i<no; i++)
441                 rsets[i]=rsisamb_create(rset_nmem, kctrl, scope,
442                     zi->reg->isamb, isam_p[i], termid);
443             r = rsmulti_or_create( rset_nmem, kctrl, scope, no, rsets);
444             xfree(rsets);
445             return r;
446         } 
447         qsort (isam_p, no, sizeof(*isam_p), isamc_trunc_cmp);
448     }
449     else
450     {
451         yaz_log (YLOG_WARN, "Unknown isam set in rset_trunc");
452         return rsnull_create (rset_nmem, kctrl);
453     }
454     return rset_trunc_r (zi, term, length, flags, isam_p, 0, no, 100,
455                          preserve_position, term_type, rset_nmem,kctrl,scope,
456                          termid);
457 }
458