Replace key_compare_it with key_compare
[idzebra-moved-to-github.git] / index / trunc.c
index 391a64e..eb81fbb 100644 (file)
@@ -1,8 +1,5 @@
-/* $Id: trunc.c,v 1.43 2004-09-03 14:59:49 heikki Exp $
-   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
-   Index Data Aps
-
-This file is part of the Zebra server.
+/* This file is part of the Zebra server.
+   Copyright (C) 1994-2011 Index Data
 
 Zebra is free software; you can redistribute it and/or modify it under
 the terms of the GNU General Public License as published by the Free
@@ -15,12 +12,15 @@ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
 for more details.
 
 You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra.  If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
 */
 
 
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
 #include <stdio.h>
 #include <assert.h>
 
@@ -39,7 +39,7 @@ struct trunc_info {
     char *buf;
 };
 
-static void heap_swap (struct trunc_info *ti, int i1, int i2)
+static void heap_swap(struct trunc_info *ti, int i1, int i2)
 {
     int swap;
 
@@ -48,11 +48,11 @@ static void heap_swap (struct trunc_info *ti, int i1, int i2)
     ti->ptr[i2] = swap;
 }
 
-static void heap_delete (struct trunc_info *ti)
+static void heap_delete(struct trunc_info *ti)
 {
     int cur = 1, child = 2;
 
-    heap_swap (ti, 1, ti->heapnum--);
+    heap_swap(ti, 1, ti->heapnum--);
     while (child <= ti->heapnum) {
         if (child < ti->heapnum &&
             (*ti->cmp)(ti->heap[ti->ptr[child]],
@@ -61,7 +61,7 @@ static void heap_delete (struct trunc_info *ti)
         if ((*ti->cmp)(ti->heap[ti->ptr[cur]],
                        ti->heap[ti->ptr[child]]) > 0)
         {
-            heap_swap (ti, cur, child);
+            heap_swap(ti, cur, child);
             cur = child;
             child = 2*cur;
         }
@@ -70,40 +70,40 @@ static void heap_delete (struct trunc_info *ti)
     }
 }
 
-static void heap_insert (struct trunc_info *ti, const char *buf, int indx)
+static void heap_insert(struct trunc_info *ti, const char *buf, int indx)
 {
     int cur, parent;
 
     cur = ++(ti->heapnum);
-    memcpy (ti->heap[ti->ptr[cur]], buf, ti->keysize);
+    memcpy(ti->heap[ti->ptr[cur]], buf, ti->keysize);
     ti->indx[ti->ptr[cur]] = indx;
     parent = cur/2;
     while (parent && (*ti->cmp)(ti->heap[ti->ptr[parent]],
                                 ti->heap[ti->ptr[cur]]) > 0)
     {
-        heap_swap (ti, cur, parent);
+        heap_swap(ti, cur, parent);
         cur = parent;
         parent = cur/2;
     }
 }
 
-static struct trunc_info *heap_init (int size, int key_size,
-                                    int (*cmp)(const void *p1,
-                                               const void *p2))
+static struct trunc_info *heap_init(int size, int key_size,
+                                   int (*cmp)(const void *p1,
+                                              const void *p2))
 {
-    struct trunc_info *ti = (struct trunc_info *) xmalloc (sizeof(*ti));
+    struct trunc_info *ti = (struct trunc_info *) xmalloc(sizeof(*ti));
     int i;
 
     ++size;
     ti->heapnum = 0;
     ti->keysize = key_size;
     ti->cmp = cmp;
-    ti->indx = (int *) xmalloc (size * sizeof(*ti->indx));
-    ti->heap = (char **) xmalloc (size * sizeof(*ti->heap));
-    ti->ptr = (int *) xmalloc (size * sizeof(*ti->ptr));
-    ti->swapbuf = (char *) xmalloc (ti->keysize);
-    ti->tmpbuf = (char *) xmalloc (ti->keysize);
-    ti->buf = (char *) xmalloc (size * ti->keysize);
+    ti->indx = (int *) xmalloc(size * sizeof(*ti->indx));
+    ti->heap = (char **) xmalloc(size * sizeof(*ti->heap));
+    ti->ptr = (int *) xmalloc(size * sizeof(*ti->ptr));
+    ti->swapbuf = (char *) xmalloc(ti->keysize);
+    ti->tmpbuf = (char *) xmalloc(ti->keysize);
+    ti->buf = (char *) xmalloc(size * ti->keysize);
     for (i = size; --i >= 0; )
     {
         ti->ptr[i] = i;
@@ -112,37 +112,31 @@ static struct trunc_info *heap_init (int size, int key_size,
     return ti;
 }
 
-static void heap_close (struct trunc_info *ti)
+static void heap_close(struct trunc_info *ti)
 {
-    xfree (ti->ptr);
-    xfree (ti->indx);
-    xfree (ti->heap);
-    xfree (ti->swapbuf);
-    xfree (ti->tmpbuf);
-    xfree (ti->buf);
-    xfree (ti);
+    xfree(ti->ptr);
+    xfree(ti->indx);
+    xfree(ti->heap);
+    xfree(ti->swapbuf);
+    xfree(ti->tmpbuf);
+    xfree(ti->buf);
+    xfree(ti);
 }
 
-static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
-                          const char *flags, ISAMS_P *isam_p, int from, int to,
-                          int merge_chunk, int preserve_position,
-                          int term_type, NMEM rset_nmem,
-                          const struct key_control *kctrl)
+static RSET rset_trunc_r(ZebraHandle zi, const char *term, int length,
+                         const char *flags, ISAM_P *isam_p, int from, int to,
+                        int merge_chunk, int preserve_position,
+                        int term_type, NMEM rset_nmem,
+                        struct rset_key_control *kctrl, int scope,
+                        TERMID termid)
 {
-    RSET result; 
+    RSET result;
     RSFD result_rsfd;
     int nn = 0;
 
-    /*
-    rset_temp_parms parms;
-    parms.cmp = key_compare_it;
-    parms.key_size = sizeof(struct it_key);
-    parms.temp_path = res_get (zi->res, "setTmpDir");
-    result = rset_create (rset_kind_temp, &parms);
-    */
-    result=rstemp_create( rset_nmem,kctrl,
-            res_get (zi->res, "setTmpDir"));
-    result_rsfd = rset_open (result, RSETF_WRITE);
+    result = rset_create_temp(rset_nmem, kctrl, scope,
+                              res_get(zi->res, "setTmpDir"), termid);
+    result_rsfd = rset_open(result, RSETF_WRITE);
 
     if (to - from > merge_chunk)
     {
@@ -152,63 +146,69 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
         struct trunc_info *ti;
         int rscur = 0;
         int rsmax = (to-from)/i_add + 1;
-        
-        rset = (RSET *) xmalloc (sizeof(*rset) * rsmax);
-        rsfd = (RSFD *) xmalloc (sizeof(*rsfd) * rsmax);
-        
+       int cmp_border = preserve_position ? 0 : 1;
+       NMEM rset_nmem_sub = nmem_create(); /* all sub rsets not needed
+                                              after this */
+
+        rset = (RSET *) xmalloc(sizeof(*rset) * rsmax);
+        rsfd = (RSFD *) xmalloc(sizeof(*rsfd) * rsmax);
+
         for (i = from; i < to; i += i_add)
         {
             if (i_add <= to - i)
-                rset[rscur] = rset_trunc_r (zi, term, length, flags,
-                                           isam_p, i, i+i_add,
-                                            merge_chunk, preserve_position,
-                                            term_type, rset_nmem, kctrl);
+                rset[rscur] = rset_trunc_r(zi, term, length, flags,
+                                          isam_p, i, i+i_add,
+                                          merge_chunk, preserve_position,
+                                          term_type, rset_nmem_sub,
+                                          kctrl, scope, 0);
             else
-                rset[rscur] = rset_trunc_r (zi, term, length, flags,
-                                            isam_p, i, to,
-                                            merge_chunk, preserve_position,
-                                            term_type, rset_nmem, kctrl);
+                rset[rscur] = rset_trunc_r(zi, term, length, flags,
+                                          isam_p, i, to,
+                                          merge_chunk, preserve_position,
+                                          term_type, rset_nmem_sub,
+                                          kctrl, scope, 0);
             rscur++;
         }
-        ti = heap_init (rscur, sizeof(struct it_key), key_compare_it);
+        ti = heap_init (rscur, sizeof(struct it_key), key_compare);
         for (i = rscur; --i >= 0; )
         {
-            rsfd[i] = rset_open (rset[i], RSETF_READ);
-            if (rset_read(rsfd[i], ti->tmpbuf))
-                heap_insert (ti, ti->tmpbuf, i);
+            rsfd[i] = rset_open(rset[i], RSETF_READ);
+            if (rset_read(rsfd[i], ti->tmpbuf, 0))
+                heap_insert(ti, ti->tmpbuf, i);
             else
             {
-                rset_close (rsfd[i]);
-                rset_delete (rset[i]);
+                rset_close(rsfd[i]);
+                rset_delete(rset[i]);
             }
         }
         while (ti->heapnum)
         {
             int n = ti->indx[ti->ptr[1]];
 
-            rset_write (result_rsfd, ti->heap[ti->ptr[1]]);
+            rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
             nn++;
 
             while (1)
             {
-                if (!rset_read (rsfd[n], ti->tmpbuf))
+                if(!rset_read (rsfd[n], ti->tmpbuf,0))
                 {
-                    heap_delete (ti);
-                    rset_close (rsfd[n]);
-                    rset_delete (rset[n]);
+                    heap_delete(ti);
+                    rset_close(rsfd[n]);
+                    rset_delete(rset[n]);
                     break;
                 }
-                if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
+                if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > cmp_border)
                 {
-                    heap_delete (ti);
-                    heap_insert (ti, ti->tmpbuf, n);
+                    heap_delete(ti);
+                    heap_insert(ti, ti->tmpbuf, n);
                     break;
                 }
             }
         }
-        xfree (rset);
-        xfree (rsfd);
-        heap_close (ti);
+        xfree(rset);
+        xfree(rsfd);
+        heap_close(ti);
+       nmem_destroy(rset_nmem_sub);
     }
     else if (zi->reg->isamc)
     {
@@ -216,53 +216,53 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
         int i;
         struct trunc_info *ti;
 
-        ispt = (ISAMC_PP *) xmalloc (sizeof(*ispt) * (to-from));
+        ispt = (ISAMC_PP *) xmalloc(sizeof(*ispt) * (to-from));
 
-        ti = heap_init (to-from, sizeof(struct it_key),
-                        key_compare_it);
+        ti = heap_init(to-from, sizeof(struct it_key),
+                      key_compare);
         for (i = to-from; --i >= 0; )
         {
-            ispt[i] = isc_pp_open (zi->reg->isamc, isam_p[from+i]);
-            if (isc_pp_read (ispt[i], ti->tmpbuf))
-                heap_insert (ti, ti->tmpbuf, i);
+            ispt[i] = isamc_pp_open(zi->reg->isamc, isam_p[from+i]);
+            if (isamc_pp_read(ispt[i], ti->tmpbuf))
+                heap_insert(ti, ti->tmpbuf, i);
             else
-                isc_pp_close (ispt[i]);
+                isamc_pp_close(ispt[i]);
         }
         while (ti->heapnum)
         {
             int n = ti->indx[ti->ptr[1]];
 
-            rset_write (result_rsfd, ti->heap[ti->ptr[1]]);
+            rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
             nn++;
             if (preserve_position)
             {
-                heap_delete (ti);
-                if (isc_pp_read (ispt[n], ti->tmpbuf))
-                    heap_insert (ti, ti->tmpbuf, n);
+                heap_delete(ti);
+                if (isamc_pp_read(ispt[n], ti->tmpbuf))
+                    heap_insert(ti, ti->tmpbuf, n);
                 else
-                    isc_pp_close (ispt[n]);
+                    isamc_pp_close(ispt[n]);
             }
             else
             {
                 while (1)
                 {
-                    if (!isc_pp_read (ispt[n], ti->tmpbuf))
+                    if (!isamc_pp_read(ispt[n], ti->tmpbuf))
                     {
-                        heap_delete (ti);
-                        isc_pp_close (ispt[n]);
+                        heap_delete(ti);
+                        isamc_pp_close(ispt[n]);
                         break;
                     }
                     if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
                     {
-                        heap_delete (ti);
-                        heap_insert (ti, ti->tmpbuf, n);
+                        heap_delete(ti);
+                        heap_insert(ti, ti->tmpbuf, n);
                         break;
                     }
                 }
             }
         }
-        heap_close (ti);
-        xfree (ispt);
+        heap_close(ti);
+        xfree(ispt);
     }
     else if (zi->reg->isams)
     {
@@ -271,42 +271,42 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
         struct trunc_info *ti;
         int nn = 0;
 
-        ispt = (ISAMS_PP *) xmalloc (sizeof(*ispt) * (to-from));
+        ispt = (ISAMS_PP *) xmalloc(sizeof(*ispt) * (to-from));
 
-        ti = heap_init (to-from, sizeof(struct it_key),
-                        key_compare_it);
+        ti = heap_init(to-from, sizeof(struct it_key),
+                      key_compare);
         for (i = to-from; --i >= 0; )
         {
-            ispt[i] = isams_pp_open (zi->reg->isams, isam_p[from+i]);
-            if (isams_pp_read (ispt[i], ti->tmpbuf))
-                heap_insert (ti, ti->tmpbuf, i);
+            ispt[i] = isams_pp_open(zi->reg->isams, isam_p[from+i]);
+            if (isams_pp_read(ispt[i], ti->tmpbuf))
+                heap_insert(ti, ti->tmpbuf, i);
             else
-                isams_pp_close (ispt[i]);
+                isams_pp_close(ispt[i]);
         }
         while (ti->heapnum)
         {
             int n = ti->indx[ti->ptr[1]];
 
-            rset_write (result_rsfd, ti->heap[ti->ptr[1]]);
+            rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
             nn++;
             while (1)
             {
-                if (!isams_pp_read (ispt[n], ti->tmpbuf))
+                if (!isams_pp_read(ispt[n], ti->tmpbuf))
                 {
-                    heap_delete (ti);
-                    isams_pp_close (ispt[n]);
+                    heap_delete(ti);
+                    isams_pp_close(ispt[n]);
                     break;
                 }
                 if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
                 {
-                    heap_delete (ti);
-                    heap_insert (ti, ti->tmpbuf, n);
+                    heap_delete(ti);
+                    heap_insert(ti, ti->tmpbuf, n);
                     break;
                 }
             }
         }
-        heap_close (ti);
-        xfree (ispt);
+        heap_close(ti);
+        xfree(ispt);
     }
     else if (zi->reg->isamb)
     {
@@ -314,68 +314,68 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
         int i;
         struct trunc_info *ti;
 
-        ispt = (ISAMB_PP *) xmalloc (sizeof(*ispt) * (to-from));
+        ispt = (ISAMB_PP *) xmalloc(sizeof(*ispt) * (to-from));
 
-        ti = heap_init (to-from, sizeof(struct it_key),
-                        key_compare_it);
+        ti = heap_init(to-from, sizeof(struct it_key),
+                      key_compare);
         for (i = to-from; --i >= 0; )
         {
            if (isam_p[from+i]) {
-                ispt[i] = isamb_pp_open (zi->reg->isamb, isam_p[from+i]);
-                if (isamb_pp_read (ispt[i], ti->tmpbuf))
-                    heap_insert (ti, ti->tmpbuf, i);
+                ispt[i] = isamb_pp_open(zi->reg->isamb, isam_p[from+i], scope);
+                if (isamb_pp_read(ispt[i], ti->tmpbuf))
+                    heap_insert(ti, ti->tmpbuf, i);
                 else
-                    isamb_pp_close (ispt[i]);
+                    isamb_pp_close(ispt[i]);
            }
         }
         while (ti->heapnum)
         {
             int n = ti->indx[ti->ptr[1]];
 
-            rset_write (result_rsfd, ti->heap[ti->ptr[1]]);
+            rset_write(result_rsfd, ti->heap[ti->ptr[1]]);
             nn++;
 
             if (preserve_position)
             {
-                heap_delete (ti);
-                if (isamb_pp_read (ispt[n], ti->tmpbuf))
-                    heap_insert (ti, ti->tmpbuf, n);
+                heap_delete(ti);
+                if (isamb_pp_read(ispt[n], ti->tmpbuf))
+                    heap_insert(ti, ti->tmpbuf, n);
                 else
-                    isamb_pp_close (ispt[n]);
+                    isamb_pp_close(ispt[n]);
             }
             else
             {
                 while (1)
                 {
-                    if (!isamb_pp_read (ispt[n], ti->tmpbuf))
+                    if (!isamb_pp_read(ispt[n], ti->tmpbuf))
                     {
-                        heap_delete (ti);
-                        isamb_pp_close (ispt[n]);
+                        heap_delete(ti);
+                        isamb_pp_close(ispt[n]);
                         break;
                     }
                     if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
                     {
-                        heap_delete (ti);
-                        heap_insert (ti, ti->tmpbuf, n);
+                        heap_delete(ti);
+                        heap_insert(ti, ti->tmpbuf, n);
                         break;
                     }
                 }
             }
         }
-        heap_close (ti);
-        xfree (ispt);
+        heap_close(ti);
+        xfree(ispt);
     }
     else
-        logf (LOG_WARN, "Unknown isam set in rset_trunc_r");
+        yaz_log(YLOG_WARN, "Unknown isam set in rset_trunc_r");
 
-    rset_close (result_rsfd);
+    rset_close(result_rsfd);
     return result;
 }
 
-static int isams_trunc_cmp (const void *p1, const void *p2)
+static int isams_trunc_cmp(const void *p1, const void *p2)
 {
-    ISAMS_P i1 = *(ISAMS_P*) p1;
-    ISAMS_P i2 = *(ISAMS_P*) p2;
+    ISAM_P i1 = *(ISAM_P*) p1;
+    ISAM_P i2 = *(ISAM_P*) p2;
 
     if (i1 > i2)
         return 1;
@@ -384,15 +384,15 @@ static int isams_trunc_cmp (const void *p1, const void *p2)
     return 0;
 }
 
-static int isamc_trunc_cmp (const void *p1, const void *p2)
+static int isamc_trunc_cmp(const void *p1, const void *p2)
 {
-    ISAMC_P i1 = *(ISAMC_P*) p1;
-    ISAMC_P i2 = *(ISAMC_P*) p2;
+    ISAM_P i1 = *(ISAM_P*) p1;
+    ISAM_P i2 = *(ISAM_P*) p2;
     zint d;
 
-    d = (isc_type (i1) - isc_type (i2));
+    d = (isamc_type(i1) - isamc_type(i2));
     if (d == 0)
-        d = isc_block (i1) - isc_block (i2);
+        d = isamc_block(i1) - isamc_block(i2);
     if (d > 0)
        return 1;
     else if (d < 0)
@@ -400,52 +400,57 @@ static int isamc_trunc_cmp (const void *p1, const void *p2)
     return 0;
 }
 
-RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no,
-                const char *term, int length, const char *flags,
-                 int preserve_position, int term_type, NMEM rset_nmem,
-                 const struct key_control *kctrl)
+RSET rset_trunc(ZebraHandle zh, ISAM_P *isam_p, int no,
+               const char *term, int length, const char *flags,
+               int preserve_position, int term_type, NMEM rset_nmem,
+               struct rset_key_control *kctrl, int scope,
+               struct ord_list *ol, const char *index_type,
+               zint hits_limit, const char *term_ref_id)
 {
-    logf (LOG_DEBUG, "rset_trunc no=%d", no);
+    TERMID termid;
+    RSET result;
+    int trunc_chunk;
+    int trunc_limit = atoi(res_get_def(zh->res, "trunclimit", "10000"));
+
+    termid = rset_term_create(term, length, flags, term_type, rset_nmem, ol,
+                             *index_type, hits_limit, term_ref_id);
+
     if (no < 1)
-       return rsnull_create (rset_nmem,kctrl);
-    if (zi->reg->isams)
-    {
-        if (no == 1)
-            return rsisams_create(rset_nmem, kctrl,
-                    zi->reg->isams, *isam_p);
-        qsort (isam_p, no, sizeof(*isam_p), isams_trunc_cmp);
-    }
-    else if (zi->reg->isamc)
+       return rset_create_null(rset_nmem, kctrl, termid);
+    else if (no == 1)
+        return zebra_create_rset_isam(zh, rset_nmem, kctrl,
+                                      scope, *isam_p, termid);
+    else if (zh->reg->isamb && no > 1 && no < trunc_limit)
     {
-        if (no == 1)
-            return rsisamc_create(rset_nmem, kctrl,
-                    zi->reg->isamc, *isam_p);
-        qsort (isam_p, no, sizeof(*isam_p), isamc_trunc_cmp);
-    }
-    else if (zi->reg->isamb)
-    {
-        if (no == 1)
-            return rsisamb_create(rset_nmem,kctrl,
-                    zi->reg->isamb, *isam_p);
-        else if (no <10000 ) /* FIXME - hardcoded number */
-        {
-            RSET r;
-            RSET *rsets=xmalloc(no*sizeof(RSET)); /* use nmem! */
-            int i;
-            for (i=0;i<no;i++)
-                rsets[i]=rsisamb_create(rset_nmem, kctrl,
-                    zi->reg->isamb, isam_p[i] );
-            r=rsmultior_create( rset_nmem, kctrl, no, rsets);
-            xfree(rsets);
-        } 
-        qsort (isam_p, no, sizeof(*isam_p), isamc_trunc_cmp);
+        RSET r;
+        RSET *rsets = xmalloc(no*sizeof(RSET)); /* use nmem! */
+        int i;
+        for (i = 0; i<no; i++)
+            rsets[i] = rsisamb_create(rset_nmem, kctrl, scope,
+                                      zh->reg->isamb, isam_p[i],
+                                      0 /* termid */);
+        r = rset_create_or(rset_nmem, kctrl, scope,
+                           termid, no, rsets);
+        xfree(rsets);
+        return r;
     }
+    if (zh->reg->isamc)
+        qsort(isam_p, no, sizeof(*isam_p), isamc_trunc_cmp);
     else
-    {
-        logf (LOG_WARN, "Unknown isam set in rset_trunc");
-       return rsnull_create (rset_nmem, kctrl);
-    }
-    return rset_trunc_r (zi, term, length, flags, isam_p, 0, no, 100,
-                         preserve_position, term_type, rset_nmem,kctrl);
+        qsort(isam_p, no, sizeof(*isam_p), isams_trunc_cmp);
+    trunc_chunk = atoi(res_get_def(zh->res, "truncchunk", "20"));
+    result = rset_trunc_r(zh, term, length, flags, isam_p, 0, no, trunc_chunk,
+                         preserve_position, term_type, rset_nmem, kctrl,
+                         scope, termid);
+    return result;
 }
 
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+