Added utility function init_extractCtrl to initialize sequence
[idzebra-moved-to-github.git] / index / zsets.c
index 88ed57d..1f8c6a7 100644 (file)
@@ -1,6 +1,6 @@
-/* $Id: zsets.c,v 1.74 2004-12-10 11:56:22 heikki Exp $
-   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
-   Index Data Aps
+/* $Id: zsets.c,v 1.81 2005-04-25 11:54:08 adam Exp $
+   Copyright (C) 1995-2005
+   Index Data ApS
 
 This file is part of the Zebra server.
 
@@ -56,6 +56,10 @@ struct zebra_set {
     int term_entries_max;
     struct zebra_set *next;
     int locked;
+
+    zint cache_position;  /* last position */
+    RSFD cache_rfd;       /* rfd (NULL if not existing) */
+    zint cache_psysno;    /* sysno for last position */
 };
 
 struct zset_sort_entry {
@@ -85,15 +89,73 @@ static void loglevels()
     log_level_searchhits = yaz_log_module_level("searchhits");
     log_level_searchterms = yaz_log_module_level("searchterms");
     log_level_resultsets = yaz_log_module_level("resultsets");
+    log_level_set = 1;
 }
 
-ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m,
-                          Z_RPNQuery *rpn, int num_bases,
-                          char **basenames, 
-                          const char *setname)
+ZEBRA_RES resultSetSearch(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
+                         Z_RPNQuery *rpn, ZebraSet sset)
+{
+    RSET rset = 0;
+    oident *attrset;
+    Z_SortKeySpecList *sort_sequence;
+    int sort_status, i;
+    ZEBRA_RES res;
+
+    zh->errCode = 0;
+    zh->errString = NULL;
+    zh->hits = 0;
+
+    sort_sequence = (Z_SortKeySpecList *)
+        nmem_malloc(nmem, sizeof(*sort_sequence));
+    sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
+    sort_sequence->specs = (Z_SortKeySpec **)
+        nmem_malloc(nmem, sort_sequence->num_specs *
+                     sizeof(*sort_sequence->specs));
+    for (i = 0; i<sort_sequence->num_specs; i++)
+        sort_sequence->specs[i] = 0;
+    
+    attrset = oid_getentbyoid (rpn->attributeSetId);
+    res = rpn_search_top(zh, rpn->RPNStructure, attrset->value,
+                        nmem, rset_nmem,
+                        sort_sequence,
+                        sset->num_bases, sset->basenames,
+                        &rset);
+    if (res != ZEBRA_OK)
+    {
+       sset->rset = 0;
+        return res;
+    }
+
+    if (zh->errCode)
+        yaz_log(YLOG_DEBUG, "search error: %d", zh->errCode);
+    
+    for (i = 0; sort_sequence->specs[i]; i++)
+        ;
+    sort_sequence->num_specs = i;
+    if (!i)
+        resultSetRank (zh, sset, rset, rset_nmem);
+    else
+    {
+        yaz_log(YLOG_DEBUG, "resultSetSortSingle in rpn_search");
+        resultSetSortSingle (zh, nmem, sset, rset,
+                             sort_sequence, &sort_status);
+        if (zh->errCode)
+        {
+            yaz_log(YLOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
+        }
+    }
+    sset->rset = rset;
+    return ZEBRA_OK;
+}
+
+
+ZEBRA_RES resultSetAddRPN (ZebraHandle zh, NMEM m, Z_RPNQuery *rpn,
+                          int num_bases, char **basenames,
+                          const char *setname)
 {
     ZebraSet zebraSet;
     int i;
+    ZEBRA_RES res;
 
     zh->errCode = 0;
     zh->errString = NULL;
@@ -101,11 +163,11 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m,
 
     zebraSet = resultSetAdd (zh, setname, 1);
     if (!zebraSet)
-        return 0;
+        return ZEBRA_FAIL;
     zebraSet->locked = 1;
     zebraSet->rpn = 0;
     zebraSet->nmem = m;
-    zebraSet->rset_nmem=nmem_create(); 
+    zebraSet->rset_nmem = nmem_create(); 
 
     zebraSet->num_bases = num_bases;
     zebraSet->basenames = 
@@ -113,16 +175,15 @@ ZebraSet resultSetAddRPN (ZebraHandle zh, NMEM m,
     for (i = 0; i<num_bases; i++)
         zebraSet->basenames[i] = nmem_strdup (zebraSet->nmem, basenames[i]);
 
-
-    zebraSet->rset = rpn_search (zh, zebraSet->nmem, zebraSet->rset_nmem,
-                                 rpn, zebraSet->num_bases,
-                                 zebraSet->basenames, zebraSet->name,
-                                 zebraSet);
+    res = resultSetSearch(zh, zebraSet->nmem, zebraSet->rset_nmem,
+                         rpn, zebraSet);
     zh->hits = zebraSet->hits;
     if (zebraSet->rset)
         zebraSet->rpn = rpn;
     zebraSet->locked = 0;
-    return zebraSet;
+    if (!zebraSet->rset)
+       return ZEBRA_FAIL;
+    return res;
 }
 
 void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type,
@@ -170,7 +231,11 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov)
         if (!ov || s->locked)
             return NULL;
         if (s->rset)
+       {
+           if (s->cache_rfd)
+               rset_close(s->cache_rfd);
             rset_delete (s->rset);
+       }
         if (s->rset_nmem)
             nmem_destroy (s->rset_nmem);
         if (s->nmem)
@@ -206,9 +271,11 @@ ZebraSet resultSetAdd (ZebraHandle zh, const char *name, int ov)
     s->term_entries = 0;
     s->hits = 0;
     s->rset = 0;
-    s->rset_nmem=0;
+    s->rset_nmem = 0;
     s->nmem = 0;
     s->rpn = 0;
+    s->cache_position = 0;
+    s->cache_rfd = 0;
     return s;
 }
 
@@ -225,9 +292,7 @@ ZebraSet resultSetGet (ZebraHandle zh, const char *name)
                 yaz_log(log_level_resultsets, "research %s", name);
                 if (!s->rset_nmem)
                     s->rset_nmem=nmem_create();
-                s->rset =
-                    rpn_search (zh, nmem, s->rset_nmem, s->rpn, s->num_bases,
-                                s->basenames, s->name, s);
+               resultSetSearch(zh, nmem, s->rset_nmem, s->rpn, s);
                 nmem_destroy (nmem);
             }
             return s;
@@ -243,8 +308,14 @@ void resultSetInvalidate (ZebraHandle zh)
     for (; s; s = s->next)
     {
         if (s->rset)
+       {
+           if (s->cache_rfd)
+               rset_close(s->cache_rfd);
             rset_delete (s->rset);
+       }
         s->rset = 0;
+       s->cache_rfd = 0;
+       s->cache_position = 0;
         if (s->rset_nmem)
             nmem_destroy(s->rset_nmem);
         s->rset_nmem=0;
@@ -287,7 +358,11 @@ void resultSetDestroy (ZebraHandle zh, int num, char **names,int *statuses)
             if (s->nmem)
                 nmem_destroy (s->nmem);
             if (s->rset)
+           {
+               if (s->cache_rfd)
+                   rset_close(s->cache_rfd);
                 rset_delete (s->rset);
+           }
             if (s->rset_nmem)
                 nmem_destroy(s->rset_nmem);
             xfree (s->name);
@@ -368,14 +443,15 @@ ZebraMetaRecord *zebra_meta_records_create (ZebraHandle zh, const char *name,
         sort_info = sset->sort_info;
         if (sort_info)
         {
-            int position;
+            zint position;
             
             for (i = 0; i<num; i++)
             {
                 position = positions[i];
                 if (position > 0 && position <= sort_info->num_entries)
                 {
-                    yaz_log(log_level_sorting, "got pos=%d (sorted)", position);
+                    yaz_log(log_level_sorting, "got pos=" ZINT_FORMAT
+                           " (sorted)", position);
                     sr[i].sysno = sort_info->entries[position-1]->sysno;
                     sr[i].score = sort_info->entries[position-1]->score;
                 }
@@ -389,7 +465,7 @@ ZebraMetaRecord *zebra_meta_records_create (ZebraHandle zh, const char *name,
         }
         if (i < num) /* nope, get the rest, unsorted - sorry */
         {
-            int position = 0;
+            zint position = 0;
             int num_i = 0;
             zint psysno = 0;
             RSFD rfd;
@@ -397,9 +473,22 @@ ZebraMetaRecord *zebra_meta_records_create (ZebraHandle zh, const char *name,
             
             if (sort_info)
                 position = sort_info->num_entries;
-            while (num_i < num && positions[num_i] < position)
+            while (num_i < num && positions[num_i] <= position)
                 num_i++;
-            rfd = rset_open (rset, RSETF_READ);
+           
+           if (sset->cache_rfd &&
+               num_i < num && positions[num_i] > sset->cache_position)
+           {
+               position = sset->cache_position;
+               rfd = sset->cache_rfd;
+               psysno = sset->cache_psysno;
+           }
+           else
+           {
+               if (sset->cache_rfd)
+                   rset_close(sset->cache_rfd);
+               rfd = rset_open (rset, RSETF_READ);
+           }
             while (num_i < num && rset_read (rfd, &key, 0))
             {
                 zint this_sys = key.mem[0];
@@ -420,13 +509,15 @@ ZebraMetaRecord *zebra_meta_records_create (ZebraHandle zh, const char *name,
                     if (position == positions[num_i])
                     {
                         sr[num_i].sysno = psysno;
-                        yaz_log(log_level_sorting, "got pos=%d (unsorted)", position);
+                        yaz_log(log_level_sorting, "got pos=" ZINT_FORMAT " (unsorted)", position);
                         sr[num_i].score = -1;
                         num_i++;
                     }
                 }
             }
-            rset_close (rfd);
+           sset->cache_position = position;
+           sset->cache_psysno = psysno;
+           sset->cache_rfd = rfd;
         }
     }
     return sr;
@@ -788,7 +879,7 @@ void resultSetRank (ZebraHandle zh, ZebraSet zebraSet, RSET rset, NMEM nmem)
                 (zebraSet->hits)++;
                 psysno = this_sys;
             }
-            (*rc->add) (handle, seqno, termid);
+            (*rc->add) (handle, CAST_ZINT_TO_INT(seqno), termid);
             
             if ((est==-2) && (zebraSet->hits==esthits))
             { /* time to estimate the hits */