Re-estabslished rank-1. Gets same order of results, but slightly different
[idzebra-moved-to-github.git] / index / zrpn.c
index 07b39b4..c185b95 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zrpn.c,v 1.149 2004-09-01 15:01:32 heikki Exp $
+/* $Id: zrpn.c,v 1.158 2004-10-26 15:32:11 heikki Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
    Index Data Aps
 
@@ -37,14 +37,16 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <rset.h>
 
 
-static const struct key_control it_ctrl={ 
+static const struct key_control it_ctrl = { 
     sizeof(struct it_key),
+    2, /* we have sysnos and seqnos in this key, nothing more */
     key_compare_it, 
     key_logdump_txt,   /* FIXME  - clean up these functions */
     key_get_seq,
 };
 
-const struct key_control *key_it_ctrl=&it_ctrl;
+
+const struct key_control *key_it_ctrl = &it_ctrl;
 
 struct rpn_char_map_info {
     ZebraMaps zm;
@@ -62,7 +64,7 @@ typedef struct {
 static const char **rpn_char_map_handler (void *vp, const char **from, int len)
 {
     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
-    const char **out = zebra_maps_input (p->zm, p->reg_type, from, len);
+    const char **out = zebra_maps_input (p->zm, p->reg_type, from, len, 0);
 #if 0
     if (out && *out)
     {
@@ -259,7 +261,7 @@ static int grep_handle (char *name, const char *info, void *p)
 }
 
 static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
-                     const char *ct1, const char *ct2)
+                     const char *ct1, const char *ct2, int first)
 {
     const char *s1, *s0 = *src;
     const char **map;
@@ -272,7 +274,7 @@ static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
         if (ct2 && strchr (ct2, *s0))
             break;
         s1 = s0;
-        map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1));
+        map = zebra_maps_input (zebra_maps, reg_type, &s1, strlen(s1), first);
         if (**map != *CHR_SPACE)
             break;
         s0 = s1;
@@ -296,13 +298,13 @@ static int term_100 (ZebraMaps zebra_maps, int reg_type,
     const char *space_start = 0;
     const char *space_end = 0;
 
-    if (!term_pre (zebra_maps, reg_type, src, NULL, NULL))
+    if (!term_pre (zebra_maps, reg_type, src, NULL, NULL, !space_split))
         return 0;
     s0 = *src;
     while (*s0)
     {
         s1 = s0;
-        map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+        map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
         if (space_split)
         {
             if (**map == *CHR_SPACE)
@@ -354,7 +356,7 @@ static int term_101 (ZebraMaps zebra_maps, int reg_type,
     int i = 0;
     int j = 0;
 
-    if (!term_pre (zebra_maps, reg_type, src, "#", "#"))
+    if (!term_pre (zebra_maps, reg_type, src, "#", "#", !space_split))
         return 0;
     s0 = *src;
     while (*s0)
@@ -368,7 +370,7 @@ static int term_101 (ZebraMaps zebra_maps, int reg_type,
         else
         {
             s1 = s0;
-            map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+            map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
             if (space_split && **map == *CHR_SPACE)
                 break;
             while (s1 < s0)
@@ -396,7 +398,7 @@ static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
     const char *s0, *s1;
     const char **map;
 
-    if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "("))
+    if (!term_pre (zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
         return 0;
     s0 = *src;
     if (errors && *s0 == '+' && s0[1] && s0[2] == '+' && s0[3] &&
@@ -417,7 +419,7 @@ static int term_103 (ZebraMaps zebra_maps, int reg_type, const char **src,
         else
         {
             s1 = s0;
-            map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+            map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
             if (**map == *CHR_SPACE)
                 break;
             while (s1 < s0)
@@ -454,7 +456,7 @@ static int term_104 (ZebraMaps zebra_maps, int reg_type,
     int i = 0;
     int j = 0;
 
-    if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#"))
+    if (!term_pre (zebra_maps, reg_type, src, "?*#", "?*#", !space_split))
         return 0;
     s0 = *src;
     while (*s0)
@@ -497,7 +499,7 @@ static int term_104 (ZebraMaps zebra_maps, int reg_type,
         }
         {
             s1 = s0;
-            map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+            map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
             if (space_split && **map == *CHR_SPACE)
                 break;
             while (s1 < s0)
@@ -525,7 +527,7 @@ static int term_105 (ZebraMaps zebra_maps, int reg_type,
     int i = 0;
     int j = 0;
 
-    if (!term_pre (zebra_maps, reg_type, src, "*!", "*!"))
+    if (!term_pre (zebra_maps, reg_type, src, "*!", "*!", !space_split))
         return 0;
     s0 = *src;
     while (*s0)
@@ -543,7 +545,7 @@ static int term_105 (ZebraMaps zebra_maps, int reg_type,
         }
         {
             s1 = s0;
-            map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0));
+            map = zebra_maps_input (zebra_maps, reg_type, &s0, strlen(s0), 0);
             if (space_split && **map == *CHR_SPACE)
                 break;
             while (s1 < s0)
@@ -896,7 +898,8 @@ static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     return rset_trunc (zh, grep_info->isam_p_buf,
                        grep_info->isam_p_indx, term_dst,
                        strlen(term_dst), rank_type, 1 /* preserve pos */,
-                       zapt->term->which, rset_nmem,key_it_ctrl);
+                       zapt->term->which, rset_nmem,
+                       key_it_ctrl,key_it_ctrl->scope);
 }
 
 
@@ -1051,7 +1054,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                                   attributeSet,
                                   reg_type, space_split, term_dst))
                 return 0;
-            logf (LOG_LOG, "dict_lookup_grep: %s", term_dict+prefix_len);
+            logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
             r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
                                   grep_info, &max_pos, 0, grep_handle);
             if (r)
@@ -1242,7 +1245,7 @@ static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             
         while ((len = (cp_end - cp)) > 0)
         {
-            map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
+            map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len, 0);
             if (**map == *CHR_SPACE)
                 space_map = *map;
             else
@@ -1401,7 +1404,7 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh,
     else if (rset_no == 1)
         return (rset[0]);
     else
-        result = rsprox_create( rset_nmem, key_it_ctrl,
+        result = rsprox_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
                        rset_no, rset,
                        1 /* ordered */, 0 /* exclusion */,
                        3 /* relation */, 1 /* distance */);
@@ -1445,7 +1448,7 @@ static RSET rpn_search_APT_or_list (ZebraHandle zh,
     grep_info_delete (&grep_info);
     if (rset_no == 0)
         return rsnull_create (rset_nmem,key_it_ctrl);  
-    return rsmultior_create(rset_nmem, key_it_ctrl,
+    return rsmultior_create(rset_nmem, key_it_ctrl,key_it_ctrl->scope,
                             rset_no, rset);
 }
 
@@ -1461,8 +1464,8 @@ static RSET rpn_search_APT_and_list (ZebraHandle zh,
                                      NMEM rset_nmem)
 {
     char term_dst[IT_MAX_WORD+1];
-    RSET rset[60], result;
-    int i, rset_no = 0;
+    RSET rset[60]; /* FIXME - bug 160 - should be dynamic somehow */
+    int rset_no = 0;
     struct grep_info grep_info;
     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
     const char *termp = termz;
@@ -1487,12 +1490,9 @@ static RSET rpn_search_APT_and_list (ZebraHandle zh,
     grep_info_delete (&grep_info);
     if (rset_no == 0)
         return rsnull_create (rset_nmem,key_it_ctrl); 
-    result = rset[0];
-    /* FIXME - Use a proper rsmultiand */
-    for (i = 1; i<rset_no; i++)
-        result= rsbool_create_and(rset_nmem,key_it_ctrl,
-                result, rset[i] );
-    return result;
+
+    return rsmultiand_create( rset_nmem, key_it_ctrl, key_it_ctrl->scope,
+                              rset_no, rset);
 }
 
 static int numeric_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
@@ -1691,10 +1691,11 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh,
 {
     char term_dst[IT_MAX_WORD+1];
     const char *termp = termz;
-    RSET rset[60], result;
-    int i, r, rset_no = 0;
+    RSET rset[60]; /* FIXME - hard-coded magic number */
+    int  r, rset_no = 0;
     struct grep_info grep_info;
 
+    logf (LOG_DEBUG, "APT_numeric t='%s'",termz);
     if (grep_info_prepare (zh, zapt, &grep_info, reg_type, stream))
         return 0;
     while (1)
@@ -1712,7 +1713,8 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh,
                                     grep_info.isam_p_indx, term_dst,
                                     strlen(term_dst), rank_type,
                                     0 /* preserve position */,
-                                    zapt->term->which, rset_nmem, key_it_ctrl);
+                                    zapt->term->which, rset_nmem, 
+                                    key_it_ctrl,key_it_ctrl->scope);
         assert (rset[rset_no]);
         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
             break;
@@ -1720,14 +1722,10 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh,
     grep_info_delete (&grep_info);
     if (rset_no == 0)
         return rsnull_create (rset_nmem,key_it_ctrl);
-    result = rset[0];
-    for (i = 1; i<rset_no; i++)
-    {
-        /* FIXME - Use a proper multi-and */
-        result= rsbool_create_and(rset_nmem,key_it_ctrl,
-                result, rset[i] );
-    }
-    return result;
+    if (rset_no == 1)
+        return rset[0];
+    return rsmultiand_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
+               rset_no, rset);
 }
 
 static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
@@ -1740,31 +1738,16 @@ static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     RSFD rsfd;
     struct it_key key;
     int sys;
-    /*
-    rset_temp_parms parms;
-
-    parms.cmp = key_compare_it;
-    parms.key_size = sizeof (struct it_key);
-    parms.temp_path = res_get (zh->res, "setTmpDir");
-    result = rset_create (rset_kind_temp, &parms);
-    */
-    result = rstemp_create( rset_nmem,key_it_ctrl,
+    result = rstemp_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
                      res_get (zh->res, "setTmpDir") );
     rsfd = rset_open (result, RSETF_WRITE);
 
     sys = atoi(termz);
     if (sys <= 0)
         sys = 1;
-#if IT_KEY_NEW
     key.mem[0] = sys;
     key.mem[1] = 1;
     key.len = 2;
-#else
-    key.sysno = sys;
-    key.seqno = 1;
-    if (key.sysno <= 0)
-        key.sysno = 1;
-#endif
     rset_write (rsfd, &key);
     rset_close (rsfd);
     return result;
@@ -1927,7 +1910,8 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
              grep_info.isam_p_indx);
     rset = rset_trunc (zh, grep_info.isam_p_buf,
                        grep_info.isam_p_indx, term, strlen(term),
-                       flags, 1, term_type,rset_nmem,key_it_ctrl);
+                       flags, 1, term_type,rset_nmem,
+                       key_it_ctrl, key_it_ctrl->scope);
     grep_info_delete (&grep_info);
     return rset;
 }
@@ -2074,7 +2058,7 @@ static RSET rpn_search_xpath (ZebraHandle zh,
                 parms.printer = key_print_it;
                 rset = rset_create (rset_kind_between, &parms);
                 */
-                rset=rsbetween_create( rset_nmem,key_it_ctrl,
+                rset=rsbetween_create( rset_nmem,key_it_ctrl,key_it_ctrl->scope,
                         rset_start_tag, rset, rset_end_tag, rset_attr);
             }
             first_path = 0;
@@ -2179,35 +2163,37 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
     if (zs->which == Z_RPNStructure_complex)
     {
         Z_Operator *zop = zs->u.complex->roperator;
-        RSET rset_l;
-        RSET rset_r;
+        RSET rsets[2]; /* l and r argument */
 
-        rset_l = rpn_search_structure (zh, zs->u.complex->s1,
+        rsets[0]=rpn_search_structure (zh, zs->u.complex->s1,
                                        attributeSet, stream, rset_nmem,
                                        sort_sequence,
                                        num_bases, basenames);
-        if (rset_l == NULL)
+        if (rsets[0] == NULL)
             return NULL;
-        rset_r = rpn_search_structure (zh, zs->u.complex->s2,
+        rsets[1]=rpn_search_structure (zh, zs->u.complex->s2,
                                        attributeSet, stream, rset_nmem,
                                        sort_sequence,
                                        num_bases, basenames);
-        if (rset_r == NULL)
+        if (rsets[1] == NULL)
         {
-            rset_delete (rset_l);
+            rset_delete (rsets[0]);
             return NULL;
         }
 
         switch (zop->which)
         {
         case Z_Operator_and:
-            r = rsbool_create_and(rset_nmem,key_it_ctrl, rset_l,rset_r );
+            r=rsmultiand_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
+                        2, rsets);
             break;
         case Z_Operator_or:
-            r = rsbool_create_or(rset_nmem,key_it_ctrl, rset_l,rset_r );
+            r=rsmultior_create(rset_nmem, key_it_ctrl, key_it_ctrl->scope,
+                        2, rsets);
             break;
         case Z_Operator_and_not:
-            r = rsbool_create_not(rset_nmem,key_it_ctrl, rset_l,rset_r );
+            r = rsbool_create_not(rset_nmem,key_it_ctrl, key_it_ctrl->scope,
+                    rsets[0],rsets[1]);
             break;
         case Z_Operator_prox:
             if (zop->u.prox->which != Z_ProximityOperator_known)
@@ -2226,12 +2212,8 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
             else
             {
                 /* new / old prox */
-                RSET twosets[2];
-                
-                twosets[0] = rset_l;
-                twosets[1] = rset_r;
-                r=rsprox_create(rset_nmem,key_it_ctrl,
-                         2, twosets, 
+                r=rsprox_create(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
+                         2, rsets, 
                          *zop->u.prox->ordered,
                          (!zop->u.prox->exclusion ? 
                               0 : *zop->u.prox->exclusion),
@@ -2300,7 +2282,7 @@ RSET rpn_search (ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
 
     sort_sequence = (Z_SortKeySpecList *)
         nmem_malloc (nmem, sizeof(*sort_sequence));
-    sort_sequence->num_specs = 10;
+    sort_sequence->num_specs = 10; /* FIXME - Hard-coded number */
     sort_sequence->specs = (Z_SortKeySpec **)
         nmem_malloc (nmem, sort_sequence->num_specs *
                      sizeof(*sort_sequence->specs));
@@ -2322,7 +2304,7 @@ RSET rpn_search (ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
         ;
     sort_sequence->num_specs = i;
     if (!i)
-        resultSetRank (zh, sset, rset);
+        resultSetRank (zh, sset, rset, rset_nmem);
     else
     {
         logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
@@ -2410,21 +2392,13 @@ static void count_set (RSET r, int *count)
 
     *count = 0;
     rfd = rset_open (r, RSETF_READ);
-    while (rset_read (rfd, &key))
+    while (rset_read (rfd, &key,0 /* never mind terms */))
     {
-#if IT_KEY_NEW
         if (key.mem[0] != psysno)
         {
             psysno = key.mem[0];
             (*count)++;
         }
-#else
-        if (key.sysno != psysno)
-        {
-            psysno = key.sysno;
-            (*count)++;
-        }
-#endif
         kno++;
     }
     rset_close (rfd);
@@ -2635,8 +2609,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
                            &glist[i+before].term, mterm);
         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
                            glist[i+before].term, strlen(glist[i+before].term),
-                           NULL, 0, zapt->term->which, rset_nmem, key_it_ctrl);
-
+                           NULL, 0, zapt->term->which, rset_nmem, 
+                           key_it_ctrl,key_it_ctrl->scope);
         ptr[j0]++;
         for (j = j0+1; j<ord_no; j++)
         {
@@ -2650,16 +2624,17 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
                    rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
                                glist[i+before].term,
                                strlen(glist[i+before].term), NULL, 0,
-                               zapt->term->which,rset_nmem,key_it_ctrl);
+                               zapt->term->which,rset_nmem,
+                               key_it_ctrl, key_it_ctrl->scope);
                 rset = rsbool_create_or(rset_nmem,key_it_ctrl,
-                            rset, rset2);
+                               key_it_ctrl->scope, rset, rset2);
                 /* FIXME - Use a proper multi-or */
 
                 ptr[j]++;
             }
         }
         if (limit_set)
-            rset = rsbool_create_and(rset_nmem,key_it_ctrl,
+            rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
                             rset, rset_dup(limit_set));
         count_set (rset, &glist[i+before].occurrences);
         rset_delete (rset);
@@ -2700,7 +2675,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         rset = rset_trunc
                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
                 glist[before-1-i].term, strlen(glist[before-1-i].term),
-                NULL, 0, zapt->term->which,rset_nmem,key_it_ctrl);
+                NULL, 0, zapt->term->which,rset_nmem,
+                key_it_ctrl,key_it_ctrl->scope);
 
         ptr[j0]++;
 
@@ -2716,15 +2692,16 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
                          &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
                                     glist[before-1-i].term,
                                     strlen(glist[before-1-i].term), NULL, 0,
-                                    zapt->term->which, rset_nmem,key_it_ctrl);
+                                    zapt->term->which, rset_nmem,
+                                    key_it_ctrl, key_it_ctrl->scope);
                 rset = rsbool_create_and(rset_nmem,key_it_ctrl,
-                            rset, rset2);
+                            key_it_ctrl->scope, rset, rset2);
                 /* FIXME - multi-and ?? */
                 ptr[j]++;
             }
         }
         if (limit_set)
-            rset = rsbool_create_and(rset_nmem,key_it_ctrl,
+            rset = rsbool_create_and(rset_nmem,key_it_ctrl,key_it_ctrl->scope,
                             rset, rset_dup(limit_set));
         count_set (rset, &glist[before-1-i].occurrences);
         rset_delete (rset);