Fixed proximity handling.
[idzebra-moved-to-github.git] / index / zrpn.c
index 14e17ea..03e0f5e 100644 (file)
@@ -4,7 +4,22 @@
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: zrpn.c,v $
- * Revision 1.96  1999-09-23 10:05:05  adam
+ * Revision 1.101  2000-03-02 14:35:03  adam
+ * Fixed proximity handling.
+ *
+ * Revision 1.100  1999/12/28 15:48:12  adam
+ * Minor Fix.
+ *
+ * Revision 1.99  1999/12/23 09:03:32  adam
+ * Changed behaviour of trunc=105 so that * is regular .* and ! is regular .
+ *
+ * Revision 1.98  1999/11/30 13:48:04  adam
+ * Improved installation. Updated for inclusion of YAZ header files.
+ *
+ * Revision 1.97  1999/10/14 14:33:50  adam
+ * Added truncation 5=106.
+ *
+ * Revision 1.96  1999/09/23 10:05:05  adam
  * Implemented structure=105 searching.
  *
  * Revision 1.95  1999/09/07 07:19:21  adam
@@ -445,7 +460,7 @@ struct grep_info {
 #ifdef TERM_COUNT        
     int *term_no;        
 #endif        
-    ISAM_P *isam_p_buf;
+    ISAMS_P *isam_p_buf;
     int isam_p_size;        
     int isam_p_indx;
     ZebraHandle zh;
@@ -472,12 +487,12 @@ static void add_isam_p (const char *name, const char *info,
 {
     if (p->isam_p_indx == p->isam_p_size)
     {
-        ISAM_P *new_isam_p_buf;
+        ISAMS_P *new_isam_p_buf;
 #ifdef TERM_COUNT        
         int *new_term_no;        
 #endif
         p->isam_p_size = 2*p->isam_p_size + 100;
-        new_isam_p_buf = (ISAM_P *) xmalloc (sizeof(*new_isam_p_buf) *
+        new_isam_p_buf = (ISAMS_P *) xmalloc (sizeof(*new_isam_p_buf) *
                                             p->isam_p_size);
         if (p->isam_p_buf)
         {
@@ -717,10 +732,10 @@ static int term_104 (ZebraMaps zebra_maps, int reg_type,
     return i;
 }
 
-/* term_105: handle term, where trunc=Process # and ! and right trunc */
+/* term_105/106: handle term, where trunc=Process * and ! and right trunc */
 static int term_105 (ZebraMaps zebra_maps, int reg_type,
                     const char **src, char *dst, int space_split,
-                    char *dst_term)
+                    char *dst_term, int right_truncate)
 {
     const char *s0, *s1;
     const char **map;
@@ -735,7 +750,7 @@ static int term_105 (ZebraMaps zebra_maps, int reg_type,
         if (*s0 == '*')
         {
             dst[i++] = '.';
-            dst[i++] = '+';
+            dst[i++] = '*';
            dst_term[j++] = *s0++;
         }
         else if (*s0 == '!')
@@ -757,8 +772,11 @@ static int term_105 (ZebraMaps zebra_maps, int reg_type,
             }
         }
     }
-    dst[i++] = '.';
-    dst[i++] = '*';
+    if (right_truncate)
+    {
+        dst[i++] = '.';
+        dst[i++] = '*';
+    }
     dst[i] = '\0';
     
     dst_term[j++] = '\0';
@@ -1253,7 +1271,18 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
        case 105:        /* process * and ! in term */
            term_dict[j++] = '(';
            if (!term_105 (zh->zebra_maps, reg_type,
-                          &termp, term_dict + j, space_split, term_dst))
+                          &termp, term_dict + j, space_split, term_dst, 1))
+               return 0;
+           strcat (term_dict, ")");
+           r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
+                                 &max_pos, 0, grep_handle);
+           if (r)
+               logf (LOG_WARN, "dict_lookup_grep err, trunc=*/!: %d", r);
+           break;
+       case 106:        /* process * and ! in term */
+           term_dict[j++] = '(';
+           if (!term_105 (zh->zebra_maps, reg_type,
+                          &termp, term_dict + j, space_split, term_dst, 0))
                return 0;
            strcat (term_dict, ")");
            r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
@@ -1311,99 +1340,8 @@ static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     termz[i] = '\0';
 }
 
-static RSET rpn_proximity (ZebraHandle zh, RSET rset1, RSET rset2,
-                          int ordered,
-                           int exclusion, int relation, int distance)
-{
-    int i;
-    RSFD rsfd1, rsfd2;
-    int  more1, more2;
-    struct it_key buf1, buf2;
-    RSFD rsfd_result;
-    RSET result;
-    rset_temp_parms parms;
-    int term_index;
-    
-    rsfd1 = rset_open (rset1, RSETF_READ);
-    more1 = rset_read (rset1, rsfd1, &buf1, &term_index);
-    
-    rsfd2 = rset_open (rset2, RSETF_READ);
-    more2 = rset_read (rset2, rsfd2, &buf2, &term_index);
-
-    parms.key_size = sizeof (struct it_key);
-    parms.temp_path = res_get (zh->res, "setTmpDir");
-    result = rset_create (rset_kind_temp, &parms);
-    rsfd_result = rset_open (result, RSETF_WRITE);
-   
-    logf (LOG_DEBUG, "rpn_proximity  excl=%d ord=%d rel=%d dis=%d",
-          exclusion, ordered, relation, distance);
-    while (more1 && more2)
-    {
-        int cmp = key_compare_it (&buf1, &buf2);
-        if (cmp < -1)
-            more1 = rset_read (rset1, rsfd1, &buf1, &term_index);
-        else if (cmp > 1)
-            more2 = rset_read (rset2, rsfd2, &buf2, &term_index);
-        else
-        {
-            int sysno = buf1.sysno;
-            int seqno[500];
-            int n = 0;
-
-            seqno[n++] = buf1.seqno;
-            while ((more1 = rset_read (rset1, rsfd1, &buf1, &term_index)) &&
-                   sysno == buf1.sysno)
-                if (n < 500)
-                    seqno[n++] = buf1.seqno;
-            do
-            {
-                for (i = 0; i<n; i++)
-                {
-                    int diff = buf2.seqno - seqno[i];
-                    int excl = exclusion;
-                    if (!ordered && diff < 0)
-                        diff = -diff;
-                    switch (relation)
-                    {
-                    case 1:      /* < */
-                        if (diff < distance)
-                            excl = !excl;
-                        break;
-                    case 2:      /* <= */
-                        if (diff <= distance)
-                            excl = !excl;
-                        break;
-                    case 3:      /* == */
-                        if (diff == distance)
-                            excl = !excl;
-                        break;
-                    case 4:      /* >= */
-                        if (diff >= distance)
-                            excl = !excl;
-                        break;
-                    case 5:      /* > */
-                        if (diff > distance)
-                            excl = !excl;
-                        break;
-                    case 6:      /* != */
-                        if (diff != distance)
-                            excl = !excl;
-                        break;
-                    }
-                    if (excl)
-                        rset_write (result, rsfd_result, &buf2);
-                }
-            } while ((more2 = rset_read (rset2, rsfd2, &buf2, &term_index)) &&
-                      sysno == buf2.sysno);
-        }
-    }
-    rset_close (result, rsfd_result);
-    rset_close (rset1, rsfd1);
-    rset_close (rset2, rsfd2);
-    return result;
-}
-
-static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no)
+static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no,
+                     int ordered, int exclusion, int relation, int distance)
 {
     int i;
     RSFD *rsfd;
@@ -1443,6 +1381,11 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no)
     }
     for (i = 0; i<rset_no; i++)
     {
+       buf[i] = 0;
+       rsfd[i] = 0;
+    }
+    for (i = 0; i<rset_no; i++)
+    {
        buf[i] = (struct it_key *) xmalloc (sizeof(**buf));
        rsfd[i] = rset_open (rset[i], RSETF_READ);
         if (!(more[i] = rset_read (rset[i], rsfd[i], buf[i], &term_index)))
@@ -1450,21 +1393,17 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no)
     }
     if (i != rset_no)
     {
+       /* at least one is empty ... return null set */
        rset_null_parms parms;
-
-       while (i >= 0)
-       {
-           rset_close (rset[i], rsfd[i]);
-           xfree (buf[i]);
-           --i;
-       }
+       
        parms.rset_term = rset_term_create (prox_term, length_prox_term,
                                            flags);
        parms.rset_term->nn = 0;
        result = rset_create (rset_kind_null, &parms);
     }
-    else
+    else if (ordered && relation == 3 && exclusion == 0 && distance == 1)
     {
+       /* special proximity case = phrase search ... */
        rset_temp_parms parms;
        RSFD rsfd_result;
 
@@ -1516,14 +1455,106 @@ static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no)
                more[0] = rset_read (*rset, *rsfd, *buf, &term_index);
            }
        }
-       
-       for (i = 0; i<rset_no; i++)
+       rset_close (result, rsfd_result);
+    }
+    else if (rset_no == 2)
+    {
+       /* generic proximity case (two input sets only) ... */
+       rset_temp_parms parms;
+       RSFD rsfd_result;
+
+       logf (LOG_LOG, "generic prox, dist = %d, relation = %d, ordered =%d, exclusion=%d",
+             distance, relation, ordered, exclusion);
+       parms.rset_term = rset_term_create (prox_term, length_prox_term,
+                                           flags);
+       parms.rset_term->nn = min_nn;
+       parms.key_size = sizeof (struct it_key);
+       parms.temp_path = res_get (zh->res, "setTmpDir");
+       result = rset_create (rset_kind_temp, &parms);
+       rsfd_result = rset_open (result, RSETF_WRITE);
+
+       while (more[0] && more[1]) 
        {
-           rset_close (rset[i], rsfd[i]);
-           xfree (buf[i]);
+           int cmp = key_compare_it (buf[0], buf[1]);
+           if (cmp < -1)
+               more[0] = rset_read (rset[0], rsfd[0], buf[0], &term_index);
+           else if (cmp > 1)
+               more[1] = rset_read (rset[1], rsfd[1], buf[1], &term_index);
+           else
+           {
+               int sysno = buf[0]->sysno;
+               int seqno[500];
+               int n = 0;
+               
+               seqno[n++] = buf[0]->seqno;
+               while ((more[0] = rset_read (rset[0], rsfd[0], buf[0],
+                                            &term_index)) &&
+                      sysno == buf[0]->sysno)
+                   if (n < 500)
+                       seqno[n++] = buf[0]->seqno;
+               do
+               {
+                   for (i = 0; i<n; i++)
+                   {
+                       int diff = buf[1]->seqno - seqno[i];
+                       int excl = exclusion;
+                       if (!ordered && diff < 0)
+                           diff = -diff;
+                       switch (relation)
+                       {
+                       case 1:      /* < */
+                           if (diff < distance && diff >= 0)
+                               excl = !excl;
+                           break;
+                       case 2:      /* <= */
+                           if (diff <= distance && diff >= 0)
+                               excl = !excl;
+                           break;
+                       case 3:      /* == */
+                           if (diff == distance && diff >= 0)
+                               excl = !excl;
+                           break;
+                       case 4:      /* >= */
+                           if (diff >= distance && diff >= 0)
+                               excl = !excl;
+                           break;
+                       case 5:      /* > */
+                           if (diff > distance && diff >= 0)
+                               excl = !excl;
+                           break;
+                       case 6:      /* != */
+                           if (diff != distance && diff >= 0)
+                               excl = !excl;
+                           break;
+                       }
+                       if (excl)
+                       {
+                           rset_write (result, rsfd_result, buf[1]);
+                           break;
+                       }
+                   }
+               } while ((more[1] = rset_read (rset[1], rsfd[1], buf[1],
+                                              &term_index)) &&
+                        sysno == buf[1]->sysno);
+           }
        }
        rset_close (result, rsfd_result);
     }
+    else
+    {
+       rset_null_parms parms;
+       
+       parms.rset_term = rset_term_create (prox_term, length_prox_term,
+                                           flags);
+       parms.rset_term->nn = 0;
+       result = rset_create (rset_kind_null, &parms);
+    }
+    for (i = 0; i<rset_no; i++)
+    {
+       if (rset[i])
+           rset_close (rset[i], rsfd[i]);
+       xfree (buf[i]);
+    }
     xfree (buf);
     xfree (more);
     xfree (rsfd);
@@ -1629,7 +1660,7 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh,
     }
     else if (rset_no == 1)
         return (rset[0]);
-    result = rpn_prox (zh, rset, rset_no);
+    result = rpn_prox (zh, rset, rset_no, 1, 0, 3, 1);
     for (i = 0; i<rset_no; i++)
         rset_delete (rset[i]);
     return result;
@@ -2240,12 +2271,22 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
                 return NULL;
             }
 #endif
-            r = rpn_proximity (zh, bool_parms.rset_l, bool_parms.rset_r,
-                               *zop->u.prox->ordered,
-                               (!zop->u.prox->exclusion ? 0 :
-                                         *zop->u.prox->exclusion),
-                               *zop->u.prox->relationType,
-                               *zop->u.prox->distance);
+           else
+           {
+               RSET rsets[2];
+
+               rsets[0] = bool_parms.rset_l;
+               rsets[1] = bool_parms.rset_r;
+               
+               r = rpn_prox (zh, rsets, 2, 
+                             *zop->u.prox->ordered,
+                             (!zop->u.prox->exclusion ? 0 :
+                              *zop->u.prox->exclusion),
+                             *zop->u.prox->relationType,
+                             *zop->u.prox->distance);
+               rset_delete (rsets[0]);
+               rset_delete (rsets[1]);
+           }
             break;
         default:
             zh->errCode = 110;
@@ -2337,7 +2378,7 @@ RSET rpn_search (ZebraHandle zh, NMEM nmem,
 
 struct scan_info_entry {
     char *term;
-    ISAM_P isam_p;
+    ISAMS_P isam_p;
 };
 
 struct scan_info {
@@ -2361,8 +2402,8 @@ static int scan_handle (char *name, const char *info, int pos, void *client)
     scan_info->list[idx].term = (char *)
        odr_malloc (scan_info->odr, strlen(name + len_prefix)+1);
     strcpy (scan_info->list[idx].term, name + len_prefix);
-    assert (*info == sizeof(ISAM_P));
-    memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
+    assert (*info == sizeof(ISAMS_P));
+    memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMS_P));
     return 0;
 }