Minor changes in search API.
[idzebra-moved-to-github.git] / index / zrpn.c
index a5617dc..710a699 100644 (file)
@@ -1,10 +1,32 @@
 /*
- * Copyright (C) 1995-1998, Index Data I/S 
+ * Copyright (C) 1995-1998, Index Data
  * All rights reserved.
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: zrpn.c,v $
- * Revision 1.80  1998-06-23 15:33:34  adam
+ * Revision 1.86  1998-09-22 10:48:20  adam
+ * Minor changes in search API.
+ *
+ * Revision 1.85  1998/09/22 10:03:43  adam
+ * Changed result sets to be persistent in the sense that they can
+ * be re-searched if needed.
+ * Fixed memory leak in rsm_or.
+ *
+ * Revision 1.84  1998/09/18 12:41:00  adam
+ * Fixed bug with numerical relations.
+ *
+ * Revision 1.83  1998/09/02 13:53:19  adam
+ * Extra parameter decode added to search routines to implement
+ * persistent queries.
+ *
+ * Revision 1.82  1998/06/26 11:16:40  quinn
+ * Added support (un-optimised) for left and left/right truncation
+ *
+ * Revision 1.81  1998/06/24 12:16:14  adam
+ * Support for relations on text operands. Open range support in
+ * DFA module (i.e. [-j], [g-]).
+ *
+ * Revision 1.80  1998/06/23 15:33:34  adam
  * Added feature to specify sort criteria in query (type 7 specifies
  * sort flags).
  *
@@ -640,7 +662,7 @@ static void gen_regular_rel (char *dst, int val, int islt)
         }
         else
         {
-            strcpy (dst, "((-");
+            strcpy (dst, "(-(");
             islt = 0;
         }
         val = -val;
@@ -720,20 +742,33 @@ static void gen_regular_rel (char *dst, int val, int islt)
     strcat (dst, "))");
 }
 
+void string_rel_add_char (char **term_p, const char *src, int *indx)
+{
+    if (src[*indx] == '\\')
+       *(*term_p)++ = src[(*indx)++];
+    *(*term_p)++ = src[(*indx)++];
+}
+
+/*
+ *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
+ *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
+ *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
+ *              ([^-a].*|a[^-b].*|ab[c-].*)
+ *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
+ *              ([^a-].*|a[^b-].*|ab[^c-].*)
+ *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
+ *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
+ */
 static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                                const char **term_sub,
-                                char *term_dict,
-                                oid_value attributeSet,
-                                struct grep_info *grep_info,
-                                int *max_pos,
-                                int reg_type,
-                                char *term_dst)
+                           const char **term_sub, char *term_dict,
+                           oid_value attributeSet,
+                           int reg_type, int space_split, char *term_dst)
 {
     AttrType relation;
     int relation_value;
-    int term_value;
-    int r;
+    int i;
     char *term_tmp = term_dict + strlen(term_dict);
+    char term_component[256];
 
     attr_init (&relation, zapt, 2);
     relation_value = attr_find (&relation, NULL);
@@ -742,52 +777,141 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     switch (relation_value)
     {
     case 1:
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
-                      term_dst))
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+                      space_split, term_dst))
             return 0;
-        term_value = atoi (term_tmp);
         logf (LOG_DEBUG, "Relation <");
-        gen_regular_rel (term_tmp, term_value-1, 1);
+       
+       *term_tmp++ = '(';
+       for (i = 0; term_component[i]; )
+       {
+           int j = 0;
+
+           if (i)
+               *term_tmp++ = '|';
+           while (j < i)
+               string_rel_add_char (&term_tmp, term_component, &j);
+
+           *term_tmp++ = '[';
+
+           *term_tmp++ = '^';
+           string_rel_add_char (&term_tmp, term_component, &i);
+           *term_tmp++ = '-';
+
+           *term_tmp++ = ']';
+           *term_tmp++ = '.';
+           *term_tmp++ = '*';
+       }
+       *term_tmp++ = ')';
+       *term_tmp = '\0';
         break;
     case 2:
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
-                      term_dst))
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+                      space_split, term_dst))
             return 0;
-        term_value = atoi (term_tmp);
         logf (LOG_DEBUG, "Relation <=");
-        gen_regular_rel (term_tmp, term_value, 1);
-        break;
-    case 4:
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
-                      term_dst))
-            return 0;
-        term_value = atoi (term_tmp);
-        logf (LOG_DEBUG, "Relation >=");
-        gen_regular_rel (term_tmp, term_value, 0);
+
+       *term_tmp++ = '(';
+       for (i = 0; term_component[i]; )
+       {
+           int j = 0;
+
+           while (j < i)
+               string_rel_add_char (&term_tmp, term_component, &j);
+           *term_tmp++ = '[';
+
+           *term_tmp++ = '^';
+           string_rel_add_char (&term_tmp, term_component, &i);
+           *term_tmp++ = '-';
+
+           *term_tmp++ = ']';
+           *term_tmp++ = '.';
+           *term_tmp++ = '*';
+
+           *term_tmp++ = '|';
+       }
+       for (i = 0; term_component[i]; )
+           string_rel_add_char (&term_tmp, term_component, &i);
+       *term_tmp++ = ')';
+       *term_tmp = '\0';
         break;
     case 5:
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
-                      term_dst))
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+                      space_split, term_dst))
             return 0;
-        term_value = atoi (term_tmp);
         logf (LOG_DEBUG, "Relation >");
-        gen_regular_rel (term_tmp, term_value+1, 0);
+
+       *term_tmp++ = '(';
+       for (i = 0; term_component[i];)
+       {
+           int j = 0;
+
+           while (j < i)
+               string_rel_add_char (&term_tmp, term_component, &j);
+           *term_tmp++ = '[';
+           
+           *term_tmp++ = '^';
+           *term_tmp++ = '-';
+           string_rel_add_char (&term_tmp, term_component, &i);
+
+           *term_tmp++ = ']';
+           *term_tmp++ = '.';
+           *term_tmp++ = '*';
+
+           *term_tmp++ = '|';
+       }
+       for (i = 0; term_component[i];)
+           string_rel_add_char (&term_tmp, term_component, &i);
+       *term_tmp++ = '.';
+       *term_tmp++ = '+';
+       *term_tmp++ = ')';
+       *term_tmp = '\0';
+        break;
+    case 4:
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+                      space_split, term_dst))
+            return 0;
+        logf (LOG_DEBUG, "Relation >=");
+
+       *term_tmp++ = '(';
+       for (i = 0; term_component[i];)
+       {
+           int j = 0;
+
+           if (i)
+               *term_tmp++ = '|';
+           while (j < i)
+               string_rel_add_char (&term_tmp, term_component, &j);
+           *term_tmp++ = '[';
+
+           if (term_component[i+1])
+           {
+               *term_tmp++ = '^';
+               *term_tmp++ = '-';
+               string_rel_add_char (&term_tmp, term_component, &i);
+           }
+           else
+           {
+               string_rel_add_char (&term_tmp, term_component, &i);
+               *term_tmp++ = '-';
+           }
+           *term_tmp++ = ']';
+           *term_tmp++ = '.';
+           *term_tmp++ = '*';
+       }
+       *term_tmp++ = ')';
+       *term_tmp = '\0';
         break;
     case 3:
     default:
         logf (LOG_DEBUG, "Relation =");
-       *term_tmp = '(';
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp+1, 1,
-                      term_dst))
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+                      space_split, term_dst))
             return 0;
+       strcat (term_tmp, "(");
+       strcat (term_tmp, term_component);
        strcat (term_tmp, ")");
     }
-    logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
-    r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, max_pos,
-                          0, grep_handle);
-    if (r)
-        logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
-    logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
     return 1;
 }
 
@@ -798,7 +922,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                        int num_bases, char **basenames,
                        char *term_dst)
 {
-    char term_dict[2*IT_MAX_WORD+2];
+    char term_dict[2*IT_MAX_WORD+4000];
     int j, r, base_no;
     AttrType truncation;
     int truncation_value;
@@ -882,20 +1006,14 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
        case -1:         /* not specified */
        case 100:        /* do not truncate */
            if (!string_relation (zh, zapt, &termp, term_dict,
-                                 attributeSet, grep_info, &max_pos,
-                                 reg_type, term_dst))
-               return 0;
-#if 0
-           term_dict[j++] = '(';   
-           if (!term_100 (zh->zebra_maps, reg_type,
-                          &termp, term_dict + j, space_split, term_dst))
+                                 attributeSet,
+                                 reg_type, space_split, term_dst))
                return 0;
-           strcat (term_dict, ")");
-           r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
-                                 &max_pos, 0, grep_handle);
+           logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
+           r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos,
+                                 0, grep_handle);
            if (r)
-               logf (LOG_WARN, "dict_lookup_grep err, trunc=none:%d", r);
-#endif
+               logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
            break;
        case 1:          /* right truncation */
            term_dict[j++] = '(';
@@ -906,8 +1024,24 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
            dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
                              &max_pos, 0, grep_handle);
            break;
-       case 2:          /* left truncation */
+       case 2:          /* keft truncation */
+           term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
+           if (!term_100 (zh->zebra_maps, reg_type,
+                          &termp, term_dict + j, space_split, term_dst))
+               return 0;
+           strcat (term_dict, ")");
+           dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
+                             &max_pos, 0, grep_handle);
+           break;
        case 3:          /* left&right truncation */
+           term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
+           if (!term_100 (zh->zebra_maps, reg_type,
+                          &termp, term_dict + j, space_split, term_dst))
+               return 0;
+           strcat (term_dict, ".*)");
+           dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
+                             &max_pos, 0, grep_handle);
+           break;
            zh->errCode = 120;
            return -1;
        case 101:        /* process # in term */
@@ -1629,7 +1763,7 @@ static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                          oid_value attributeSet, ODR stream,
+                          oid_value attributeSet, NMEM stream,
                           Z_SortKeySpecList *sort_sequence,
                           const char *rank_type)
 {
@@ -1654,8 +1788,8 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     if (!sort_sequence->specs)
     {
        sort_sequence->num_specs = 10;
-       sort_sequence->specs = odr_malloc (stream, sort_sequence->num_specs *
-                                          sizeof(*sort_sequence->specs));
+       sort_sequence->specs = nmem_malloc (stream, sort_sequence->num_specs *
+                                           sizeof(*sort_sequence->specs));
        for (i = 0; i<sort_sequence->num_specs; i++)
            sort_sequence->specs[i] = 0;
     }
@@ -1672,29 +1806,29 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     if (!oid_ent_to_oid (&oe, oid))
        return 0;
 
-    sks = odr_malloc (stream, sizeof(*sks));
-    sks->sortElement = odr_malloc (stream, sizeof(*sks->sortElement));
+    sks = nmem_malloc (stream, sizeof(*sks));
+    sks->sortElement = nmem_malloc (stream, sizeof(*sks->sortElement));
     sks->sortElement->which = Z_SortElement_generic;
-    sk = sks->sortElement->u.generic = odr_malloc (stream, sizeof(*sk));
+    sk = sks->sortElement->u.generic = nmem_malloc (stream, sizeof(*sk));
     sk->which = Z_SortKey_sortAttributes;
-    sk->u.sortAttributes = odr_malloc (stream, sizeof(*sk->u.sortAttributes));
+    sk->u.sortAttributes = nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
 
     sk->u.sortAttributes->id = oid;
     sk->u.sortAttributes->list =
-       odr_malloc (stream, sizeof(*sk->u.sortAttributes->list));
+       nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
     sk->u.sortAttributes->list->num_attributes = 1;
     sk->u.sortAttributes->list->attributes =
-       odr_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
+       nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
     ae = *sk->u.sortAttributes->list->attributes =
-       odr_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
+       nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
     ae->attributeSet = 0;
-    ae->attributeType =        odr_malloc (stream, sizeof(*ae->attributeType));
+    ae->attributeType =        nmem_malloc (stream, sizeof(*ae->attributeType));
     *ae->attributeType = 1;
     ae->which = Z_AttributeValue_numeric;
-    ae->value.numeric = odr_malloc (stream, sizeof(*ae->value.numeric));
+    ae->value.numeric = nmem_malloc (stream, sizeof(*ae->value.numeric));
     *ae->value.numeric = use_value;
 
-    sks->sortRelation = odr_malloc (stream, sizeof(*sks->sortRelation));
+    sks->sortRelation = nmem_malloc (stream, sizeof(*sks->sortRelation));
     if (sort_relation_value == 1)
        *sks->sortRelation = Z_SortRelation_ascending;
     else if (sort_relation_value == 2)
@@ -1702,7 +1836,7 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     else 
        *sks->sortRelation = Z_SortRelation_ascending;
 
-    sks->caseSensitivity = odr_malloc (stream, sizeof(*sks->caseSensitivity));
+    sks->caseSensitivity = nmem_malloc (stream, sizeof(*sks->caseSensitivity));
     *sks->caseSensitivity = 0;
 
     sks->missingValueAction = 0;
@@ -1715,7 +1849,7 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
 
 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                            oid_value attributeSet, ODR stream,
+                            oid_value attributeSet, NMEM stream,
                            Z_SortKeySpecList *sort_sequence,
                             int num_bases, char **basenames)
 {
@@ -1779,7 +1913,7 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
-                                  oid_value attributeSet, ODR stream,
+                                  oid_value attributeSet, NMEM stream,
                                  Z_SortKeySpecList *sort_sequence,
                                   int num_bases, char **basenames)
 {
@@ -1836,7 +1970,7 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
 #ifdef ASN_COMPILED
             if (*zop->u.prox->u.known != Z_ProxUnit_word)
             {
-                char *val = odr_malloc (stream, 16);
+                char *val = nmem_malloc (stream, 16);
                 zh->errCode = 132;
                 zh->errString = val;
                 sprintf (val, "%d", *zop->u.prox->u.known);
@@ -1845,7 +1979,7 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
 #else
             if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word)
             {
-                char *val = odr_malloc (stream, 16);
+                char *val = nmem_malloc (stream, 16);
                 zh->errCode = 132;
                 zh->errString = val;
                 sprintf (val, "%d", *zop->u.prox->proximityUnitCode);
@@ -1894,9 +2028,11 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
     return r;
 }
 
-void rpn_search (ZebraHandle zh, ODR stream,
+
+RSET rpn_search (ZebraHandle zh, NMEM nmem,
                 Z_RPNQuery *rpn, int num_bases, char **basenames, 
-                const char *setname)
+                const char *setname,
+                ZebraSet sset)
 {
     RSET rset;
     oident *attrset;
@@ -1904,37 +2040,43 @@ void rpn_search (ZebraHandle zh, ODR stream,
     Z_SortKeySpecList *sort_sequence;
     int sort_status, i;
 
-    zlog_rpn (rpn);
-
     zh->errCode = 0;
     zh->errString = NULL;
     zh->hits = 0;
 
-    sort_sequence = odr_malloc (stream, sizeof(*sort_sequence));
+    sort_sequence = nmem_malloc (nmem, sizeof(*sort_sequence));
     sort_sequence->num_specs = 10;
-    sort_sequence->specs = odr_malloc (stream, sort_sequence->num_specs *
+    sort_sequence->specs = nmem_malloc (nmem, sort_sequence->num_specs *
                                       sizeof(*sort_sequence->specs));
     for (i = 0; i<sort_sequence->num_specs; i++)
        sort_sequence->specs[i] = 0;
     
     attrset = oid_getentbyoid (rpn->attributeSetId);
     attributeSet = attrset->value;
-    rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet, stream,
-                                sort_sequence,
-                                 num_bases, basenames);
+    rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
+                                nmem, sort_sequence, num_bases, basenames);
     if (!rset)
-       return;
+       return 0;
 
-    resultSetAdd (zh, setname, 1, rset, &zh->hits);
     if (zh->errCode)
         logf (LOG_DEBUG, "search error: %d", zh->errCode);
-
+    
     for (i = 0; sort_sequence->specs[i]; i++)
        ;
     sort_sequence->num_specs = i;
-    if (i)
-       resultSetSort (zh, stream, 1, &setname, setname, sort_sequence,
-                      &sort_status);
+    if (!i)
+       resultSetRank (zh, sset, rset);
+    else
+    {
+       logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
+       resultSetSortSingle (zh, nmem, sset, rset,
+                            sort_sequence, &sort_status);
+       if (zh->errCode)
+       {
+           logf (LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
+       }
+    }
+    return rset;
 }
 
 struct scan_info_entry {
@@ -1969,14 +2111,14 @@ static int scan_handle (char *name, const char *info, int pos, void *client)
     return 0;
 }
 
-static void scan_term_untrans (ZebraHandle zh, ODR stream, int reg_type,
+static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
                               char **dst, const char *src)
 {
     char term_dst[1024];
     
     term_untrans (zh, reg_type, term_dst, src);
     
-    *dst = odr_malloc (stream, strlen(term_dst)+1);
+    *dst = nmem_malloc (stream, strlen(term_dst)+1);
     strcpy (*dst, term_dst);
 }
 
@@ -2144,7 +2286,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         }
         if (j0 == -1)
             break;
-        scan_term_untrans (zh, stream, reg_id,
+        scan_term_untrans (zh, stream->mem, reg_id,
                           &glist[i+before].term, mterm);
         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
                           glist[i+before].term, strlen(glist[i+before].term),
@@ -2208,7 +2350,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         if (j0 == -1)
             break;
 
-        scan_term_untrans (zh, stream, reg_id,
+        scan_term_untrans (zh, stream->mem, reg_id,
                           &glist[before-1-i].term, mterm);
 
         rset = rset_trunc