X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=index%2Fzrpn.c;h=710a6999de9ca763f56adcb0cd1a98ddc532be55;hb=e5b0bb88921b593c89dd6ae9077c6566bc8a6a91;hp=a5617dccbfe3612968e066244c1ebab5c30b6acb;hpb=5d51844baabbb37b83afdd9b10db0a6c53cd1905;p=idzebra-moved-to-github.git

diff --git a/index/zrpn.c b/index/zrpn.c
index a5617dc..710a699 100644
--- a/index/zrpn.c
+++ b/index/zrpn.c
@@ -1,10 +1,32 @@
 /*
- * Copyright (C) 1995-1998, Index Data I/S 
+ * Copyright (C) 1995-1998, Index Data
  * All rights reserved.
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: zrpn.c,v $
- * Revision 1.80  1998-06-23 15:33:34  adam
+ * Revision 1.86  1998-09-22 10:48:20  adam
+ * Minor changes in search API.
+ *
+ * Revision 1.85  1998/09/22 10:03:43  adam
+ * Changed result sets to be persistent in the sense that they can
+ * be re-searched if needed.
+ * Fixed memory leak in rsm_or.
+ *
+ * Revision 1.84  1998/09/18 12:41:00  adam
+ * Fixed bug with numerical relations.
+ *
+ * Revision 1.83  1998/09/02 13:53:19  adam
+ * Extra parameter decode added to search routines to implement
+ * persistent queries.
+ *
+ * Revision 1.82  1998/06/26 11:16:40  quinn
+ * Added support (un-optimised) for left and left/right truncation
+ *
+ * Revision 1.81  1998/06/24 12:16:14  adam
+ * Support for relations on text operands. Open range support in
+ * DFA module (i.e. [-j], [g-]).
+ *
+ * Revision 1.80  1998/06/23 15:33:34  adam
  * Added feature to specify sort criteria in query (type 7 specifies
  * sort flags).
  *
@@ -640,7 +662,7 @@ static void gen_regular_rel (char *dst, int val, int islt)
         }
         else
         {
-            strcpy (dst, "((-");
+            strcpy (dst, "(-(");
             islt = 0;
         }
         val = -val;
@@ -720,20 +742,33 @@ static void gen_regular_rel (char *dst, int val, int islt)
     strcat (dst, "))");
 }
 
+void string_rel_add_char (char **term_p, const char *src, int *indx)
+{
+    if (src[*indx] == '\\')
+	*(*term_p)++ = src[(*indx)++];
+    *(*term_p)++ = src[(*indx)++];
+}
+
+/*
+ *   >  abc     ([b-].*|a[c-].*|ab[d-].*|abc.+)
+ *              ([^-a].*|a[^-b].*ab[^-c].*|abc.+)
+ *   >= abc     ([b-].*|a[c-].*|ab[c-].*)
+ *              ([^-a].*|a[^-b].*|ab[c-].*)
+ *   <  abc     ([-0].*|a[-a].*|ab[-b].*)
+ *              ([^a-].*|a[^b-].*|ab[^c-].*)
+ *   <= abc     ([-0].*|a[-a].*|ab[-b].*|abc)
+ *              ([^a-].*|a[^b-].*|ab[^c-].*|abc)
+ */
 static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-				 const char **term_sub,
-				 char *term_dict,
-				 oid_value attributeSet,
-				 struct grep_info *grep_info,
-				 int *max_pos,
-				 int reg_type,
-				 char *term_dst)
+			    const char **term_sub, char *term_dict,
+			    oid_value attributeSet,
+			    int reg_type, int space_split, char *term_dst)
 {
     AttrType relation;
     int relation_value;
-    int term_value;
-    int r;
+    int i;
     char *term_tmp = term_dict + strlen(term_dict);
+    char term_component[256];
 
     attr_init (&relation, zapt, 2);
     relation_value = attr_find (&relation, NULL);
@@ -742,52 +777,141 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     switch (relation_value)
     {
     case 1:
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
-		       term_dst))
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+		       space_split, term_dst))
             return 0;
-        term_value = atoi (term_tmp);
         logf (LOG_DEBUG, "Relation <");
-        gen_regular_rel (term_tmp, term_value-1, 1);
+	
+	*term_tmp++ = '(';
+	for (i = 0; term_component[i]; )
+	{
+	    int j = 0;
+
+	    if (i)
+		*term_tmp++ = '|';
+	    while (j < i)
+		string_rel_add_char (&term_tmp, term_component, &j);
+
+	    *term_tmp++ = '[';
+
+	    *term_tmp++ = '^';
+	    string_rel_add_char (&term_tmp, term_component, &i);
+	    *term_tmp++ = '-';
+
+	    *term_tmp++ = ']';
+	    *term_tmp++ = '.';
+	    *term_tmp++ = '*';
+	}
+	*term_tmp++ = ')';
+	*term_tmp = '\0';
         break;
     case 2:
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
-		       term_dst))
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+		       space_split, term_dst))
             return 0;
-        term_value = atoi (term_tmp);
         logf (LOG_DEBUG, "Relation <=");
-        gen_regular_rel (term_tmp, term_value, 1);
-        break;
-    case 4:
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
-		       term_dst))
-            return 0;
-        term_value = atoi (term_tmp);
-        logf (LOG_DEBUG, "Relation >=");
-        gen_regular_rel (term_tmp, term_value, 0);
+
+	*term_tmp++ = '(';
+	for (i = 0; term_component[i]; )
+	{
+	    int j = 0;
+
+	    while (j < i)
+		string_rel_add_char (&term_tmp, term_component, &j);
+	    *term_tmp++ = '[';
+
+	    *term_tmp++ = '^';
+	    string_rel_add_char (&term_tmp, term_component, &i);
+	    *term_tmp++ = '-';
+
+	    *term_tmp++ = ']';
+	    *term_tmp++ = '.';
+	    *term_tmp++ = '*';
+
+	    *term_tmp++ = '|';
+	}
+	for (i = 0; term_component[i]; )
+	    string_rel_add_char (&term_tmp, term_component, &i);
+	*term_tmp++ = ')';
+	*term_tmp = '\0';
         break;
     case 5:
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp, 1,
-		       term_dst))
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+		       space_split, term_dst))
             return 0;
-        term_value = atoi (term_tmp);
         logf (LOG_DEBUG, "Relation >");
-        gen_regular_rel (term_tmp, term_value+1, 0);
+
+	*term_tmp++ = '(';
+	for (i = 0; term_component[i];)
+	{
+	    int j = 0;
+
+	    while (j < i)
+		string_rel_add_char (&term_tmp, term_component, &j);
+	    *term_tmp++ = '[';
+	    
+	    *term_tmp++ = '^';
+	    *term_tmp++ = '-';
+	    string_rel_add_char (&term_tmp, term_component, &i);
+
+	    *term_tmp++ = ']';
+	    *term_tmp++ = '.';
+	    *term_tmp++ = '*';
+
+	    *term_tmp++ = '|';
+	}
+	for (i = 0; term_component[i];)
+	    string_rel_add_char (&term_tmp, term_component, &i);
+	*term_tmp++ = '.';
+	*term_tmp++ = '+';
+	*term_tmp++ = ')';
+	*term_tmp = '\0';
+        break;
+    case 4:
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+		       space_split, term_dst))
+            return 0;
+        logf (LOG_DEBUG, "Relation >=");
+
+	*term_tmp++ = '(';
+	for (i = 0; term_component[i];)
+	{
+	    int j = 0;
+
+	    if (i)
+		*term_tmp++ = '|';
+	    while (j < i)
+		string_rel_add_char (&term_tmp, term_component, &j);
+	    *term_tmp++ = '[';
+
+	    if (term_component[i+1])
+	    {
+		*term_tmp++ = '^';
+		*term_tmp++ = '-';
+		string_rel_add_char (&term_tmp, term_component, &i);
+	    }
+	    else
+	    {
+		string_rel_add_char (&term_tmp, term_component, &i);
+		*term_tmp++ = '-';
+	    }
+	    *term_tmp++ = ']';
+	    *term_tmp++ = '.';
+	    *term_tmp++ = '*';
+	}
+	*term_tmp++ = ')';
+	*term_tmp = '\0';
         break;
     case 3:
     default:
         logf (LOG_DEBUG, "Relation =");
-	*term_tmp = '(';
-        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_tmp+1, 1,
-		       term_dst))
+        if (!term_100 (zh->zebra_maps, reg_type, term_sub, term_component,
+		       space_split, term_dst))
             return 0;
+	strcat (term_tmp, "(");
+	strcat (term_tmp, term_component);
 	strcat (term_tmp, ")");
     }
-    logf (LOG_DEBUG, "dict_lookup_grep: %s", term_tmp);
-    r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, max_pos,
-                          0, grep_handle);
-    if (r)
-        logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
-    logf (LOG_DEBUG, "%d positions", grep_info->isam_p_indx);
     return 1;
 }
 
@@ -798,7 +922,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 			int num_bases, char **basenames,
 			char *term_dst)
 {
-    char term_dict[2*IT_MAX_WORD+2];
+    char term_dict[2*IT_MAX_WORD+4000];
     int j, r, base_no;
     AttrType truncation;
     int truncation_value;
@@ -882,20 +1006,14 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 	case -1:         /* not specified */
 	case 100:        /* do not truncate */
 	    if (!string_relation (zh, zapt, &termp, term_dict,
-				  attributeSet, grep_info, &max_pos,
-				  reg_type, term_dst))
-		return 0;
-#if 0
-	    term_dict[j++] = '(';   
-	    if (!term_100 (zh->zebra_maps, reg_type,
-			   &termp, term_dict + j, space_split, term_dst))
+				  attributeSet,
+				  reg_type, space_split, term_dst))
 		return 0;
-	    strcat (term_dict, ")");
-	    r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
-				  &max_pos, 0, grep_handle);
+	    logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
+	    r = dict_lookup_grep (zh->dict, term_dict, 0, grep_info, &max_pos,
+				  0, grep_handle);
 	    if (r)
-		logf (LOG_WARN, "dict_lookup_grep err, trunc=none:%d", r);
-#endif
+		logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
 	    break;
 	case 1:          /* right truncation */
 	    term_dict[j++] = '(';
@@ -906,8 +1024,24 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 	    dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
 			      &max_pos, 0, grep_handle);
 	    break;
-	case 2:          /* left truncation */
+	case 2:          /* keft truncation */
+	    term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
+	    if (!term_100 (zh->zebra_maps, reg_type,
+			   &termp, term_dict + j, space_split, term_dst))
+		return 0;
+	    strcat (term_dict, ")");
+	    dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
+			      &max_pos, 0, grep_handle);
+	    break;
 	case 3:          /* left&right truncation */
+	    term_dict[j++] = '('; term_dict[j++] = '.'; term_dict[j++] = '*';
+	    if (!term_100 (zh->zebra_maps, reg_type,
+			   &termp, term_dict + j, space_split, term_dst))
+		return 0;
+	    strcat (term_dict, ".*)");
+	    dict_lookup_grep (zh->dict, term_dict, 0, grep_info,
+			      &max_pos, 0, grep_handle);
+	    break;
 	    zh->errCode = 120;
 	    return -1;
 	case 101:        /* process # in term */
@@ -1629,7 +1763,7 @@ static RSET rpn_search_APT_local (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-			   oid_value attributeSet, ODR stream,
+			   oid_value attributeSet, NMEM stream,
 			   Z_SortKeySpecList *sort_sequence,
 			   const char *rank_type)
 {
@@ -1654,8 +1788,8 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     if (!sort_sequence->specs)
     {
 	sort_sequence->num_specs = 10;
-	sort_sequence->specs = odr_malloc (stream, sort_sequence->num_specs *
-					   sizeof(*sort_sequence->specs));
+	sort_sequence->specs = nmem_malloc (stream, sort_sequence->num_specs *
+					    sizeof(*sort_sequence->specs));
 	for (i = 0; i<sort_sequence->num_specs; i++)
 	    sort_sequence->specs[i] = 0;
     }
@@ -1672,29 +1806,29 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     if (!oid_ent_to_oid (&oe, oid))
 	return 0;
 
-    sks = odr_malloc (stream, sizeof(*sks));
-    sks->sortElement = odr_malloc (stream, sizeof(*sks->sortElement));
+    sks = nmem_malloc (stream, sizeof(*sks));
+    sks->sortElement = nmem_malloc (stream, sizeof(*sks->sortElement));
     sks->sortElement->which = Z_SortElement_generic;
-    sk = sks->sortElement->u.generic = odr_malloc (stream, sizeof(*sk));
+    sk = sks->sortElement->u.generic = nmem_malloc (stream, sizeof(*sk));
     sk->which = Z_SortKey_sortAttributes;
-    sk->u.sortAttributes = odr_malloc (stream, sizeof(*sk->u.sortAttributes));
+    sk->u.sortAttributes = nmem_malloc (stream, sizeof(*sk->u.sortAttributes));
 
     sk->u.sortAttributes->id = oid;
     sk->u.sortAttributes->list =
-	odr_malloc (stream, sizeof(*sk->u.sortAttributes->list));
+	nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list));
     sk->u.sortAttributes->list->num_attributes = 1;
     sk->u.sortAttributes->list->attributes =
-	odr_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
+	nmem_malloc (stream, sizeof(*sk->u.sortAttributes->list->attributes));
     ae = *sk->u.sortAttributes->list->attributes =
-	odr_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
+	nmem_malloc (stream, sizeof(**sk->u.sortAttributes->list->attributes));
     ae->attributeSet = 0;
-    ae->attributeType =	odr_malloc (stream, sizeof(*ae->attributeType));
+    ae->attributeType =	nmem_malloc (stream, sizeof(*ae->attributeType));
     *ae->attributeType = 1;
     ae->which = Z_AttributeValue_numeric;
-    ae->value.numeric = odr_malloc (stream, sizeof(*ae->value.numeric));
+    ae->value.numeric = nmem_malloc (stream, sizeof(*ae->value.numeric));
     *ae->value.numeric = use_value;
 
-    sks->sortRelation = odr_malloc (stream, sizeof(*sks->sortRelation));
+    sks->sortRelation = nmem_malloc (stream, sizeof(*sks->sortRelation));
     if (sort_relation_value == 1)
 	*sks->sortRelation = Z_SortRelation_ascending;
     else if (sort_relation_value == 2)
@@ -1702,7 +1836,7 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     else 
 	*sks->sortRelation = Z_SortRelation_ascending;
 
-    sks->caseSensitivity = odr_malloc (stream, sizeof(*sks->caseSensitivity));
+    sks->caseSensitivity = nmem_malloc (stream, sizeof(*sks->caseSensitivity));
     *sks->caseSensitivity = 0;
 
     sks->missingValueAction = 0;
@@ -1715,7 +1849,7 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
 
 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                            oid_value attributeSet, ODR stream,
+                            oid_value attributeSet, NMEM stream,
 			    Z_SortKeySpecList *sort_sequence,
                             int num_bases, char **basenames)
 {
@@ -1779,7 +1913,7 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
-                                  oid_value attributeSet, ODR stream,
+                                  oid_value attributeSet, NMEM stream,
 				  Z_SortKeySpecList *sort_sequence,
                                   int num_bases, char **basenames)
 {
@@ -1836,7 +1970,7 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
 #ifdef ASN_COMPILED
             if (*zop->u.prox->u.known != Z_ProxUnit_word)
             {
-                char *val = odr_malloc (stream, 16);
+                char *val = nmem_malloc (stream, 16);
                 zh->errCode = 132;
                 zh->errString = val;
                 sprintf (val, "%d", *zop->u.prox->u.known);
@@ -1845,7 +1979,7 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
 #else
             if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word)
             {
-                char *val = odr_malloc (stream, 16);
+                char *val = nmem_malloc (stream, 16);
                 zh->errCode = 132;
                 zh->errString = val;
                 sprintf (val, "%d", *zop->u.prox->proximityUnitCode);
@@ -1894,9 +2028,11 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
     return r;
 }
 
-void rpn_search (ZebraHandle zh, ODR stream,
+
+RSET rpn_search (ZebraHandle zh, NMEM nmem,
 		 Z_RPNQuery *rpn, int num_bases, char **basenames, 
-		 const char *setname)
+		 const char *setname,
+		 ZebraSet sset)
 {
     RSET rset;
     oident *attrset;
@@ -1904,37 +2040,43 @@ void rpn_search (ZebraHandle zh, ODR stream,
     Z_SortKeySpecList *sort_sequence;
     int sort_status, i;
 
-    zlog_rpn (rpn);
-
     zh->errCode = 0;
     zh->errString = NULL;
     zh->hits = 0;
 
-    sort_sequence = odr_malloc (stream, sizeof(*sort_sequence));
+    sort_sequence = nmem_malloc (nmem, sizeof(*sort_sequence));
     sort_sequence->num_specs = 10;
-    sort_sequence->specs = odr_malloc (stream, sort_sequence->num_specs *
+    sort_sequence->specs = nmem_malloc (nmem, sort_sequence->num_specs *
 				       sizeof(*sort_sequence->specs));
     for (i = 0; i<sort_sequence->num_specs; i++)
 	sort_sequence->specs[i] = 0;
     
     attrset = oid_getentbyoid (rpn->attributeSetId);
     attributeSet = attrset->value;
-    rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet, stream,
-				 sort_sequence,
-                                 num_bases, basenames);
+    rset = rpn_search_structure (zh, rpn->RPNStructure, attributeSet,
+				 nmem, sort_sequence, num_bases, basenames);
     if (!rset)
-	return;
+	return 0;
 
-    resultSetAdd (zh, setname, 1, rset, &zh->hits);
     if (zh->errCode)
         logf (LOG_DEBUG, "search error: %d", zh->errCode);
-
+    
     for (i = 0; sort_sequence->specs[i]; i++)
 	;
     sort_sequence->num_specs = i;
-    if (i)
-	resultSetSort (zh, stream, 1, &setname, setname, sort_sequence,
-		       &sort_status);
+    if (!i)
+	resultSetRank (zh, sset, rset);
+    else
+    {
+	logf (LOG_DEBUG, "resultSetSortSingle in rpn_search");
+	resultSetSortSingle (zh, nmem, sset, rset,
+			     sort_sequence, &sort_status);
+	if (zh->errCode)
+	{
+	    logf (LOG_DEBUG, "resultSetSortSingle status = %d", zh->errCode);
+	}
+    }
+    return rset;
 }
 
 struct scan_info_entry {
@@ -1969,14 +2111,14 @@ static int scan_handle (char *name, const char *info, int pos, void *client)
     return 0;
 }
 
-static void scan_term_untrans (ZebraHandle zh, ODR stream, int reg_type,
+static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
 			       char **dst, const char *src)
 {
     char term_dst[1024];
     
     term_untrans (zh, reg_type, term_dst, src);
     
-    *dst = odr_malloc (stream, strlen(term_dst)+1);
+    *dst = nmem_malloc (stream, strlen(term_dst)+1);
     strcpy (*dst, term_dst);
 }
 
@@ -2144,7 +2286,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         }
         if (j0 == -1)
             break;
-        scan_term_untrans (zh, stream, reg_id,
+        scan_term_untrans (zh, stream->mem, reg_id,
 			   &glist[i+before].term, mterm);
         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
 			   glist[i+before].term, strlen(glist[i+before].term),
@@ -2208,7 +2350,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         if (j0 == -1)
             break;
 
-        scan_term_untrans (zh, stream, reg_id,
+        scan_term_untrans (zh, stream->mem, reg_id,
 			   &glist[before-1-i].term, mterm);
 
         rset = rset_trunc