Moved zebrautl.h to idzebra/util.h.
[idzebra-moved-to-github.git] / index / zrpn.c
index 476990c..dfb7ab9 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zrpn.c,v 1.169 2005-02-25 10:08:44 adam Exp $
+/* $Id: zrpn.c,v 1.171 2005-03-11 17:56:34 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -206,7 +206,7 @@ static void term_untrans(ZebraHandle zh, int reg_type,
 }
 
 static void add_isam_p(const char *name, const char *info,
-                        struct grep_info *p)
+                      struct grep_info *p)
 {
     if (!log_level_set)
     {
@@ -295,6 +295,33 @@ static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
     return *s0;
 }
 
+
+static void esc_str(char *out_buf, int out_size,
+                   const char *in_buf, int in_size)
+{
+    int k;
+
+    assert(out_buf);
+    assert(in_buf);
+    assert(out_size > 20);
+    *out_buf = '\0';
+    for (k = 0; k<in_size; k++)
+    {
+       int c = in_buf[k] & 0xff;
+       int pc;
+       if (c < 32 || c > 126)
+           pc = '?';
+       else
+           pc = c;
+       sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
+       if (strlen(out_buf) > out_size-20)
+       {
+           strcat(out_buf, "..");
+           break;
+       }
+    }
+}
+
 #define REGEX_CHARS " []()|.*+?!"
 
 /* term_100: handle term, where trunc = none(no operators at all) */
@@ -302,7 +329,7 @@ static int term_100(ZebraMaps zebra_maps, int reg_type,
                      const char **src, char *dst, int space_split,
                      char *dst_term)
 {
-    const char *s0, *s1;
+    const char *s0;
     const char **map;
     int i = 0;
     int j = 0;
@@ -315,8 +342,10 @@ static int term_100(ZebraMaps zebra_maps, int reg_type,
     s0 = *src;
     while (*s0)
     {
-        s1 = s0;
-        map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
+        const char *s1 = s0;
+       int q_map_match = 0;
+        map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
+                               &q_map_match);
         if (space_split)
         {
             if (**map == *CHR_SPACE)
@@ -343,14 +372,26 @@ static int term_100(ZebraMaps zebra_maps, int reg_type,
                 space_start = space_end = 0;
             }
         }
-        /* add non-space char */
-        while (s1 < s0)
-        {
-            if (strchr(REGEX_CHARS, *s1))
-                dst[i++] = '\\';
-            dst_term[j++] = *s1;
-            dst[i++] = *s1++;
-        }
+       /* add non-space char */
+       memcpy(dst_term+j, s1, s0 - s1);
+       j += (s0 - s1);
+       if (!q_map_match)
+       {
+           while (s1 < s0)
+           {
+               if (strchr(REGEX_CHARS, *s1))
+                   dst[i++] = '\\';
+               dst[i++] = *s1++;
+           }
+       }
+       else
+       {
+           char tmpbuf[80];
+           esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+           
+           strcpy(dst + i, map[0]);
+           i += strlen(map[0]);
+       }
     }
     dst[i] = '\0';
     dst_term[j] = '\0';
@@ -363,7 +404,7 @@ static int term_101(ZebraMaps zebra_maps, int reg_type,
                      const char **src, char *dst, int space_split,
                      char *dst_term)
 {
-    const char *s0, *s1;
+    const char *s0;
     const char **map;
     int i = 0;
     int j = 0;
@@ -381,17 +422,33 @@ static int term_101(ZebraMaps zebra_maps, int reg_type,
         }
         else
         {
-            s1 = s0;
-            map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
+           const char *s1 = s0;
+           int q_map_match = 0;
+           map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
+                                   &q_map_match);
             if (space_split && **map == *CHR_SPACE)
                 break;
-            while (s1 < s0)
-            {
-                if (strchr(REGEX_CHARS, *s1))
-                    dst[i++] = '\\';
-                dst_term[j++] = *s1;
-                dst[i++] = *s1++;
-            }
+
+           /* add non-space char */
+           memcpy(dst_term+j, s1, s0 - s1);
+           j += (s0 - s1);
+           if (!q_map_match)
+           {
+               while (s1 < s0)
+               {
+                   if (strchr(REGEX_CHARS, *s1))
+                       dst[i++] = '\\';
+                   dst[i++] = *s1++;
+               }
+           }
+           else
+           {
+               char tmpbuf[80];
+               esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+               
+               strcpy(dst + i, map[0]);
+               i += strlen(map[0]);
+           }
         }
     }
     dst[i] = '\0';
@@ -407,7 +464,7 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
 {
     int i = 0;
     int j = 0;
-    const char *s0, *s1;
+    const char *s0;
     const char **map;
 
     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
@@ -430,22 +487,39 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
         }
         else
         {
-            s1 = s0;
-            map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
-            if (**map == *CHR_SPACE)
+           const char *s1 = s0;
+           int q_map_match = 0;
+           map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
+                                   &q_map_match);
+            if (space_split && **map == *CHR_SPACE)
                 break;
-            while (s1 < s0)
-            {
-                if (strchr(REGEX_CHARS, *s1))
-                    dst[i++] = '\\';
-                dst_term[j++] = *s1;
-                dst[i++] = *s1++;
-            }
+
+           /* add non-space char */
+           memcpy(dst_term+j, s1, s0 - s1);
+           j += (s0 - s1);
+           if (!q_map_match)
+           {
+               while (s1 < s0)
+               {
+                   if (strchr(REGEX_CHARS, *s1))
+                       dst[i++] = '\\';
+                   dst[i++] = *s1++;
+               }
+           }
+           else
+           {
+               char tmpbuf[80];
+               esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+               
+               strcpy(dst + i, map[0]);
+               i += strlen(map[0]);
+           }
         }
     }
     dst[i] = '\0';
     dst_term[j] = '\0';
     *src = s0;
+    
     return i;
 }
 
@@ -958,6 +1032,7 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         use_value = 1016;
     for (base_no = 0; base_no < num_bases; base_no++)
     {
+       int ord = -1;
        int attr_ok = 0;
        int regex_range = 0;
        int init_pos = 0;
@@ -976,23 +1051,48 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         }
         if (xpath_use > 0 && use_value == -2) 
         {
-            use_value = xpath_use;
+           /* xpath mode and we have a string attribute */
             attp.local_attributes = &id_xpath_attr;
             attp.attset_ordinal = VAL_IDXPATH;
             id_xpath_attr.next = 0;
+
+            use_value = xpath_use;  /* xpath_use as use-attribute now */
             id_xpath_attr.local = use_value;
         }
-        else if (curAttributeSet == VAL_IDXPATH)
+        else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
         {
+           /* X-Path attribute, use numeric value directly */
             attp.local_attributes = &id_xpath_attr;
             attp.attset_ordinal = VAL_IDXPATH;
             id_xpath_attr.next = 0;
             id_xpath_attr.local = use_value;
         }
-        else
+       else if (use_string &&
+                (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+                                                    use_string)) >= 0)
+       {
+           /* we have a match for a raw string attribute */
+            char ord_buf[32];
+            int i, ord_len;
+
+            if (prefix_len)
+                term_dict[prefix_len++] = '|';
+            else
+                term_dict[prefix_len++] = '(';
+            
+            ord_len = key_SU_encode (ord, ord_buf);
+            for (i = 0; i<ord_len; i++)
+            {
+                term_dict[prefix_len++] = 1;
+                term_dict[prefix_len++] = ord_buf[i];
+            }
+            attp.local_attributes = 0;  /* no more attributes */
+       }
+        else 
         {
+           /* lookup in the .att files . Allow string as well */
             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
-                                            use_string)))
+                                     use_string)))
             {
                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
                       curAttributeSet, use_value, r);
@@ -1021,29 +1121,29 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                 continue;
             }
         }
-        for (local_attr = attp.local_attributes; local_attr;
-             local_attr = local_attr->next)
-        {
-            int ord;
-            char ord_buf[32];
-            int i, ord_len;
-            
-            ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
-                                         local_attr->local);
-            if (ord < 0)
-                continue;
-            if (prefix_len)
-                term_dict[prefix_len++] = '|';
-            else
-                term_dict[prefix_len++] = '(';
-            
-            ord_len = key_SU_encode (ord, ord_buf);
-            for (i = 0; i<ord_len; i++)
-            {
-                term_dict[prefix_len++] = 1;
-                term_dict[prefix_len++] = ord_buf[i];
-            }
-        }
+       for (local_attr = attp.local_attributes; local_attr;
+            local_attr = local_attr->next)
+       {
+           char ord_buf[32];
+           int i, ord_len;
+           
+           ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+                                             attp.attset_ordinal,
+                                             local_attr->local);
+           if (ord < 0)
+               continue;
+           if (prefix_len)
+               term_dict[prefix_len++] = '|';
+           else
+               term_dict[prefix_len++] = '(';
+           
+           ord_len = key_SU_encode (ord, ord_buf);
+           for (i = 0; i<ord_len; i++)
+           {
+               term_dict[prefix_len++] = 1;
+               term_dict[prefix_len++] = ord_buf[i];
+           }
+       }
         if (!prefix_len)
         {
 #if 1
@@ -1147,6 +1247,12 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         }
        if (attr_ok)
        {
+           char buf[80];
+           const char *input = term_dict + prefix_len;
+           esc_str(buf, sizeof(buf), input, strlen(input));
+       }
+       if (attr_ok)
+       {
            yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
            r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
                                 grep_info, &max_pos, init_pos,
@@ -1617,8 +1723,9 @@ static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             char ord_buf[32];
             int i, ord_len;
 
-            ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
-                                          local_attr->local);
+            ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+                                             attp.attset_ordinal,
+                                             local_attr->local);
             if (ord < 0)
                 continue;
             if (prefix_len)
@@ -1858,7 +1965,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
     char term_dict[2048];
     char ord_buf[32];
     int prefix_len = 0;
-    int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
+    int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
     int ord_len, i, r, max_pos;
     int term_type = Z_Term_characterString;
     const char *flags = "void";
@@ -1888,7 +1995,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
     grep_info.isam_p_indx = 0;
     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
                           &grep_info, &max_pos, 0, grep_handle);
-    yaz_log (YLOG_LOG, "%s %d positions", term,
+    yaz_log (YLOG_DEBUG, "%s %d positions", term,
              grep_info.isam_p_indx);
     rset = rset_trunc(zh, grep_info.isam_p_buf,
                        grep_info.isam_p_indx, term, strlen(term),
@@ -2499,8 +2606,9 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         {
             int ord;
 
-            ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
-                                         local_attr->local);
+            ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+                                             attp.attset_ordinal,
+                                             local_attr->local);
             if (ord > 0)
                 ords[ord_no++] = ord;
         }