Get rid of typedefs iSAMS_P, ISAMC_P, ISAMB_P and use ISAM_P instead.
[idzebra-moved-to-github.git] / index / zrpn.c
index 476990c..caccbcd 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zrpn.c,v 1.169 2005-02-25 10:08:44 adam Exp $
+/* $Id: zrpn.c,v 1.173 2005-04-13 13:03:47 adam Exp $
    Copyright (C) 1995-2005
    Index Data ApS
 
@@ -180,7 +180,7 @@ struct grep_info {
 #ifdef TERM_COUNT        
     int *term_no;        
 #endif        
-    ISAMC_P *isam_p_buf;
+    ISAM_P *isam_p_buf;
     int isam_p_size;        
     int isam_p_indx;
     ZebraHandle zh;
@@ -206,7 +206,7 @@ static void term_untrans(ZebraHandle zh, int reg_type,
 }
 
 static void add_isam_p(const char *name, const char *info,
-                        struct grep_info *p)
+                      struct grep_info *p)
 {
     if (!log_level_set)
     {
@@ -215,13 +215,13 @@ static void add_isam_p(const char *name, const char *info,
     }
     if (p->isam_p_indx == p->isam_p_size)
     {
-        ISAMC_P *new_isam_p_buf;
+        ISAM_P *new_isam_p_buf;
 #ifdef TERM_COUNT        
         int *new_term_no;        
 #endif
         p->isam_p_size = 2*p->isam_p_size + 100;
-        new_isam_p_buf = (ISAMC_P *) xmalloc(sizeof(*new_isam_p_buf) *
-                                             p->isam_p_size);
+        new_isam_p_buf = (ISAM_P *) xmalloc(sizeof(*new_isam_p_buf) *
+                                           p->isam_p_size);
         if (p->isam_p_buf)
         {
             memcpy(new_isam_p_buf, p->isam_p_buf,
@@ -295,6 +295,33 @@ static int term_pre(ZebraMaps zebra_maps, int reg_type, const char **src,
     return *s0;
 }
 
+
+static void esc_str(char *out_buf, int out_size,
+                   const char *in_buf, int in_size)
+{
+    int k;
+
+    assert(out_buf);
+    assert(in_buf);
+    assert(out_size > 20);
+    *out_buf = '\0';
+    for (k = 0; k<in_size; k++)
+    {
+       int c = in_buf[k] & 0xff;
+       int pc;
+       if (c < 32 || c > 126)
+           pc = '?';
+       else
+           pc = c;
+       sprintf(out_buf +strlen(out_buf), "%02X:%c  ", c, pc);
+       if (strlen(out_buf) > out_size-20)
+       {
+           strcat(out_buf, "..");
+           break;
+       }
+    }
+}
+
 #define REGEX_CHARS " []()|.*+?!"
 
 /* term_100: handle term, where trunc = none(no operators at all) */
@@ -302,7 +329,7 @@ static int term_100(ZebraMaps zebra_maps, int reg_type,
                      const char **src, char *dst, int space_split,
                      char *dst_term)
 {
-    const char *s0, *s1;
+    const char *s0;
     const char **map;
     int i = 0;
     int j = 0;
@@ -315,8 +342,10 @@ static int term_100(ZebraMaps zebra_maps, int reg_type,
     s0 = *src;
     while (*s0)
     {
-        s1 = s0;
-        map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
+        const char *s1 = s0;
+       int q_map_match = 0;
+        map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
+                               &q_map_match);
         if (space_split)
         {
             if (**map == *CHR_SPACE)
@@ -343,14 +372,26 @@ static int term_100(ZebraMaps zebra_maps, int reg_type,
                 space_start = space_end = 0;
             }
         }
-        /* add non-space char */
-        while (s1 < s0)
-        {
-            if (strchr(REGEX_CHARS, *s1))
-                dst[i++] = '\\';
-            dst_term[j++] = *s1;
-            dst[i++] = *s1++;
-        }
+       /* add non-space char */
+       memcpy(dst_term+j, s1, s0 - s1);
+       j += (s0 - s1);
+       if (!q_map_match)
+       {
+           while (s1 < s0)
+           {
+               if (strchr(REGEX_CHARS, *s1))
+                   dst[i++] = '\\';
+               dst[i++] = *s1++;
+           }
+       }
+       else
+       {
+           char tmpbuf[80];
+           esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+           
+           strcpy(dst + i, map[0]);
+           i += strlen(map[0]);
+       }
     }
     dst[i] = '\0';
     dst_term[j] = '\0';
@@ -363,7 +404,7 @@ static int term_101(ZebraMaps zebra_maps, int reg_type,
                      const char **src, char *dst, int space_split,
                      char *dst_term)
 {
-    const char *s0, *s1;
+    const char *s0;
     const char **map;
     int i = 0;
     int j = 0;
@@ -381,17 +422,33 @@ static int term_101(ZebraMaps zebra_maps, int reg_type,
         }
         else
         {
-            s1 = s0;
-            map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
+           const char *s1 = s0;
+           int q_map_match = 0;
+           map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
+                                   &q_map_match);
             if (space_split && **map == *CHR_SPACE)
                 break;
-            while (s1 < s0)
-            {
-                if (strchr(REGEX_CHARS, *s1))
-                    dst[i++] = '\\';
-                dst_term[j++] = *s1;
-                dst[i++] = *s1++;
-            }
+
+           /* add non-space char */
+           memcpy(dst_term+j, s1, s0 - s1);
+           j += (s0 - s1);
+           if (!q_map_match)
+           {
+               while (s1 < s0)
+               {
+                   if (strchr(REGEX_CHARS, *s1))
+                       dst[i++] = '\\';
+                   dst[i++] = *s1++;
+               }
+           }
+           else
+           {
+               char tmpbuf[80];
+               esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+               
+               strcpy(dst + i, map[0]);
+               i += strlen(map[0]);
+           }
         }
     }
     dst[i] = '\0';
@@ -407,7 +464,7 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
 {
     int i = 0;
     int j = 0;
-    const char *s0, *s1;
+    const char *s0;
     const char **map;
 
     if (!term_pre(zebra_maps, reg_type, src, "^\\()[].*+?|", "(", !space_split))
@@ -430,22 +487,39 @@ static int term_103(ZebraMaps zebra_maps, int reg_type, const char **src,
         }
         else
         {
-            s1 = s0;
-            map = zebra_maps_input(zebra_maps, reg_type, &s0, strlen(s0), 0);
-            if (**map == *CHR_SPACE)
+           const char *s1 = s0;
+           int q_map_match = 0;
+           map = zebra_maps_search(zebra_maps, reg_type, &s0, strlen(s0), 
+                                   &q_map_match);
+            if (space_split && **map == *CHR_SPACE)
                 break;
-            while (s1 < s0)
-            {
-                if (strchr(REGEX_CHARS, *s1))
-                    dst[i++] = '\\';
-                dst_term[j++] = *s1;
-                dst[i++] = *s1++;
-            }
+
+           /* add non-space char */
+           memcpy(dst_term+j, s1, s0 - s1);
+           j += (s0 - s1);
+           if (!q_map_match)
+           {
+               while (s1 < s0)
+               {
+                   if (strchr(REGEX_CHARS, *s1))
+                       dst[i++] = '\\';
+                   dst[i++] = *s1++;
+               }
+           }
+           else
+           {
+               char tmpbuf[80];
+               esc_str(tmpbuf, sizeof(tmpbuf), map[0], strlen(map[0]));
+               
+               strcpy(dst + i, map[0]);
+               i += strlen(map[0]);
+           }
         }
     }
     dst[i] = '\0';
     dst_term[j] = '\0';
     *src = s0;
+    
     return i;
 }
 
@@ -958,6 +1032,7 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         use_value = 1016;
     for (base_no = 0; base_no < num_bases; base_no++)
     {
+       int ord = -1;
        int attr_ok = 0;
        int regex_range = 0;
        int init_pos = 0;
@@ -976,23 +1051,48 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         }
         if (xpath_use > 0 && use_value == -2) 
         {
-            use_value = xpath_use;
+           /* xpath mode and we have a string attribute */
             attp.local_attributes = &id_xpath_attr;
             attp.attset_ordinal = VAL_IDXPATH;
             id_xpath_attr.next = 0;
+
+            use_value = xpath_use;  /* xpath_use as use-attribute now */
             id_xpath_attr.local = use_value;
         }
-        else if (curAttributeSet == VAL_IDXPATH)
+        else if (curAttributeSet == VAL_IDXPATH && use_value >= 0)
         {
+           /* X-Path attribute, use numeric value directly */
             attp.local_attributes = &id_xpath_attr;
             attp.attset_ordinal = VAL_IDXPATH;
             id_xpath_attr.next = 0;
             id_xpath_attr.local = use_value;
         }
-        else
+       else if (use_string &&
+                (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+                                                    use_string)) >= 0)
+       {
+           /* we have a match for a raw string attribute */
+            char ord_buf[32];
+            int i, ord_len;
+
+            if (prefix_len)
+                term_dict[prefix_len++] = '|';
+            else
+                term_dict[prefix_len++] = '(';
+            
+            ord_len = key_SU_encode (ord, ord_buf);
+            for (i = 0; i<ord_len; i++)
+            {
+                term_dict[prefix_len++] = 1;
+                term_dict[prefix_len++] = ord_buf[i];
+            }
+            attp.local_attributes = 0;  /* no more attributes */
+       }
+        else 
         {
+           /* lookup in the .att files . Allow string as well */
             if ((r = att_getentbyatt (zh, &attp, curAttributeSet, use_value,
-                                            use_string)))
+                                     use_string)))
             {
                 yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
                       curAttributeSet, use_value, r);
@@ -1021,29 +1121,29 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                 continue;
             }
         }
-        for (local_attr = attp.local_attributes; local_attr;
-             local_attr = local_attr->next)
-        {
-            int ord;
-            char ord_buf[32];
-            int i, ord_len;
-            
-            ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
-                                         local_attr->local);
-            if (ord < 0)
-                continue;
-            if (prefix_len)
-                term_dict[prefix_len++] = '|';
-            else
-                term_dict[prefix_len++] = '(';
-            
-            ord_len = key_SU_encode (ord, ord_buf);
-            for (i = 0; i<ord_len; i++)
-            {
-                term_dict[prefix_len++] = 1;
-                term_dict[prefix_len++] = ord_buf[i];
-            }
-        }
+       for (local_attr = attp.local_attributes; local_attr;
+            local_attr = local_attr->next)
+       {
+           char ord_buf[32];
+           int i, ord_len;
+           
+           ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+                                             attp.attset_ordinal,
+                                             local_attr->local);
+           if (ord < 0)
+               continue;
+           if (prefix_len)
+               term_dict[prefix_len++] = '|';
+           else
+               term_dict[prefix_len++] = '(';
+           
+           ord_len = key_SU_encode (ord, ord_buf);
+           for (i = 0; i<ord_len; i++)
+           {
+               term_dict[prefix_len++] = 1;
+               term_dict[prefix_len++] = ord_buf[i];
+           }
+       }
         if (!prefix_len)
         {
 #if 1
@@ -1147,6 +1247,12 @@ static int string_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         }
        if (attr_ok)
        {
+           char buf[80];
+           const char *input = term_dict + prefix_len;
+           esc_str(buf, sizeof(buf), input, strlen(input));
+       }
+       if (attr_ok)
+       {
            yaz_log(log_level_rpn, "dict_lookup_grep: %s", term_dict+prefix_len);
            r = dict_lookup_grep(zh->reg->dict, term_dict, regex_range,
                                 grep_info, &max_pos, init_pos,
@@ -1617,8 +1723,9 @@ static int numeric_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             char ord_buf[32];
             int i, ord_len;
 
-            ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
-                                          local_attr->local);
+            ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+                                             attp.attset_ordinal,
+                                             local_attr->local);
             if (ord < 0)
                 continue;
             if (prefix_len)
@@ -1858,7 +1965,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
     char term_dict[2048];
     char ord_buf[32];
     int prefix_len = 0;
-    int ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, use);
+    int ord = zebraExplain_lookup_attr_su(zh->reg->zei, curAttributeSet, use);
     int ord_len, i, r, max_pos;
     int term_type = Z_Term_characterString;
     const char *flags = "void";
@@ -1888,7 +1995,7 @@ static RSET xpath_trunc(ZebraHandle zh, NMEM stream,
     grep_info.isam_p_indx = 0;
     r = dict_lookup_grep(zh->reg->dict, term_dict, 0,
                           &grep_info, &max_pos, 0, grep_handle);
-    yaz_log (YLOG_LOG, "%s %d positions", term,
+    yaz_log (YLOG_DEBUG, "%s %d positions", term,
              grep_info.isam_p_indx);
     rset = rset_trunc(zh, grep_info.isam_p_buf,
                        grep_info.isam_p_indx, term, strlen(term),
@@ -2299,7 +2406,7 @@ RSET rpn_search(ZebraHandle zh, NMEM nmem, NMEM rset_nmem,
 
 struct scan_info_entry {
     char *term;
-    ISAMC_P isam_p;
+    ISAM_P isam_p;
 };
 
 struct scan_info {
@@ -2323,8 +2430,8 @@ static int scan_handle (char *name, const char *info, int pos, void *client)
     scan_info->list[idx].term = (char *)
         odr_malloc(scan_info->odr, strlen(name + len_prefix)+1);
     strcpy(scan_info->list[idx].term, name + len_prefix);
-    assert (*info == sizeof(ISAMC_P));
-    memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAMC_P));
+    assert (*info == sizeof(ISAM_P));
+    memcpy (&scan_info->list[idx].isam_p, info+1, sizeof(ISAM_P));
     return 0;
 }
 
@@ -2466,44 +2573,58 @@ void rpn_scan(ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         use_value = 1016;
     for (base_no = 0; base_no < num_bases && ord_no < 32; base_no++)
     {
-        int r;
-        attent attp;
-        data1_local_attribute *local_attr;
+       data1_local_attribute *local_attr;
+       attent attp;
+       int ord;
 
-        if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
-                                use_string)))
-        {
-            yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
-                  attributeset, use_value);
-            if (r == -1)
-            {
-                char val_str[32];
-                sprintf (val_str, "%d", use_value);
-                errCode = 114;
-                errString = odr_strdup (stream, val_str);
-            }   
-            else
-                errCode = 121;
-            continue;
-        }
-        if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
-        {
-            zh->errString = basenames[base_no];
-            zh->errCode = 109; /* Database unavailable */
-            *num_entries = 0;
-            return;
-        }
-        bases_ok++;
-        for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
-             local_attr = local_attr->next)
-        {
-            int ord;
+       if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
+       {
+           zh->errString = basenames[base_no];
+           zh->errCode = 109; /* Database unavailable */
+           *num_entries = 0;
+           return;
+       }
 
-            ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
-                                         local_attr->local);
-            if (ord > 0)
-                ords[ord_no++] = ord;
-        }
+       if (use_string &&
+           (ord = zebraExplain_lookup_attr_str(zh->reg->zei,
+                                               use_string)) >= 0)
+       {
+           /* we have a match for a raw string attribute */
+           if (ord > 0)
+               ords[ord_no++] = ord;
+            attp.local_attributes = 0;  /* no more attributes */
+       }
+       else
+       {
+           int r;
+           
+           if ((r = att_getentbyatt (zh, &attp, attributeset, use_value,
+                                     use_string)))
+           {
+               yaz_log(YLOG_DEBUG, "att_getentbyatt fail. set=%d use=%d",
+                       attributeset, use_value);
+               if (r == -1)
+               {
+                   char val_str[32];
+                   sprintf (val_str, "%d", use_value);
+                   errCode = 114;
+                   errString = odr_strdup (stream, val_str);
+               }   
+               else
+                   errCode = 121;
+               continue;
+           }
+       }
+       bases_ok++;
+       for (local_attr = attp.local_attributes; local_attr && ord_no < 32;
+            local_attr = local_attr->next)
+       {
+           ord = zebraExplain_lookup_attr_su(zh->reg->zei,
+                                             attp.attset_ordinal,
+                                             local_attr->local);
+           if (ord > 0)
+               ords[ord_no++] = ord;
+       }
     }
     if (!bases_ok && errCode)
     {