Fix X-Path Searching for numeric structure
[idzebra-moved-to-github.git] / index / zrpn.c
index bd0ceab..0a2c29c 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: zrpn.c,v 1.122 2002-08-28 19:52:29 adam Exp $
+/* $Id: zrpn.c,v 1.126 2002-12-16 22:59:34 adam Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
    Index Data Aps
 
@@ -46,7 +46,20 @@ struct rpn_char_map_info {
 static const char **rpn_char_map_handler (void *vp, const char **from, int len)
 {
     struct rpn_char_map_info *p = (struct rpn_char_map_info *) vp;
-    return zebra_maps_input (p->zm, p->reg_type, from, len);
+    const char **out = zebra_maps_input (p->zm, p->reg_type, from, len);
+#if 0
+    if (out && *out)
+    {
+        const char *outp = *out;
+        yaz_log (LOG_LOG, "---");
+        while (*outp)
+        {
+            yaz_log (LOG_LOG, "%02X", *outp);
+            outp++;
+        }
+    }
+#endif
+    return out;
 }
 
 static void rpn_char_map_prepare (struct zebra_register *reg, int reg_type,
@@ -69,20 +82,12 @@ static int attr_find_ex (AttrType *src, oid_value *attributeSetP,
 {
     int num_attributes;
 
-#ifdef ASN_COMPILED
     num_attributes = src->zapt->attributes->num_attributes;
-#else
-    num_attributes = src->zapt->num_attributes;
-#endif
     while (src->major < num_attributes)
     {
         Z_AttributeElement *element;
 
-#ifdef ASN_COMPILED
         element = src->zapt->attributes->attributes[src->major];
-#else
-        element = src->zapt->attributeList[src->major];
-#endif
         if (src->type == *element->attributeType)
         {
             switch (element->which) 
@@ -167,17 +172,18 @@ struct grep_info {
 static void term_untrans  (ZebraHandle zh, int reg_type,
                           char *dst, const char *src)
 {
+    int len = 0;
     while (*src)
     {
         const char *cp = zebra_maps_output (zh->reg->zebra_maps,
                                            reg_type, &src);
-       if (!cp)
-           *dst++ = *src++;
+       if (!cp && len < IT_MAX_WORD-1)
+           dst[len++] = *src++;
        else
-           while (*cp)
-               *dst++ = *cp++;
+           while (*cp && len < IT_MAX_WORD-1)
+               dst[len++] = *cp++;
     }
-    *dst = '\0';
+    dst[len] = '\0';
 }
 
 static void add_isam_p (const char *name, const char *info,
@@ -220,7 +226,7 @@ static void add_isam_p (const char *name, const char *info,
     {
        const char *db;
        int set, use;
-       char term_tmp[512];
+       char term_tmp[IT_MAX_WORD];
        int su_code = 0;
        int len = key_SU_decode (&su_code, name);
        
@@ -266,7 +272,7 @@ static int term_pre (ZebraMaps zebra_maps, int reg_type, const char **src,
     return *s0;
 }
 
-#define REGEX_CHARS "[]()|.*+!"
+#define REGEX_CHARS " []()|.*+?!"
 
 /* term_100: handle term, where trunc=none (no operators at all) */
 static int term_100 (ZebraMaps zebra_maps, int reg_type,
@@ -982,11 +988,11 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                                  attributeSet,
                                  reg_type, space_split, term_dst))
                return 0;
-           logf (LOG_DEBUG, "dict_lookup_grep: %s", term_dict+prefix_len);
+           logf (LOG_LOG, "dict_lookup_grep: %s", term_dict+prefix_len);
            r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
                                  grep_info, &max_pos, 0, grep_handle);
            if (r)
-               logf (LOG_WARN, "dict_lookup_grep fail, rel=gt: %d", r);
+               logf (LOG_WARN, "dict_lookup_grep fail %d", r);
            break;
        case 1:          /* right truncation */
            term_dict[j++] = '(';
@@ -1096,8 +1102,9 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 
-static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                       char *termz)
+/* convert APT search term to UTF8 */
+static int zapt_term_to_utf8 (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+                              char *termz)
 {
     size_t sizez;
     Z_Term *term = zapt->term;
@@ -1113,7 +1120,6 @@ static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
             size_t outleft = IT_MAX_WORD-1;
             size_t ret;
 
-            yaz_log (LOG_DEBUG, "converting general from ISO-8859-1");
             ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft,
                         &outbuf, &outleft);
             if (ret == (size_t)(-1))
@@ -1123,13 +1129,15 @@ static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                 return -1;
             }
             *outbuf = 0;
-            return 0;
         }
-        sizez = term->u.general->len;
-        if (sizez > IT_MAX_WORD-1)
-            sizez = IT_MAX_WORD-1;
-        memcpy (termz, term->u.general->buf, sizez);
-        termz[sizez] = '\0';
+        else
+        {
+            sizez = term->u.general->len;
+            if (sizez > IT_MAX_WORD-1)
+                sizez = IT_MAX_WORD-1;
+            memcpy (termz, term->u.general->buf, sizez);
+            termz[sizez] = '\0';
+        }
         break;
     case Z_Term_characterString:
         sizez = strlen(term->u.characterString);
@@ -1140,38 +1148,47 @@ static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
         break;
     default:
         zh->errCode = 124;
+        return -1;
     }
     return 0;
 }
 
-static void trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                             char *termz, int reg_type)
+/* convert APT SCAN term to internal cmap */
+static int trans_scan_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+                            char *termz, int reg_type)
 {
-    Z_Term *term = zapt->term;
-    const char **map;
-    const char *cp = (const char *) term->u.general->buf;
-    const char *cp_end = cp + term->u.general->len;
-    const char *src;
-    int i = 0;
-    const char *space_map = NULL;
-    int len;
-    
-    while ((len = (cp_end - cp)) > 0)
+    char termz0[IT_MAX_WORD];
+
+    if (zapt_term_to_utf8(zh, zapt, termz0))
+        return -1;    /* error */
+    else
     {
-        map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
-        if (**map == *CHR_SPACE)
-            space_map = *map;
-        else
+        const char **map;
+        const char *cp = (const char *) termz0;
+        const char *cp_end = cp + strlen(cp);
+        const char *src;
+        int i = 0;
+        const char *space_map = NULL;
+        int len;
+            
+        while ((len = (cp_end - cp)) > 0)
         {
-            if (i && space_map)
-                for (src = space_map; *src; src++)
+            map = zebra_maps_input (zh->reg->zebra_maps, reg_type, &cp, len);
+            if (**map == *CHR_SPACE)
+                space_map = *map;
+            else
+            {
+                if (i && space_map)
+                    for (src = space_map; *src; src++)
+                        termz[i++] = *src;
+                space_map = NULL;
+                for (src = *map; *src; src++)
                     termz[i++] = *src;
-            space_map = NULL;
-            for (src = *map; *src; src++)
-                termz[i++] = *src;
+            }
         }
+        termz[i] = '\0';
     }
-    termz[i] = '\0';
+    return 0;
 }
 
 static RSET rpn_prox (ZebraHandle zh, RSET *rset, int rset_no,
@@ -1730,20 +1747,20 @@ static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                         oid_value attributeSet, struct grep_info *grep_info,
                         int reg_type, int complete_flag,
                         int num_bases, char **basenames,
-                        char *term_dst)
+                        char *term_dst, int xpath_use)
 {
     char term_dict[2*IT_MAX_WORD+2];
     int r, base_no;
     AttrType use;
     int use_value;
+    const char *use_string = 0;
     oid_value curAttributeSet = attributeSet;
     const char *termp;
     struct rpn_char_map_info rcmi;
 
     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
     attr_init (&use, zapt, 1);
-    use_value = attr_find (&use, &curAttributeSet);
-    logf (LOG_DEBUG, "numeric_term, use value %d", use_value);
+    use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
 
     if (use_value == -1)
         use_value = 1016;
@@ -1751,19 +1768,38 @@ static int numeric_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     for (base_no = 0; base_no < num_bases; base_no++)
     {
         attent attp;
+        data1_local_attribute id_xpath_attr;
         data1_local_attribute *local_attr;
         int max_pos, prefix_len = 0;
 
         termp = *term_sub;
-        if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
+        if (use_value == -2)  /* string attribute (assume IDXPATH/any) */
         {
-            logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
-                  curAttributeSet, use_value, r);
-           if (r == -1)
-               zh->errCode = 114;
-           else
-               zh->errCode = 121;
-            return -1;
+            use_value = xpath_use;
+            attp.local_attributes = &id_xpath_attr;
+            attp.attset_ordinal = VAL_IDXPATH;
+            id_xpath_attr.next = 0;
+            id_xpath_attr.local = use_value;
+        }
+       else if (curAttributeSet == VAL_IDXPATH)
+        {
+            attp.local_attributes = &id_xpath_attr;
+            attp.attset_ordinal = VAL_IDXPATH;
+            id_xpath_attr.next = 0;
+            id_xpath_attr.local = use_value;
+        }
+        else
+        {
+            if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
+            {
+                logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
+                      curAttributeSet, use_value, r);
+                if (r == -1)
+                    zh->errCode = 114;
+                else
+                    zh->errCode = 121;
+                return -1;
+            }
         }
         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
         {
@@ -1820,7 +1856,7 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh,
                                    oid_value attributeSet,
                                    NMEM stream,
                                    int reg_type, int complete_flag,
-                                   const char *rank_type,
+                                   const char *rank_type, int xpath_use,
                                    int num_bases, char **basenames)
 {
     char term_dst[IT_MAX_WORD+1];
@@ -1837,7 +1873,7 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh,
        grep_info.isam_p_indx = 0;
         r = numeric_term (zh, zapt, &termp, attributeSet, &grep_info,
                          reg_type, complete_flag, num_bases, basenames,
-                         term_dst);
+                         term_dst, xpath_use);
         if (r < 1)
             break;
        logf (LOG_DEBUG, "term: %s", term_dst);
@@ -1989,13 +2025,8 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
        nmem_malloc (stream, sizeof(*sks->caseSensitivity));
     *sks->caseSensitivity = 0;
 
-#ifdef ASN_COMPILED
     sks->which = Z_SortKeySpec_null;
     sks->u.null = odr_nullval ();
-#else
-    sks->missingValueAction = 0;
-#endif
-
     sort_sequence->specs[i] = sks;
 
     parms.rset_term = rset_term_create (termz, -1, rank_type,
@@ -2351,7 +2382,7 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     logf (LOG_DEBUG, "search_type=%s", search_type);
     logf (LOG_DEBUG, "rank_type=%s", rank_type);
 
-    if (trans_term (zh, zapt, termz))
+    if (zapt_term_to_utf8(zh, zapt, termz))
         return 0;
 
     if (sort_flag)
@@ -2395,6 +2426,7 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     {
        rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
                                       reg_id, complete_flag, rank_type,
+                                       xpath_use,
                                       num_bases, basenames);
     }
     else if (!strcmp (search_type, "always"))
@@ -2448,21 +2480,11 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
             r = rset_create (rset_kind_not, &bool_parms);
             break;
         case Z_Operator_prox:
-#ifdef ASN_COMPILED
             if (zop->u.prox->which != Z_ProximityOperator_known)
             {
                 zh->errCode = 132;
                 return NULL;
             }
-#else
-            if (zop->u.prox->which != Z_ProxCode_known)
-            {
-                zh->errCode = 132;
-                return NULL;
-            }
-#endif
-
-#ifdef ASN_COMPILED
             if (*zop->u.prox->u.known != Z_ProxUnit_word)
             {
                 char *val = (char *) nmem_malloc (stream, 16);
@@ -2471,16 +2493,6 @@ static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
                 sprintf (val, "%d", *zop->u.prox->u.known);
                 return NULL;
             }
-#else
-            if (*zop->u.prox->proximityUnitCode != Z_ProxUnit_word)
-            {
-                char *val = (char *) nmem_malloc (stream, 16);
-                zh->errCode = 132;
-                zh->errString = val;
-                sprintf (val, "%d", *zop->u.prox->proximityUnitCode);
-                return NULL;
-            }
-#endif
            else
            {
                RSET rsets[2];
@@ -2626,12 +2638,33 @@ static int scan_handle (char *name, const char *info, int pos, void *client)
 static void scan_term_untrans (ZebraHandle zh, NMEM stream, int reg_type,
                               char **dst, const char *src)
 {
-    char term_dst[1024];
+    char term_src[IT_MAX_WORD];
+    char term_dst[IT_MAX_WORD];
     
-    term_untrans (zh, reg_type, term_dst, src);
-    
-    *dst = (char *) nmem_malloc (stream, strlen(term_dst)+1);
-    strcpy (*dst, term_dst);
+    term_untrans (zh, reg_type, term_src, src);
+
+    if (zh->iconv_from_utf8 != 0)
+    {
+        int len;
+        char *inbuf = term_src;
+        size_t inleft = strlen(term_src);
+        char *outbuf = term_dst;
+        size_t outleft = sizeof(term_dst)-1;
+        size_t ret;
+        
+        ret = yaz_iconv (zh->iconv_from_utf8, &inbuf, &inleft,
+                         &outbuf, &outleft);
+        if (ret == (size_t)(-1))
+            len = 0;
+        else
+            len = outbuf - term_dst;
+        *dst = nmem_malloc (stream, len + 1);
+        if (len > 0)
+            memcpy (*dst, term_dst, len);
+        (*dst)[len] = '\0';
+    }
+    else
+        *dst = nmem_strdup (stream, term_src);
 }
 
 static void count_set (RSET r, int *count)
@@ -2775,7 +2808,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         termz[prefix_len] = 0;
         strcpy (scan_info->prefix, termz);
 
-        trans_scan_term (zh, zapt, termz+prefix_len, reg_id);
+        if (trans_scan_term (zh, zapt, termz+prefix_len, reg_id))
+            return ;
                     
         dict_scan (zh->reg->dict, termz, &before_tmp, &after_tmp,
                   scan_info, scan_handle);