Work on XPATH
authorAdam Dickmeiss <adam@indexdata.dk>
Fri, 12 Apr 2002 14:40:42 +0000 (14:40 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Fri, 12 Apr 2002 14:40:42 +0000 (14:40 +0000)
dict/Makefile.am
index/Makefile.am
index/index.h
index/trunc.c
index/zrpn.c
recctrl/recgrs.c
test/api/Makefile.am

index 4552c6e..9fd786c 100644 (file)
@@ -1,10 +1,9 @@
-## $Id: Makefile.am,v 1.3 2001-10-15 20:57:18 adam Exp $
+## $Id: Makefile.am,v 1.4 2002-04-12 14:40:42 adam Exp $
 
 noinst_LIBRARIES = libdict.a
 noinst_PROGRAMS = dicttest dictext
 
-INCLUDES = -I$(srcdir)/../include 
-CFLAGS = @YAZINC@
+INCLUDES = -I$(srcdir)/../include @YAZINC@
 LDADD = libdict.a ../bfile/libbfile.a ../dfa/libdfa.a ../util/libutil.a @YAZLIB@ @LIBS@
 
 libdict_a_SOURCES = scan.c dopen.c dclose.c drdwr.c open.c close.c insert.c \
index 559f341..accf045 100644 (file)
@@ -1,4 +1,4 @@
-## $Id: Makefile.am,v 1.11 2002-04-05 08:46:26 adam Exp $
+## $Id: Makefile.am,v 1.12 2002-04-12 14:40:42 adam Exp $
 
 noinst_PROGRAMS = apitest kdump
 
@@ -29,7 +29,7 @@ zebrasrv_SOURCES = zserver.c
 apitest_SOURCES = apitest.c
 kdump_SOURCES=kdump.c kcompare.c
 
-CFLAGS = -I$(srcdir)/../include $(YAZINC) $(TCL_INCLUDE)
+INCLUDES = -I$(srcdir)/../include $(YAZINC) $(TCL_INCLUDE)
 
 LDADD = libzebra.a $(YAZLIB) $(TCL_LIB)
 
index 345cf42..f3ae7f6 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2002, Index Data
  * All rights reserved.
  * Sebastian Hammer, Adam Dickmeiss, Heikki Levanto
- * $Id: index.h,v 1.77 2002-04-05 08:46:26 adam Exp $
+ * $Id: index.h,v 1.78 2002-04-12 14:40:42 adam Exp $
  */
 
 #ifndef INDEX_H
@@ -306,7 +306,8 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
               int *is_partial);
 
 RSET rset_trunc (ZebraHandle zh, ISAMS_P *isam_p, int no,
-                const char *term, int length_term, const char *flags);
+                const char *term, int length_term, const char *flags,
+                 int preserve_position);
 
 void resultSetAddTerm (ZebraHandle zh, ZebraSet s, int reg_type,
                       const char *db, int set,
index 0f9128e..40c0e50 100644 (file)
@@ -4,7 +4,10 @@
  * Sebastian Hammer, Adam Dickmeiss
  *
  * $Log: trunc.c,v $
- * Revision 1.22  2002-04-05 08:46:26  adam
+ * Revision 1.23  2002-04-12 14:40:42  adam
+ * Work on XPATH
+ *
+ * Revision 1.22  2002/04/05 08:46:26  adam
  * Zebra with full functionality
  *
  * Revision 1.21  2002/04/04 14:14:13  adam
@@ -190,8 +193,8 @@ static void heap_close (struct trunc_info *ti)
 }
 
 static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
-                        const char *flags, ISAMS_P *isam_p, int from, int to,
-                         int merge_chunk)
+                          const char *flags, ISAMS_P *isam_p, int from, int to,
+                          int merge_chunk, int preserve_position)
 {
     RSET result; 
     RSFD result_rsfd;
@@ -221,10 +224,12 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
         {
             if (i_add <= to - i)
                 rset[rscur] = rset_trunc_r (zi, term, length, flags,
-                                           isam_p, i, i+i_add, merge_chunk);
+                                           isam_p, i, i+i_add,
+                                            merge_chunk, preserve_position);
             else
                 rset[rscur] = rset_trunc_r (zi, term, length, flags,
-                                            isam_p, i, to, merge_chunk);
+                                            isam_p, i, to,
+                                            merge_chunk, preserve_position);
             rscur++;
         }
         ti = heap_init (rscur, sizeof(struct it_key), key_compare_it);
@@ -341,31 +346,32 @@ static RSET rset_trunc_r (ZebraHandle zi, const char *term, int length,
             int n = ti->indx[ti->ptr[1]];
 
             rset_write (result, result_rsfd, ti->heap[ti->ptr[1]]);
-#if 0
-/* section that preserve all keys */
-            heap_delete (ti);
-            if (isc_pp_read (ispt[n], ti->tmpbuf))
-                heap_insert (ti, ti->tmpbuf, n);
-            else
-                isc_pp_close (ispt[n]);
-#else
-/* section that preserve all keys with unique sysnos */
-            while (1)
+            if (preserve_position)
             {
-                if (!isc_pp_read (ispt[n], ti->tmpbuf))
-                {
-                    heap_delete (ti);
+                heap_delete (ti);
+                if (isc_pp_read (ispt[n], ti->tmpbuf))
+                    heap_insert (ti, ti->tmpbuf, n);
+                else
                     isc_pp_close (ispt[n]);
-                    break;
-                }
-                if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
+            }
+            else
+            {
+                while (1)
                 {
-                    heap_delete (ti);
-                    heap_insert (ti, ti->tmpbuf, n);
-                    break;
+                    if (!isc_pp_read (ispt[n], ti->tmpbuf))
+                    {
+                        heap_delete (ti);
+                        isc_pp_close (ispt[n]);
+                        break;
+                    }
+                    if ((*ti->cmp)(ti->tmpbuf, ti->heap[ti->ptr[1]]) > 1)
+                    {
+                        heap_delete (ti);
+                        heap_insert (ti, ti->tmpbuf, n);
+                        break;
+                    }
                 }
             }
-#endif
         }
         heap_close (ti);
         xfree (ispt);
@@ -517,7 +523,8 @@ static int isamd_trunc_cmp (const void *p1, const void *p2)
 }
 
 RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no,
-                const char *term, int length, const char *flags)
+                const char *term, int length, const char *flags,
+                 int preserve_position)
 {
     logf (LOG_DEBUG, "rset_trunc no=%d", no);
     if (no < 1)
@@ -616,6 +623,7 @@ RSET rset_trunc (ZebraHandle zi, ISAMS_P *isam_p, int no,
         logf (LOG_WARN, "Unknown isam set in rset_trunc");
        return rset_create (rset_kind_null, NULL);
     }
-    return rset_trunc_r (zi, term, length, flags, isam_p, 0, no, 100);
+    return rset_trunc_r (zi, term, length, flags, isam_p, 0, no, 100,
+                         preserve_position);
 }
 
index 2a14aee..06f4259 100644 (file)
@@ -3,7 +3,7 @@
  * All rights reserved.
  * Sebastian Hammer, Adam Dickmeiss
  *
- * $Id: zrpn.c,v 1.112 2002-04-04 14:14:13 adam Exp $
+ * $Id: zrpn.c,v 1.113 2002-04-12 14:40:42 adam Exp $
  */
 #include <stdio.h>
 #include <assert.h>
@@ -20,6 +20,7 @@
 #include <rstemp.h>
 #include <rsnull.h>
 #include <rsbool.h>
+#include <rsbetween.h>
 
 struct rpn_char_map_info {
     ZebraMaps zm;
@@ -797,12 +798,43 @@ static int string_relation (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 }
 
 static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
-                       const char **term_sub, 
-                       oid_value attributeSet, NMEM stream,
-                       struct grep_info *grep_info,
-                       int reg_type, int complete_flag,
-                       int num_bases, char **basenames,
-                       char *term_dst)
+                        const char **term_sub, 
+                        oid_value attributeSet, NMEM stream,
+                        struct grep_info *grep_info,
+                        int reg_type, int complete_flag,
+                        int num_bases, char **basenames,
+                        char *term_dst);
+
+static RSET term_trunc (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+                        const char **term_sub, 
+                        oid_value attributeSet, NMEM stream,
+                        struct grep_info *grep_info,
+                        int reg_type, int complete_flag,
+                        int num_bases, char **basenames,
+                        char *term_dst,
+                        const char *rank_type)
+{
+    int r;
+    grep_info->isam_p_indx = 0;
+    r = string_term (zh, zapt, term_sub, attributeSet, stream, grep_info,
+                     reg_type, complete_flag, num_bases, basenames,
+                     term_dst);
+    if (r < 1)
+        return 0;
+    logf (LOG_DEBUG, "term: %s", term_dst);
+    return rset_trunc (zh, grep_info->isam_p_buf,
+                       grep_info->isam_p_indx, term_dst,
+                       strlen(term_dst), rank_type, 1 /* preserve pos */);
+}
+
+
+static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+                        const char **term_sub, 
+                        oid_value attributeSet, NMEM stream,
+                        struct grep_info *grep_info,
+                        int reg_type, int complete_flag,
+                        int num_bases, char **basenames,
+                        char *term_dst)
 {
     char term_dict[2*IT_MAX_WORD+4000];
     int j, r, base_no;
@@ -810,6 +842,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     int truncation_value;
     AttrType use;
     int use_value;
+    const char *use_string = 0;
     oid_value curAttributeSet = attributeSet;
     const char *termp;
     struct rpn_char_map_info rcmi;
@@ -817,7 +850,7 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
     rpn_char_map_prepare (zh->reg, reg_type, &rcmi);
     attr_init (&use, zapt, 1);
-    use_value = attr_find (&use, &curAttributeSet);
+    use_value = attr_find_ex (&use, &curAttributeSet, &use_string);
     logf (LOG_DEBUG, "string_term, use value %d", use_value);
     attr_init (&truncation, zapt, 5);
     truncation_value = attr_find (&truncation, NULL);
@@ -825,78 +858,92 @@ static int string_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
     if (use_value == -1)
         use_value = 1016;
+    if (use_value == -2)
+        use_value = 1016;
 
     for (base_no = 0; base_no < num_bases; base_no++)
     {
         attent attp;
+        data1_local_attribute id_xpath_attr;
         data1_local_attribute *local_attr;
         int max_pos, prefix_len = 0;
 
         termp = *term_sub;
-        if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
-        {
-            logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
-                  curAttributeSet, use_value, r);
-           if (r == -1)
-           {
-               char val_str[32];
-               sprintf (val_str, "%d", use_value);
-               zh->errCode = 114;
-               zh->errString = nmem_strdup (stream, val_str);
-           }
-           else
-           {
-               int oid[OID_SIZE];
-               struct oident oident;
-
-               oident.proto = PROTO_Z3950;
-               oident.oclass = CLASS_ATTSET;
-               oident.value = curAttributeSet;
-               oid_ent_to_oid (&oident, oid);
 
-               zh->errCode = 121;
-               zh->errString = nmem_strdup (stream, oident.desc);
-           }
-            return -1;
-        }
         if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
         {
             zh->errCode = 109; /* Database unavailable */
             zh->errString = basenames[base_no];
             return -1;
         }
+        if (curAttributeSet == VAL_IDXPATH)
+        {
+            attp.local_attributes = &id_xpath_attr;
+            attp.attset_ordinal = curAttributeSet;
+            id_xpath_attr.next = 0;
+            id_xpath_attr.local = use_value;
+        }
+        else
+        {
+            if ((r=att_getentbyatt (zh, &attp, curAttributeSet, use_value)))
+            {
+                logf (LOG_DEBUG, "att_getentbyatt fail. set=%d use=%d r=%d",
+                      curAttributeSet, use_value, r);
+                if (r == -1)
+                {
+                    char val_str[32];
+                    sprintf (val_str, "%d", use_value);
+                    zh->errCode = 114;
+                    zh->errString = nmem_strdup (stream, val_str);
+                }
+                else
+                {
+                    int oid[OID_SIZE];
+                    struct oident oident;
+                    
+                    oident.proto = PROTO_Z3950;
+                    oident.oclass = CLASS_ATTSET;
+                    oident.value = curAttributeSet;
+                    oid_ent_to_oid (&oident, oid);
+                    
+                    zh->errCode = 121;
+                    zh->errString = nmem_strdup (stream, oident.desc);
+                }
+                return -1;
+            }
+        }
         for (local_attr = attp.local_attributes; local_attr;
              local_attr = local_attr->next)
         {
             int ord;
-           char ord_buf[32];
-           int i, ord_len;
-
+            char ord_buf[32];
+            int i, ord_len;
+            
             ord = zebraExplain_lookupSU (zh->reg->zei, attp.attset_ordinal,
-                                          local_attr->local);
+                                         local_attr->local);
             if (ord < 0)
                 continue;
             if (prefix_len)
                 term_dict[prefix_len++] = '|';
             else
                 term_dict[prefix_len++] = '(';
-
-           ord_len = key_SU_encode (ord, ord_buf);
-           for (i = 0; i<ord_len; i++)
-           {
-               term_dict[prefix_len++] = 1;
-               term_dict[prefix_len++] = ord_buf[i];
-           }
+            
+            ord_len = key_SU_encode (ord, ord_buf);
+            for (i = 0; i<ord_len; i++)
+            {
+                term_dict[prefix_len++] = 1;
+                term_dict[prefix_len++] = ord_buf[i];
+            }
         }
         if (!prefix_len)
         {
-           char val_str[32];
-           sprintf (val_str, "%d", use_value);
-           zh->errCode = 114;
-           zh->errString = nmem_strdup (stream, val_str);
+            char val_str[32];
+            sprintf (val_str, "%d", use_value);
+            zh->errCode = 114;
+            zh->errString = nmem_strdup (stream, val_str);
             return -1;
         }
-        term_dict[prefix_len++] = ')';        
+        term_dict[prefix_len++] = ')';
         term_dict[prefix_len++] = 1;
         term_dict[prefix_len++] = reg_type;
        logf (LOG_DEBUG, "reg_type = %d", term_dict[prefix_len-1]);
@@ -1334,6 +1381,14 @@ char *normalize_term(ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     }
 }
 
+static void grep_info_delete (struct grep_info *grep_info)
+{
+#ifdef TERM_COUNT
+    xfree(grep_info->term_no);
+#endif
+    xfree (grep_info->isam_p_buf);
+}
+
 static int grep_info_prepare (ZebraHandle zh,
                              Z_AttributesPlusTerm *zapt,
                              struct grep_info *grep_info,
@@ -1392,7 +1447,7 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh,
 {
     char term_dst[IT_MAX_WORD+1];
     RSET rset[60], result;
-    int i, r, rset_no = 0;
+    int i, rset_no = 0;
     struct grep_info grep_info;
     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
     const char *termp = termz;
@@ -1403,24 +1458,17 @@ static RSET rpn_search_APT_phrase (ZebraHandle zh,
     while (1)
     { 
        logf (LOG_DEBUG, "APT_phrase termp=%s", termp);
-       grep_info.isam_p_indx = 0;
-        r = string_term (zh, zapt, &termp, attributeSet, stream, &grep_info,
-                       reg_type, complete_flag, num_bases, basenames,
-                       term_dst);
-        if (r < 1)
+        rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
+                                    stream, &grep_info,
+                                    reg_type, complete_flag,
+                                    num_bases, basenames,
+                                    term_dst, rank_type);
+        if (!rset[rset_no])
             break;
-       logf (LOG_DEBUG, "term: %s", term_dst);
-        rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
-                                    grep_info.isam_p_indx, term_dst,
-                                   strlen(term_dst), rank_type);
-        assert (rset[rset_no]);
         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
             break;
     }
-#ifdef TERM_COUNT
-    xfree(grep_info.term_no);
-#endif
-    xfree (grep_info.isam_p_buf);
+    grep_info_delete (&grep_info);
     if (rset_no == 0)
     {
        rset_null_parms parms;
@@ -1447,7 +1495,7 @@ static RSET rpn_search_APT_or_list (ZebraHandle zh,
 {
     char term_dst[IT_MAX_WORD+1];
     RSET rset[60], result;
-    int i, r, rset_no = 0;
+    int i, rset_no = 0;
     struct grep_info grep_info;
     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
     const char *termp = termz;
@@ -1457,24 +1505,17 @@ static RSET rpn_search_APT_or_list (ZebraHandle zh,
     while (1)
     { 
        logf (LOG_DEBUG, "APT_or_list termp=%s", termp);
-       grep_info.isam_p_indx = 0;
-        r = string_term (zh, zapt, &termp, attributeSet, stream, &grep_info,
-                       reg_type, complete_flag, num_bases, basenames,
-                       term_dst);
-        if (r < 1)
+        rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
+                                    stream, &grep_info,
+                                    reg_type, complete_flag,
+                                    num_bases, basenames,
+                                    term_dst, rank_type);
+        if (!rset[rset_no])
             break;
-       logf (LOG_DEBUG, "term: %s", term_dst);
-        rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
-                                    grep_info.isam_p_indx, term_dst,
-                                   strlen(term_dst), rank_type);
-        assert (rset[rset_no]);
         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
             break;
     }
-#ifdef TERM_COUNT
-    xfree(grep_info.term_no);
-#endif
-    xfree (grep_info.isam_p_buf);
+    grep_info_delete (&grep_info);
     if (rset_no == 0)
     {
        rset_null_parms parms;
@@ -1507,7 +1548,7 @@ static RSET rpn_search_APT_and_list (ZebraHandle zh,
 {
     char term_dst[IT_MAX_WORD+1];
     RSET rset[60], result;
-    int i, r, rset_no = 0;
+    int i, rset_no = 0;
     struct grep_info grep_info;
     char *termz = normalize_term(zh, zapt, termz_org, stream, reg_type);
     const char *termp = termz;
@@ -1517,24 +1558,18 @@ static RSET rpn_search_APT_and_list (ZebraHandle zh,
     while (1)
     { 
        logf (LOG_DEBUG, "APT_and_list termp=%s", termp);
-       grep_info.isam_p_indx = 0;
-        r = string_term (zh, zapt, &termp, attributeSet, stream, &grep_info,
-                       reg_type, complete_flag, num_bases, basenames,
-                       term_dst);
-        if (r < 1)
+        rset[rset_no] = term_trunc (zh, zapt, &termp, attributeSet,
+                                    stream, &grep_info,
+                                    reg_type, complete_flag,
+                                    num_bases, basenames,
+                                    term_dst, rank_type);
+        if (!rset[rset_no])
             break;
-       logf (LOG_DEBUG, "term: %s", term_dst);
-        rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
-                                    grep_info.isam_p_indx, term_dst,
-                                   strlen(term_dst), rank_type);
         assert (rset[rset_no]);
         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
             break;
     }
-#ifdef TERM_COUNT
-    xfree(grep_info.term_no);
-#endif
-    xfree (grep_info.isam_p_buf);
+    grep_info_delete (&grep_info);
     if (rset_no == 0)
     {
        rset_null_parms parms;
@@ -1730,15 +1765,13 @@ static RSET rpn_search_APT_numeric (ZebraHandle zh,
        logf (LOG_DEBUG, "term: %s", term_dst);
         rset[rset_no] = rset_trunc (zh, grep_info.isam_p_buf,
                                     grep_info.isam_p_indx, term_dst,
-                                   strlen(term_dst), rank_type);
+                                   strlen(term_dst), rank_type,
+                                    0 /* preserve position */);
         assert (rset[rset_no]);
         if (++rset_no >= (int) (sizeof(rset)/sizeof(*rset)))
             break;
     }
-#ifdef TERM_COUNT
-    xfree(grep_info.term_no);
-#endif
-    xfree (grep_info.isam_p_buf);
+    grep_info_delete (&grep_info);
     if (rset_no == 0)
     {
        rset_null_parms parms;
@@ -1888,6 +1921,128 @@ static RSET rpn_sort_spec (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     return rset_create (rset_kind_null, &parms);
 }
 
+static RSET rpn_search_xpath (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
+                              oid_value attributeSet,
+                              int num_bases, char **basenames,
+                              NMEM stream, const char *rank_type, RSET rset)
+{
+    AttrType use;
+    const char *use_string = 0;
+    oid_value curAttributeSet = attributeSet;
+    char term_dict[2048];
+    int base_no;
+    struct grep_info grep_info;
+    struct rpn_char_map_info rcmi;
+
+    yaz_log (LOG_LOG, "rpn_search_xpath 1");
+    attr_init (&use, zapt, 1);
+    attr_find_ex (&use, &curAttributeSet, &use_string);
+
+    if (curAttributeSet != VAL_IDXPATH)
+    {
+        yaz_log (LOG_LOG, "rpn_search_xpath - not 1");
+        return rset;
+    }
+    if (!use_string)
+    {
+        yaz_log (LOG_LOG, "rpn_search_xpath - not 2");
+        return rset;
+    }
+
+    rpn_char_map_prepare (zh->reg, '0', &rcmi);
+
+    if (grep_info_prepare (zh, zapt, &grep_info, '0', stream))
+       return 0;
+
+    yaz_log (LOG_LOG, "rpn_search_xpath 2");
+    for (base_no = 0; base_no < num_bases; base_no++)
+    {
+        rset_between_parms parms;
+        RSET rset_start_tag, rset_end_tag;
+        int ord, ord_len, i, r, max_pos;
+        int prefix_len ;
+        char ord_buf[32];
+        if (zebraExplain_curDatabase (zh->reg->zei, basenames[base_no]))
+        {
+            zh->errCode = 109; /* Database unavailable */
+            zh->errString = basenames[base_no];
+            return rset;
+        }
+
+        prefix_len = 0;
+        ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, 1);
+        if (ord < 0)
+            continue;
+        if (prefix_len)
+            term_dict[prefix_len++] = '|';
+        else
+            term_dict[prefix_len++] = '(';
+        
+        ord_len = key_SU_encode (ord, ord_buf);
+        for (i = 0; i<ord_len; i++)
+        {
+            term_dict[prefix_len++] = 1;
+            term_dict[prefix_len++] = ord_buf[i];
+        }
+        term_dict[prefix_len++] = ')';
+        term_dict[prefix_len++] = 1;
+        term_dict[prefix_len++] = '0';
+        strcpy (term_dict+prefix_len, use_string);
+        grep_info.isam_p_indx = 0;
+        yaz_log (LOG_LOG, "rpn_search_xpath 3 %s", term_dict+prefix_len);
+        r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
+                              &grep_info, &max_pos, 0, grep_handle);
+        yaz_log (LOG_LOG, "%s %d positions", use_string,
+                 grep_info.isam_p_indx);
+        rset_start_tag =
+            rset_trunc (zh, grep_info.isam_p_buf,
+                        grep_info.isam_p_indx, use_string, strlen(use_string),
+                        rank_type, 1);
+
+        prefix_len = 0;
+        ord = zebraExplain_lookupSU (zh->reg->zei, curAttributeSet, 2);
+        if (ord < 0)
+            continue;
+        if (prefix_len)
+            term_dict[prefix_len++] = '|';
+        else
+            term_dict[prefix_len++] = '(';
+        
+        ord_len = key_SU_encode (ord, ord_buf);
+        for (i = 0; i<ord_len; i++)
+        {
+            term_dict[prefix_len++] = 1;
+            term_dict[prefix_len++] = ord_buf[i];
+        }
+        term_dict[prefix_len++] = ')';
+        term_dict[prefix_len++] = 1;
+        term_dict[prefix_len++] = '0';
+        strcpy (term_dict+prefix_len, use_string);
+        grep_info.isam_p_indx = 0;
+        r = dict_lookup_grep (zh->reg->dict, term_dict, 0,
+                              &grep_info, &max_pos, 0, grep_handle);
+
+        yaz_log (LOG_LOG, "%s %d positions", use_string,
+                 grep_info.isam_p_indx);
+        rset_end_tag =
+            rset_trunc (zh, grep_info.isam_p_buf,
+                        grep_info.isam_p_indx, use_string, strlen(use_string),
+                        rank_type, 1);
+
+        parms.key_size = sizeof(struct it_key);
+        parms.cmp = key_compare_it;
+        parms.rset_l = rset_start_tag;
+        parms.rset_m = rset;
+        parms.rset_r = rset_end_tag;
+        yaz_log (LOG_LOG, "rpn_search_xpath 4");
+        rset = rset_create (rset_kind_between, &parms);
+    }
+    grep_info_delete (&grep_info);
+
+    return rset;
+}
+
+
 
 static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
                             oid_value attributeSet, NMEM stream,
@@ -1900,6 +2055,7 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
     int complete_flag;
     int sort_flag;
     char termz[IT_MAX_WORD+1];
+    RSET rset = 0;
 
     zebra_maps_attr (zh->reg->zebra_maps, zapt, &reg_id, &search_type,
                     rank_type, &complete_flag, &sort_flag);
@@ -1922,35 +2078,37 @@ static RSET rpn_search_APT (ZebraHandle zh, Z_AttributesPlusTerm *zapt,
 
     if (!strcmp (search_type, "phrase"))
     {
-       return rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
+       rset = rpn_search_APT_phrase (zh, zapt, termz, attributeSet, stream,
                                      reg_id, complete_flag, rank_type,
                                      num_bases, basenames);
     }
     else if (!strcmp (search_type, "and-list"))
     {
-       return rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
+       rset = rpn_search_APT_and_list (zh, zapt, termz, attributeSet, stream,
                                        reg_id, complete_flag, rank_type,
                                        num_bases, basenames);
     }
     else if (!strcmp (search_type, "or-list"))
     {
-       return rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
+       rset = rpn_search_APT_or_list (zh, zapt, termz, attributeSet, stream,
                                       reg_id, complete_flag, rank_type,
                                       num_bases, basenames);
     }
     else if (!strcmp (search_type, "local"))
     {
-        return rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
+        rset = rpn_search_APT_local (zh, zapt, termz, attributeSet, stream,
                                     rank_type);
     }
     else if (!strcmp (search_type, "numeric"))
     {
-       return rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
+       rset = rpn_search_APT_numeric (zh, zapt, termz, attributeSet, stream,
                                       reg_id, complete_flag, rank_type,
                                       num_bases, basenames);
     }
-    zh->errCode = 118;
-    return NULL;
+    else
+        zh->errCode = 118;
+    return rpn_search_xpath (zh, zapt, attributeSet, num_bases, basenames,
+                             stream, rank_type, rset);
 }
 
 static RSET rpn_search_structure (ZebraHandle zh, Z_RPNStructure *zs,
@@ -2357,7 +2515,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
                           &glist[i+before].term, mterm);
         rset = rset_trunc (zh, &scan_info_array[j0].list[ptr[j0]].isam_p, 1,
                           glist[i+before].term, strlen(glist[i+before].term),
-                          NULL);
+                          NULL, 0);
 
         ptr[j0]++;
         for (j = j0+1; j<ord_no; j++)
@@ -2372,7 +2530,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
                 rset2 =
                    rset_trunc (zh, &scan_info_array[j].list[ptr[j]].isam_p, 1,
                               glist[i+before].term,
-                              strlen(glist[i+before].term), NULL);
+                              strlen(glist[i+before].term), NULL, 0);
 
                 bool_parms.key_size = sizeof(struct it_key);
                 bool_parms.cmp = key_compare_it;
@@ -2423,7 +2581,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
         rset = rset_trunc
                (zh, &scan_info_array[j0].list[before-1-ptr[j0]].isam_p, 1,
                glist[before-1-i].term, strlen(glist[before-1-i].term),
-               NULL);
+               NULL, 0);
 
         ptr[j0]++;
 
@@ -2439,7 +2597,7 @@ void rpn_scan (ZebraHandle zh, ODR stream, Z_AttributesPlusTerm *zapt,
                 rset2 = rset_trunc (zh,
                          &scan_info_array[j].list[before-1-ptr[j]].isam_p, 1,
                                    glist[before-1-i].term,
-                                   strlen(glist[before-1-i].term), NULL);
+                                   strlen(glist[before-1-i].term), NULL, 0);
 
                 bool_parms.key_size = sizeof(struct it_key);
                 bool_parms.cmp = key_compare_it;
index 7413384..deb7046 100644 (file)
@@ -3,7 +3,10 @@
  * All rights reserved.
  *
  * $Log: recgrs.c,v $
- * Revision 1.44  2002-04-11 20:09:47  adam
+ * Revision 1.45  2002-04-12 14:40:42  adam
+ * Work on XPATH
+ *
+ * Revision 1.44  2002/04/11 20:09:47  adam
  * work on string tag indexing
  *
  * Revision 1.43  2002/03/21 23:06:36  adam
@@ -324,20 +327,22 @@ static void grs_destroy(void *clientData)
     free (h);
 }
 
-static void index_string_tag (data1_node *n,
-                              struct recExtractCtrl *p,
-                              int level, RecWord *wrd,
-                              int use)
+static void index_xpath (data1_node *n, struct recExtractCtrl *p,
+                         int level, RecWord *wrd, int use)
 {
     int i;
+    char tag_path_full[1024];
+    size_t flen = 0;
+    data1_node *nn;
+
     switch (n->which)
     {
     case DATA1N_data:
         wrd->reg_type = 'w';
         wrd->string = n->u.data.data;
         wrd->length = n->u.data.len;
-        wrd->attrSet = VAL_BIB1;
-        wrd->attrUse = 1016;
+        wrd->attrSet = VAL_IDXPATH,
+        wrd->attrUse = use;
         if (p->flagShowRecords)
         {
             printf("%*s data=", (level + 1) * 4, "");
@@ -351,16 +356,40 @@ static void index_string_tag (data1_node *n,
         }
         break;
     case DATA1N_tag:
-        wrd->reg_type = 'w';
-        wrd->string = n->u.tag.tag;
-        wrd->length = strlen(n->u.tag.tag);
-        wrd->attrSet = VAL_BIB1;
+        for (nn = n; nn; nn = nn->parent)
+        {
+            if (n->which == DATA1N_tag)
+            {
+                size_t tlen = strlen(nn->u.tag.tag);
+                if (tlen + flen > (sizeof(tag_path_full)-2))
+                    return;
+                memcpy (tag_path_full + flen, nn->u.tag.tag, tlen);
+                flen += tlen;
+                tag_path_full[flen++] = '/';
+            }
+            else if (n->which == DATA1N_root)
+            {
+                size_t tlen = strlen(nn->u.root.type);
+                if (tlen + flen > (sizeof(tag_path_full)-2))
+                    return;
+                memcpy (tag_path_full + flen, nn->u.root.type, tlen);
+                flen += tlen;
+                tag_path_full[flen++] = '/';
+                break;
+            }
+        }
+        wrd->reg_type = '0';
+        wrd->string = tag_path_full;
+        wrd->length = flen;
+        wrd->attrSet = VAL_IDXPATH,
         wrd->attrUse = use;
         if (p->flagShowRecords)
         {
             printf("%*s tag=", (level + 1) * 4, "");
-            for (i = 0; i<wrd->length && i < 8; i++)
+            for (i = 0; i<wrd->length && i < 40; i++)
                 fputc (wrd->string[i], stdout);
+            if (i == 40)
+                printf (" ..");
             printf("\n");
         }
         else
@@ -496,7 +525,7 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
             index_termlist (n, n, p, level, &wrd);
             /* index start tag */
             if (!n->root->u.root.absyn)
-                index_string_tag (n, p, level, &wrd, 1);
+                index_xpath (n, p, level, &wrd, 1);
        }
 
        if (n->child)
@@ -524,7 +553,7 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
            if (par)
                index_termlist (par, n, p, level, &wrd);
             if (!n->root->u.root.absyn)
-                index_string_tag (n, p, level, &wrd, 1016);
+                index_xpath (n, p, level, &wrd, 1016);
 
        }
 
@@ -532,7 +561,7 @@ static int dumpkeys(data1_node *n, struct recExtractCtrl *p, int level)
        {
             /* index end tag */
             if (!n->root->u.root.absyn)
-                index_string_tag (n, p, level, &wrd, 2);
+                index_xpath (n, p, level, &wrd, 2);
        }
 
 
index ba60a45..5fd0003 100644 (file)
@@ -6,6 +6,6 @@ EXTRA_DIST=t1.cfg t2.cfg
 t1_SOURCES = t1.c
 t2_SOURCES = t2.c
 
-CFLAGS = -I$(top_srcdir)/index -I$(top_srcdir)/include $(YAZINC)
+INCLUDES = -I$(top_srcdir)/index -I$(top_srcdir)/include $(YAZINC)
 
 LDADD = ../../index/libzebra.a $(YAZLIB) $(TCL_LIB)