One more RSET_DEBUG
[idzebra-moved-to-github.git] / rset / rsbetween.c
index 1c9b8f7..f631f33 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: rsbetween.c,v 1.9 2003-06-12 18:14:49 adam Exp $
+/* $Id: rsbetween.c,v 1.12 2004-02-12 15:15:54 heikki Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
    Index Data Aps
 
@@ -21,6 +21,14 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 */
 
 
+/* rsbetween is (mostly) used for xml searches. It returns the hits of the
+ * "middle" rset, that are in between the "left" and "right" rsets. For
+ * example "Shakespeare" in between "<title>" and </title>. The thing is 
+ * complicated by the inclusion of attributes (from their own rset). If attrs
+ * specified, they must match the "left" rset (start tag). "Hamlet" between
+ * "<title lang=eng>" and "</title>". (This assumes that the attributes are
+ * indexed to the same seqno as the tags).
+*/ 
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -35,6 +43,9 @@ static RSFD r_open_between (RSET ct, int flag);
 static void r_close_between (RSFD rfd);
 static void r_delete_between (RSET ct);
 static void r_rewind_between (RSFD rfd);
+static int r_forward_between(RSET ct, RSFD rfd, void *buf, int *term_index,
+                     int (*cmpfunc)(const void *p1, const void *p2),
+                     const void *untilbuf);
 static int r_count_between (RSET ct);
 static int r_read_between (RSFD rfd, void *buf, int *term_index);
 static int r_write_between (RSFD rfd, const void *buf);
@@ -47,6 +58,7 @@ static const struct rset_control control_between =
     r_close_between,
     r_delete_between,
     r_rewind_between,
+    r_forward_between, /* rset_default_forward, */
     r_count_between,
     r_read_between,
     r_write_between,
@@ -88,6 +100,19 @@ struct rset_between_rfd {
     struct rset_between_info *info;
 };    
 
+static void log2 (struct rset_between_rfd *p, char *msg, int cmp_l, int cmp_r)
+{
+    char buf_l[32];
+    char buf_m[32];
+    char buf_r[32];
+    logf(LOG_DEBUG,"btw: %s l=%s(%d/%d) m=%s(%d) r=%s(%d/%d), lev=%d",
+      msg, 
+      (*p->info->printer)(p->buf_l, buf_l), p->more_l, cmp_l,
+      (*p->info->printer)(p->buf_m, buf_m), p->more_m,
+      (*p->info->printer)(p->buf_r, buf_r), p->more_r, cmp_r,
+      p->level);
+}
+
 static void *r_create_between (RSET ct, const struct rset_control *sel,
                                void *parms)
 {
@@ -151,8 +176,8 @@ static RSFD r_open_between (RSET ct, int flag)
 
     if (flag & RSETF_WRITE)
     {
-       logf (LOG_FATAL, "between set type is read-only");
-       return NULL;
+        logf (LOG_FATAL, "between set type is read-only");
+        return NULL;
     }
     rfd = (struct rset_between_rfd *) xmalloc (sizeof(*rfd));
     rfd->next = info->rfd_list;
@@ -169,11 +194,11 @@ static RSFD r_open_between (RSET ct, int flag)
     rfd->rfd_r = rset_open (info->rset_r, RSETF_READ);
     
     rfd->more_l = rset_read (info->rset_l, rfd->rfd_l, rfd->buf_l,
-                            &rfd->term_index_l);
+                             &rfd->term_index_l);
     rfd->more_m = rset_read (info->rset_m, rfd->rfd_m, rfd->buf_m,
-                            &rfd->term_index_m);
+                             &rfd->term_index_m);
     rfd->more_r = rset_read (info->rset_r, rfd->rfd_r, rfd->buf_r,
-                            &rfd->term_index_r);
+                             &rfd->term_index_r);
     if (info->rset_attr)
     {
         int dummy;
@@ -247,24 +272,33 @@ static void r_rewind_between (RSFD rfd)
     p->level=0;
 }
 
+
+
+static int r_forward_between(RSET ct, RSFD rfd, void *buf, int *term_index,
+                     int (*cmpfunc)(const void *p1, const void *p2),
+                     const void *untilbuf)
+{
+    struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
+    struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
+    int rc;
+    log2( p, "fwd: before forward", 0,0);
+    /* It is enough to forward the m pointer here, the read will */
+    /* naturally forward the l, m, and attr pointers */
+    if (p->more_m)
+        p->more_m=rset_forward(info->rset_m,p->rfd_m, p->buf_m,
+                        &p->term_index_m, info->cmp,untilbuf);
+    log2( p, "fwd: after forward M", 0,0);
+    rc = r_read_between(rfd, buf, term_index);
+    log2( p, "fwd: after forward", 0,0);
+    return rc;
+}
+
 static int r_count_between (RSET ct)
 {
     return 0;
 }
 
 
-static void log2 (struct rset_between_rfd *p, char *msg, int cmp_l, int cmp_r)
-{
-    char buf_l[32];
-    char buf_m[32];
-    char buf_r[32];
-    logf(LOG_DEBUG,"btw: %s l=%s(%d/%d) m=%s(%d) r=%s(%d/%d), lev=%d",
-      msg, 
-      (*p->info->printer)(p->buf_l, buf_l), p->more_l, cmp_l,
-      (*p->info->printer)(p->buf_m, buf_m), p->more_m,
-      (*p->info->printer)(p->buf_r, buf_r), p->more_r, cmp_r,
-      p->level);
-}
 
 static int r_read_between (RSFD rfd, void *buf, int *term_index)
 {
@@ -278,23 +312,23 @@ static int r_read_between (RSFD rfd, void *buf, int *term_index)
     {
         log2( p, "start of loop", cmp_l, cmp_r);
 
-       /* forward L until past m, count levels, note rec boundaries */
-       if (p->more_l)
-           cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
-       else
-       {
-           p->level = 0;
-           cmp_l=2; /* past this record */
-       }
+        /* forward L until past m, count levels, note rec boundaries */
+        if (p->more_l)
+            cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
+        else
+        {
+            p->level = 0;
+            cmp_l=2; /* past this record */
+        }
         log2( p, "after first L", cmp_l, cmp_r);
 
         while (cmp_l < 0)   /* l before m */
-       {
+        {
             if (cmp_l == -2)
-               p->level=0; /* earlier record */
+                p->level=0; /* earlier record */
             if (cmp_l == -1)
             {
-               p->level++; /* relevant start tag */
+                p->level++; /* relevant start tag */
 
                 if (!info->rset_attr)
                     attr_match = 1;
@@ -313,69 +347,136 @@ static int r_read_between (RSFD rfd, void *buf, int *term_index)
                         }
                         else if (cmp_attr > 0)
                             break;
-                        p->more_attr = rset_read (info->rset_attr, p->rfd_attr,
+                        else if (cmp_attr==-1) 
+                            p->more_attr = rset_read (info->rset_attr, p->rfd_attr,
                                                   p->buf_attr, &dummy_term);
-                    }
+                            /* if we had a forward that went all the way to
+                             * the seqno, we could use that. But fwd only goes
+                             * to the sysno */
+                        else if (cmp_attr==-2) 
+                        {
+                            p->more_attr = rset_forward(
+                                      info->rset_attr, p->rfd_attr,
+                                      p->buf_attr, &dummy_term,
+                                      info->cmp, p->buf_l);
+                            logf(LOG_DEBUG, "btw: after frowarding attr m=%d",p->more_attr);
+                        }
+                    } /* while more_attr */
                 }
             }
+#define NEWCODE 1 
+#if NEWCODE                
+            if (cmp_l==-2)
+            {
+                if (p->more_l) 
+                {
+                    p->more_l=rset_forward(
+                                      info->rset_l, p->rfd_l,
+                                      p->buf_l, &p->term_index_l,
+                                      info->cmp, p->buf_m);
+                    if (p->more_l)
+                        cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
+                    else
+                        cmp_l=2;
+                    log2( p, "after forwarding L", cmp_l, cmp_r);
+                }
+            } else
+            {
+                p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l,
+                              &p->term_index_l);
+            }
+#else
             p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l,
-                          &p->term_index_l);
+                              &p->term_index_l);
+#endif
             if (p->more_l)
             {
-               cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
+                cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
             }
             else
-               cmp_l=2; 
-           log2( p, "end of L loop", cmp_l, cmp_r);
+                cmp_l=2; 
+            log2( p, "end of L loop", cmp_l, cmp_r);
         } /* forward L */
 
             
-       /* forward R until past m, count levels */
+        /* forward R until past m, count levels */
         log2( p, "Before moving R", cmp_l, cmp_r);
         if (p->more_r)
-           cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
-       else
-           cmp_r=2; 
+            cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
+        else
+            cmp_r=2; 
         log2( p, "after first R", cmp_l, cmp_r);
         while (cmp_r < 0)   /* r before m */
-       {
-           /* -2, earlier record, doesn't matter */
+        {
+             /* -2, earlier record, don't count level */
             if (cmp_r == -1)
-               p->level--; /* relevant end tag */
+                p->level--; /* relevant end tag */
             if (p->more_r)
             {
+#if NEWCODE                
+                if (cmp_r==-2)
+                {
+                    p->more_r=rset_forward(
+                                      info->rset_r, p->rfd_r,
+                                      p->buf_r, &p->term_index_r,
+                                      info->cmp, p->buf_m);
+                } else
+                {
+                    p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r,
+                                       &p->term_index_r);
+                }
+                if (p->more_r)
+                    cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
+
+#else
                 p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r,
-                                  &p->term_index_r);
-               cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
+                                       &p->term_index_r);
+                cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
+#endif
             }
             else
-               cmp_r=2; 
+                cmp_r=2; 
         log2( p, "End of R loop", cmp_l, cmp_r);
         } /* forward R */
-       
-       if ( ( p->level <= 0 ) && ! p->more_l)
-           return 0; /* no more start tags, nothing more to find */
         
-       if ( attr_match && p->level > 0)  /* within a tag pair (or deeper) */
-       {
-           memcpy (buf, p->buf_m, info->key_size);
+        if ( ( p->level <= 0 ) && ! p->more_l)
+            return 0; /* no more start tags, nothing more to find */
+        
+        if ( attr_match && p->level > 0)  /* within a tag pair (or deeper) */
+        {
+            memcpy (buf, p->buf_m, info->key_size);
             *term_index = p->term_index_m;
             log2( p, "Returning a hit (and forwarding m)", cmp_l, cmp_r);
             p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m,
                                    &p->term_index_m);
-           if (cmp_l == 2)
-               p->level = 0;
-           return 1;
-       }
-       else if ( ! p->more_l )  /* not in data, no more starts */
-       {
-           log2( p, "no more starts, exiting without a hit", cmp_l, cmp_r);
-           return 0;  /* ergo, nothing can be found. stop scanning */
-       }
-       if (cmp_l == 2)
-           p->level = 0;
+            if (cmp_l == 2)
+                p->level = 0;
+            return 1;
+        }
+        else if ( ! p->more_l )  /* not in data, no more starts */
+        {
+            log2( p, "no more starts, exiting without a hit", cmp_l, cmp_r);
+            return 0;  /* ergo, nothing can be found. stop scanning */
+        }
+#if NEWCODE                
+        if (cmp_l == 2)
+        {
+            p->level = 0;
+            p->more_m=rset_forward(
+                              info->rset_m, p->rfd_m,
+                              p->buf_m, &p->term_index_m,
+                              info->cmp, p->buf_l);
+        } else
+        {
+            p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m,
+                               &p->term_index_m);
+        }
+#else
+        if (cmp_l == 2)
+            p->level = 0;
         p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m,
                                &p->term_index_m);
+#endif
         log2( p, "End of M loop", cmp_l, cmp_r);
     } /* while more_m */