#if'd out logging in rsbetween
[idzebra-moved-to-github.git] / rset / rsbetween.c
index 517aa7a..c7b4984 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: rsbetween.c,v 1.7 2002-08-02 19:26:57 adam Exp $
+/* $Id: rsbetween.c,v 1.13 2004-06-08 15:05:16 heikki Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
    Index Data Aps
 
@@ -21,6 +21,14 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 */
 
 
+/* rsbetween is (mostly) used for xml searches. It returns the hits of the
+ * "middle" rset, that are in between the "left" and "right" rsets. For
+ * example "Shakespeare" in between "<title>" and </title>. The thing is 
+ * complicated by the inclusion of attributes (from their own rset). If attrs
+ * specified, they must match the "left" rset (start tag). "Hamlet" between
+ * "<title lang=eng>" and "</title>". (This assumes that the attributes are
+ * indexed to the same seqno as the tags).
+*/ 
 
 #include <stdio.h>
 #include <stdlib.h>
@@ -30,11 +38,16 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <rsbetween.h>
 #include <zebrautl.h>
 
+#define RSBETWEEN_DEBUG 0
+
 static void *r_create_between(RSET ct, const struct rset_control *sel, void *parms);
 static RSFD r_open_between (RSET ct, int flag);
 static void r_close_between (RSFD rfd);
 static void r_delete_between (RSET ct);
 static void r_rewind_between (RSFD rfd);
+static int r_forward_between(RSET ct, RSFD rfd, void *buf, int *term_index,
+                     int (*cmpfunc)(const void *p1, const void *p2),
+                     const void *untilbuf);
 static int r_count_between (RSET ct);
 static int r_read_between (RSFD rfd, void *buf, int *term_index);
 static int r_write_between (RSFD rfd, const void *buf);
@@ -47,6 +60,7 @@ static const struct rset_control control_between =
     r_close_between,
     r_delete_between,
     r_rewind_between,
+    r_forward_between, /* rset_default_forward, */
     r_count_between,
     r_read_between,
     r_write_between,
@@ -88,6 +102,21 @@ struct rset_between_rfd {
     struct rset_between_info *info;
 };    
 
+#if RSBETWEEN_DEBUG
+static void log2 (struct rset_between_rfd *p, char *msg, int cmp_l, int cmp_r)
+{
+    char buf_l[32];
+    char buf_m[32];
+    char buf_r[32];
+    logf(LOG_DEBUG,"btw: %s l=%s(%d/%d) m=%s(%d) r=%s(%d/%d), lev=%d",
+      msg, 
+      (*p->info->printer)(p->buf_l, buf_l), p->more_l, cmp_l,
+      (*p->info->printer)(p->buf_m, buf_m), p->more_m,
+      (*p->info->printer)(p->buf_r, buf_r), p->more_r, cmp_r,
+      p->level);
+}
+#endif
+
 static void *r_create_between (RSET ct, const struct rset_control *sel,
                                void *parms)
 {
@@ -151,8 +180,8 @@ static RSFD r_open_between (RSET ct, int flag)
 
     if (flag & RSETF_WRITE)
     {
-       logf (LOG_FATAL, "between set type is read-only");
-       return NULL;
+        logf (LOG_FATAL, "between set type is read-only");
+        return NULL;
     }
     rfd = (struct rset_between_rfd *) xmalloc (sizeof(*rfd));
     rfd->next = info->rfd_list;
@@ -169,11 +198,11 @@ static RSFD r_open_between (RSET ct, int flag)
     rfd->rfd_r = rset_open (info->rset_r, RSETF_READ);
     
     rfd->more_l = rset_read (info->rset_l, rfd->rfd_l, rfd->buf_l,
-                            &rfd->term_index_l);
+                             &rfd->term_index_l);
     rfd->more_m = rset_read (info->rset_m, rfd->rfd_m, rfd->buf_m,
-                            &rfd->term_index_m);
+                             &rfd->term_index_m);
     rfd->more_r = rset_read (info->rset_r, rfd->rfd_r, rfd->buf_r,
-                            &rfd->term_index_r);
+                             &rfd->term_index_r);
     if (info->rset_attr)
     {
         int dummy;
@@ -230,7 +259,9 @@ static void r_rewind_between (RSFD rfd)
     struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
     struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
 
+#if RSBETWEEN_DEBUG
     logf (LOG_DEBUG, "rsbetween_rewind");
+#endif
     rset_rewind (info->rset_l, p->rfd_l);
     rset_rewind (info->rset_m, p->rfd_m);
     rset_rewind (info->rset_r, p->rfd_r);
@@ -247,49 +278,73 @@ static void r_rewind_between (RSFD rfd)
     p->level=0;
 }
 
-static int r_count_between (RSET ct)
+
+
+static int r_forward_between(RSET ct, RSFD rfd, void *buf, int *term_index,
+                     int (*cmpfunc)(const void *p1, const void *p2),
+                     const void *untilbuf)
 {
-    return 0;
+    struct rset_between_info *info = ((struct rset_between_rfd*)rfd)->info;
+    struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
+    int rc;
+#if RSBETWEEN_DEBUG
+    log2( p, "fwd: before forward", 0,0);
+#endif
+    /* It is enough to forward the m pointer here, the read will */
+    /* naturally forward the l, m, and attr pointers */
+    if (p->more_m)
+        p->more_m=rset_forward(info->rset_m,p->rfd_m, p->buf_m,
+                        &p->term_index_m, info->cmp,untilbuf);
+#if RSBETWEEN_DEBUG
+    log2( p, "fwd: after forward M", 0,0);
+#endif
+    rc = r_read_between(rfd, buf, term_index);
+#if RSBETWEEN_DEBUG
+    log2( p, "fwd: after forward", 0,0);
+#endif
+    return rc;
 }
 
-static void logit( struct rset_between_info *info, char *prefix, void *l, void *m, void *r)
+static int r_count_between (RSET ct)
 {
-    char buf_l[32];
-    char buf_m[32];
-    char buf_r[32];
-    logf(LOG_DEBUG,"btw: %s l=%s m=%s r=%s",
-      prefix, 
-      (*info->printer)(l, buf_l),
-      (*info->printer)(m, buf_m),
-      (*info->printer)(r, buf_r) );
+    return 0;
 }
 
+
+
 static int r_read_between (RSFD rfd, void *buf, int *term_index)
 {
     struct rset_between_rfd *p = (struct rset_between_rfd *) rfd;
     struct rset_between_info *info = p->info;
-    int cmp_l;
-    int cmp_r;
-    int attr_match;
+    int cmp_l=0;
+    int cmp_r=0;
+    int attr_match = 0;
 
     while (p->more_m)
     {
-        logit( info, "start of loop", p->buf_l, p->buf_m, p->buf_r);
-
-       /* forward L until past m, count levels, note rec boundaries */
-       if (p->more_l)
-           cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
-       else
-           cmp_l=2; /* past this record */
-        logf(LOG_DEBUG, "cmp_l=%d", cmp_l);
+#if RSBETWEEN_DEBUG
+        log2( p, "start of loop", cmp_l, cmp_r);
+#endif
+
+        /* forward L until past m, count levels, note rec boundaries */
+        if (p->more_l)
+            cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
+        else
+        {
+            p->level = 0;
+            cmp_l=2; /* past this record */
+        }
+#if RSBETWEEN_DEBUG
+        log2( p, "after first L", cmp_l, cmp_r);
+#endif
 
         while (cmp_l < 0)   /* l before m */
-       {
+        {
             if (cmp_l == -2)
-               p->level=0; /* earlier record */
+                p->level=0; /* earlier record */
             if (cmp_l == -1)
             {
-               p->level++; /* relevant start tag */
+                p->level++; /* relevant start tag */
 
                 if (!info->rset_attr)
                     attr_match = 1;
@@ -308,68 +363,160 @@ static int r_read_between (RSFD rfd, void *buf, int *term_index)
                         }
                         else if (cmp_attr > 0)
                             break;
-                        p->more_attr = rset_read (info->rset_attr, p->rfd_attr,
+                        else if (cmp_attr==-1) 
+                            p->more_attr = rset_read (info->rset_attr, p->rfd_attr,
                                                   p->buf_attr, &dummy_term);
-                    }
+                            /* if we had a forward that went all the way to
+                             * the seqno, we could use that. But fwd only goes
+                             * to the sysno */
+                        else if (cmp_attr==-2) 
+                        {
+                            p->more_attr = rset_forward(
+                                      info->rset_attr, p->rfd_attr,
+                                      p->buf_attr, &dummy_term,
+                                      info->cmp, p->buf_l);
+#if RSBETWEEN_DEBUG
+                            logf(LOG_DEBUG, "btw: after frowarding attr m=%d",p->more_attr);
+#endif
+                        }
+                    } /* while more_attr */
                 }
             }
-            if (p->more_l)
+#define NEWCODE 1 
+#if NEWCODE                
+            if (cmp_l==-2)
+            {
+                if (p->more_l) 
+                {
+                    p->more_l=rset_forward(
+                                      info->rset_l, p->rfd_l,
+                                      p->buf_l, &p->term_index_l,
+                                      info->cmp, p->buf_m);
+                    if (p->more_l)
+                        cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
+                    else
+                        cmp_l=2;
+#if RSBETWEEN_DEBUG
+                    log2( p, "after forwarding L", cmp_l, cmp_r);
+#endif
+                }
+            } else
             {
                 p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l,
-                                  &p->term_index_l);
-               cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
-                logit( info, "forwarded L", p->buf_l, p->buf_m, p->buf_r);
-                logf(LOG_DEBUG, "  cmp_l=%d", cmp_l);
+                              &p->term_index_l);
+            }
+#else
+            p->more_l = rset_read (info->rset_l, p->rfd_l, p->buf_l,
+                              &p->term_index_l);
+#endif
+            if (p->more_l)
+            {
+                cmp_l= (*info->cmp)(p->buf_l, p->buf_m);
             }
             else
-               cmp_l=2; 
+                cmp_l=2; 
+#if RSBETWEEN_DEBUG
+            log2( p, "end of L loop", cmp_l, cmp_r);
+#endif
         } /* forward L */
 
             
-       /* forward R until past m, count levels */
+        /* forward R until past m, count levels */
+#if RSBETWEEN_DEBUG
+        log2( p, "Before moving R", cmp_l, cmp_r);
+#endif
         if (p->more_r)
-           cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
-       else
-           cmp_r=2; 
-        logf(LOG_DEBUG, "cmp_r=%d", cmp_r);
+            cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
+        else
+            cmp_r=2; 
+#if RSBETWEEN_DEBUG
+        log2( p, "after first R", cmp_l, cmp_r);
+#endif
         while (cmp_r < 0)   /* r before m */
-       {
-           /* -2, earlier record, doesn't matter */
+        {
+             /* -2, earlier record, don't count level */
             if (cmp_r == -1)
-               p->level--; /* relevant end tag */
+                p->level--; /* relevant end tag */
             if (p->more_r)
             {
+#if NEWCODE                
+                if (cmp_r==-2)
+                {
+                    p->more_r=rset_forward(
+                                      info->rset_r, p->rfd_r,
+                                      p->buf_r, &p->term_index_r,
+                                      info->cmp, p->buf_m);
+                } else
+                {
+                    p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r,
+                                       &p->term_index_r);
+                }
+                if (p->more_r)
+                    cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
+
+#else
                 p->more_r = rset_read (info->rset_r, p->rfd_r, p->buf_r,
-                                  &p->term_index_r);
-               cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
-                logit( info, "forwarded R", p->buf_l, p->buf_m, p->buf_r);
-                logf(LOG_DEBUG, "  cmp_r=%d", cmp_r);
+                                       &p->term_index_r);
+                cmp_r= (*info->cmp)(p->buf_r, p->buf_m);
+#endif
             }
             else
-               cmp_r=2; 
+                cmp_r=2; 
+#if RSBETWEEN_DEBUG
+        log2( p, "End of R loop", cmp_l, cmp_r);
+#endif
         } /* forward R */
-       
-       if ( ( p->level <= 0 ) && ! p->more_l)
-           return 0; /* no more start tags, nothing more to find */
         
-       if ( attr_match && p->level > 0)  /* within a tag pair (or deeper) */
-       {
-           memcpy (buf, p->buf_m, info->key_size);
+        if ( ( p->level <= 0 ) && ! p->more_l)
+            return 0; /* no more start tags, nothing more to find */
+        
+        if ( attr_match && p->level > 0)  /* within a tag pair (or deeper) */
+        {
+            memcpy (buf, p->buf_m, info->key_size);
             *term_index = p->term_index_m;
-            logit( info, "Returning a hit (m)", p->buf_l, p->buf_m, p->buf_r);
+#if RSBETWEEN_DEBUG
+            log2( p, "Returning a hit (and forwarding m)", cmp_l, cmp_r);
+#endif
             p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m,
                                    &p->term_index_m);
-           return 1;
-       }
-       else
-           if ( ! p->more_l )  /* not in data, no more starts */
-               return 0;  /* ergo, nothing can be found. stop scanning */
-        
+            if (cmp_l == 2)
+                p->level = 0;
+            return 1;
+        }
+        else if ( ! p->more_l )  /* not in data, no more starts */
+        {
+#if RSBETWEEN_DEBUG
+            log2( p, "no more starts, exiting without a hit", cmp_l, cmp_r);
+#endif
+            return 0;  /* ergo, nothing can be found. stop scanning */
+        }
+#if NEWCODE                
+        if (cmp_l == 2)
+        {
+            p->level = 0;
+            p->more_m=rset_forward(
+                              info->rset_m, p->rfd_m,
+                              p->buf_m, &p->term_index_m,
+                              info->cmp, p->buf_l);
+        } else
+        {
+            p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m,
+                               &p->term_index_m);
+        }
+#else
+        if (cmp_l == 2)
+            p->level = 0;
         p->more_m = rset_read (info->rset_m, p->rfd_m, p->buf_m,
                                &p->term_index_m);
+#endif
+#if RSBETWEEN_DEBUG
+        log2( p, "End of M loop", cmp_l, cmp_r);
+#endif
     } /* while more_m */
-      
-    logf(LOG_DEBUG,"Exiting, no more stuff in m");
+    
+#if RSBETWEEN_DEBUG
+    log2( p, "Exiting, nothing more in m", cmp_l, cmp_r);
+#endif
     return 0;  /* no more data possible */