New rsbeween implementation. Much shorter and easier, and works well
authorHeikki Levanto <heikki@indexdata.dk>
Fri, 5 Nov 2004 17:44:32 +0000 (17:44 +0000)
committerHeikki Levanto <heikki@indexdata.dk>
Fri, 5 Nov 2004 17:44:32 +0000 (17:44 +0000)
with ranking as well.

examples/gils/zebra.cfg
rset/rsbetween.c
test/api/xpath4.c
test/api/xpath5.c

index 71dbcd0..6082f54 100644 (file)
@@ -1,5 +1,5 @@
 # Simple Zebra configuration file
-# $Id: zebra.cfg,v 1.5 2004-10-03 22:06:32 adam Exp $
+# $Id: zebra.cfg,v 1.6 2004-11-05 17:44:32 heikki Exp $
 #
 # Where the schema files, attribute files, etc are located.
 profilePath: .:../../tab
@@ -12,6 +12,7 @@ attset: explain.att
 recordtype: grs.sgml
 isam: c
 
+#rank: zvrank
 #recordId: (bib-1,identifier-standard)
 
 modulePath: ../../recctrl/.libs
index 321903b..3e44608 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: rsbetween.c,v 1.29 2004-11-01 15:53:57 heikki Exp $
+/* $Id: rsbetween.c,v 1.30 2004-11-05 17:44:32 heikki Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
    Index Data Aps
 
@@ -29,11 +29,6 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
  * "<title lang=eng>" and "</title>". (This assumes that the attributes are
  * indexed to the same seqno as the tags).
  *
- * Currently fails to return all hits from a record, which breaks the ranking.
- * This is bug #202. Seems like there is no decent way to get that working
- * with the current implementation. I have planned a new method for doing
- * this, see at the end of this file.
- *
 */ 
 
 #include <stdio.h>
@@ -44,71 +39,60 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
 #include <zebrautl.h>
 #include <rset.h>
 
-#define RSBETWEEN_DEBUG 0 
 
-static RSFD r_open_between (RSET ct, int flag);
-static void r_close_between (RSFD rfd);
-static void r_delete_between (RSET ct);
-static int r_forward_between(RSFD rfd, void *buf, 
+static RSFD r_open (RSET ct, int flag);
+static void r_close (RSFD rfd);
+static void r_delete (RSET ct);
+static int r_forward(RSFD rfd, void *buf, 
                     TERMID *term, const void *untilbuf);
-static int r_read_between (RSFD rfd, void *buf, TERMID *term );
-static int r_write_between (RSFD rfd, const void *buf);
-static void r_pos_between (RSFD rfd, double *current, double *total);
+static int r_read(RSFD rfd, void *buf, TERMID *term );
+static int r_write(RSFD rfd, const void *buf);
+static void r_pos(RSFD rfd, double *current, double *total);
 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
 
 static const struct rset_control control = 
 {
     "between",
-    r_delete_between,
+    r_delete,
     r_get_terms,
-    r_open_between,
-    r_close_between,
-    r_forward_between, 
-    r_pos_between,
-    r_read_between,
-    r_write_between,
+    r_open,
+    r_close,
+    r_forward, 
+    r_pos,
+    r_read,
+    r_write,
 };
 
 
 const struct rset_control *rset_kind_between = &control;
 
+#define STARTTAG 0
+#define HIT 1
+#define STOPTAG 2
+#define ATTRTAG 3
+
 struct rset_between_info {
-    RSET rset_l;  /* left arg, start tag */
-    RSET rset_m;  /* the thing itself */
-    RSET rset_r;  /* right arg, end tag */
-    RSET rset_attr; /* attributes , optional */
+    RSET andset; /* the multi-and of the above */
+    TERMID startterm; /* pseudo terms for detecting which one we read from */
+    TERMID stopterm;
+    TERMID attrterm;
 };
 
 struct rset_between_rfd {
-    RSFD rfd_l;
-    RSFD rfd_m;
-    RSFD rfd_r;
-    RSFD rfd_attr;
-    int  more_l;
-    int  more_m;
-    int  more_r;
-    int  more_attr;
-    void *buf_l;
-    void *buf_m;
-    void *buf_r;
-    void *buf_attr;
-    TERMID term_m; /* we only return terms for the mid argument */
-    int level;  /* counting start/end tags */
-    int attr_match; /* did we have a matching attr for L */
+    RSFD andrfd;
+    void *recbuf; /* a key that tells which record we are in */
+    void *startbuf; /* the start tag */
+    int startbufok; /* we have seen the first start tag */
+    void *attrbuf;  /* the attr tag. If these two match, we have attr match */
+    int attrbufok; /* we have seen the first attr tag, can compare */
+    int depth; /* number of start-tags without end-tags */
+    int attrdepth; /* on what depth the attr matched */
     zint hits;
 };    
 
-#if RSBETWEEN_DEBUG
-static void log2 (RSFD rfd, char *msg, int cmp_l, int cmp_r)
-{
-    struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
-    RSET ct = rfd->rset;
-    logf(LOG_LOG,"between: %s cmp_l=%d cmp_r=%d", msg, cmp_l, cmp_r);
-    (*ct->keycontrol->key_logdump_txt)(LOG_LOG, p->buf_l, "between: L");
-    (*ct->keycontrol->key_logdump_txt)(LOG_LOG, p->buf_m, "between: M");
-    (*ct->keycontrol->key_logdump_txt)(LOG_LOG, p->buf_r, "between: R");
-}
-#endif
+static int log_level=0;
+static int log_level_initialized=0;
+
 
 RSET rsbetween_create( NMEM nmem, const struct key_control *kcontrol,
             int scope,
@@ -117,28 +101,67 @@ RSET rsbetween_create( NMEM nmem, const struct key_control *kcontrol,
     RSET rnew=rset_create_base(&control, nmem, kcontrol, scope,0);
     struct rset_between_info *info=
         (struct rset_between_info *) nmem_malloc(rnew->nmem,sizeof(*info));
-    info->rset_l = rset_l;
-    info->rset_m = rset_m;
-    info->rset_r = rset_r;
-    info->rset_attr = rset_attr;
+    RSET rsetarray[4];
+    int n=4;
+    
+    if (!log_level_initialized)
+    {
+        log_level=yaz_log_module_level("rsbetween");
+        log_level_initialized=1;
+    }
+    rsetarray[STARTTAG] = rset_l;
+    rsetarray[HIT] = rset_m;
+    rsetarray[STOPTAG] = rset_r;
+    rsetarray[ATTRTAG] = rset_attr;
+
+    /* make sure we have decent terms for all rsets. Create dummies if needed*/
+    if (!rsetarray[STARTTAG]->term)
+    {
+        rsetarray[STARTTAG]->term=
+             rset_term_create("<starttag>",strlen("<starttag>"),"",0,nmem);
+        rsetarray[STARTTAG]->term->rset=rsetarray[STARTTAG];
+    }
+    info->startterm=rsetarray[STARTTAG]->term;
+
+    if (!rsetarray[STOPTAG]->term)
+    {
+        rsetarray[STOPTAG]->term=
+             rset_term_create("<stoptag>",strlen("<stoptag>"),"",0,nmem);
+        rsetarray[STOPTAG]->term->rset=rsetarray[STOPTAG];
+    }
+    info->stopterm=rsetarray[STOPTAG]->term;
+
+    if (rset_attr)
+    {
+        if (!rsetarray[ATTRTAG]->term)
+        {
+            rsetarray[ATTRTAG]->term=
+                 rset_term_create("<attrtag>",strlen("<attrtag>"),"",0,nmem);
+            rsetarray[ATTRTAG]->term->rset=rsetarray[ATTRTAG];
+        }
+        info->attrterm=rsetarray[ATTRTAG]->term;
+    }
+    else
+    {
+        info->attrterm=NULL;
+        n--; /* smaller and */
+    }
+    info->andset=rsmultiand_create( nmem, kcontrol, scope, n, rsetarray);
     rnew->priv=info;
+    logf(log_level,"create rset at %p",rnew);
     return rnew;
 }
 
 
-static void r_delete_between (RSET ct)
+static void r_delete (RSET ct)
 {
     struct rset_between_info *info = (struct rset_between_info *) ct->priv;
-
-    rset_delete (info->rset_l);
-    rset_delete (info->rset_m);
-    rset_delete (info->rset_r);
-    if (info->rset_attr)
-        rset_delete (info->rset_attr);
+    logf(log_level,"delete rset at %p",ct);
+    rset_delete(info->andset);
 }
 
 
-static RSFD r_open_between (RSET ct, int flag)
+static RSFD r_open (RSET ct, int flag)
 {
     struct rset_between_info *info = (struct rset_between_info *) ct->priv;
     RSFD rfd;
@@ -155,376 +178,161 @@ static RSFD r_open_between (RSET ct, int flag)
     else {
         p = (struct rset_between_rfd *) nmem_malloc(ct->nmem, (sizeof(*p)));
         rfd->priv=p;
-        p->buf_l = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
-        p->buf_m = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
-        p->buf_r = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
-        p->buf_attr = nmem_malloc(ct->nmem, (ct->keycontrol->key_size));
+        p->recbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size)); 
+        p->startbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size)); 
+        p->attrbuf = nmem_malloc(ct->nmem, (ct->keycontrol->key_size)); 
     }
-
-    p->rfd_l = rset_open (info->rset_l, RSETF_READ);
-    p->rfd_m = rset_open (info->rset_m, RSETF_READ);
-    p->rfd_r = rset_open (info->rset_r, RSETF_READ);
-    
-    p->more_l = rset_read (p->rfd_l, p->buf_l,NULL);
-    p->more_m = rset_read (p->rfd_m, p->buf_m, &p->term_m);
-    p->more_r = rset_read (p->rfd_r, p->buf_r,NULL);
-    if (info->rset_attr)
-    {
-        p->rfd_attr = rset_open (info->rset_attr, RSETF_READ);
-        p->more_attr = rset_read (p->rfd_attr, p->buf_attr, NULL);
-        p->attr_match=0;
-    } else
-        p->attr_match=1; 
-    p->level=0;
-    p->hits=0;
+    p->andrfd = rset_open (info->andset, RSETF_READ);
+    p->hits=-1;
+    p->depth=0;
+    p->attrdepth=0;
+    p->attrbufok=0;
+    p->startbufok=0;
+    logf(log_level,"open rset=%p rfd=%p", ct, rfd);
     return rfd;
 }
 
-static void r_close_between (RSFD rfd)
+static void r_close (RSFD rfd)
 {
-    struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
     struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
-
-#if RSBETWEEN_DEBUG
-    log2( rfd, "fwd: close. hits:", p->hits,0);
-#endif
-    rset_close (p->rfd_l);
-    rset_close (p->rfd_m);
-    rset_close (p->rfd_r);
-    if (info->rset_attr)
-        rset_close (p->rfd_attr);
+    logf(log_level,"close rfd=%p", rfd);
+    rset_close (p->andrfd);
     rfd_delete_base(rfd);
 }
 
 
 
-static int r_forward_between(RSFD rfd, void *buf, 
-                             TERMID *term, const void *untilbuf)
+static int r_forward(RSFD rfd, void *buf, 
+                     TERMID *term, const void *untilbuf)
 {
     struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
     int rc;
-#if RSBETWEEN_DEBUG
-    log2( rfd, "fwd: before forward", 0,0);
-#endif
-    /* It is enough to forward the m pointer here, the read will */
-    /* naturally forward the l, m, and attr pointers */
-    if (p->more_m)
-        p->more_m=rset_forward(p->rfd_m, p->buf_m, term, untilbuf);
-#if RSBETWEEN_DEBUG
-    log2( rfd, "fwd: after forward M", 0,0);
-#endif
-    rc = r_read_between(rfd, buf, term);
-#if RSBETWEEN_DEBUG
-    log2( rfd, "fwd: after forward", 0,0);
-#endif
+    logf(log_level, "forwarding ");
+    rc=rset_forward(p->andrfd,buf,term,untilbuf);
     return rc;
 }
 
 
 
-
-static int r_read_between (RSFD rfd, void *buf, TERMID *term)
+static void checkattr(RSFD rfd)
 {
     struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
     struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
     const struct key_control *kctrl=rfd->rset->keycontrol;
-    int cmp_l=0;
-    int cmp_r=0;
-/*    int attr_match = 0; */
-
-    while (p->more_m)
+    int cmp;
+    if (p->attrdepth)
+        return; /* already found one */
+    if (!info->attrterm) 
     {
-#if RSBETWEEN_DEBUG
-        log2( rfd, "start of loop", cmp_l, cmp_r);
-        logf(LOG_LOG,"level=%d. more=%d/%d/%d",
-                p->level,p->more_l,p->more_m, p->more_r);
-#endif
-
-        /* forward L until past m, count levels, note rec boundaries */
-        if (p->more_l)
-            cmp_l= (*kctrl->cmp)(p->buf_l, p->buf_m);
-        else
+        p->attrdepth=-1; /* matches always */
+        return;
+    }
+    if ( p->startbufok && p->attrbufok )
+    { /* have buffers to compare */
+        cmp=(kctrl->cmp)(p->startbuf,p->attrbuf);
+        if (0==cmp) /* and the keys match */
         {
-            p->level = 0;
-            cmp_l=rfd->rset->scope; /* past this record */
+            p->attrdepth=p->depth;
+            logf(log_level, "found attribute match at depth %d",p->attrdepth);
         }
-#if RSBETWEEN_DEBUG
-        log2( rfd, "after first L", cmp_l, cmp_r);
-        logf(LOG_LOG,"level=%d. more=%d/%d/%d",
-                p->level,p->more_l,p->more_m, p->more_r);
-#endif
+    }
+}
 
-        while (cmp_l < 0)   /* l before m */
-        {
-            if (cmp_l <= - rfd->rset->scope)  /* ==-2 */
-                p->level=0; /* earlier record */
-            if (cmp_l > - rfd->rset->scope)  /* == -1 */
-            {
-                p->level++; /* relevant start tag */
-
-                if (!info->rset_attr)
-                    p->attr_match = 1;
-                else
-                {
-                    int cmp_attr;
-                    p->attr_match = 0;
-                    while (p->more_attr)
-                    {
-                        cmp_attr = (*kctrl->cmp)(p->buf_attr, p->buf_l);
-                        if (cmp_attr == 0)
-                        {
-                            p->attr_match = 1;
-                            break;
-                        }
-                        else if (cmp_attr > 0)
-                            break;
-                        else if (cmp_attr > - rfd->rset->scope)  /* == -1 */
-                            p->more_attr = rset_read (p->rfd_attr, 
-                                                      p->buf_attr,NULL);
-                            /* if we had a forward that went all the way to
-                             * the seqno, we could use that. But fwd only goes
-                             * to the sysno */
-                        else if (cmp_attr <= - rfd->rset->scope) /* ==-2 */
-                        {
-                            p->more_attr = rset_forward( p->rfd_attr,
-                                             p->buf_attr, NULL, p->buf_l);
-#if RSBETWEEN_DEBUG
-                            logf(LOG_LOG, "btw: after frowarding attr m=%d",
-                                      p->more_attr);
-#endif
-                        }
-                    } /* while more_attr */
-                }
-            }
-#define NEWCODE 1 
-#if NEWCODE                
-            if (cmp_l <= - rfd->rset->scope )/* ==-2 */
-            {
-                if (p->more_l) 
-                {
-                    p->more_l=rset_forward(p->rfd_l, p->buf_l, NULL, p->buf_m);
-                    if (p->more_l)
-                        cmp_l= (*kctrl->cmp)(p->buf_l, p->buf_m);
-                    else
-                        cmp_l=rfd->rset->scope;  /*2*/
-#if RSBETWEEN_DEBUG
-                    log2( rfd, "after forwarding L", cmp_l, cmp_r);
-                    logf(LOG_LOG,"level=%d. more=%d/%d/%d",
-                            p->level,p->more_l,p->more_m, p->more_r);
-#endif
-                }
-            } else
-            {
-                p->more_l = rset_read (p->rfd_l, p->buf_l, NULL);
-            }
-#else
-            p->more_l = rset_read (p->rfd_l, p->buf_l, NULL);
-#endif
-            if (p->more_l)
-            {
-                cmp_l= (*kctrl->cmp)(p->buf_l, p->buf_m);
-            }
-            else
-                cmp_l=rfd->rset->scope;   /*2*/
-#if RSBETWEEN_DEBUG
-            log2( rfd, "end of L loop", cmp_l, cmp_r);
-            logf(LOG_LOG,"level=%d. more=%d/%d/%d",
-                p->level,p->more_l,p->more_m, p->more_r);
-#endif
-        } /* forward L */
-
-            
-        /* forward R until past m, count levels */
-#if RSBETWEEN_DEBUG
-        log2( rfd, "Before moving R", cmp_l, cmp_r);
-        logf(LOG_LOG,"level=%d. more=%d/%d/%d",
-                p->level,p->more_l,p->more_m, p->more_r);
-#endif
-        if (p->more_r)
-            cmp_r= (*kctrl->cmp)(p->buf_r, p->buf_m);
-        else
-            cmp_r=rfd->rset->scope;  /*2*/
-#if RSBETWEEN_DEBUG
-        log2( rfd, "after first R", cmp_l, cmp_r);
-        logf(LOG_LOG,"level=%d. more=%d/%d/%d",
-                p->level,p->more_l,p->more_m, p->more_r);
-#endif
-        while (cmp_r < 0)   /* r before m */
-        {
-             /* -2, earlier record, don't count level */
-            if (cmp_r > -rfd->rset->scope) /* == -1 */
-                p->level--; /* relevant end tag */
-            if (p->more_r)
-            {
-#if NEWCODE                
-                if (cmp_r <= - rfd->rset->scope) /* == -2 */
-                {
-                    p->more_r=rset_forward(p->rfd_r, p->buf_r, NULL, p->buf_m);
-                } else
-                {
-                    p->more_r = rset_read (p->rfd_r, p->buf_r, NULL);
-                }
-                if (p->more_r)
-                    cmp_r= (*kctrl->cmp)(p->buf_r, p->buf_m);
-
-#else
-                p->more_r = rset_read (p->rfd_r, p->buf_r, NULL);
-                cmp_r= (*kctrl->cmp)(p->buf_r, p->buf_m);
-#endif
-            }
-            else
-                cmp_r=rfd->rset->scope;  /*2*/
-#if RSBETWEEN_DEBUG
-        log2( rfd, "End of R loop", cmp_l, cmp_r);
-        logf(LOG_LOG,"level=%d. more=%d/%d/%d am=%d",
-                p->level,p->more_l,p->more_m, p->more_r, p->attr_match);
-#endif
-        } /* forward R */
-        
-        if ( ( p->level <= 0 ) && ! p->more_l)
-        {
-#if RSBETWEEN_DEBUG
-            logf(LOG_LOG,"no more_l, returning zero");
-#endif
-            return 0; /* no more start tags, nothing more to find */
+
+static int r_read (RSFD rfd, void *buf, TERMID *term)
+{
+    struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
+    struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
+    const struct key_control *kctrl=rfd->rset->keycontrol;
+    int cmp;
+    logf(log_level,"== read: term=%p",term);
+    TERMID dummyterm=0;
+    if (!term)
+        term=&dummyterm;
+    while ( rset_read(p->andrfd,buf,term) )
+    {
+        logf(log_level,"read loop term=%p d=%d ad=%d",
+                *term,p->depth, p->attrdepth);
+        if (p->hits<0) 
+        {/* first time? */
+            memcpy(p->recbuf,buf,kctrl->key_size);
+            p->hits=0;
+            cmp=rfd->rset->scope; /* force newrecord */
         }
-        
-#if RSBETWEEN_DEBUG
-        log2( rfd, "Considering M", cmp_l, cmp_r);
-        logf(LOG_LOG,"level=%d. more=%d/%d/%d am=%d",
-                p->level,p->more_l,p->more_m, p->more_r, p->attr_match);
-#endif
-        if ( p->attr_match && p->level > 0)  /* within a tag pair (or deeper) */
-        {
-            memcpy (buf, p->buf_m, kctrl->key_size);
-            if (term)
-                *term=p->term_m;
-#if RSBETWEEN_DEBUG
-            log2( rfd, "Returning a hit (and forwarding m)", cmp_l, cmp_r);
-#endif
-            p->more_m = rset_read (p->rfd_m, p->buf_m, NULL);
-#if RSBETWEEN_DEBUG
-            logf(LOG_LOG,"read m. more=%d level=%d "
-                    "cmp_l=%d scope=%d hits="ZINT_FORMAT, 
-                    p->more_m, p->level,
-                    cmp_l, rfd->rset->scope, p->hits);
-#endif
-            if (cmp_l >= rfd->rset->scope)  /* == 2 */
-                p->level = 0;
-            p->hits++;
-            return 1;
+        else {
+            cmp=(kctrl->cmp)(buf,p->recbuf);
+            logf(log_level, "cmp=%d",cmp);
+        }
+
+        if (cmp>=rfd->rset->scope)
+        { 
+            logf(log_level,"new record");
+            p->depth=0;
+            p->attrdepth=0;
+            memcpy(p->recbuf,buf,kctrl->key_size);
         }
-        else if ( ! p->more_l )  /* not in data, no more starts */
+
+        if (*term)
+            logf(log_level,"  term: '%s'", (*term)->name);
+        if (*term==info->startterm)
         {
-#if RSBETWEEN_DEBUG
-            log2( rfd, "no more starts, exiting without a hit", cmp_l, cmp_r);
-#endif
-            return 0;  /* ergo, nothing can be found. stop scanning */
+            p->depth++;
+            logf(log_level,"read start tag. d=%d",p->depth);
+            memcpy(p->startbuf,buf,kctrl->key_size);
+            p->startbufok=1;
+            checkattr(rfd); /* in case we already saw the attr here */
         }
-#if NEWCODE                
-        if (cmp_l >= rfd->rset->scope) /* == 2 */
+        else if (*term==info->stopterm)
         {
-            p->level = 0;
-            p->more_m=rset_forward(p->rfd_m, p->buf_m, &p->term_m, p->buf_l);
-        } else
+            if (p->depth == p->attrdepth)
+                p->attrdepth=0; /* ending the tag with attr match */
+            p->depth--;
+            logf(log_level,"read end tag. d=%d ad=%d",p->depth, p->attrdepth);
+        }
+        else if (*term==info->attrterm)
         {
-            p->more_m = rset_read (p->rfd_m, p->buf_m, &p->term_m);
+            logf(log_level,"read attr");
+            memcpy(p->attrbuf,buf,kctrl->key_size);
+            p->attrbufok=1;
+            checkattr(rfd); /* in case the start tag came first */
         }
-#else
-        if (cmp_l >= rfd->rset->scope )  /* == 2 */
-            p->level = 0;
-        p->more_m = rset_read (p->rfd_m, p->buf_m, &p->term_m);
-#endif
-#if RSBETWEEN_DEBUG
-        log2( rfd, "End of M loop", cmp_l, cmp_r);
-        logf(LOG_LOG,"level=%d. more=%d/%d/%d",
-                p->level,p->more_l,p->more_m, p->more_r);
-#endif
-    } /* while more_m */
-    
-#if RSBETWEEN_DEBUG
-    log2( rfd, "Exiting, nothing more in m", cmp_l, cmp_r);
-    logf(LOG_LOG,"level=%d. more=%d/%d/%d",
-                p->level,p->more_l,p->more_m, p->more_r);
-#endif
-    return 0;  /* no more data possible */
+        else 
+        { /* mut be a real hit */
+            if (p->depth && p->attrdepth)
+            {
+                p->hits++;
+                logf(log_level,"got a hit h="ZINT_FORMAT" d=%d ad=%d", 
+                        p->hits,p->depth,p->attrdepth);
+                return 1; /* we have everything in place already! */
+            } else
+                logf(log_level, "Ignoring hit. h="ZINT_FORMAT" d=%d ad=%d",
+                        p->hits,p->depth,p->attrdepth);
+        }
+    } /* while read */
 
+    return 0;
 
 }  /* r_read */
 
 
-static int r_write_between (RSFD rfd, const void *buf)
+static int r_write (RSFD rfd, const void *buf)
 {
     logf (LOG_FATAL, "between set type is read-only");
     return -1;
 }
 
 
-static void r_pos_between (RSFD rfd, double *current, double *total)
+static void r_pos (RSFD rfd, double *current, double *total)
 {
     struct rset_between_rfd *p=(struct rset_between_rfd *)rfd->priv;
-    double lcur,ltot;
-    double mcur,mtot;
-    double rcur,rtot;
-    double r;
-    ltot=-1; rtot=-1;
-    rset_pos(p->rfd_l,  &lcur, &ltot);
-    rset_pos(p->rfd_m,  &mcur, &mtot);
-    rset_pos(p->rfd_r,  &rcur, &rtot);
-    if ( (ltot<0) && (mtot<0) && (rtot<0) ) { /*no position */
-        *current=mcur;  /* return same as you got */
-        *total=mtot;    /* probably -1 for not available */
-    }
-    if ( ltot<0) { ltot=0; lcur=0;} /* if only one useful, use it */
-    if ( mtot<0) { mtot=0; mcur=0;}
-    if ( rtot<0) { rtot=0; rcur=0;}
-    if ( ltot+mtot+rtot < 1 ) { /* empty rset */
-        *current=0;
-        *total=0;
-        return;
-    }
-    r=1.0*(lcur+mcur+rcur)/(ltot+mtot+rtot); /* weighed average of l and r */
-    *current=p->hits;
-    *total=*current/r ; 
-#if RSBETWEEN_DEBUG
-    {
-    struct rset_between_info *info =(struct rset_between_info *)rfd->rset->priv;
-    yaz_log(LOG_LOG,"betw_pos: (%s/%s) %0.1f/%0.1f= %0.4f ",
-                    info->rset_l->control->desc, info->rset_r->control->desc,
-                    *current, *total, r);
-    }
-#endif
+    rset_pos(p->andrfd,current, total);
+    logf(log_level,"pos: %0.1f/%0.1f ", *current, *total);
 }
 
 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
 {
     struct rset_between_info *info = (struct rset_between_info *) ct->priv;
-    rset_getterms(info->rset_m, terms, maxterms, curterm);
+    rset_getterms(info->andset, terms, maxterms, curterm);
 }
 
-/* Better algorithm
- * One of the major problems with rsbetween is the complexity of keeping track
- * of start tags, stop tags, hits, and attributes, together with record
- * boundaries etc. 
- *
- * Things can be divided into finding the right records, and then processing
- * hits inside the record.
- *
- * Finding the record is mostly a matter of forwarding until we have a start
- * tag and a hit in the same record.
- *
- * Handling stuff inside a record, we can simplify things by implementing a
- * glorified OR operator that returns all the occurrences in proper order,
- * together with info on what type it was. Then the main logic can just keep
- * reading, and consider each type separately:
- *   - if a start tag, increment level (some trickery with attributes!)
- *   - if a stop tag, decrement level
- *   - if a hit, and we have a level, return it
- *   - if a hit, but no level, ignore it
- *
- * The attributes can be detected when ever reading start tags. The main
- * routine needs to keep a stack of attribute match bits, so when ever we read
- * a start tag, we must report back if we have a matching attribute or not.
- *
- */
  
index 0dcdb94..7141ecd 100644 (file)
@@ -1,6 +1,6 @@
 
 
-/* $Id: xpath4.c,v 1.2 2004-11-01 16:09:05 heikki Exp $
+/* $Id: xpath4.c,v 1.3 2004-11-05 17:44:32 heikki Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
    Index Data Aps
 
@@ -50,10 +50,39 @@ const char *myrec[] = {
     "</record> \n",
  
     "<record> \n"
+    "  <title lang=en>double english</title> \n"
+    "  <title lang=da>double danish</title> \n"
+    "  <author>grunt</author> \n"
+    "</record> \n",
+
+    "<record> \n"
     "  <title>hamlet</title> \n"
     "  <author>foo bar grunt grunt grunt</author> \n"
     "</record> \n",
 
+    "<record> \n"
+    "  before \n"
+    "  <nested> \n"
+    "     early \n"
+    "     <nested> \n"
+    "        middle \n"
+    "     </nested> \n"
+    "     late \n"
+    "  </nested> \n"
+    "  after \n"
+    "</record> \n",
+
+    "<record> \n"
+    "  before \n"
+    "  <nestattr level=outer> \n"
+    "     early \n"
+    "     <nestattr level=inner> \n"
+    "        middle \n"
+    "     </nestattr> \n"
+    "     late \n"
+    "  </nestattr> \n"
+    "  after \n"
+    "</record> \n",
     0};
 
 
@@ -61,6 +90,9 @@ int main(int argc, char **argv)
 {
     ZebraService zs = start_up("zebraxpath.cfg", argc, argv);
     ZebraHandle zh = zebra_open (zs);
+
+    yaz_log_init_level( yaz_log_mask_str_x("xpath4,rsbetween", LOG_DEFAULT_LEVEL));
+
     init_data(zh,myrec);
 
 #define q(qry,hits) do_query(__LINE__,zh,qry,hits)
@@ -69,6 +101,12 @@ int main(int argc, char **argv)
     q("@attr 1=/record/title bar",2);
     q("@attr 1=/record/title[@lang='da'] foo",1);
     q("@attr 1=/record/title[@lang='en'] foo",1);
+
+    q("@attr 1=/record/title[@lang='en'] english",1); 
+    q("@attr 1=/record/title[@lang='da'] english",0); 
+    q("@attr 1=/record/title[@lang='da'] danish",1);  
+    q("@attr 1=/record/title[@lang='en'] danish",0);  
+
     q("@attr 1=/record/title @and foo bar",2);
     /* The previous one returns two hits, as the and applies to the whole
     record, so it matches <title>foo</title><title>bar</title>
@@ -78,6 +116,38 @@ int main(int argc, char **argv)
 
     /* check we get all the occureences for 'grunt' */
     /* this can only be seen in the log, with debugs on. bug #202 */
-    q("@attr 1=/record/author grunt",2);
+    q("@attr 1=/record/author grunt",3);
+
+    /* check nested tags */
+    q("@attr 1=/record/nested before",0);
+    q("@attr 1=/record/nested early",1);
+    q("@attr 1=/record/nested middle",1);
+    q("@attr 1=/record/nested late",1);
+    q("@attr 1=/record/nested after",0);
+
+    q("@attr 1=/record/nested/nested before",0);
+    q("@attr 1=/record/nested/nested early",0);
+    q("@attr 1=/record/nested/nested middle",1);
+    q("@attr 1=/record/nested/nested late",0);
+    q("@attr 1=/record/nested/nested after",0);
+
+    q("@attr 1=/record/nestattr[@level='outer'] before",0);
+    q("@attr 1=/record/nestattr[@level='outer'] early",1);
+    q("@attr 1=/record/nestattr[@level='outer'] middle",1);
+    q("@attr 1=/record/nestattr[@level='outer'] late",1);
+    q("@attr 1=/record/nestattr[@level='outer'] after",0);
+
+    q("@attr 1=/record/nestattr[@level='inner'] before",0);
+    q("@attr 1=/record/nestattr[@level='inner'] early",0);
+    q("@attr 1=/record/nestattr[@level='inner'] middle",0);
+    q("@attr 1=/record/nestattr[@level='inner'] late",0);
+    q("@attr 1=/record/nestattr[@level='inner'] after",0);
+
+    q("@attr 1=/record/nestattr/nestattr[@level='inner'] before",0);
+    q("@attr 1=/record/nestattr/nestattr[@level='inner'] early",0);
+    q("@attr 1=/record/nestattr/nestattr[@level='inner'] middle",1);
+    q("@attr 1=/record/nestattr/nestattr[@level='inner'] late",0);
+    q("@attr 1=/record/nestattr/nestattr[@level='inner'] after",0);
+
     return close_down(zh, zs, 0);
 }
index b525989..1a102a6 100644 (file)
@@ -1,6 +1,6 @@
 
 
-/* $Id: xpath5.c,v 1.2 2004-11-01 16:09:05 heikki Exp $
+/* $Id: xpath5.c,v 1.3 2004-11-05 17:44:32 heikki Exp $
    Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004
    Index Data Aps
 
@@ -71,21 +71,15 @@ int main(int argc, char **argv)
     ranking_query(__LINE__,zh,qry,hits,string,score)
 
     q("@attr 1=/record/title @attr 2=102 the",
-            3,"first title",846);
+            3,"first title",952);
     q("@attr 1=/ @attr 2=102 @or third foo",
             3,"third title",802);
 
     q("@attr 1=/ @attr 2=102 foo",
             3,"second title",850);
 
-#if 0 
-    /* FIXME - This query triggers bug #202 */
-    /* http://bugs.index/show_bug.cgi?id=202 */
-    /* it seems that the ranking does not see all occurrences */
-    /* of 'foo', and thus ranks all records as high. */
     q("@attr 1=/record/ @attr 2=102 foo",
-            3,"second title",846);
-#endif
+            3,"second title",927);
 
     return close_down(zh, zs, 0);
 }