Merge branch 'master' of ssh://git.indexdata.com/home/git/pub/idzebra
authorMike Taylor <mike@miketaylor.org.uk>
Thu, 8 Oct 2009 20:22:32 +0000 (21:22 +0100)
committerMike Taylor <mike@miketaylor.org.uk>
Thu, 8 Oct 2009 20:22:32 +0000 (21:22 +0100)
NEWS
configure.ac
debian/changelog
index/kinput.c
rset/rsprox.c
test/api/test_search.c

diff --git a/NEWS b/NEWS
index c18295b..b552f20 100644 (file)
--- a/NEWS
+++ b/NEWS
@@ -1,3 +1,14 @@
+--- 2.0.42 2009/10/08
+
+Proximity modifications. The optimized proximity now deals with more
+cases, ie all prox of the form @prox 0 d 1 1,2,3 k 2 ..
+That's exlusive==0, any distance, ordered (true), relation (<, =, <=),
+known unit word.
+
+Fix bug WRT inconsistent registers, bug #3062.
+
+Fix install doc rule so it's compatible with Automake 1.11.
+
 --- 2.0.41 2009/08/24
 
 Fix typos and other errors in the documentation. Thanks to Galen
index 8cf0f1e..0a8d6fb 100644 (file)
@@ -2,7 +2,7 @@ dnl This file is part of the Zebra server.
 dnl   Copyright (C) 1994-2009 Index Data
 dnl
 AC_PREREQ(2.60)
-AC_INIT([idzebra],[2.0.41],[zebra-help@indexdata.dk])
+AC_INIT([idzebra],[2.0.42],[zebra-help@indexdata.dk])
 AC_CONFIG_SRCDIR(configure.ac)
 AC_CONFIG_AUX_DIR(config)
 AM_INIT_AUTOMAKE([1.9])
index 532f693..f68d301 100644 (file)
@@ -1,3 +1,9 @@
+idzebra (2.0.42-1indexdata) unstable; urgency=low
+
+  * Upstream.
+
+ -- Adam Dickmeiss <adam@indexdata.dk>  Thu, 08 Oct 2009 09:36:43 +0200
+
 idzebra (2.0.41-1indexdata) unstable; urgency=low
 
   * Upstream.
index f5afa44..de1d563 100644 (file)
@@ -185,7 +185,7 @@ int key_file_read(struct key_file *f, char *key)
         key[i++] = c;
         while ((c = key_file_getc(f)))
         {
-            if (i <= IT_MAX_WORD)
+            if (i < INP_NAME_MAX-2)
                 key[i++] = c;
         }
         key[i++] = '\0';
@@ -360,7 +360,7 @@ static int heap_read_one(struct heap_info *hi, char *name, char *key)
 /* for debugging only */
 void zebra_log_dict_entry(ZebraHandle zh, const char *s)
 {
-    char dst[IT_MAX_WORD+1];
+    char dst[INP_NAME_MAX+1];
     int ord;
     int len = key_SU_decode(&ord, (const unsigned char *) s);
     const char *index_type;
index 6972828..fa35f38 100644 (file)
@@ -29,13 +29,13 @@ Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 #define RSET_DEBUG 0
 #endif
 
-static RSFD r_open (RSET ct, int flag);
-static void r_close (RSFD rfd);
-static void r_delete (RSET ct);
+static RSFD r_open(RSET ct, int flag);
+static void r_close(RSFD rfd);
+static void r_delete(RSET ct);
 static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf);
-static int r_read (RSFD rfd, void *buf, TERMID *term);
-static int r_write (RSFD rfd, const void *buf);
-static void r_pos (RSFD rfd, double *current, double *total);
+static int r_read(RSFD rfd, void *buf, TERMID *term);
+static int r_write(RSFD rfd, const void *buf);
+static void r_pos(RSFD rfd, double *current, double *total);
 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm);
 
 static const struct rset_control control = 
@@ -85,11 +85,11 @@ RSET rset_create_prox(NMEM nmem, struct rset_key_control *kcontrol,
     return rnew;
 }
 
-static void r_delete (RSET ct)
+static void r_delete(RSET ct)
 {
 }
 
-static RSFD r_open (RSET ct, int flag)
+static RSFD r_open(RSET ct, int flag)
 {
     RSFD rfd;
     struct rset_prox_rfd *p;
@@ -103,10 +103,11 @@ static RSFD r_open (RSET ct, int flag)
     rfd = rfd_create_base(ct);
     if (rfd->priv)
         p = (struct rset_prox_rfd *)(rfd->priv);
-    else {
+    else
+    {
         p = (struct rset_prox_rfd *) nmem_malloc(ct->nmem,sizeof(*p));
         rfd->priv = p;
-        p->more = nmem_malloc (ct->nmem,sizeof(*p->more) * ct->no_children);
+        p->more = nmem_malloc(ct->nmem,sizeof(*p->more) * ct->no_children);
         p->buf = nmem_malloc(ct->nmem,sizeof(*p->buf) * ct->no_children);
         p->terms = nmem_malloc(ct->nmem,sizeof(*p->terms) * ct->no_children);
         for (i = 0; i < ct->no_children; i++) 
@@ -118,22 +119,23 @@ static RSFD r_open (RSET ct, int flag)
     }
     yaz_log(YLOG_DEBUG,"rsprox (%s) open [%p] n=%d", 
             ct->control->desc, rfd, ct->no_children);
-
-    for (i = 0; i < ct->no_children; i++) {
-        p->rfd[i] = rset_open (ct->children[i], RSETF_READ);
-        p->more[i] = rset_read (p->rfd[i], p->buf[i], &p->terms[i]);
+    
+    for (i = 0; i < ct->no_children; i++)
+    {
+        p->rfd[i] = rset_open(ct->children[i], RSETF_READ);
+        p->more[i] = rset_read(p->rfd[i], p->buf[i], &p->terms[i]);
     }
     p->hits = 0;
     return rfd;
 }
 
-static void r_close (RSFD rfd)
+static void r_close(RSFD rfd)
 {
     RSET ct = rfd->rset;
     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
     
     int i;
-    for (i = 0; i<ct->no_children; i++)
+    for (i = 0; i < ct->no_children; i++)
         rset_close(p->rfd[i]);
 }
 
@@ -149,13 +151,12 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
     if (untilbuf)
     {
         /* it is enough to forward first one. Other will follow. */
-        if ( p->more[0] &&   /* was: cmp >=2 */
-           ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) ) 
+        if (p->more[0] &&   /* was: cmp >=2 */
+            ((kctrl->cmp)(untilbuf, p->buf[0]) >= rfd->rset->scope) ) 
             p->more[0] = rset_forward(p->rfd[0], p->buf[0], 
                                       &p->terms[0], untilbuf);
     }
-    if (info->ordered && info->relation == 3 && info->exclusion == 0
-        && info->distance == 1)
+    if (info->ordered && info->relation <= 3 && info->exclusion == 0)
     {
         while (p->more[0]) 
         {
@@ -166,29 +167,34 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
                     p->more[0] = 0; /* saves us a goto out of while loop. */
                     break;
                 }
-                cmp = (*kctrl->cmp) (p->buf[i], p->buf[i-1]);
-                if (cmp >= rfd->rset->scope )  /* cmp>1 */
+                cmp = (*kctrl->cmp)(p->buf[i], p->buf[i-1]);
+                if (cmp >= rfd->rset->scope)  /* not same record */
                 {
-                    p->more[i-1] = rset_forward (p->rfd[i-1],
-                                                 p->buf[i-1],
-                                                 &p->terms[i-1],
-                                                 p->buf[i]);
+                    p->more[i-1] = rset_forward(p->rfd[i-1],
+                                                p->buf[i-1],
+                                                &p->terms[i-1],
+                                                p->buf[i]);
                     break;
                 }
-                else if ( cmp>0 ) /* cmp == 1*/
+                else if (cmp > 0) /* within record and ordered */
                 {
-                    if ((*kctrl->getseq)(p->buf[i-1]) +1 != 
-                        (*kctrl->getseq)(p->buf[i]))
-                    { /* FIXME - We need more flexible multilevel stuff */
-                        p->more[i-1] = rset_read ( p->rfd[i-1], p->buf[i-1],
-                                                   &p->terms[i-1]);
-                        break;
-                    }
+                    zint diff = (*kctrl->getseq)(p->buf[i]) -
+                        (*kctrl->getseq)(p->buf[i-1]);
+                    if (info->relation == 3 && diff == info->distance)
+                        continue;
+                    else if (info->relation == 2 && diff <= info->distance)
+                        continue;
+                    else if (info->relation == 1 && diff < info->distance)
+                        continue;
+                    
+                    p->more[i-1] = rset_read(p->rfd[i-1], p->buf[i-1],
+                                             &p->terms[i-1]);
+                    break;
                 }
-                else
+                else  /* within record - wrong order */
                 {
-                    p->more[i] = rset_forward (p->rfd[i], 
-                                  p->buf[i], &p->terms[i], p->buf[i-1]);
+                    p->more[i] = rset_forward(p->rfd[i], p->buf[i],
+                                              &p->terms[i], p->buf[i-1]);
                     break;
                 }
             }
@@ -210,19 +216,19 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
         {
             int cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
             if ( cmp <= - rfd->rset->scope) /* cmp<-1*/
-                p->more[0] = rset_forward (p->rfd[0], p->buf[0], 
-                                           &p->terms[0],p->buf[1]);
+                p->more[0] = rset_forward(p->rfd[0], p->buf[0], 
+                                          &p->terms[0],p->buf[1]);
             else if ( cmp >= rfd->rset->scope ) /* cmp>1 */
-                p->more[1] = rset_forward (p->rfd[1], p->buf[1], 
-                                           &p->terms[1],p->buf[0]);
+                p->more[1] = rset_forward(p->rfd[1], p->buf[1], 
+                                          &p->terms[1],p->buf[0]);
             else
             {
                 zint seqno[500]; /* FIXME - why 500 ?? */
                 int n = 0;
                 
                 seqno[n++] = (*kctrl->getseq)(p->buf[0]);
-                while ((p->more[0] = rset_read (p->rfd[0],
-                                        p->buf[0], &p->terms[0])))
+                while ((p->more[0] = rset_read(p->rfd[0],
+                                               p->buf[0], &p->terms[0])))
                 {
                     cmp = (*kctrl->cmp)(p->buf[0], p->buf[1]);
                     if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
@@ -230,51 +236,60 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
                     if (n < 500)
                         seqno[n++] = (*kctrl->getseq)(p->buf[0]);
                 }
-                for (i = 0; i<n; i++)
+                /* set up return buffer.. (save buf[1]) */
+                memcpy(buf, p->buf[1], kctrl->key_size);
+                if (term)
+                    *term = p->terms[1];
+                while (1)
                 {
-                    zint diff = (*kctrl->getseq)(p->buf[1]) - seqno[i];
-                    int excl = info->exclusion;
-                    if (!info->ordered && diff < 0)
-                        diff = -diff;
-                    switch (info->relation)
+                    for (i = 0; i < n; i++)
                     {
-                    case 1:      /* < */
-                        if (diff < info->distance && diff >= 0)
-                            excl = !excl;
-                        break;
-                    case 2:      /* <= */
-                        if (diff <= info->distance && diff >= 0)
-                            excl = !excl;
-                        break;
-                    case 3:      /* == */
-                        if (diff == info->distance && diff >= 0)
-                            excl = !excl;
-                        break;
-                    case 4:      /* >= */
-                        if (diff >= info->distance && diff >= 0)
-                            excl = !excl;
-                        break;
-                    case 5:      /* > */
-                        if (diff > info->distance && diff >= 0)
-                            excl = !excl;
+                        zint diff = (*kctrl->getseq)(p->buf[1]) - seqno[i];
+                        int excl = info->exclusion;
+                        if (!info->ordered && diff < 0)
+                            diff = -diff;
+                        switch (info->relation)
+                        {
+                        case 1:      /* < */
+                            if (diff < info->distance && diff >= 0)
+                                excl = !excl;
+                            break;
+                        case 2:      /* <= */
+                            if (diff <= info->distance && diff >= 0)
+                                excl = !excl;
+                            break;
+                        case 3:      /* == */
+                            if (diff == info->distance && diff >= 0)
+                                excl = !excl;
+                            break;
+                        case 4:      /* >= */
+                            if (diff >= info->distance && diff >= 0)
+                                excl = !excl;
+                            break;
+                        case 5:      /* > */
+                            if (diff > info->distance && diff >= 0)
+                                excl = !excl;
+                            break;
+                        case 6:      /* != */
+                            if (diff != info->distance && diff >= 0)
+                                excl = !excl;
+                            break;
+                        }
+                        if (excl)
+                        {
+                            p->more[1] = rset_read( p->rfd[1], p->buf[1],
+                                                    &p->terms[1]);
+                            p->hits++;
+                            return 1;
+                        }
+                    }
+                    p->more[1] = rset_read(p->rfd[1], p->buf[1], &p->terms[1]);
+                    if (!p->more[1])
                         break;
-                    case 6:      /* != */
-                        if (diff != info->distance && diff >= 0)
-                            excl = !excl;
+                    cmp = (*kctrl->cmp)(buf, p->buf[1]);
+                    if (cmp <= - rfd->rset->scope || cmp >= rfd->rset->scope)
                         break;
-                    }
-                    if (excl)
-                    {
-                        memcpy (buf, p->buf[1], kctrl->key_size);
-                        if (term)
-                            *term = p->terms[1];
-                        p->more[1] = rset_read ( p->rfd[1], p->buf[1],
-                                                 &p->terms[1]);
-                        p->hits++;
-                        return 1;
-                    }
                 }
-                p->more[1] = rset_read (p->rfd[1], p->buf[1], &p->terms[1]);
             }
         }
     }
@@ -282,18 +297,18 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
 }
 
 
-static int r_read (RSFD rfd, void *buf, TERMID *term)
+static int r_read(RSFD rfd, void *buf, TERMID *term)
 {
     return r_forward(rfd, buf, term, 0);
 }
 
-static int r_write (RSFD rfd, const void *buf)
+static int r_write(RSFD rfd, const void *buf)
 {
     yaz_log(YLOG_FATAL, "prox set type is read-only");
     return -1;
 }
 
-static void r_pos (RSFD rfd, double *current, double *total)
+static void r_pos(RSFD rfd, double *current, double *total)
 {
     RSET ct = rfd->rset;
     struct rset_prox_rfd *p = (struct rset_prox_rfd *)(rfd->priv);
@@ -307,30 +322,36 @@ static void r_pos (RSFD rfd, double *current, double *total)
     for (i = 0; i < ct->no_children; i++)
     {
         rset_pos(p->rfd[i],  &cur, &tot);
-        if (tot>0) {
+        if (tot > 0)
+        {
             scur += cur;
             stot += tot;
         }
     }
-    if (tot <0) {  /* nothing found */
+    if (tot < 0)
+    {  /* nothing found */
         *current = -1;
         *total = -1;
-    } else if (tot < 1) { /* most likely tot==0 */
+    }
+    else if (tot < 1)
+    { /* most likely tot==0 */
         *current = 0;
         *total = 0;
-    } else {
+    }
+    else
+    {
         r = scur/stot; 
         *current = (double) p->hits;
-        *total=*current/r ; 
+        *total = *current/r ; 
     }
     yaz_log(YLOG_DEBUG,"prox_pos: [%d] %0.1f/%0.1f= %0.4f ",
-                    i,*current, *total, r);
+            i,*current, *total, r);
 }
 
 static void r_get_terms(RSET ct, TERMID *terms, int maxterms, int *curterm)
 {
     int i;
-    for (i = 0; i<ct->no_children; i++)
+    for (i = 0; i < ct->no_children; i++)
         rset_getterms(ct->children[i], terms, maxterms, curterm);
 }
 
index 80e75d3..7dae442 100644 (file)
@@ -50,6 +50,7 @@ const char *myrec[] = {
         "</gils>\n",
 
         "<gils>\n<title>My title x</title><abstract>a b c c c a y</abstract>\n</gils>\n" ,
+        "<gils>\n<title>test</title><abstract>a1 a2 c a1 a2 a3</abstract>\n</gils>\n" ,
 
         "<test_search>\n"
         " <date>2107-09-19 00:00:00</date>\n"
@@ -173,8 +174,8 @@ static void tst(int argc, char **argv)
     YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 title", 2));
 
     /* always-matches relation */
-    YAZ_CHECK(tl_query(zh, "@attr 1=_ALLRECORDS @attr 2=103 {ym}", 4));
-    YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 {x my}", 3));
+    YAZ_CHECK(tl_query(zh, "@attr 1=_ALLRECORDS @attr 2=103 {ym}", 5));
+    YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 {x my}", 4));
     YAZ_CHECK(tl_query_x(zh, "@attr 1=1 @attr 2=103 {x my}", 0, 114));
 
     /* and searches */
@@ -246,6 +247,16 @@ static void tst(int argc, char **argv)
     /* exl=0 distance=1 order=1 relation=3 (=), known, unit=word */
     YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 a b", 1));
 
+
+    /* exl=0 distance=1 order=1 relation=3 (=), known, unit=word */
+    YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 c a", 1));
+    /* exl=0 distance=1 order=1 relation=2 (<=), known, unit=word */
+    YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 c a", 1));
+
+    /* exl=0 distance=1 order=1 relation=2 (<=), known, unit=word */
+    YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 @prox 0 1 1 2 k 2 a1 a2 a3", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 @prox 0 1 1 3 k 2 a1 a2 a3", 1));
+
     /* 3 term @prox test.. */
     YAZ_CHECK(tl_query(zh, "@attr 1=1016 \"a b c\"", 1));
 
@@ -255,7 +266,7 @@ static void tst(int argc, char **argv)
     /* left associative (works fine) */
     YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 @prox 0 1 1 2 k 2 a b c", 1));
 
-    /* exl=0 distance=1 order=1 relation=3 (=), known, unit=word *
+    /* exl=0 distance=1 order=1 relation=3 (=), known, unit=word */
     /* right associative (does not work, so zero hits) */
     YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 a @prox 0 1 1 3 k 2 b c", 0));
     /* left associative (works fine) */