Use optimized proximity for more cases.
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 8 Oct 2009 07:23:01 +0000 (09:23 +0200)
committerAdam Dickmeiss <adam@indexdata.dk>
Thu, 8 Oct 2009 07:23:01 +0000 (09:23 +0200)
The optimized proximity now deals with more cases, ie all prox of the
form @prox 0 d 1 1,2,3 k 2 .. That's exlusive==0, any distance,
ordered (true), relation (<, =, <=), known unit word.

rset/rsprox.c
test/api/test_search.c

index 15e5b19..2df2592 100644 (file)
@@ -154,8 +154,7 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
             p->more[0] = rset_forward(p->rfd[0], p->buf[0], 
                                       &p->terms[0], untilbuf);
     }
             p->more[0] = rset_forward(p->rfd[0], p->buf[0], 
                                       &p->terms[0], untilbuf);
     }
-    if (info->ordered && info->relation == 3 && info->exclusion == 0
-        && info->distance == 1)
+    if (info->ordered && info->relation <= 3 && info->exclusion == 0)
     {
         while (p->more[0]) 
         {
     {
         while (p->more[0]) 
         {
@@ -166,8 +165,8 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
                     p->more[0] = 0; /* saves us a goto out of while loop. */
                     break;
                 }
                     p->more[0] = 0; /* saves us a goto out of while loop. */
                     break;
                 }
-                cmp = (*kctrl->cmp) (p->buf[i], p->buf[i-1]);
-                if (cmp >= rfd->rset->scope )  /* cmp>1 */
+                cmp = (*kctrl->cmp)(p->buf[i], p->buf[i-1]);
+                if (cmp >= rfd->rset->scope)  /* not same record */
                 {
                     p->more[i-1] = rset_forward (p->rfd[i-1],
                                                  p->buf[i-1],
                 {
                     p->more[i-1] = rset_forward (p->rfd[i-1],
                                                  p->buf[i-1],
@@ -175,20 +174,25 @@ static int r_forward(RSFD rfd, void *buf, TERMID *term, const void *untilbuf)
                                                  p->buf[i]);
                     break;
                 }
                                                  p->buf[i]);
                     break;
                 }
-                else if ( cmp>0 ) /* cmp == 1*/
+                else if (cmp > 0) /* within record and ordered */
                 {
                 {
-                    if ((*kctrl->getseq)(p->buf[i-1]) +1 != 
-                        (*kctrl->getseq)(p->buf[i]))
-                    { /* FIXME - We need more flexible multilevel stuff */
-                        p->more[i-1] = rset_read ( p->rfd[i-1], p->buf[i-1],
-                                                   &p->terms[i-1]);
-                        break;
-                    }
+                    zint diff = (*kctrl->getseq)(p->buf[i]) -
+                        (*kctrl->getseq)(p->buf[i-1]);
+                    if (info->relation == 3 && diff == info->distance)
+                        continue;
+                    else if (info->relation == 2 && diff <= info->distance)
+                        continue;
+                    else if (info->relation == 1 && diff < info->distance)
+                        continue;
+
+                    p->more[i-1] = rset_read(p->rfd[i-1], p->buf[i-1],
+                                             &p->terms[i-1]);
+                    break;
                 }
                 }
-                else
+                else  /* within record - wrong order */
                 {
                 {
-                    p->more[i] = rset_forward (p->rfd[i], 
-                                  p->buf[i], &p->terms[i], p->buf[i-1]);
+                    p->more[i] = rset_forward(p->rfd[i], p->buf[i],
+                                              &p->terms[i], p->buf[i-1]);
                     break;
                 }
             }
                     break;
                 }
             }
index 9fe04ec..7dae442 100644 (file)
@@ -50,6 +50,7 @@ const char *myrec[] = {
         "</gils>\n",
 
         "<gils>\n<title>My title x</title><abstract>a b c c c a y</abstract>\n</gils>\n" ,
         "</gils>\n",
 
         "<gils>\n<title>My title x</title><abstract>a b c c c a y</abstract>\n</gils>\n" ,
+        "<gils>\n<title>test</title><abstract>a1 a2 c a1 a2 a3</abstract>\n</gils>\n" ,
 
         "<test_search>\n"
         " <date>2107-09-19 00:00:00</date>\n"
 
         "<test_search>\n"
         " <date>2107-09-19 00:00:00</date>\n"
@@ -173,8 +174,8 @@ static void tst(int argc, char **argv)
     YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 title", 2));
 
     /* always-matches relation */
     YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=5 title", 2));
 
     /* always-matches relation */
-    YAZ_CHECK(tl_query(zh, "@attr 1=_ALLRECORDS @attr 2=103 {ym}", 4));
-    YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 {x my}", 3));
+    YAZ_CHECK(tl_query(zh, "@attr 1=_ALLRECORDS @attr 2=103 {ym}", 5));
+    YAZ_CHECK(tl_query(zh, "@attr 1=4 @attr 2=103 {x my}", 4));
     YAZ_CHECK(tl_query_x(zh, "@attr 1=1 @attr 2=103 {x my}", 0, 114));
 
     /* and searches */
     YAZ_CHECK(tl_query_x(zh, "@attr 1=1 @attr 2=103 {x my}", 0, 114));
 
     /* and searches */
@@ -252,6 +253,10 @@ static void tst(int argc, char **argv)
     /* exl=0 distance=1 order=1 relation=2 (<=), known, unit=word */
     YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 c a", 1));
 
     /* exl=0 distance=1 order=1 relation=2 (<=), known, unit=word */
     YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 c a", 1));
 
+    /* exl=0 distance=1 order=1 relation=2 (<=), known, unit=word */
+    YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 2 k 2 @prox 0 1 1 2 k 2 a1 a2 a3", 1));
+    YAZ_CHECK(tl_query(zh, "@attr 1=1016 @prox 0 1 1 3 k 2 @prox 0 1 1 3 k 2 a1 a2 a3", 1));
+
     /* 3 term @prox test.. */
     YAZ_CHECK(tl_query(zh, "@attr 1=1016 \"a b c\"", 1));
 
     /* 3 term @prox test.. */
     YAZ_CHECK(tl_query(zh, "@attr 1=1016 \"a b c\"", 1));