Fixed bug #685: Optimize xelm/melm matching. Indexing the Koha collection

[idzebra-moved-to-github.git] / index / recgrs.c
diff --git a/index/recgrs.c b/index/recgrs.c

index 8a31f3c..7da47f7 100644 (file)
--- a/index/recgrs.c
+++ b/index/recgrs.c
@@ -1,4 +1,4 @@
-/* $Id: recgrs.c,v 1.3 2006-07-06 12:42:22 marc Exp $
+/* $Id: recgrs.c,v 1.6 2006-09-28 18:38:47 adam Exp $
     Copyright (C) 1995-2006
     Index Data ApS
  
     Copyright (C) 1995-2006
     Index Data ApS
  
@@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  for more details.
  
  You should have received a copy of the GNU General Public License
  for more details.
  
  You should have received a copy of the GNU General Public License
-along with Zebra; see the file LICENSE.zebra.  If not, write to the
-Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
-02111-1307, USA.
+along with this program; if not, write to the Free Software
+Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+
  */
  
  #include <stdio.h>
  */
  
  #include <stdio.h>
@@ -392,20 +392,40 @@ pop, 2003-01-17
  data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
  {
      data1_absyn *abs = n->root->u.root.absyn;
  data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
  {
      data1_absyn *abs = n->root->u.root.absyn;
-    data1_xpelement *xpe = abs->xp_elements;
+
+    data1_xpelement *xpe = 0;
      data1_node *nn;
  #ifdef ENHANCED_XELM 
      struct xpath_location_step *xp;
  #endif
      char *pexpr = xmalloc(strlen(tagpath)+5);
      data1_node *nn;
  #ifdef ENHANCED_XELM 
      struct xpath_location_step *xp;
  #endif
      char *pexpr = xmalloc(strlen(tagpath)+5);
-    int ok = 0;
      
      sprintf (pexpr, "/%s\n", tagpath);
      
      sprintf (pexpr, "/%s\n", tagpath);
-    for (; xpe; xpe = xpe->next)
+
+    for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
+        xpe->match_state = -1; /* don't know if it matches yet */
+
+    for (xpe = abs->xp_elements; xpe; xpe = xpe->next)
      {
         int i;
      {
         int i;
-       ok = dfa_match_first(xpe->dfa->states, pexpr);
+        int ok = xpe->match_state;
+        if (ok == -1)
+        {   /* don't know whether there is a match yet */
+            data1_xpelement *xpe1;
+
+            assert(xpe->dfa);
+            ok = dfa_match_first(xpe->dfa->states, pexpr);
  
  
+#if OPTIMIZE_MELM
+            /* mark this and following ones with same regexp */
+            for (xpe1 = xpe; xpe1; xpe1 = xpe1->next)
+            {
+                if (!strcmp(xpe1->regexp, xpe->regexp))
+                    xpe1->match_state = ok;
+            }
+#endif
+        }
+        assert (ok == 0 || ok == 1);
          if (ok) {
  #ifdef ENHANCED_XELM 
              /* we have to check the perdicates up to the root node */
          if (ok) {
  #ifdef ENHANCED_XELM 
              /* we have to check the perdicates up to the root node */
@@ -440,7 +460,7 @@ data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
      
      xfree(pexpr);
      
      
      xfree(pexpr);
      
-    if (ok) {
+    if (xpe) {
         yaz_log(YLOG_DEBUG, "Got it");
          return xpe->termlists;
      } else {
         yaz_log(YLOG_DEBUG, "Got it");
          return xpe->termlists;
      } else {
@@ -952,12 +972,7 @@ static int grs_extract_sub(void *clientData, struct recExtractCtrl *p,
      int oidtmp[OID_SIZE];
      RecWord wrd;
  
      int oidtmp[OID_SIZE];
      RecWord wrd;
  
-    gri.readf = p->readf;
-    gri.seekf = p->seekf;
-    gri.tellf = p->tellf;
-    gri.endf = p->endf;
-    gri.fh = p->fh;
-    gri.offset = p->offset;
+    gri.stream = p->stream;
      gri.mem = mem;
      gri.dh = p->dh;
      gri.clientData = clientData;
      gri.mem = mem;
      gri.dh = p->dh;
      gri.clientData = clientData;
@@ -1149,12 +1164,7 @@ int zebra_grs_retrieve(void *clientData, struct recRetrieveCtrl *p,
      int dummy;
      
      mem = nmem_create();
      int dummy;
      
      mem = nmem_create();
-    gri.readf = p->readf;
-    gri.seekf = p->seekf;
-    gri.tellf = p->tellf;
-    gri.endf = NULL;
-    gri.fh = p->fh;
-    gri.offset = 0;
+    gri.stream = p->stream;
      gri.mem = mem;
      gri.dh = p->dh;
      gri.clientData = clientData;
      gri.mem = mem;
      gri.dh = p->dh;
      gri.clientData = clientData;