More optimizations of the melm matching.
authorAdam Dickmeiss <adam@indexdata.dk>
Fri, 29 Sep 2006 10:02:45 +0000 (10:02 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Fri, 29 Sep 2006 10:02:45 +0000 (10:02 +0000)
data1/d1_absyn.c
include/d1_absyn.h
index/recgrs.c

index 5adb241..feff7ce 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: d1_absyn.c,v 1.29 2006-09-28 18:38:44 adam Exp $
+/* $Id: d1_absyn.c,v 1.30 2006-09-29 10:02:45 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -674,7 +674,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
                                      enum DATA1_XPATH_INDEXING default_xpath)
 {
     data1_sub_elements *cur_elements = NULL;
-    data1_xpelement *cur_xpelement = NULL;
+    data1_xpelement **cur_xpelement = NULL;
     data1_attset *attset_list = data1_empty_attset(dh);
     data1_attset_child **attset_childp = &attset_list->children;
 
@@ -715,6 +715,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
     res->sub_elements = NULL;
     res->main_elements = NULL;
     res->xp_elements = NULL;
+    cur_xpelement = &res->xp_elements;
 
     while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50)))
     {
@@ -856,7 +857,8 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
            struct DFA *dfa = 0;
            data1_termlist **tp;
            char melm_xpath[128];
-            data1_xpelement *xp_old = 0;
+            data1_xpelement *xp_ele = 0;
+            data1_xpelement *last_match = 0;
             
            if (argc < 3)
            {
@@ -875,12 +877,14 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
            regexp = mk_xpath_regexp(dh, xpath_expr);
 
 #if OPTIMIZE_MELM
-            for (xp_old = res->xp_elements; xp_old; xp_old = xp_old->next)
-                if (!strcmp(xp_old->regexp, regexp))
-                    break;
+            /* get last of existing regulars with same regexp */
+            for (xp_ele = res->xp_elements; xp_ele; xp_ele = xp_ele->next)
+                if (!strcmp(xp_ele->regexp, regexp))
+                    last_match = xp_ele;
 #endif
-            if (!xp_old)
+            if (!last_match)
             {
+                /* new regular expression . Parse + generate */
                 const char *regexp_ptr = regexp;
 
                 dfa = dfa_init();
@@ -891,39 +895,31 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
                     continue;
                 }
             }
-           if (!cur_xpelement)
-           {
-                cur_xpelement = (data1_xpelement *)
-                   nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
-               res->xp_elements = cur_xpelement;
-            } else {
-                cur_xpelement->next = (data1_xpelement *)
-                    nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
-                cur_xpelement = cur_xpelement->next;
-           }
+            *cur_xpelement = (data1_xpelement *)
+                nmem_malloc(data1_nmem_get(dh), sizeof(**cur_xpelement));
+            (*cur_xpelement)->next = 0;
+            (*cur_xpelement)->match_next = 0;
+            if (last_match)
+                last_match->match_next = *cur_xpelement;
 #if OPTIMIZE_MELM
-            cur_xpelement->regexp = regexp;
+            (*cur_xpelement)->regexp = regexp;
 #endif
-           cur_xpelement->next = NULL;
-           cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh), 
-                                                   xpath_expr); 
+           (*cur_xpelement)->xpath_expr = nmem_strdup(data1_nmem_get (dh), 
+                                                       xpath_expr); 
            
             if (dfa)
                 dfa_mkstate (dfa);
-            cur_xpelement->dfa = dfa;
-
-#ifdef ENHANCED_XELM 
-            cur_xpelement->xpath_len =
-                zebra_parse_xpath_str(xpath_expr, 
-                                      cur_xpelement->xpath, XPATH_STEP_COUNT,
-                                      data1_nmem_get(dh));
+            (*cur_xpelement)->dfa = dfa;
             
-           /*
-           dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len);
-           */
+#ifdef ENHANCED_XELM 
+            (*cur_xpelement)->xpath_len =
+                zebra_parse_xpath_str(
+                    xpath_expr, 
+                    (*cur_xpelement)->xpath, XPATH_STEP_COUNT,
+                    data1_nmem_get(dh));
 #endif
-           cur_xpelement->termlists = 0;
-           tp = &cur_xpelement->termlists;
+           (*cur_xpelement)->termlists = 0;
+           tp = &(*cur_xpelement)->termlists;
             
            /* parse termList definitions */
            p = termlists;
@@ -937,6 +933,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
                }
                *tp = all; /* append any ALL entries to the list */
            }
+            cur_xpelement = &(*cur_xpelement)->next;
        }
        else if (!strcmp(cmd, "section"))
        {
index 20d5669..ad6ce11 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: d1_absyn.h,v 1.7 2006-09-28 18:38:46 adam Exp $
+/* $Id: d1_absyn.h,v 1.8 2006-09-29 10:02:46 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -44,6 +44,7 @@ typedef struct data1_xpelement
     const char *regexp;
 #endif
     int match_state;
+    struct data1_xpelement *match_next;
 } data1_xpelement;
 
 struct data1_absyn
index 7da47f7..4d621ef 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: recgrs.c,v 1.6 2006-09-28 18:38:47 adam Exp $
+/* $Id: recgrs.c,v 1.7 2006-09-29 10:02:47 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -418,11 +418,8 @@ data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n)
 
 #if OPTIMIZE_MELM
             /* mark this and following ones with same regexp */
-            for (xpe1 = xpe; xpe1; xpe1 = xpe1->next)
-            {
-                if (!strcmp(xpe1->regexp, xpe->regexp))
-                    xpe1->match_state = ok;
-            }
+            for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next)
+                xpe1->match_state = ok;
 #endif
         }
         assert (ok == 0 || ok == 1);