More optimizations of the melm matching.
[idzebra-moved-to-github.git] / data1 / d1_absyn.c
index 5adb241..feff7ce 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: d1_absyn.c,v 1.29 2006-09-28 18:38:44 adam Exp $
+/* $Id: d1_absyn.c,v 1.30 2006-09-29 10:02:45 adam Exp $
    Copyright (C) 1995-2006
    Index Data ApS
 
@@ -674,7 +674,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
                                      enum DATA1_XPATH_INDEXING default_xpath)
 {
     data1_sub_elements *cur_elements = NULL;
-    data1_xpelement *cur_xpelement = NULL;
+    data1_xpelement **cur_xpelement = NULL;
     data1_attset *attset_list = data1_empty_attset(dh);
     data1_attset_child **attset_childp = &attset_list->children;
 
@@ -715,6 +715,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
     res->sub_elements = NULL;
     res->main_elements = NULL;
     res->xp_elements = NULL;
+    cur_xpelement = &res->xp_elements;
 
     while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50)))
     {
@@ -856,7 +857,8 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
            struct DFA *dfa = 0;
            data1_termlist **tp;
            char melm_xpath[128];
-            data1_xpelement *xp_old = 0;
+            data1_xpelement *xp_ele = 0;
+            data1_xpelement *last_match = 0;
             
            if (argc < 3)
            {
@@ -875,12 +877,14 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
            regexp = mk_xpath_regexp(dh, xpath_expr);
 
 #if OPTIMIZE_MELM
-            for (xp_old = res->xp_elements; xp_old; xp_old = xp_old->next)
-                if (!strcmp(xp_old->regexp, regexp))
-                    break;
+            /* get last of existing regulars with same regexp */
+            for (xp_ele = res->xp_elements; xp_ele; xp_ele = xp_ele->next)
+                if (!strcmp(xp_ele->regexp, regexp))
+                    last_match = xp_ele;
 #endif
-            if (!xp_old)
+            if (!last_match)
             {
+                /* new regular expression . Parse + generate */
                 const char *regexp_ptr = regexp;
 
                 dfa = dfa_init();
@@ -891,39 +895,31 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
                     continue;
                 }
             }
-           if (!cur_xpelement)
-           {
-                cur_xpelement = (data1_xpelement *)
-                   nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
-               res->xp_elements = cur_xpelement;
-            } else {
-                cur_xpelement->next = (data1_xpelement *)
-                    nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement));
-                cur_xpelement = cur_xpelement->next;
-           }
+            *cur_xpelement = (data1_xpelement *)
+                nmem_malloc(data1_nmem_get(dh), sizeof(**cur_xpelement));
+            (*cur_xpelement)->next = 0;
+            (*cur_xpelement)->match_next = 0;
+            if (last_match)
+                last_match->match_next = *cur_xpelement;
 #if OPTIMIZE_MELM
-            cur_xpelement->regexp = regexp;
+            (*cur_xpelement)->regexp = regexp;
 #endif
-           cur_xpelement->next = NULL;
-           cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh), 
-                                                   xpath_expr); 
+           (*cur_xpelement)->xpath_expr = nmem_strdup(data1_nmem_get (dh), 
+                                                       xpath_expr); 
            
             if (dfa)
                 dfa_mkstate (dfa);
-            cur_xpelement->dfa = dfa;
-
-#ifdef ENHANCED_XELM 
-            cur_xpelement->xpath_len =
-                zebra_parse_xpath_str(xpath_expr, 
-                                      cur_xpelement->xpath, XPATH_STEP_COUNT,
-                                      data1_nmem_get(dh));
+            (*cur_xpelement)->dfa = dfa;
             
-           /*
-           dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len);
-           */
+#ifdef ENHANCED_XELM 
+            (*cur_xpelement)->xpath_len =
+                zebra_parse_xpath_str(
+                    xpath_expr, 
+                    (*cur_xpelement)->xpath, XPATH_STEP_COUNT,
+                    data1_nmem_get(dh));
 #endif
-           cur_xpelement->termlists = 0;
-           tp = &cur_xpelement->termlists;
+           (*cur_xpelement)->termlists = 0;
+           tp = &(*cur_xpelement)->termlists;
             
            /* parse termList definitions */
            p = termlists;
@@ -937,6 +933,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file,
                }
                *tp = all; /* append any ALL entries to the list */
            }
+            cur_xpelement = &(*cur_xpelement)->next;
        }
        else if (!strcmp(cmd, "section"))
        {