From f5fb1bd0a0df8a28471285cdf622897f4c2391b3 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Fri, 29 Sep 2006 10:02:45 +0000 Subject: [PATCH] More optimizations of the melm matching. --- data1/d1_absyn.c | 63 +++++++++++++++++++++++++--------------------------- include/d1_absyn.h | 3 ++- index/recgrs.c | 9 +++----- 3 files changed, 35 insertions(+), 40 deletions(-) diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index 5adb241..feff7ce 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,4 +1,4 @@ -/* $Id: d1_absyn.c,v 1.29 2006-09-28 18:38:44 adam Exp $ +/* $Id: d1_absyn.c,v 1.30 2006-09-29 10:02:45 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -674,7 +674,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, enum DATA1_XPATH_INDEXING default_xpath) { data1_sub_elements *cur_elements = NULL; - data1_xpelement *cur_xpelement = NULL; + data1_xpelement **cur_xpelement = NULL; data1_attset *attset_list = data1_empty_attset(dh); data1_attset_child **attset_childp = &attset_list->children; @@ -715,6 +715,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, res->sub_elements = NULL; res->main_elements = NULL; res->xp_elements = NULL; + cur_xpelement = &res->xp_elements; while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50))) { @@ -856,7 +857,8 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, struct DFA *dfa = 0; data1_termlist **tp; char melm_xpath[128]; - data1_xpelement *xp_old = 0; + data1_xpelement *xp_ele = 0; + data1_xpelement *last_match = 0; if (argc < 3) { @@ -875,12 +877,14 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, regexp = mk_xpath_regexp(dh, xpath_expr); #if OPTIMIZE_MELM - for (xp_old = res->xp_elements; xp_old; xp_old = xp_old->next) - if (!strcmp(xp_old->regexp, regexp)) - break; + /* get last of existing regulars with same regexp */ + for (xp_ele = res->xp_elements; xp_ele; xp_ele = xp_ele->next) + if (!strcmp(xp_ele->regexp, regexp)) + last_match = xp_ele; #endif - if (!xp_old) + if (!last_match) { + /* new regular expression . Parse + generate */ const char *regexp_ptr = regexp; dfa = dfa_init(); @@ -891,39 +895,31 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, continue; } } - if (!cur_xpelement) - { - cur_xpelement = (data1_xpelement *) - nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement)); - res->xp_elements = cur_xpelement; - } else { - cur_xpelement->next = (data1_xpelement *) - nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement)); - cur_xpelement = cur_xpelement->next; - } + *cur_xpelement = (data1_xpelement *) + nmem_malloc(data1_nmem_get(dh), sizeof(**cur_xpelement)); + (*cur_xpelement)->next = 0; + (*cur_xpelement)->match_next = 0; + if (last_match) + last_match->match_next = *cur_xpelement; #if OPTIMIZE_MELM - cur_xpelement->regexp = regexp; + (*cur_xpelement)->regexp = regexp; #endif - cur_xpelement->next = NULL; - cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh), - xpath_expr); + (*cur_xpelement)->xpath_expr = nmem_strdup(data1_nmem_get (dh), + xpath_expr); if (dfa) dfa_mkstate (dfa); - cur_xpelement->dfa = dfa; - -#ifdef ENHANCED_XELM - cur_xpelement->xpath_len = - zebra_parse_xpath_str(xpath_expr, - cur_xpelement->xpath, XPATH_STEP_COUNT, - data1_nmem_get(dh)); + (*cur_xpelement)->dfa = dfa; - /* - dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len); - */ +#ifdef ENHANCED_XELM + (*cur_xpelement)->xpath_len = + zebra_parse_xpath_str( + xpath_expr, + (*cur_xpelement)->xpath, XPATH_STEP_COUNT, + data1_nmem_get(dh)); #endif - cur_xpelement->termlists = 0; - tp = &cur_xpelement->termlists; + (*cur_xpelement)->termlists = 0; + tp = &(*cur_xpelement)->termlists; /* parse termList definitions */ p = termlists; @@ -937,6 +933,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, } *tp = all; /* append any ALL entries to the list */ } + cur_xpelement = &(*cur_xpelement)->next; } else if (!strcmp(cmd, "section")) { diff --git a/include/d1_absyn.h b/include/d1_absyn.h index 20d5669..ad6ce11 100644 --- a/include/d1_absyn.h +++ b/include/d1_absyn.h @@ -1,4 +1,4 @@ -/* $Id: d1_absyn.h,v 1.7 2006-09-28 18:38:46 adam Exp $ +/* $Id: d1_absyn.h,v 1.8 2006-09-29 10:02:46 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -44,6 +44,7 @@ typedef struct data1_xpelement const char *regexp; #endif int match_state; + struct data1_xpelement *match_next; } data1_xpelement; struct data1_absyn diff --git a/index/recgrs.c b/index/recgrs.c index 7da47f7..4d621ef 100644 --- a/index/recgrs.c +++ b/index/recgrs.c @@ -1,4 +1,4 @@ -/* $Id: recgrs.c,v 1.6 2006-09-28 18:38:47 adam Exp $ +/* $Id: recgrs.c,v 1.7 2006-09-29 10:02:47 adam Exp $ Copyright (C) 1995-2006 Index Data ApS @@ -418,11 +418,8 @@ data1_termlist *xpath_termlist_by_tagpath(char *tagpath, data1_node *n) #if OPTIMIZE_MELM /* mark this and following ones with same regexp */ - for (xpe1 = xpe; xpe1; xpe1 = xpe1->next) - { - if (!strcmp(xpe1->regexp, xpe->regexp)) - xpe1->match_state = ok; - } + for (xpe1 = xpe; xpe1; xpe1 = xpe1->match_next) + xpe1->match_state = ok; #endif } assert (ok == 0 || ok == 1); -- 1.7.10.4