X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=data1%2Fd1_absyn.c;h=5adb241359536656b8fbdd59e5754c29293c0bac;hp=2165bc56da19ad3484f4189dfdeacd2302bbb75f;hb=396e9aaedfbed7534e329b42475cd7abe2fd3814;hpb=bc69879777443f6a65ede7dea9d46374de1c0bcd diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index 2165bc5..5adb241 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,6 +1,6 @@ -/* $Id: d1_absyn.c,v 1.17 2005-01-04 16:28:34 quinn Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 - Index Data Aps +/* $Id: d1_absyn.c,v 1.29 2006-09-28 18:38:44 adam Exp $ + Copyright (C) 1995-2006 + Index Data ApS This file is part of the Zebra server. @@ -15,9 +15,9 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ #include @@ -28,6 +28,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include #include +#include #include #include @@ -165,7 +166,7 @@ data1_absyn *data1_absyn_search (data1_handle dh, const char *name) return p->absyn; p = p->next; } - return NULL; + return 0; } /* *ostrich* We need to destroy DFAs, in xp_element (xelm) definitions @@ -184,9 +185,10 @@ void data1_absyn_destroy (data1_handle dh) data1_xpelement *xpe = abs->xp_elements; while (xpe) { yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr); - if (xpe->dfa) { dfa_delete (&xpe->dfa); } + if (xpe->dfa) + dfa_delete (&xpe->dfa); xpe = xpe->next; - } + } } p = p->next; } @@ -205,7 +207,11 @@ void data1_absyn_trav (data1_handle dh, void *handle, } } -data1_absyn *data1_absyn_add (data1_handle dh, const char *name) +static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, + enum DATA1_XPATH_INDEXING en); + +static data1_absyn *data1_absyn_add(data1_handle dh, const char *name, + enum DATA1_XPATH_INDEXING en) { char fname[512]; NMEM mem = data1_nmem_get (dh); @@ -213,20 +219,21 @@ data1_absyn *data1_absyn_add (data1_handle dh, const char *name) data1_absyn_cache p = (data1_absyn_cache)nmem_malloc (mem, sizeof(*p)); data1_absyn_cache *pp = data1_absyn_cache_get (dh); - sprintf(fname, "%s.abs", name); - p->absyn = data1_read_absyn (dh, fname, 0); - p->name = nmem_strdup (mem, name); + sprintf(fname, "%.500s.abs", name); + p->absyn = data1_read_absyn(dh, fname, en); + p->name = nmem_strdup(mem, name); p->next = *pp; *pp = p; return p->absyn; } -data1_absyn *data1_get_absyn (data1_handle dh, const char *name) +data1_absyn *data1_get_absyn (data1_handle dh, const char *name, + enum DATA1_XPATH_INDEXING en) { data1_absyn *absyn; if (!(absyn = data1_absyn_search (dh, name))) - absyn = data1_absyn_add (dh, name); + absyn = data1_absyn_add (dh, name, en); return absyn; } @@ -240,7 +247,7 @@ data1_attset *data1_attset_search_name (data1_handle dh, const char *name) return p->attset; p = p->next; } - return NULL; + return 0; } data1_attset *data1_attset_search_id (data1_handle dh, int id) @@ -253,25 +260,15 @@ data1_attset *data1_attset_search_id (data1_handle dh, int id) return p->attset; p = p->next; } - return NULL; + return 0; } data1_attset *data1_attset_add (data1_handle dh, const char *name) { - char fname[512], aname[512]; NMEM mem = data1_nmem_get (dh); data1_attset *attset; - - strcpy (aname, name); - sprintf(fname, "%s.att", name); - attset = data1_read_attset (dh, fname); - if (!attset) - { - char *cp; - attset = data1_read_attset (dh, name); - if (attset && (cp = strrchr (aname, '.'))) - *cp = '\0'; - } + + attset = data1_read_attset (dh, name); if (!attset) yaz_log (YLOG_WARN|YLOG_ERRNO, "Couldn't load attribute set %s", name); else @@ -280,7 +277,7 @@ data1_attset *data1_attset_add (data1_handle dh, const char *name) nmem_malloc (mem, sizeof(*p)); data1_attset_cache *pp = data1_attset_cache_get (dh); - attset->name = p->name = nmem_strdup (mem, aname); + attset->name = p->name = nmem_strdup(mem, name); p->attset = attset; p->next = *pp; *pp = p; @@ -311,37 +308,6 @@ data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a, /* we have multiple versions of data1_getelementbyname */ #define DATA1_GETELEMENTBYTAGNAME_VERSION 1 -#if DATA1_GETELEMENTBYTAGNAME_VERSION==0 -/* straight linear search */ -data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, - data1_element *parent, - const char *tagname) -{ - data1_element *r; - - /* It's now possible to have a data1 tree with no abstract syntax */ - if ( !abs ) - return 0; - - if (!parent) - r = abs->main_elements; - else - r = parent->children; - - for (; r; r = r->next) - { - data1_name *n; - - for (n = r->tag->names; n; n = n->next) - if (!data1_matchstr(tagname, n->name)) - return r; - } - return 0; -} -#endif - -#if DATA1_GETELEMENTBYTAGNAME_VERSION==1 -/* using hash search */ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, data1_element *parent, const char *tagname) @@ -358,12 +324,15 @@ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, else r = parent->children; +#if DATA1_GETELEMENTBYTAGNAME_VERSION==1 + /* using hash search */ if (!r) return 0; ht = r->hash; if (!ht) { + /* build hash table (the first time) */ ht = r->hash = data1_hash_open(29, data1_nmem_get(dh)); for (; r; r = r->next) { @@ -374,8 +343,19 @@ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, } } return data1_hash_lookup(ht, tagname); -} +#else + /* using linear search */ + for (; r; r = r->next) + { + data1_name *n; + + for (n = r->tag->names; n; n = n->next) + if (!data1_matchstr(tagname, n->name)) + return r; + } + return 0; #endif +} data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn, const char *name) @@ -439,78 +419,81 @@ void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e) */ -const char * mk_xpath_regexp (data1_handle dh, char *expr) +static const char * mk_xpath_regexp (data1_handle dh, const char *expr) { - char *p = expr; - char *pp; - char *s; + const char *p = expr; int abs = 1; - int i; - int j; - int e=0; - int is_predicate = 0; - - static char *stack[32]; - static char res[1024]; - char *r = ""; + int e = 0; + char *stack[32]; + char *res_p, *res = 0; + size_t res_size = 1; - if (*p != '/') { return (""); } + if (*p != '/') + return (""); p++; - if (*p == '/') { abs=0; p++; } - - while (*p) { - i=0; - while (*p && !strchr("/",*p)) { - i++; p++; - } - stack[e] = (char *) nmem_malloc (data1_nmem_get (dh), i+1); + if (*p == '/') + { + abs =0; + p++; + } + while (*p) + { + int is_predicate = 0; + char *s; + int i, j; + for (i = 0; *p && !strchr("/",*p); i++, p++) + ; + res_size += (i+3); /* we'll add / between later .. */ + stack[e] = (char *) nmem_malloc(data1_nmem_get(dh), i+1); s = stack[e]; - for (j=0; j< i; j++) { - pp = p-i+j; - if (*pp == '[') { - is_predicate=1; - } - else if (*pp == ']') { - is_predicate=0; - } - else { - if (!is_predicate) { - if (*pp == '*') - *s++ = '.'; - *s++ = *pp; + for (j = 0; j < i; j++) + { + const char *pp = p-i+j; + if (*pp == '[') + is_predicate=1; + else if (*pp == ']') + is_predicate=0; + else + { + if (!is_predicate) { + if (*pp == '*') + *s++ = '.'; + *s++ = *pp; + } } - } } *s = 0; e++; - if (*p) {p++;} + if (*p) + p++; + } + res_p = res = nmem_malloc(data1_nmem_get(dh), res_size + 10); + + if (stack[e-1][0] == '@') /* path/@attr spec (leaf is attribute) */ + strcpy(res_p, "/"); + else + strcpy(res_p, "[^@]*/"); /* path .. (index all cdata below it) */ + res_p = res_p + strlen(res_p); + while (--e >= 0) { + sprintf(res_p, "%s/", stack[e]); + res_p += strlen(stack[e]) + 1; } - e--; p = &res[0]; i=0; - sprintf (p, "^"); p++; - while (e >= 0) { - /* !!! res size is not checked !!! */ - sprintf (p, "%s/",stack[e]); - p += strlen(stack[e]) + 1; - e--; + if (!abs) + { + sprintf(res_p, ".*"); + res_p += 2; } - if (!abs) { sprintf (p, ".*"); p+=2; } - sprintf (p, "$"); p++; - r = nmem_strdup (data1_nmem_get (dh), res); - yaz_log(YLOG_DEBUG,"Got regexp: %s",r); - return (r); + sprintf (res_p, "$"); + res_p++; + yaz_log(YLOG_DEBUG, "Got regexp: %s", res); + return res; } -/* *ostrich* - - added arg xpelement... when called from xelm context, it's 1, saying - that ! means xpath, not element name as attribute name... - - pop, 2002-12-13 - */ -static int parse_termlists (data1_handle dh, data1_termlist ***tpp, - char *cp, const char *file, int lineno, - const char *element_name, data1_absyn *res, - int xpelement) +static int parse_termlists(data1_handle dh, data1_termlist ***tpp, + char *cp, const char *file, int lineno, + const char *element_name, data1_absyn *res, + int xpelement, + data1_attset *attset) { data1_termlist **tp = *tpp; while(1) @@ -561,22 +544,24 @@ static int parse_termlists (data1_handle dh, data1_termlist ***tpp, nmem_malloc(data1_nmem_get(dh), sizeof(**tp)); (*tp)->next = 0; - if (!xpelement) { - if (*attname == '!') + if (*attname == '!') + { + if (!xpelement && element_name) strcpy(attname, element_name); - } - if (!((*tp)->att = data1_getattbyname(dh, res->attset, - attname))) { - if ((!xpelement) || (*attname != '!')) { - yaz_log(YLOG_WARN, - "%s:%d: Couldn't find att '%s' in attset", - file, lineno, attname); - return -1; - } else { - (*tp)->att = 0; + else if (xpelement) + strcpy(attname, ZEBRA_XPATH_CDATA); + } + if (attset) + { + if (!data1_getattbyname(dh, attset, attname)) + { + yaz_log(YLOG_WARN, "Index '%s' not found in attset(s)", + attname); } - } - + } + + (*tp)->index_name = nmem_strdup(data1_nmem_get(dh), attname); + assert (*(*tp)->index_name != '!'); if (r == 2 && (source = strchr(structure, ':'))) *source++ = '\0'; /* cut off structure .. */ else @@ -605,7 +590,7 @@ static int melm2xpath(char *melm, char *buf) char *field = melm; char *subfield; char *fieldtype; - if ((dollar = index(melm, '$'))) { + if ((dollar = strchr(melm, '$'))) { *dollar = '\0'; subfield = ++dollar; } else @@ -685,11 +670,13 @@ YAZ_EXPORT data1_element *data1_absyn_getelements(data1_handle dh, return absyn->main_elements; } -data1_absyn *data1_read_absyn (data1_handle dh, const char *file, - int file_must_exist) +static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, + enum DATA1_XPATH_INDEXING default_xpath) { data1_sub_elements *cur_elements = NULL; data1_xpelement *cur_xpelement = NULL; + data1_attset *attset_list = data1_empty_attset(dh); + data1_attset_child **attset_childp = &attset_list->children; data1_absyn *res = 0; FILE *f; @@ -698,7 +685,6 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, data1_maptab **maptabp; data1_marctab **marcp; data1_termlist *all = 0; - data1_attset_child **attset_childp; data1_tagset **tagset_childp; struct data1_systag **systagsp; int level = 0; @@ -706,26 +692,19 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, int argc; char *argv[50], line[512]; - if (!(f = data1_path_fopen(dh, file, "r"))) - { - yaz_log(YLOG_WARN|YLOG_ERRNO, "Couldn't open %s", file); - if (file_must_exist) - return 0; - } + f = data1_path_fopen(dh, file, "r"); res = (data1_absyn *) nmem_malloc(data1_nmem_get(dh), sizeof(*res)); res->name = 0; res->reference = VAL_NONE; res->tagset = 0; res->encoding = 0; - res->enable_xpath_indexing = (f ? 0 : 1); + res->xpath_indexing = + (f ? DATA1_XPATH_INDEXING_DISABLE : default_xpath); res->systags = 0; systagsp = &res->systags; tagset_childp = &res->tagset; - res->attset = data1_empty_attset (dh); - attset_childp = &res->attset->children; - res->varset = 0; res->esetnames = 0; esetpp = &res->esetnames; @@ -736,7 +715,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, res->sub_elements = NULL; res->main_elements = NULL; res->xp_elements = NULL; - + while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50))) { char *cmd = *argv; @@ -845,9 +824,8 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, p = termlists; if (*p != '-') { - assert (res->attset); - - if (parse_termlists (dh, &tp, p, file, lineno, name, res, 0)) + if (parse_termlists (dh, &tp, p, file, lineno, name, res, 0, + attset_list)) { fclose (f); return 0; @@ -875,9 +853,10 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, int i; char *p, *xpath_expr, *termlists; const char *regexp; - struct DFA *dfa = dfa = dfa_init(); + struct DFA *dfa = 0; data1_termlist **tp; char melm_xpath[128]; + data1_xpelement *xp_old = 0; if (argc < 3) { @@ -894,13 +873,24 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, } termlists = argv[2]; regexp = mk_xpath_regexp(dh, xpath_expr); - i = dfa_parse (dfa, ®exp); - if (i || *regexp) { - yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno); - dfa_delete (&dfa); - continue; - } - + +#if OPTIMIZE_MELM + for (xp_old = res->xp_elements; xp_old; xp_old = xp_old->next) + if (!strcmp(xp_old->regexp, regexp)) + break; +#endif + if (!xp_old) + { + const char *regexp_ptr = regexp; + + dfa = dfa_init(); + i = dfa_parse (dfa, ®exp_ptr); + if (i || *regexp_ptr) { + yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno); + dfa_delete (&dfa); + continue; + } + } if (!cur_xpelement) { cur_xpelement = (data1_xpelement *) @@ -911,12 +901,16 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement)); cur_xpelement = cur_xpelement->next; } +#if OPTIMIZE_MELM + cur_xpelement->regexp = regexp; +#endif cur_xpelement->next = NULL; cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh), xpath_expr); - dfa_mkstate (dfa); - cur_xpelement->dfa = dfa; + if (dfa) + dfa_mkstate (dfa); + cur_xpelement->dfa = dfa; #ifdef ENHANCED_XELM cur_xpelement->xpath_len = @@ -935,10 +929,8 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, p = termlists; if (*p != '-') { - assert (res->attset); - if (parse_termlists (dh, &tp, p, file, lineno, - xpath_expr, res, 1)) + xpath_expr, res, 1, attset_list)) { fclose (f); return 0; @@ -977,9 +969,9 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, continue; } if (!strcmp(argv[1], "enable")) - res->enable_xpath_indexing = 1; + res->xpath_indexing = DATA1_XPATH_INDEXING_ENABLE; else if (!strcmp (argv[1], "disable")) - res->enable_xpath_indexing = 0; + res->xpath_indexing = DATA1_XPATH_INDEXING_DISABLE; else { yaz_log(YLOG_WARN, "%s:%d: Expecting disable/enable " @@ -1001,7 +993,8 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, file, lineno); continue; } - if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res, 0)) + if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res, 0, + attset_list)) { fclose (f); return 0; @@ -1037,27 +1030,27 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, } else if (!strcmp(cmd, "attset")) { - char *name; - data1_attset *attset; - - if (argc != 2) - { - yaz_log(YLOG_WARN, "%s:%d: Bad # of args to attset", - file, lineno); - continue; - } - name = argv[1]; - if (!(attset = data1_get_attset (dh, name))) - { - yaz_log(YLOG_WARN, "%s:%d: Couldn't find attset %s", - file, lineno, name); - continue; - } - *attset_childp = (data1_attset_child *) - nmem_malloc (data1_nmem_get(dh), sizeof(**attset_childp)); - (*attset_childp)->child = attset; - (*attset_childp)->next = 0; - attset_childp = &(*attset_childp)->next; + char *name; + data1_attset *attset; + + if (argc != 2) + { + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to attset", + file, lineno); + continue; + } + name = argv[1]; + if (!(attset = data1_get_attset (dh, name))) + { + yaz_log(YLOG_WARN, "%s:%d: Couldn't find attset %s", + file, lineno, name); + continue; + } + *attset_childp = (data1_attset_child *) + nmem_malloc (data1_nmem_get(dh), sizeof(**attset_childp)); + (*attset_childp)->child = attset; + (*attset_childp)->next = 0; + attset_childp = &(*attset_childp)->next; } else if (!strcmp(cmd, "tagset")) { @@ -1206,6 +1199,13 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, fix_element_ref (dh, res, cur_elements->elements); } *systagsp = 0; - yaz_log(YLOG_DEBUG, "%s: data1_read_absyn end", file); return res; } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +