X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=data1%2Fd1_absyn.c;h=d737d35df8c93ba4e25c3ac4774f5ef12ad33b1a;hp=f660485df62332a62ad88091ae72b9a3e0483565;hb=d513d15e315601b730b0b3a6126c3163d00223fb;hpb=b88909df16157ed1e7859bc3fad6b01520d4865e diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index f660485..d737d35 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,8 +1,5 @@ -/* $Id: d1_absyn.c,v 1.27 2006-06-13 12:02:02 adam Exp $ - Copyright (C) 1995-2006 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2011 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -15,18 +12,21 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#if HAVE_CONFIG_H +#include +#endif #include #include #include #include #include -#include +#include #include #include #include @@ -185,9 +185,10 @@ void data1_absyn_destroy (data1_handle dh) data1_xpelement *xpe = abs->xp_elements; while (xpe) { yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr); - if (xpe->dfa) { dfa_delete (&xpe->dfa); } + if (xpe->dfa) + dfa_delete (&xpe->dfa); xpe = xpe->next; - } + } } p = p->next; } @@ -249,13 +250,13 @@ data1_attset *data1_attset_search_name (data1_handle dh, const char *name) return 0; } -data1_attset *data1_attset_search_id (data1_handle dh, int id) +data1_attset *data1_attset_search_id(data1_handle dh, const Odr_oid *oid) { data1_attset_cache p = *data1_attset_cache_get (dh); while (p) { - if (id == p->attset->reference) + if (p->attset->oid && !oid_oidcmp(oid, p->attset->oid)) return p->attset; p = p->next; } @@ -307,37 +308,6 @@ data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a, /* we have multiple versions of data1_getelementbyname */ #define DATA1_GETELEMENTBYTAGNAME_VERSION 1 -#if DATA1_GETELEMENTBYTAGNAME_VERSION==0 -/* straight linear search */ -data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, - data1_element *parent, - const char *tagname) -{ - data1_element *r; - - /* It's now possible to have a data1 tree with no abstract syntax */ - if ( !abs ) - return 0; - - if (!parent) - r = abs->main_elements; - else - r = parent->children; - - for (; r; r = r->next) - { - data1_name *n; - - for (n = r->tag->names; n; n = n->next) - if (!data1_matchstr(tagname, n->name)) - return r; - } - return 0; -} -#endif - -#if DATA1_GETELEMENTBYTAGNAME_VERSION==1 -/* using hash search */ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, data1_element *parent, const char *tagname) @@ -354,12 +324,15 @@ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, else r = parent->children; +#if DATA1_GETELEMENTBYTAGNAME_VERSION==1 + /* using hash search */ if (!r) return 0; ht = r->hash; if (!ht) { + /* build hash table (the first time) */ ht = r->hash = data1_hash_open(29, data1_nmem_get(dh)); for (; r; r = r->next) { @@ -370,8 +343,19 @@ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, } } return data1_hash_lookup(ht, tagname); -} +#else + /* using linear search */ + for (; r; r = r->next) + { + data1_name *n; + + for (n = r->tag->names; n; n = n->next) + if (!data1_matchstr(tagname, n->name)) + return r; + } + return 0; #endif +} data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn, const char *name) @@ -675,22 +659,26 @@ int read_absyn_line(FILE *f, int *lineno, char *line, int len, return argc; } -data1_marctab *data1_absyn_getmarctab(data1_handle dh, data1_absyn *absyn) +data1_marctab *data1_absyn_getmarctab(data1_handle dh, data1_node *root) { - return absyn->marc; + if (root->u.root.absyn) + return root->u.root.absyn->marc; + return 0; } -YAZ_EXPORT data1_element *data1_absyn_getelements(data1_handle dh, - data1_absyn *absyn) +data1_element *data1_absyn_getelements(data1_handle dh, + data1_node *root) { - return absyn->main_elements; + if (root->u.root.absyn) + return root->u.root.absyn->main_elements; + return 0; } static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, enum DATA1_XPATH_INDEXING default_xpath) { data1_sub_elements *cur_elements = NULL; - data1_xpelement *cur_xpelement = NULL; + data1_xpelement **cur_xpelement = NULL; data1_attset *attset_list = data1_empty_attset(dh); data1_attset_child **attset_childp = &attset_list->children; @@ -712,7 +700,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, res = (data1_absyn *) nmem_malloc(data1_nmem_get(dh), sizeof(*res)); res->name = 0; - res->reference = VAL_NONE; + res->oid = 0; res->tagset = 0; res->encoding = 0; res->xpath_indexing = @@ -731,6 +719,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, res->sub_elements = NULL; res->main_elements = NULL; res->xp_elements = NULL; + cur_xpelement = &res->xp_elements; while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50))) { @@ -869,13 +858,16 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, int i; char *p, *xpath_expr, *termlists; const char *regexp; - struct DFA *dfa = dfa = dfa_init(); + struct DFA *dfa = 0; data1_termlist **tp; char melm_xpath[128]; + data1_xpelement *xp_ele = 0; + data1_xpelement *last_match = 0; - if (argc < 3) + if (argc != 3) { - yaz_log(YLOG_WARN, "%s:%d: Bad # of args to xelm", file, lineno); + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to %s", + file, lineno, cmd); continue; } @@ -888,42 +880,51 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, } termlists = argv[2]; regexp = mk_xpath_regexp(dh, xpath_expr); - i = dfa_parse (dfa, ®exp); - if (i || *regexp) { - yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno); - dfa_delete (&dfa); - continue; - } - - if (!cur_xpelement) - { - cur_xpelement = (data1_xpelement *) - nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement)); - res->xp_elements = cur_xpelement; - } else { - cur_xpelement->next = (data1_xpelement *) - nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement)); - cur_xpelement = cur_xpelement->next; - } - cur_xpelement->next = NULL; - cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh), - xpath_expr); - - dfa_mkstate (dfa); - cur_xpelement->dfa = dfa; -#ifdef ENHANCED_XELM - cur_xpelement->xpath_len = - zebra_parse_xpath_str(xpath_expr, - cur_xpelement->xpath, XPATH_STEP_COUNT, - data1_nmem_get(dh)); +#if OPTIMIZE_MELM + /* get last of existing regulars with same regexp */ + for (xp_ele = res->xp_elements; xp_ele; xp_ele = xp_ele->next) + if (!strcmp(xp_ele->regexp, regexp)) + last_match = xp_ele; +#endif + if (!last_match) + { + /* new regular expression . Parse + generate */ + const char *regexp_ptr = regexp; + + dfa = dfa_init(); + i = dfa_parse (dfa, ®exp_ptr); + if (i || *regexp_ptr) { + yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno); + dfa_delete (&dfa); + continue; + } + } + *cur_xpelement = (data1_xpelement *) + nmem_malloc(data1_nmem_get(dh), sizeof(**cur_xpelement)); + (*cur_xpelement)->next = 0; + (*cur_xpelement)->match_next = 0; + if (last_match) + last_match->match_next = *cur_xpelement; +#if OPTIMIZE_MELM + (*cur_xpelement)->regexp = regexp; +#endif + (*cur_xpelement)->xpath_expr = nmem_strdup(data1_nmem_get (dh), + xpath_expr); + + if (dfa) + dfa_mkstate (dfa); + (*cur_xpelement)->dfa = dfa; - /* - dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len); - */ +#ifdef ENHANCED_XELM + (*cur_xpelement)->xpath_len = + zebra_parse_xpath_str( + xpath_expr, + (*cur_xpelement)->xpath, XPATH_STEP_COUNT, + data1_nmem_get(dh)); #endif - cur_xpelement->termlists = 0; - tp = &cur_xpelement->termlists; + (*cur_xpelement)->termlists = 0; + tp = &(*cur_xpelement)->termlists; /* parse termList definitions */ p = termlists; @@ -937,6 +938,7 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, } *tp = all; /* append any ALL entries to the list */ } + cur_xpelement = &(*cur_xpelement)->next; } else if (!strcmp(cmd, "section")) { @@ -1021,7 +1023,10 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, continue; } name = argv[1]; - if ((res->reference = oid_getvalbyname(name)) == VAL_NONE) + res->oid = yaz_string_to_oid_nmem(yaz_oid_std(), + CLASS_SCHEMA, name, + data1_nmem_get(dh)); + if (!res->oid) { yaz_log(YLOG_WARN, "%s:%d: Unknown tagset ref '%s'", file, lineno, name); @@ -1201,9 +1206,11 @@ static data1_absyn *data1_read_absyn(data1_handle dh, const char *file, *systagsp = 0; return res; } + /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab