X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=data1%2Fd1_absyn.c;h=45620d5791ecdb03d61ef00d3f9043ed518b23ef;hp=e2434c4e43be426c0a1e361eccacd59f2f03fd76;hb=4eb3b54bb2ca9af74f39f000d3d40dba99ded887;hpb=00d69171180b9edc123986794b572be57caa5bb2 diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index e2434c4..45620d5 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,5 +1,5 @@ -/* $Id: d1_absyn.c,v 1.5 2002-12-16 22:59:34 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 +/* $Id: d1_absyn.c,v 1.16 2004-12-13 20:51:27 adam Exp $ + Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 Index Data Aps This file is part of the Zebra server. @@ -25,12 +25,104 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#include #include -#include +#include +#include +#include +#include #define D1_MAX_NESTING 128 +struct data1_hash_table { + NMEM nmem; + int size; + struct data1_hash_entry **ar; +}; + +struct data1_hash_entry { + void *clientData; + char *str; + struct data1_hash_entry *next; +}; + +unsigned data1_hash_calc(struct data1_hash_table *ht, const char *str) +{ + unsigned v = 0; + assert(str); + while (*str) + { + if (*str >= 'a' && *str <= 'z') + v = v*65509 + *str -'a'+10; + else if (*str >= 'A' && *str <= 'Z') + v = v*65509 + *str -'A'+10; + else if (*str >= '0' && *str <= '9') + v = v*65509 + *str -'0'; + str++; + } + return v % ht->size; +} + +struct data1_hash_table *data1_hash_open(int size, NMEM nmem) +{ + int i; + struct data1_hash_table *ht = nmem_malloc(nmem, sizeof(*ht)); + ht->nmem = nmem; + ht->size = size; + if (ht->size <= 0) + ht->size = 29; + ht->ar = nmem_malloc(nmem, sizeof(*ht->ar) * ht->size); + for (i = 0; isize; i++) + ht->ar[i] = 0; + return ht; +} + +void data1_hash_insert(struct data1_hash_table *ht, const char *str, + void *clientData, int copy) +{ + char *dstr = copy ? nmem_strdup(ht->nmem, str) : (char*) str; + if (strchr(str, '?') || strchr(str, '.')) + { + int i; + for (i = 0; isize; i++) + { + struct data1_hash_entry **he = &ht->ar[i]; + for (; *he && strcmp(str, (*he)->str); he = &(*he)->next) + ; + if (!*he) + { + *he = nmem_malloc(ht->nmem, sizeof(**he)); + (*he)->str = dstr; + (*he)->next = 0; + } + (*he)->clientData = clientData; + } + } + else + { + struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)]; + for (; *he && strcmp(str, (*he)->str); he = &(*he)->next) + ; + if (!*he) + { + *he = nmem_malloc(ht->nmem, sizeof(**he)); + (*he)->str = dstr; + (*he)->next = 0; + } + (*he)->clientData = clientData; + } +} + +void *data1_hash_lookup(struct data1_hash_table *ht, const char *str) +{ + struct data1_hash_entry **he = &ht->ar[data1_hash_calc(ht, str)]; + + for (; *he && yaz_matchstr(str, (*he)->str); he = &(*he)->next) + ; + if (*he) + return (*he)->clientData; + return 0; +} + struct data1_systag { char *name; char *value; @@ -51,13 +143,25 @@ struct data1_attset_cache_info data1_attset_cache next; }; +data1_element *data1_mk_element(data1_handle dh) +{ + data1_element *e = nmem_malloc(data1_nmem_get(dh), sizeof(*e)); + e->name = 0; + e->tag = 0; + e->termlists = 0; + e->next = e->children = 0; + e->sub_name = 0; + e->hash = 0; + return e; +} + data1_absyn *data1_absyn_search (data1_handle dh, const char *name) { data1_absyn_cache p = *data1_absyn_cache_get (dh); while (p) { - if (!strcmp (name, p->name)) + if (!yaz_matchstr (name, p->name)) return p->absyn; p = p->next; } @@ -75,12 +179,15 @@ void data1_absyn_destroy (data1_handle dh) while (p) { data1_absyn *abs = p->absyn; - data1_xpelement *xpe = abs->xp_elements; - while (xpe) { - logf (LOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr); - if (xpe->dfa) { dfa_delete (&xpe->dfa); } - xpe = xpe->next; - } + if (abs) + { + data1_xpelement *xpe = abs->xp_elements; + while (xpe) { + yaz_log (YLOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr); + if (xpe->dfa) { dfa_delete (&xpe->dfa); } + xpe = xpe->next; + } + } p = p->next; } } @@ -129,7 +236,7 @@ data1_attset *data1_attset_search_name (data1_handle dh, const char *name) while (p) { - if (!strcmp (name, p->name)) + if (!yaz_matchstr (name, p->name)) return p->attset; p = p->next; } @@ -166,7 +273,7 @@ data1_attset *data1_attset_add (data1_handle dh, const char *name) *cp = '\0'; } if (!attset) - yaz_log (LOG_WARN|LOG_ERRNO, "Couldn't load attribute set %s", name); + yaz_log (YLOG_WARN|YLOG_ERRNO, "Couldn't load attribute set %s", name); else { data1_attset_cache p = (data1_attset_cache) @@ -201,6 +308,11 @@ data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a, return 0; } +/* we have multiple versions of data1_getelementbyname */ +#define DATA1_GETELEMENTBYTAGNAME_VERSION 1 + +#if DATA1_GETELEMENTBYTAGNAME_VERSION==0 +/* straight linear search */ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, data1_element *parent, const char *tagname) @@ -226,6 +338,44 @@ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, } return 0; } +#endif + +#if DATA1_GETELEMENTBYTAGNAME_VERSION==1 +/* using hash search */ +data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, + data1_element *parent, + const char *tagname) +{ + data1_element *r; + struct data1_hash_table *ht; + + /* It's now possible to have a data1 tree with no abstract syntax */ + if ( !abs ) + return 0; + + if (!parent) + r = abs->main_elements; + else + r = parent->children; + + if (!r) + return 0; + + ht = r->hash; + if (!ht) + { + ht = r->hash = data1_hash_open(29, data1_nmem_get(dh)); + for (; r; r = r->next) + { + data1_name *n; + + for (n = r->tag->names; n; n = n->next) + data1_hash_insert(ht, n->name, r, 0); + } + } + return data1_hash_lookup(ht, tagname); +} +#endif data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn, const char *name) @@ -263,7 +413,7 @@ void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e) if (sub_e) e->children = sub_e->elements; else - yaz_log (LOG_WARN, "Unresolved reference to sub-elements %s", + yaz_log (YLOG_WARN, "Unresolved reference to sub-elements %s", e->sub_name); } } @@ -282,14 +432,23 @@ void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e) / -> none pop, 2002-12-13 + + Now [] predicates are supported + + pop, 2003-01-17 + */ const char * mk_xpath_regexp (data1_handle dh, char *expr) { char *p = expr; + char *pp; + char *s; int abs = 1; int i; + int j; int e=0; + int is_predicate = 0; static char *stack[32]; static char res[1024]; @@ -301,10 +460,28 @@ const char * mk_xpath_regexp (data1_handle dh, char *expr) while (*p) { i=0; - while (*p && !strchr("/",*p)) { i++; p++; } + while (*p && !strchr("/",*p)) { + i++; p++; + } stack[e] = (char *) nmem_malloc (data1_nmem_get (dh), i+1); - memcpy (stack[e], p - i, i); - stack[e][i] = 0; + s = stack[e]; + for (j=0; j< i; j++) { + pp = p-i+j; + if (*pp == '[') { + is_predicate=1; + } + else if (*pp == ']') { + is_predicate=0; + } + else { + if (!is_predicate) { + if (*pp == '*') + *s++ = '.'; + *s++ = *pp; + } + } + } + *s = 0; e++; if (*p) {p++;} } @@ -319,6 +496,7 @@ const char * mk_xpath_regexp (data1_handle dh, char *expr) if (!abs) { sprintf (p, ".*"); p+=2; } sprintf (p, "$"); p++; r = nmem_strdup (data1_nmem_get (dh), res); + yaz_log(YLOG_DEBUG,"Got regexp: %s",r); return (r); } @@ -330,28 +508,57 @@ const char * mk_xpath_regexp (data1_handle dh, char *expr) pop, 2002-12-13 */ static int parse_termlists (data1_handle dh, data1_termlist ***tpp, - char *p, const char *file, int lineno, + char *cp, const char *file, int lineno, const char *element_name, data1_absyn *res, int xpelement) { data1_termlist **tp = *tpp; - do + while(1) { char attname[512], structure[512]; char *source; - int r; - - if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname, - structure))) + int r, i; + int level = 0; + structure[0] = '\0'; + for (i = 0; cp[i] && inext = 0; if (!xpelement) { @@ -361,7 +568,7 @@ static int parse_termlists (data1_handle dh, data1_termlist ***tpp, if (!((*tp)->att = data1_getattbyname(dh, res->attset, attname))) { if ((!xpelement) || (*attname != '!')) { - yaz_log(LOG_WARN, + yaz_log(YLOG_WARN, "%s:%d: Couldn't find att '%s' in attset", file, lineno, attname); return -1; @@ -384,7 +591,7 @@ static int parse_termlists (data1_handle dh, data1_termlist ***tpp, nmem_strdup (data1_nmem_get (dh), structure); tp = &(*tp)->next; } - while ((p = strchr(p, ',')) && *(++p)); + *tpp = tp; return 0; } @@ -399,6 +606,58 @@ const char *data1_systag_lookup(data1_absyn *absyn, const char *tag, return default_value; } +#define l_isspace(c) ((c) == '\t' || (c) == ' ' || (c) == '\n' || (c) == '\r') + +int read_absyn_line(FILE *f, int *lineno, char *line, int len, + char *argv[], int num) +{ + char *p; + int argc; + int quoted = 0; + + while ((p = fgets(line, len, f))) + { + (*lineno)++; + while (*p && l_isspace(*p)) + p++; + if (*p && *p != '#') + break; + } + if (!p) + return 0; + + for (argc = 0; *p ; argc++) + { + if (*p == '#') /* trailing comment */ + break; + argv[argc] = p; + while (*p && !(l_isspace(*p) && !quoted)) { + if (*p =='"') quoted = 1 - quoted; + if (*p =='[') quoted = 1; + if (*p ==']') quoted = 0; + p++; + } + if (*p) + { + *(p++) = '\0'; + while (*p && l_isspace(*p)) + p++; + } + } + return argc; +} + +data1_marctab *data1_absyn_getmarctab(data1_handle dh, data1_absyn *absyn) +{ + return absyn->marc; +} + +YAZ_EXPORT data1_element *data1_absyn_getelements(data1_handle dh, + data1_absyn *absyn) +{ + return absyn->main_elements; +} + data1_absyn *data1_read_absyn (data1_handle dh, const char *file, int file_must_exist) { @@ -422,7 +681,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (!(f = data1_path_fopen(dh, file, "r"))) { - yaz_log(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file); + yaz_log(YLOG_WARN|YLOG_ERRNO, "Couldn't open %s", file); if (file_must_exist) return 0; } @@ -451,7 +710,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, res->main_elements = NULL; res->xp_elements = NULL; - while (f && (argc = readconf_line(f, &lineno, line, 512, argv, 50))) + while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50))) { char *cmd = *argv; if (!strcmp(cmd, "elm") || !strcmp(cmd, "element")) @@ -464,7 +723,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc < 4) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to elm", file, lineno); + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to elm", file, lineno); continue; } path = argv[1]; @@ -495,17 +754,12 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, } if (i > level+1) { - yaz_log(LOG_WARN, "%s:%d: Bad level increase", file, lineno); + yaz_log(YLOG_WARN, "%s:%d: Bad level increase", file, lineno); fclose(f); return 0; } level = i; - new_element = *ppl[level-1] = (data1_element *) - nmem_malloc(data1_nmem_get(dh), sizeof(*new_element)); - new_element->next = new_element->children = 0; - new_element->tag = 0; - new_element->termlists = 0; - new_element->sub_name = 0; + new_element = *ppl[level-1] = data1_mk_element(dh); tp = &new_element->termlists; ppl[level-1] = &new_element->next; @@ -523,14 +777,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, { if (!res->tagset) { - yaz_log(LOG_WARN, "%s:%d: No tagset loaded", file, lineno); + yaz_log(YLOG_WARN, "%s:%d: No tagset loaded", file, lineno); fclose(f); return 0; } if (!(new_element->tag = data1_gettagbynum (dh, res->tagset, type, value))) { - yaz_log(LOG_WARN, "%s:%d: Couldn't find tag %s in tagset", + yaz_log(YLOG_WARN, "%s:%d: Couldn't find tag %s in tagset", file, lineno, p); fclose(f); return 0; @@ -556,7 +810,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, } else { - yaz_log(LOG_WARN, "%s:%d: Bad element", file, lineno); + yaz_log(YLOG_WARN, "%s:%d: Bad element", file, lineno); fclose(f); return 0; } @@ -582,6 +836,11 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, maybe we should use a simple sscanf instead of dfa? pop, 2002-12-13 + + Now [] predicates are supported. regexps and xpath structure is + a bit redundant, however it's comfortable later... + + pop, 2003-01-17 */ else if (!strcmp(cmd, "xelm")) { @@ -589,13 +848,12 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, int i; char *p, *xpath_expr, *termlists; const char *regexp; - int type, value; struct DFA *dfa = dfa = dfa_init(); data1_termlist **tp; if (argc < 3) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to xelm", file, lineno); + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to xelm", file, lineno); continue; } xpath_expr = argv[1]; @@ -603,7 +861,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, regexp = mk_xpath_regexp(dh, xpath_expr); i = dfa_parse (dfa, ®exp); if (i || *regexp) { - yaz_log(LOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno); + yaz_log(YLOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno); dfa_delete (&dfa); continue; } @@ -624,7 +882,17 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, dfa_mkstate (dfa); cur_xpelement->dfa = dfa; + +#ifdef ENHANCED_XELM + cur_xpelement->xpath_len = + zebra_parse_xpath_str(xpath_expr, + cur_xpelement->xpath, XPATH_STEP_COUNT, + data1_nmem_get(dh)); + /* + dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len); + */ +#endif cur_xpelement->termlists = 0; tp = &cur_xpelement->termlists; @@ -649,7 +917,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc < 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to section", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to section", file, lineno); continue; } @@ -669,7 +937,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, { if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to 'xpath' directive", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'xpath' directive", file, lineno); continue; } @@ -679,7 +947,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, res->enable_xpath_indexing = 0; else { - yaz_log(LOG_WARN, "%s:%d: Expecting disable/enable " + yaz_log(YLOG_WARN, "%s:%d: Expecting disable/enable " "after 'xpath' directive", file, lineno); } } @@ -688,13 +956,13 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, data1_termlist **tp = &all; if (all) { - yaz_log(LOG_WARN, "%s:%d: Too many 'all' directives - ignored", + yaz_log(YLOG_WARN, "%s:%d: Too many 'all' directives - ignored", file, lineno); continue; } if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to 'all' directive", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to 'all' directive", file, lineno); continue; } @@ -708,7 +976,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, { if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to name directive", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to name directive", file, lineno); continue; } @@ -720,14 +988,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to reference", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to reference", file, lineno); continue; } name = argv[1]; if ((res->reference = oid_getvalbyname(name)) == VAL_NONE) { - yaz_log(LOG_WARN, "%s:%d: Unknown tagset ref '%s'", + yaz_log(YLOG_WARN, "%s:%d: Unknown tagset ref '%s'", file, lineno, name); continue; } @@ -739,14 +1007,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to attset", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to attset", file, lineno); continue; } name = argv[1]; if (!(attset = data1_get_attset (dh, name))) { - yaz_log(LOG_WARN, "%s:%d: Couldn't find attset %s", + yaz_log(YLOG_WARN, "%s:%d: Couldn't find attset %s", file, lineno, name); continue; } @@ -762,7 +1030,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, int type = 0; if (argc < 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args to tagset", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args to tagset", file, lineno); continue; } @@ -772,7 +1040,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, *tagset_childp = data1_read_tagset (dh, name, type); if (!(*tagset_childp)) { - yaz_log(LOG_WARN, "%s:%d: Couldn't load tagset %s", + yaz_log(YLOG_WARN, "%s:%d: Couldn't load tagset %s", file, lineno, name); continue; } @@ -784,14 +1052,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args in varset", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args in varset", file, lineno); continue; } name = argv[1]; if (!(res->varset = data1_read_varset (dh, name))) { - yaz_log(LOG_WARN, "%s:%d: Couldn't load Varset %s", + yaz_log(YLOG_WARN, "%s:%d: Couldn't load Varset %s", file, lineno, name); continue; } @@ -802,7 +1070,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc != 3) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args in esetname", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args in esetname", file, lineno); continue; } @@ -817,7 +1085,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, (*esetpp)->spec = 0; else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname))) { - yaz_log(LOG_WARN, "%s:%d: Espec-1 read failed for %s", + yaz_log(YLOG_WARN, "%s:%d: Espec-1 read failed for %s", file, lineno, fname); continue; } @@ -829,14 +1097,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # of args for maptab", + yaz_log(YLOG_WARN, "%s:%d: Bad # of args for maptab", file, lineno); continue; } name = argv[1]; if (!(*maptabp = data1_read_maptab (dh, name))) { - yaz_log(LOG_WARN, "%s:%d: Couldn't load maptab %s", + yaz_log(YLOG_WARN, "%s:%d: Couldn't load maptab %s", file, lineno, name); continue; } @@ -848,14 +1116,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # or args for marc", + yaz_log(YLOG_WARN, "%s:%d: Bad # or args for marc", file, lineno); continue; } name = argv[1]; if (!(*marcp = data1_read_marctab (dh, name))) { - yaz_log(LOG_WARN, "%s:%d: Couldn't read marctab %s", + yaz_log(YLOG_WARN, "%s:%d: Couldn't read marctab %s", file, lineno, name); continue; } @@ -865,7 +1133,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, { if (argc != 2) { - yaz_log(LOG_WARN, "%s:%d: Bad # or args for encoding", + yaz_log(YLOG_WARN, "%s:%d: Bad # or args for encoding", file, lineno); continue; } @@ -873,10 +1141,9 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, } else if (!strcmp(cmd, "systag")) { - struct data1_systag *st; if (argc != 3) { - yaz_log(LOG_WARN, "%s:%d: Bad # or args for systag", + yaz_log(YLOG_WARN, "%s:%d: Bad # or args for systag", file, lineno); continue; } @@ -888,7 +1155,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, } else { - yaz_log(LOG_WARN, "%s:%d: Unknown directive '%s'", file, + yaz_log(YLOG_WARN, "%s:%d: Unknown directive '%s'", file, lineno, cmd); continue; } @@ -904,6 +1171,6 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, fix_element_ref (dh, res, cur_elements->elements); } *systagsp = 0; - yaz_log (LOG_DEBUG, "%s: data1_read_absyn end", file); + yaz_log(YLOG_DEBUG, "%s: data1_read_absyn end", file); return res; }