X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=data1%2Fd1_absyn.c;h=dec05b801e8b479743620e3c74f9d036a1197d4d;hb=f280b4adca76b5b447dae9d6a01ff1f9067c08b9;hp=1cee7f622b46d2fcd3b7c969ab9b58d6b8b6ea38;hpb=02266b68e2e2c86d8a8467ee56721926b365d2d0;p=idzebra-moved-to-github.git diff --git a/data1/d1_absyn.c b/data1/d1_absyn.c index 1cee7f6..dec05b8 100644 --- a/data1/d1_absyn.c +++ b/data1/d1_absyn.c @@ -1,4 +1,4 @@ -/* $Id: d1_absyn.c,v 1.2 2002-10-22 13:19:50 adam Exp $ +/* $Id: d1_absyn.c,v 1.9 2003-06-12 18:20:24 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -28,9 +28,16 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include #include +#include #define D1_MAX_NESTING 128 +struct data1_systag { + char *name; + char *value; + struct data1_systag *next; +}; + struct data1_absyn_cache_info { char *name; @@ -57,6 +64,31 @@ data1_absyn *data1_absyn_search (data1_handle dh, const char *name) } return NULL; } +/* *ostrich* + We need to destroy DFAs, in xp_element (xelm) definitions + pop, 2002-12-13 +*/ + +void data1_absyn_destroy (data1_handle dh) +{ + data1_absyn_cache p = *data1_absyn_cache_get (dh); + + while (p) + { + data1_absyn *abs = p->absyn; + if (abs) + { + data1_xpelement *xpe = abs->xp_elements; + while (xpe) { + logf (LOG_DEBUG,"Destroy xp element %s",xpe->xpath_expr); + if (xpe->dfa) { dfa_delete (&xpe->dfa); } + xpe = xpe->next; + } + } + p = p->next; + } +} + void data1_absyn_trav (data1_handle dh, void *handle, void (*fh)(data1_handle dh, void *h, data1_absyn *a)) @@ -240,11 +272,99 @@ void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e) } } } +/* *ostrich* + + New function, a bit dummy now... I've seen it in zrpn.c... We should build + more clever regexps... + + + //a -> ^a/.*$ + //a/b -> ^b/a/.*$ + /a -> ^a/$ + /a/b -> ^b/a/$ + + / -> none + + pop, 2002-12-13 + + Now [] predicates are supported + + pop, 2003-01-17 + + */ + +const char * mk_xpath_regexp (data1_handle dh, char *expr) +{ + char *p = expr; + char *pp; + char *s; + int abs = 1; + int i; + int j; + int e=0; + int is_predicate = 0; + + static char *stack[32]; + static char res[1024]; + char *r = ""; + + if (*p != '/') { return (""); } + p++; + if (*p == '/') { abs=0; p++; } + + while (*p) { + i=0; + while (*p && !strchr("/",*p)) { + i++; p++; + } + stack[e] = (char *) nmem_malloc (data1_nmem_get (dh), i+1); + s = stack[e]; + for (j=0; j< i; j++) { + pp = p-i+j; + if (*pp == '[') { + is_predicate=1; + } + else if (*pp == ']') { + is_predicate=0; + } + else { + if (!is_predicate) { + if (*pp == '*') + *s++ = '.'; + *s++ = *pp; + } + } + } + *s = 0; + e++; + if (*p) {p++;} + } + e--; p = &res[0]; i=0; + sprintf (p, "^"); p++; + while (e >= 0) { + /* !!! res size is not checked !!! */ + sprintf (p, "%s/",stack[e]); + p += strlen(stack[e]) + 1; + e--; + } + if (!abs) { sprintf (p, ".*"); p+=2; } + sprintf (p, "$"); p++; + r = nmem_strdup (data1_nmem_get (dh), res); + yaz_log(LOG_DEBUG,"Got regexp: %s",r); + return (r); +} + +/* *ostrich* + added arg xpelement... when called from xelm context, it's 1, saying + that ! means xpath, not element name as attribute name... + pop, 2002-12-13 + */ static int parse_termlists (data1_handle dh, data1_termlist ***tpp, char *p, const char *file, int lineno, - const char *element_name, data1_absyn *res) + const char *element_name, data1_absyn *res, + int xpelement) { data1_termlist **tp = *tpp; do @@ -261,19 +381,27 @@ static int parse_termlists (data1_handle dh, data1_termlist ***tpp, file, lineno, p); return -1; } - if (*attname == '!') - strcpy(attname, element_name); + *tp = (data1_termlist *) - nmem_malloc(data1_nmem_get(dh), sizeof(**tp)); + nmem_malloc(data1_nmem_get(dh), sizeof(**tp)); (*tp)->next = 0; + + if (!xpelement) { + if (*attname == '!') + strcpy(attname, element_name); + } if (!((*tp)->att = data1_getattbyname(dh, res->attset, - attname))) - { - yaz_log(LOG_WARN, - "%s:%d: Couldn't find att '%s' in attset", - file, lineno, attname); - return -1; + attname))) { + if ((!xpelement) || (*attname != '!')) { + yaz_log(LOG_WARN, + "%s:%d: Couldn't find att '%s' in attset", + file, lineno, attname); + return -1; + } else { + (*tp)->att = 0; + } } + if (r == 2 && (source = strchr(structure, ':'))) *source++ = '\0'; /* cut off structure .. */ else @@ -293,10 +421,64 @@ static int parse_termlists (data1_handle dh, data1_termlist ***tpp, return 0; } +const char *data1_systag_lookup(data1_absyn *absyn, const char *tag, + const char *default_value) +{ + struct data1_systag *p = absyn->systags; + for (; p; p = p->next) + if (!strcmp(p->name, tag)) + return p->value; + return default_value; +} + +#define l_isspace(c) ((c) == '\t' || (c) == ' ' || (c) == '\n' || (c) == '\r') + +int read_absyn_line(FILE *f, int *lineno, char *line, int len, + char *argv[], int num) +{ + char *p; + int argc; + int quoted = 0; + + while ((p = fgets(line, len, f))) + { + (*lineno)++; + while (*p && l_isspace(*p)) + p++; + if (*p && *p != '#') + break; + } + if (!p) + return 0; + + for (argc = 0; *p ; argc++) + { + if (*p == '#') /* trailing comment */ + break; + argv[argc] = p; + while (*p && !(l_isspace(*p) && !quoted)) { + if (*p =='"') quoted = 1 - quoted; + if (*p =='[') quoted = 1; + if (*p ==']') quoted = 0; + p++; + } + if (*p) + { + *(p++) = '\0'; + while (*p && l_isspace(*p)) + p++; + } + } + return argc; +} + + data1_absyn *data1_read_absyn (data1_handle dh, const char *file, int file_must_exist) { data1_sub_elements *cur_elements = NULL; + data1_xpelement *cur_xpelement = NULL; + data1_absyn *res = 0; FILE *f; data1_element **ppl[D1_MAX_NESTING]; @@ -306,6 +488,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, data1_termlist *all = 0; data1_attset_child **attset_childp; data1_tagset **tagset_childp; + struct data1_systag **systagsp; int level = 0; int lineno = 0; int argc; @@ -324,6 +507,8 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, res->tagset = 0; res->encoding = 0; res->enable_xpath_indexing = (f ? 0 : 1); + res->systags = 0; + systagsp = &res->systags; tagset_childp = &res->tagset; res->attset = data1_empty_attset (dh); @@ -336,11 +521,11 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, maptabp = &res->maptabs; res->marc = 0; marcp = &res->marc; - res->sub_elements = NULL; res->main_elements = NULL; + res->xp_elements = NULL; - while (f && (argc = readconf_line(f, &lineno, line, 512, argv, 50))) + while (f && (argc = read_absyn_line(f, &lineno, line, 512, argv, 50))) { char *cmd = *argv; if (!strcmp(cmd, "elm") || !strcmp(cmd, "element")) @@ -455,7 +640,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, { assert (res->attset); - if (parse_termlists (dh, &tp, p, file, lineno, name, res)) + if (parse_termlists (dh, &tp, p, file, lineno, name, res, 0)) { fclose (f); return 0; @@ -464,6 +649,88 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, } new_element->name = nmem_strdup(data1_nmem_get (dh), name); } + /* *ostrich* + New code to support xelm directive + for each xelm a dfa is built. xelms are stored in res->xp_elements + + maybe we should use a simple sscanf instead of dfa? + + pop, 2002-12-13 + + Now [] predicates are supported. regexps and xpath structure is + a bit redundant, however it's comfortable later... + + pop, 2003-01-17 + */ + + else if (!strcmp(cmd, "xelm")) { + + int i; + char *p, *xpath_expr, *termlists; + const char *regexp; + struct DFA *dfa = dfa = dfa_init(); + data1_termlist **tp; + + if (argc < 3) + { + yaz_log(LOG_WARN, "%s:%d: Bad # of args to xelm", file, lineno); + continue; + } + xpath_expr = argv[1]; + termlists = argv[2]; + regexp = mk_xpath_regexp(dh, xpath_expr); + i = dfa_parse (dfa, ®exp); + if (i || *regexp) { + yaz_log(LOG_WARN, "%s:%d: Bad xpath to xelm", file, lineno); + dfa_delete (&dfa); + continue; + } + + if (!cur_xpelement) + { + cur_xpelement = (data1_xpelement *) + nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement)); + res->xp_elements = cur_xpelement; + } else { + cur_xpelement->next = (data1_xpelement *) + nmem_malloc(data1_nmem_get(dh), sizeof(*cur_xpelement)); + cur_xpelement = cur_xpelement->next; + } + cur_xpelement->next = NULL; + cur_xpelement->xpath_expr = nmem_strdup(data1_nmem_get (dh), + xpath_expr); + + dfa_mkstate (dfa); + cur_xpelement->dfa = dfa; + +#ifdef ENHANCED_XELM + cur_xpelement->xpath_len = + zebra_parse_xpath_str(xpath_expr, + cur_xpelement->xpath, XPATH_STEP_COUNT, + data1_nmem_get(dh)); + + /* + dump_xp_steps(cur_xpelement->xpath,cur_xpelement->xpath_len); + */ +#endif + cur_xpelement->termlists = 0; + tp = &cur_xpelement->termlists; + + /* parse termList definitions */ + p = termlists; + if (*p != '-') + { + assert (res->attset); + + if (parse_termlists (dh, &tp, p, file, lineno, + xpath_expr, res, 1)) + { + fclose (f); + return 0; + } + *tp = all; /* append any ALL entries to the list */ + } + } else if (!strcmp(cmd, "section")) { char *name; @@ -471,7 +738,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, if (argc < 2) { yaz_log(LOG_WARN, "%s:%d: Bad # of args to section", - file, lineno); + file, lineno); continue; } name = argv[1]; @@ -519,7 +786,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, file, lineno); continue; } - if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res)) + if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res, 0)) { fclose (f); return 0; @@ -692,6 +959,20 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, } res->encoding = nmem_strdup (data1_nmem_get(dh), argv[1]); } + else if (!strcmp(cmd, "systag")) + { + if (argc != 3) + { + yaz_log(LOG_WARN, "%s:%d: Bad # or args for systag", + file, lineno); + continue; + } + *systagsp = nmem_malloc (data1_nmem_get(dh), sizeof(**systagsp)); + + (*systagsp)->name = nmem_strdup(data1_nmem_get(dh), argv[1]); + (*systagsp)->value = nmem_strdup(data1_nmem_get(dh), argv[2]); + systagsp = &(*systagsp)->next; + } else { yaz_log(LOG_WARN, "%s:%d: Unknown directive '%s'", file, @@ -709,6 +990,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file, res->main_elements = cur_elements->elements; fix_element_ref (dh, res, cur_elements->elements); } + *systagsp = 0; yaz_log (LOG_DEBUG, "%s: data1_read_absyn end", file); return res; }