X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=retrieval%2Fd1_absyn.c;h=7813015cbbdb97cac10acebe7e8117beceb1d973;hp=142b9983cc6927963a1b9e315410d0b70268790b;hb=ec29e37cd269b330eb493157dedf9aacf5c6ca46;hpb=3ffa5dbc563e15bccb012f3d3d00a993f87ace82 diff --git a/retrieval/d1_absyn.c b/retrieval/d1_absyn.c index 142b998..7813015 100644 --- a/retrieval/d1_absyn.c +++ b/retrieval/d1_absyn.c @@ -1,10 +1,75 @@ /* - * Copyright (c) 1995, Index Data. + * Copyright (c) 1995-1998, Index Data. * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * * $Log: d1_absyn.c,v $ - * Revision 1.3 1995-11-01 16:34:55 quinn + * Revision 1.21 1998-06-09 13:55:07 adam + * Minor changes. + * + * Revision 1.20 1998/05/18 13:07:02 adam + * Changed the way attribute sets are handled by the retriaval module. + * Extended Explain conversion / schema. + * Modified server and client to work with ASN.1 compiled protocol handlers. + * + * Revision 1.19 1998/03/05 08:15:32 adam + * Implemented data1_add_insert_taggeddata utility which is more flexible + * than data1_insert_taggeddata. + * + * Revision 1.18 1998/02/27 14:08:04 adam + * Added const to some char pointer arguments. + * Reworked data1_read_node so that it doesn't create a tree with + * pointers to original "SGML"-buffer. + * + * Revision 1.17 1998/02/11 11:53:34 adam + * Changed code so that it compiles as C++. + * + * Revision 1.16 1997/12/18 10:51:30 adam + * Implemented sub-trees feature for schemas - including forward + * references. + * + * Revision 1.15 1997/12/09 16:18:16 adam + * Work on EXPLAIN schema. First implementation of sub-schema facility + * in the *.abs files. + * + * Revision 1.14 1997/10/31 12:20:09 adam + * Improved memory debugging for xmalloc/nmem.c. References to NMEM + * instead of ODR in n ESPEC-1 handling in source d1_espec.c. + * Bug fix: missing fclose in data1_read_espec1. + * + * Revision 1.13 1997/10/27 13:54:18 adam + * Changed structure field in data1 node to be simple string which + * is "unknown" to the retrieval system itself. + * + * Revision 1.12 1997/09/17 12:10:34 adam + * YAZ version 1.4. + * + * Revision 1.11 1997/09/05 09:50:55 adam + * Removed global data1_tabpath - uses data1_get_tabpath() instead. + * + * Revision 1.10 1997/05/14 06:54:01 adam + * C++ support. + * + * Revision 1.9 1997/02/19 14:46:15 adam + * The "all" specifier only affects elements that are indexed (and not + * all elements). + * + * Revision 1.8 1997/01/02 10:47:59 quinn + * Added optional, physical ANY + * + * Revision 1.7 1996/06/10 08:56:01 quinn + * Work on Summary. + * + * Revision 1.6 1996/05/31 13:52:21 quinn + * Fixed uninitialized variable for local tags in abstract syntax. + * + * Revision 1.5 1996/05/09 07:27:43 quinn + * Multiple local attributes values supported. + * + * Revision 1.4 1996/05/01 12:45:28 quinn + * Support use of local tag names in abs file. + * + * Revision 1.3 1995/11/01 16:34:55 quinn * Making data1 look for tables in data1_tabpath * * Revision 1.2 1995/11/01 13:54:44 quinn @@ -22,7 +87,6 @@ #include #include -#include #include #include #include @@ -30,39 +94,140 @@ #include #define D1_MAX_NESTING 128 -#define DATA1_MAX_SYNTAXES 30 /* max no of syntaxes to handle in one session */ -static struct /* cache of abstract syntaxes */ +struct data1_absyn_cache_info { char *name; data1_absyn *absyn; -} syntaxes[DATA1_MAX_SYNTAXES] = {{0,0}}; + data1_absyn_cache next; +}; -data1_absyn *data1_get_absyn(char *name) +struct data1_attset_cache_info { - char fname[512]; - int i; + char *name; + data1_attset *attset; + data1_attset_cache next; +}; - for (i = 0; syntaxes[i].name; i++) - if (!strcmp(name, syntaxes[i].name)) - return syntaxes[i].absyn; +data1_absyn *data1_absyn_search (data1_handle dh, const char *name) +{ + data1_absyn_cache p = *data1_absyn_cache_get (dh); - if (i >= DATA1_MAX_SYNTAXES - 1) + while (p) { - logf(LOG_WARN, "Too many abstract syntaxes loaded"); - return 0; + if (!strcmp (name, p->name)) + return p->absyn; + p = p->next; } + return NULL; +} + +void data1_absyn_trav (data1_handle dh, void *handle, + void (*fh)(data1_handle dh, void *h, data1_absyn *a)) +{ + data1_absyn_cache p = *data1_absyn_cache_get (dh); + + while (p) + { + (*fh)(dh, handle, p->absyn); + p = p->next; + } +} + +data1_absyn *data1_absyn_add (data1_handle dh, const char *name) +{ + char fname[512]; + NMEM mem = data1_nmem_get (dh); + + data1_absyn_cache p = (data1_absyn_cache)nmem_malloc (mem, sizeof(*p)); + data1_absyn_cache *pp = data1_absyn_cache_get (dh); + sprintf(fname, "%s.abs", name); - if (!(syntaxes[i].absyn = data1_read_absyn(fname))) - return 0; - if (!(syntaxes[i].name = xmalloc(strlen(name)+1))) - abort(); - strcpy(syntaxes[i].name, name); - syntaxes[i+1].name = 0; - return syntaxes[i].absyn; + p->absyn = data1_read_absyn (dh, fname); + p->name = nmem_strdup (mem, name); + p->next = *pp; + *pp = p; + return p->absyn; } -data1_esetname *data1_getesetbyname(data1_absyn *a, char *name) +data1_absyn *data1_get_absyn (data1_handle dh, const char *name) +{ + data1_absyn *absyn; + + if (!(absyn = data1_absyn_search (dh, name))) + absyn = data1_absyn_add (dh, name); + return absyn; +} + +data1_attset *data1_attset_search_name (data1_handle dh, const char *name) +{ + data1_attset_cache p = *data1_attset_cache_get (dh); + + while (p) + { + if (!strcmp (name, p->name)) + return p->attset; + p = p->next; + } + return NULL; +} + +data1_attset *data1_attset_search_id (data1_handle dh, int id) +{ + data1_attset_cache p = *data1_attset_cache_get (dh); + + while (p) + { + if (id == p->attset->reference) + return p->attset; + p = p->next; + } + return NULL; +} + +data1_attset *data1_attset_add (data1_handle dh, const char *name) +{ + char fname[512], aname[512]; + NMEM mem = data1_nmem_get (dh); + data1_attset *attset; + + strcpy (aname, name); + sprintf(fname, "%s.att", name); + attset = data1_read_attset (dh, fname); + if (!attset) + { + char *cp; + attset = data1_read_attset (dh, name); + if (attset && (cp = strrchr (aname, '.'))) + *cp = '\0'; + } + if (!attset) + logf (LOG_WARN|LOG_ERRNO, "couldn't load attribute set %s", name); + else + { + data1_attset_cache p = (data1_attset_cache) + nmem_malloc (mem, sizeof(*p)); + data1_attset_cache *pp = data1_attset_cache_get (dh); + + attset->name = p->name = nmem_strdup (mem, aname); + p->attset = attset; + p->next = *pp; + *pp = p; + } + return attset; +} + +data1_attset *data1_get_attset (data1_handle dh, const char *name) +{ + data1_attset *attset; + + if (!(attset = data1_attset_search_name (dh, name))) + attset = data1_attset_add (dh, name); + return attset; +} + +data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a, + const char *name) { data1_esetname *r; @@ -72,15 +237,17 @@ data1_esetname *data1_getesetbyname(data1_absyn *a, char *name) return 0; } -data1_element *data1_getelementbytagname(data1_absyn *abs, - data1_element *parent, char *tagname) +data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, + data1_element *parent, + const char *tagname) { data1_element *r; if (!parent) - r = abs->elements; + r = abs->main_elements; else r = parent->children; + assert (abs->main_elements); for (; r; r = r->next) { data1_name *n; @@ -92,48 +259,75 @@ data1_element *data1_getelementbytagname(data1_absyn *abs, return 0; } -data1_element *data1_getelementbyname(data1_absyn *absyn, char *name) +data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn, + const char *name) { data1_element *r; - - for (r = absyn->elements; r; r = r->next) + assert (absyn->main_elements); + for (r = absyn->main_elements; r; r = r->next) if (!data1_matchstr(r->name, name)) return r; return 0; } -data1_absyn *data1_read_absyn(char *file) + +void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e) +{ + for (; e; e = e->next) + { + if (!e->sub_name) + { + if (e->children) + fix_element_ref (dh, absyn, e->children); + } + else + { + data1_sub_elements *sub_e = absyn->sub_elements; + while (sub_e && strcmp (e->sub_name, sub_e->name)) + sub_e = sub_e->next; + if (sub_e) + e->children = sub_e->elements; + else + logf (LOG_WARN, "Unresolved reference to sub-elements %s", + e->sub_name); + } + } +} + +data1_absyn *data1_read_absyn (data1_handle dh, const char *file) { char line[512], *r, cmd[512], args[512]; + data1_sub_elements *cur_elements = NULL; data1_absyn *res = 0; FILE *f; data1_element **ppl[D1_MAX_NESTING]; data1_esetname **esetpp; data1_maptab **maptabp; data1_marctab **marcp; + data1_termlist *all = 0; int level = 0; - if (!(f = yaz_path_fopen(data1_tabpath, file, "r"))) + if (!(f = yaz_path_fopen(data1_get_tabpath (dh), file, "r"))) { - logf(LOG_WARN|LOG_ERRNO, "%s", file); + logf(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file); return 0; } - if (!(res = xmalloc(sizeof(*res)))) - abort(); + res = (data1_absyn *)nmem_malloc(data1_nmem_get(dh), sizeof(*res)); res->name = 0; res->reference = VAL_NONE; res->tagset = 0; res->attset = 0; res->varset = 0; res->esetnames = 0; + esetpp = &res->esetnames; res->maptabs = 0; maptabp = &res->maptabs; res->marc = 0; marcp = &res->marc; - res->elements = 0; - ppl[0] = &res->elements; - esetpp = &res->esetnames; + + res->sub_elements = NULL; + res->main_elements = NULL; for (;;) { @@ -145,20 +339,30 @@ data1_absyn *data1_read_absyn(char *file) break; } if (!r) - { - fclose(f); - return res; - } + break; if (sscanf(r, "%s %[^\n]", cmd, args) < 2) *args = '\0'; if (!strcmp(cmd, "elm")) { - data1_element *new; + data1_element *new_element; int i; - char path[512], name[512], att[512], *p; + char path[512], name[512], termlists[512], *p, *sub_p; int type, value; + data1_termlist **tp; - if (sscanf(args, "%s %s %s", path, name, att) < 3) + if (!cur_elements) + { + cur_elements = (data1_sub_elements *)nmem_malloc(data1_nmem_get(dh), + sizeof(*cur_elements)); + cur_elements->next = res->sub_elements; + cur_elements->elements = NULL; + cur_elements->name = "main"; + res->sub_elements = cur_elements; + + level = 0; + ppl[level] = &cur_elements->elements; + } + if (sscanf(args, "%511s %511s %511s", path, name, termlists) < 3) { logf(LOG_WARN, "Bad # of args to elm in %s: '%s'", file, args); @@ -177,40 +381,73 @@ data1_absyn *data1_read_absyn(char *file) } if (i > level + 1) { - logf(LOG_WARN, "Bad level inc in %s in '%'", file, args); + logf(LOG_WARN, "Bad level inc in %s in '%s'", file, args); fclose(f); return 0; } level = i; - if (!(new = *ppl[level] = xmalloc(sizeof(*new)))) - abort; - new ->next = new->children = 0; - ppl[level] = &new->next; - ppl[level+1] = &new->children; + new_element = *ppl[level] = + (data1_element *)nmem_malloc(data1_nmem_get(dh), sizeof(*new_element)); + new_element->next = new_element->children = 0; + new_element->tag = 0; + new_element->termlists = 0; + new_element->sub_name = 0; - if (sscanf(p, "(%d,%d)", &type, &value) < 2) + tp = &new_element->termlists; + ppl[level] = &new_element->next; + ppl[level+1] = &new_element->children; + + /* consider subtree (if any) ... */ + if ((sub_p = strchr (p, ':')) && sub_p[1]) { - logf(LOG_WARN, "Malformed element '%s' in %s", p, file); - fclose(f); - return 0; + *sub_p++ = '\0'; + new_element->sub_name = + nmem_strdup (data1_nmem_get(dh), sub_p); } - if (!res->tagset) + /* well-defined tag */ + if (sscanf(p, "(%d,%d)", &type, &value) == 2) { - logf(LOG_WARN, "No tagset loaded in %s", file); - fclose(f); - return 0; + if (!res->tagset) + { + logf(LOG_WARN, "No tagset loaded in %s", file); + fclose(f); + return 0; + } + if (!(new_element->tag = data1_gettagbynum (dh, res->tagset, + type, value))) + { + logf(LOG_WARN, "Couldn't find tag %s in tagset in %s", + p, file); + fclose(f); + return 0; + } + } + /* private tag */ + else if (*p) + { + data1_tag *nt = + new_element->tag = (data1_tag *)nmem_malloc(data1_nmem_get (dh), + sizeof(*new_element->tag)); + nt->which = DATA1T_string; + nt->value.string = nmem_strdup(data1_nmem_get (dh), p); + nt->names = (data1_name *)nmem_malloc(data1_nmem_get(dh), + sizeof(*new_element->tag->names)); + nt->names->name = nt->value.string; + nt->names->next = 0; + nt->kind = DATA1K_string; + nt->next = 0; + nt->tagset = 0; } - if (!(new->tag = data1_gettagbynum(res->tagset, type, value))) + else { - logf(LOG_WARN, "Couldn't find tag %s in tagset in %s", - p, file); + logf(LOG_WARN, "Bad element is %s", file); fclose(f); return 0; } - if (*att == '!') - strcpy(att, name); - if (*att == '-') - new->att = 0; + /* parse termList definitions */ + p = termlists; + if (*p == '-') + new_element->termlists = 0; else { if (!res->attset) @@ -219,30 +456,130 @@ data1_absyn *data1_read_absyn(char *file) fclose(f); return 0; } - if (!(new->att = data1_getattbyname(res->attset, att))) + do { - logf(LOG_WARN, "Couldn't find att '%s' in attset", att); + char attname[512], structure[512]; + int r; + + if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname, + structure))) + { + logf(LOG_WARN, "Syntax error in termlistspec in %s", + file); + fclose(f); + return 0; + } + if (*attname == '!') + strcpy(attname, name); + *tp = (data1_termlist *)nmem_malloc(data1_nmem_get(dh), sizeof(**tp)); + (*tp)->next = 0; + if (!((*tp)->att = data1_getattbyname(dh, res->attset, + attname))) + { + logf(LOG_WARN, "Couldn't find att '%s' in attset", + attname); + fclose(f); + return 0; + } + if (r < 2) /* is the structure qualified? */ + (*tp)->structure = "w"; + else + { + (*tp)->structure = (char *)nmem_malloc (data1_nmem_get (dh), + strlen(structure)+1); + strcpy ((*tp)->structure, structure); + } + tp = &(*tp)->next; + } + while ((p = strchr(p, ',')) && *(++p)); + *tp = all; /* append any ALL entries to the list */ + } + new_element->name = nmem_strdup(data1_nmem_get (dh), name); + } + else if (!strcmp(cmd, "section")) + { + char name[512]; + if (sscanf(args, "%511s", name) < 1) + { + logf(LOG_WARN, "Bad # of args to sub in %s: '%s'", + file, args); + continue; + } + cur_elements = (data1_sub_elements *)nmem_malloc(data1_nmem_get(dh), + sizeof(*cur_elements)); + cur_elements->next = res->sub_elements; + cur_elements->elements = NULL; + cur_elements->name = nmem_strdup (data1_nmem_get(dh), name); + res->sub_elements = cur_elements; + + level = 0; + ppl[level] = &cur_elements->elements; + } + else if (!strcmp(cmd, "all")) + { + char *p; + data1_termlist **tp = &all; + + if (all) + { + logf(LOG_WARN, "Too many ALL declarations in %s - ignored", + file); + continue; + } + + p = args; + if (!res->attset) + { + logf(LOG_WARN, "No attset loaded in %s", file); + fclose(f); + return 0; + } + do + { + char attname[512], structure[512]; + int r; + + if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname, + structure))) + { + logf(LOG_WARN, "Syntax error in termlistspec in %s", + file); + fclose(f); + return 0; + } + *tp = (data1_termlist *)nmem_malloc(data1_nmem_get(dh), sizeof(**tp)); + if (!((*tp)->att = data1_getattbyname (dh, res->attset, + attname))) + { + logf(LOG_WARN, "Couldn't find att '%s' in attset", + attname); fclose(f); return 0; } + if (r < 2) /* is the structure qualified? */ + (*tp)->structure = "w"; + else + { + (*tp)->structure = (char *)nmem_malloc (data1_nmem_get (dh), + strlen(structure)+1); + strcpy ((*tp)->structure, structure); + } + (*tp)->next = 0; + tp = &(*tp)->next; } - if (!(new->name = xmalloc(strlen(name)+1))) - abort(); - strcpy(new->name, name); + while ((p = strchr(p, ',')) && *(++p)); } else if (!strcmp(cmd, "name")) { char name[512]; - if (!sscanf(args, "%s", name)) + if (!sscanf(args, "%511s", name)) { - logf(LOG_WARN, "%s malformed name directive in %s", file); + logf(LOG_WARN, "Malformed name directive in %s", file); fclose(f); return 0; } - if (!(res->name = xmalloc(strlen(args)+1))) - abort(); - strcpy(res->name, name); + res->name = nmem_strdup(data1_nmem_get(dh), name); } else if (!strcmp(cmd, "reference")) { @@ -250,7 +587,7 @@ data1_absyn *data1_read_absyn(char *file) if (!sscanf(args, "%s", name)) { - logf(LOG_WARN, "%s malformed reference directive in %s", file); + logf(LOG_WARN, "Malformed reference in %s", file); fclose(f); return 0; } @@ -267,11 +604,11 @@ data1_absyn *data1_read_absyn(char *file) if (!sscanf(args, "%s", name)) { - logf(LOG_WARN, "%s malformed attset directive in %s", file); + logf(LOG_WARN, "Malformed attset directive in %s", file); fclose(f); return 0; } - if (!(res->attset = data1_read_attset(name))) + if (!(res->attset = data1_get_attset (dh, name))) { logf(LOG_WARN, "Attset failed in %s", file); fclose(f); @@ -284,11 +621,11 @@ data1_absyn *data1_read_absyn(char *file) if (!sscanf(args, "%s", name)) { - logf(LOG_WARN, "%s malformed tagset directive in %s", file); + logf(LOG_WARN, "Malformed tagset directive in %s", file); fclose(f); return 0; } - if (!(res->tagset = data1_read_tagset(name))) + if (!(res->tagset = data1_read_tagset (dh, name))) { logf(LOG_WARN, "Tagset failed in %s", file); fclose(f); @@ -301,11 +638,11 @@ data1_absyn *data1_read_absyn(char *file) if (!sscanf(args, "%s", name)) { - logf(LOG_WARN, "%s malformed varset directive in %s", file); + logf(LOG_WARN, "Malformed varset directive in %s", file); fclose(f); return 0; } - if (!(res->varset = data1_read_varset(name))) + if (!(res->varset = data1_read_varset (dh, name))) { logf(LOG_WARN, "Varset failed in %s", file); fclose(f); @@ -318,22 +655,22 @@ data1_absyn *data1_read_absyn(char *file) if (sscanf(args, "%s %s", name, fname) != 2) { - logf(LOG_WARN, "%s: Two arg's required for esetname directive"); + logf(LOG_WARN, "Two arg's required for esetname in %s", + file); fclose(f); return 0; } - *esetpp = xmalloc(sizeof(**esetpp)); - (*esetpp)->name = xmalloc(strlen(name)+1); - strcpy((*esetpp)->name, name); + *esetpp = (data1_esetname *)nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp)); + (*esetpp)->name = nmem_strdup(data1_nmem_get(dh), name); + (*esetpp)->next = 0; if (*fname == '@') (*esetpp)->spec = 0; - else if (!((*esetpp)->spec = data1_read_espec1(fname, 0))) + else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname))) { logf(LOG_WARN, "%s: Espec-1 read failed", file); fclose(f); return 0; } - (*esetpp)->next = 0; esetpp = &(*esetpp)->next; } else if (!strcmp(cmd, "maptab")) @@ -342,13 +679,14 @@ data1_absyn *data1_read_absyn(char *file) if (sscanf(args, "%s", name) != 1) { - logf(LOG_WARN, "%s: One argument required for maptab directive", - file); + logf(LOG_WARN, "One argument for maptab directive in %s", + file); continue; } - if (!(*maptabp = data1_read_maptab(name))) + if (!(*maptabp = data1_read_maptab (dh, name))) { - logf(LOG_WARN, "%s: Failed to read maptab."); + logf(LOG_WARN, "Failed to read maptab %s in %s", + name, file); continue; } maptabp = &(*maptabp)->next; @@ -359,13 +697,14 @@ data1_absyn *data1_read_absyn(char *file) if (sscanf(args, "%s", name) != 1) { - logf(LOG_WARN, "%s: One argument required for marc directive", + logf(LOG_WARN, "One argument for marc directive in %s", file); continue; } - if (!(*marcp = data1_read_marctab(name))) + if (!(*marcp = data1_read_marctab (dh, name))) { - logf(LOG_WARN, "%s: Failed to read marctab."); + logf(LOG_WARN, "%Failed to read marctab %s in %s", + name, file); continue; } marcp = &(*marcp)->next; @@ -377,4 +716,15 @@ data1_absyn *data1_read_absyn(char *file) return 0; } } + fclose(f); + + for (cur_elements = res->sub_elements; cur_elements; + cur_elements = cur_elements->next) + { + if (!strcmp (cur_elements->name, "main")) + res->main_elements = cur_elements->elements; + fix_element_ref (dh, res, cur_elements->elements); + } + logf (LOG_DEBUG, "end data1_read_absyn file=%s", file); + return res; }