X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=retrieval%2Fd1_absyn.c;h=349e4d54f174778dcf34c5813e50c0dab8404691;hb=5b690aebb8dc2d05cad8f668de8fd821a1c231fa;hp=71e35de0afdcb2154ba7ee643028856d398f5f3f;hpb=21a7fea425de8b32ac71bbbb981e88e77e6a3428;p=yaz-moved-to-github.git diff --git a/retrieval/d1_absyn.c b/retrieval/d1_absyn.c index 71e35de..349e4d5 100644 --- a/retrieval/d1_absyn.c +++ b/retrieval/d1_absyn.c @@ -1,107 +1,19 @@ /* - * Copyright (c) 1995-1998, Index Data. + * Copyright (c) 1995-2002, Index Data. * See the file LICENSE for details. * Sebastian Hammer, Adam Dickmeiss * - * $Log: d1_absyn.c,v $ - * Revision 1.23 1998-10-15 08:29:16 adam - * Tag set type may be specified in reference to it using "tagset" - * directive in .abs-files and "include" directive in .tag-files. - * - * Revision 1.22 1998/10/13 16:09:47 adam - * Added support for arbitrary OID's for tagsets, schemas and attribute sets. - * Added support for multiple attribute set references and tagset references - * from an abstract syntax file. - * Fixed many bad logs-calls in routines that read the various - * specifications regarding data1 (*.abs,*.att,...) and made the messages - * consistent whenever possible. - * Added extra 'lineno' argument to function readconf_line. - * - * Revision 1.21 1998/06/09 13:55:07 adam - * Minor changes. - * - * Revision 1.20 1998/05/18 13:07:02 adam - * Changed the way attribute sets are handled by the retriaval module. - * Extended Explain conversion / schema. - * Modified server and client to work with ASN.1 compiled protocol handlers. - * - * Revision 1.19 1998/03/05 08:15:32 adam - * Implemented data1_add_insert_taggeddata utility which is more flexible - * than data1_insert_taggeddata. - * - * Revision 1.18 1998/02/27 14:08:04 adam - * Added const to some char pointer arguments. - * Reworked data1_read_node so that it doesn't create a tree with - * pointers to original "SGML"-buffer. - * - * Revision 1.17 1998/02/11 11:53:34 adam - * Changed code so that it compiles as C++. - * - * Revision 1.16 1997/12/18 10:51:30 adam - * Implemented sub-trees feature for schemas - including forward - * references. - * - * Revision 1.15 1997/12/09 16:18:16 adam - * Work on EXPLAIN schema. First implementation of sub-schema facility - * in the *.abs files. - * - * Revision 1.14 1997/10/31 12:20:09 adam - * Improved memory debugging for xmalloc/nmem.c. References to NMEM - * instead of ODR in n ESPEC-1 handling in source d1_espec.c. - * Bug fix: missing fclose in data1_read_espec1. - * - * Revision 1.13 1997/10/27 13:54:18 adam - * Changed structure field in data1 node to be simple string which - * is "unknown" to the retrieval system itself. - * - * Revision 1.12 1997/09/17 12:10:34 adam - * YAZ version 1.4. - * - * Revision 1.11 1997/09/05 09:50:55 adam - * Removed global data1_tabpath - uses data1_get_tabpath() instead. - * - * Revision 1.10 1997/05/14 06:54:01 adam - * C++ support. - * - * Revision 1.9 1997/02/19 14:46:15 adam - * The "all" specifier only affects elements that are indexed (and not - * all elements). - * - * Revision 1.8 1997/01/02 10:47:59 quinn - * Added optional, physical ANY - * - * Revision 1.7 1996/06/10 08:56:01 quinn - * Work on Summary. - * - * Revision 1.6 1996/05/31 13:52:21 quinn - * Fixed uninitialized variable for local tags in abstract syntax. - * - * Revision 1.5 1996/05/09 07:27:43 quinn - * Multiple local attributes values supported. - * - * Revision 1.4 1996/05/01 12:45:28 quinn - * Support use of local tag names in abs file. - * - * Revision 1.3 1995/11/01 16:34:55 quinn - * Making data1 look for tables in data1_tabpath - * - * Revision 1.2 1995/11/01 13:54:44 quinn - * Minor adjustments - * - * Revision 1.1 1995/11/01 11:56:06 quinn - * Added Retrieval (data management) functions en masse. - * + * $Id: d1_absyn.c,v 1.34 2002-08-29 21:26:08 mike Exp $ */ -#include #include #include #include #include -#include -#include -#include +#include +#include +#include #define D1_MAX_NESTING 128 @@ -153,7 +65,7 @@ data1_absyn *data1_absyn_add (data1_handle dh, const char *name) data1_absyn_cache *pp = data1_absyn_cache_get (dh); sprintf(fname, "%s.abs", name); - p->absyn = data1_read_absyn (dh, fname); + p->absyn = data1_read_absyn (dh, fname, 0); p->name = nmem_strdup (mem, name); p->next = *pp; *pp = p; @@ -212,7 +124,7 @@ data1_attset *data1_attset_add (data1_handle dh, const char *name) *cp = '\0'; } if (!attset) - logf (LOG_WARN|LOG_ERRNO, "Couldn't load attribute set %s", name); + yaz_log (LOG_WARN|LOG_ERRNO, "Couldn't load attribute set %s", name); else { data1_attset_cache p = (data1_attset_cache) @@ -253,11 +165,15 @@ data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs, { data1_element *r; + /* It's now possible to have a data1 tree with no abstract syntax */ + if ( !abs ) + return 0; + if (!parent) r = abs->main_elements; else r = parent->children; - assert (abs->main_elements); + for (; r; r = r->next) { data1_name *n; @@ -273,7 +189,10 @@ data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn, const char *name) { data1_element *r; - assert (absyn->main_elements); + + /* It's now possible to have a data1 tree with no abstract syntax */ + if ( !absyn ) + return 0; for (r = absyn->main_elements; r; r = r->next) if (!data1_matchstr(r->name, name)) return r; @@ -283,6 +202,10 @@ data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn, void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e) { + /* It's now possible to have a data1 tree with no abstract syntax */ + if ( !absyn ) + return; + for (; e; e = e->next) { if (!e->sub_name) @@ -298,13 +221,66 @@ void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e) if (sub_e) e->children = sub_e->elements; else - logf (LOG_WARN, "Unresolved reference to sub-elements %s", + yaz_log (LOG_WARN, "Unresolved reference to sub-elements %s", e->sub_name); } } } -data1_absyn *data1_read_absyn (data1_handle dh, const char *file) + +static int parse_termlists (data1_handle dh, data1_termlist ***tpp, + char *p, const char *file, int lineno, + const char *element_name, data1_absyn *res) +{ + data1_termlist **tp = *tpp; + do + { + char attname[512], structure[512]; + char *source; + int r; + + if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname, + structure))) + { + yaz_log(LOG_WARN, + "%s:%d: Syntax error in termlistspec '%s'", + file, lineno, p); + return -1; + } + if (*attname == '!') + strcpy(attname, element_name); + *tp = (data1_termlist *) + nmem_malloc(data1_nmem_get(dh), sizeof(**tp)); + (*tp)->next = 0; + if (!((*tp)->att = data1_getattbyname(dh, res->attset, + attname))) + { + yaz_log(LOG_WARN, + "%s:%d: Couldn't find att '%s' in attset", + file, lineno, attname); + return -1; + } + if (r == 2 && (source = strchr(structure, ':'))) + *source++ = '\0'; /* cut off structure .. */ + else + source = "data"; /* ok: default is leaf data */ + (*tp)->source = (char *) + nmem_strdup (data1_nmem_get (dh), source); + + if (r < 2) /* is the structure qualified? */ + (*tp)->structure = "w"; + else + (*tp)->structure = (char *) + nmem_strdup (data1_nmem_get (dh), structure); + tp = &(*tp)->next; + } + while ((p = strchr(p, ',')) && *(++p)); + *tpp = tp; + return 0; +} + +data1_absyn *data1_read_absyn (data1_handle dh, const char *file, + int file_must_exist) { data1_sub_elements *cur_elements = NULL; data1_absyn *res = 0; @@ -321,16 +297,19 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) int argc; char *argv[50], line[512]; - if (!(f = yaz_path_fopen(data1_get_tabpath (dh), file, "r"))) + if (!(f = data1_path_fopen(dh, file, "r"))) { - logf(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file); - return 0; + yaz_log(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file); + if (file_must_exist) + return 0; } res = (data1_absyn *) nmem_malloc(data1_nmem_get(dh), sizeof(*res)); res->name = 0; res->reference = VAL_NONE; res->tagset = 0; + res->encoding = 0; + res->enable_xpath_indexing = (f ? 0 : 1); tagset_childp = &res->tagset; res->attset = data1_empty_attset (dh); @@ -346,11 +325,11 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) res->sub_elements = NULL; res->main_elements = NULL; - - while ((argc = readconf_line(f, &lineno, line, 512, argv, 50))) + + while (f && (argc = readconf_line(f, &lineno, line, 512, argv, 50))) { char *cmd = *argv; - if (!strcmp(cmd, "elm")) + if (!strcmp(cmd, "elm") || !strcmp(cmd, "element")) { data1_element *new_element; int i; @@ -360,7 +339,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) if (argc < 4) { - logf(LOG_WARN, "%s:%d: Bad # of args to elm", file, lineno); + yaz_log(LOG_WARN, "%s:%d: Bad # of args to elm", file, lineno); continue; } path = argv[1]; @@ -380,7 +359,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) ppl[level] = &cur_elements->elements; } p = path; - for (i = 0;; i++) + for (i = 1;; i++) { char *e; @@ -389,14 +368,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) else break; } - if (i > level + 1) + if (i > level+1) { - logf(LOG_WARN, "%s:%d: Bad level increase", file, lineno); + yaz_log(LOG_WARN, "%s:%d: Bad level increase", file, lineno); fclose(f); return 0; } level = i; - new_element = *ppl[level] = (data1_element *) + new_element = *ppl[level-1] = (data1_element *) nmem_malloc(data1_nmem_get(dh), sizeof(*new_element)); new_element->next = new_element->children = 0; new_element->tag = 0; @@ -404,8 +383,8 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) new_element->sub_name = 0; tp = &new_element->termlists; - ppl[level] = &new_element->next; - ppl[level+1] = &new_element->children; + ppl[level-1] = &new_element->next; + ppl[level] = &new_element->children; /* consider subtree (if any) ... */ if ((sub_p = strchr (p, ':')) && sub_p[1]) @@ -419,14 +398,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) { if (!res->tagset) { - logf(LOG_WARN, "%s:%d: No tagset loaded", file, lineno); + yaz_log(LOG_WARN, "%s:%d: No tagset loaded", file, lineno); fclose(f); return 0; } if (!(new_element->tag = data1_gettagbynum (dh, res->tagset, type, value))) { - logf(LOG_WARN, "%s:%d: Couldn't find tag %s in tagset", + yaz_log(LOG_WARN, "%s:%d: Couldn't find tag %s in tagset", file, lineno, p); fclose(f); return 0; @@ -452,57 +431,21 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) } else { - logf(LOG_WARN, "%s:%d: Bad element", file, lineno); + yaz_log(LOG_WARN, "%s:%d: Bad element", file, lineno); fclose(f); return 0; } /* parse termList definitions */ p = termlists; - if (*p == '-') - new_element->termlists = 0; - else + if (*p != '-') { assert (res->attset); - do + + if (parse_termlists (dh, &tp, p, file, lineno, name, res)) { - char attname[512], structure[512]; - int r; - - if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname, - structure))) - { - logf(LOG_WARN, - "%s:%d: Syntax error in termlistspec '%s'", - file, lineno, p); - fclose(f); - return 0; - } - if (*attname == '!') - strcpy(attname, name); - *tp = (data1_termlist *) - nmem_malloc(data1_nmem_get(dh), sizeof(**tp)); - (*tp)->next = 0; - if (!((*tp)->att = data1_getattbyname(dh, res->attset, - attname))) - { - logf(LOG_WARN, - "%s:%d: Couldn't find att '%s' in attset", - file, lineno, attname); - fclose(f); - return 0; - } - if (r < 2) /* is the structure qualified? */ - (*tp)->structure = "w"; - else - { - (*tp)->structure = (char *) - nmem_malloc (data1_nmem_get (dh), - strlen(structure)+1); - strcpy ((*tp)->structure, structure); - } - tp = &(*tp)->next; + fclose (f); + return 0; } - while ((p = strchr(p, ',')) && *(++p)); *tp = all; /* append any ALL entries to the list */ } new_element->name = nmem_strdup(data1_nmem_get (dh), name); @@ -513,7 +456,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) if (argc < 2) { - logf(LOG_WARN, "%s:%d: Bad # of args to section", + yaz_log(LOG_WARN, "%s:%d: Bad # of args to section", file, lineno); continue; } @@ -529,68 +472,50 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) level = 0; ppl[level] = &cur_elements->elements; } + else if (!strcmp(cmd, "xpath")) + { + if (argc != 2) + { + yaz_log(LOG_WARN, "%s:%d: Bad # of args to 'xpath' directive", + file, lineno); + continue; + } + if (!strcmp(argv[1], "enable")) + res->enable_xpath_indexing = 1; + else if (!strcmp (argv[1], "disable")) + res->enable_xpath_indexing = 0; + else + { + yaz_log(LOG_WARN, "%s:%d: Expecting disable/enable " + "after 'xpath' directive", file, lineno); + } + } else if (!strcmp(cmd, "all")) { - char *p; data1_termlist **tp = &all; - if (all) { - logf(LOG_WARN, "%s:%d: Too many 'all' directives - ignored", + yaz_log(LOG_WARN, "%s:%d: Too many 'all' directives - ignored", file, lineno); continue; } - if (argc != 2) { - logf(LOG_WARN, "%s:%d: Bad # of args to 'all' directive", + yaz_log(LOG_WARN, "%s:%d: Bad # of args to 'all' directive", file, lineno); continue; } - p = argv[1]; - assert (res->attset); - do + if (parse_termlists (dh, &tp, argv[1], file, lineno, 0, res)) { - char attname[512], structure[512]; - int r; - - if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname, - structure))) - { - logf(LOG_WARN, "%s:%d: Syntax error in termlistspec", - file, lineno); - fclose(f); - return 0; - } - *tp = (data1_termlist *) - nmem_malloc(data1_nmem_get(dh), sizeof(**tp)); - if (!((*tp)->att = - data1_getattbyname (dh, res->attset, attname))) - { - logf(LOG_WARN, "%s:%d: Couldn't find att '%s' in attset", - file, lineno, attname); - fclose(f); - return 0; - } - if (r < 2) /* is the structure qualified? */ - (*tp)->structure = "w"; - else - { - (*tp)->structure = - (char *)nmem_malloc (data1_nmem_get (dh), - strlen(structure)+1); - strcpy ((*tp)->structure, structure); - } - (*tp)->next = 0; - tp = &(*tp)->next; + fclose (f); + return 0; } - while ((p = strchr(p, ',')) && *(++p)); } else if (!strcmp(cmd, "name")) { if (argc != 2) { - logf(LOG_WARN, "%s:%d: Bad # of args to name directive", + yaz_log(LOG_WARN, "%s:%d: Bad # of args to name directive", file, lineno); continue; } @@ -602,14 +527,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) if (argc != 2) { - logf(LOG_WARN, "%s:%d: Bad # of args to reference", + yaz_log(LOG_WARN, "%s:%d: Bad # of args to reference", file, lineno); continue; } name = argv[1]; if ((res->reference = oid_getvalbyname(name)) == VAL_NONE) { - logf(LOG_WARN, "%s:%d: Unknown tagset ref '%s'", + yaz_log(LOG_WARN, "%s:%d: Unknown tagset ref '%s'", file, lineno, name); continue; } @@ -621,14 +546,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) if (argc != 2) { - logf(LOG_WARN, "%s:%d: Bad # of args to attset", + yaz_log(LOG_WARN, "%s:%d: Bad # of args to attset", file, lineno); continue; } name = argv[1]; if (!(attset = data1_get_attset (dh, name))) { - logf(LOG_WARN, "%s:%d: Couldn't find attset %s", + yaz_log(LOG_WARN, "%s:%d: Couldn't find attset %s", file, lineno, name); continue; } @@ -644,7 +569,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) int type = 0; if (argc < 2) { - logf(LOG_WARN, "%s:%d: Bad # of args to tagset", + yaz_log(LOG_WARN, "%s:%d: Bad # of args to tagset", file, lineno); continue; } @@ -654,7 +579,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) *tagset_childp = data1_read_tagset (dh, name, type); if (!(*tagset_childp)) { - logf(LOG_WARN, "%s:%d: Couldn't load tagset %s", + yaz_log(LOG_WARN, "%s:%d: Couldn't load tagset %s", file, lineno, name); continue; } @@ -666,14 +591,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) if (argc != 2) { - logf(LOG_WARN, "%s:%d: Bad # of args in varset", + yaz_log(LOG_WARN, "%s:%d: Bad # of args in varset", file, lineno); continue; } name = argv[1]; if (!(res->varset = data1_read_varset (dh, name))) { - logf(LOG_WARN, "%s:%d: Couldn't load Varset %s", + yaz_log(LOG_WARN, "%s:%d: Couldn't load Varset %s", file, lineno, name); continue; } @@ -684,7 +609,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) if (argc != 3) { - logf(LOG_WARN, "%s:%d: Bad # of args in esetname", + yaz_log(LOG_WARN, "%s:%d: Bad # of args in esetname", file, lineno); continue; } @@ -699,7 +624,7 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) (*esetpp)->spec = 0; else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname))) { - logf(LOG_WARN, "%s:%d: Espec-1 read failed for %s", + yaz_log(LOG_WARN, "%s:%d: Espec-1 read failed for %s", file, lineno, fname); continue; } @@ -711,14 +636,14 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) if (argc != 2) { - logf(LOG_WARN, "%s:%d: Bad # of args for maptab", + yaz_log(LOG_WARN, "%s:%d: Bad # of args for maptab", file, lineno); continue; } name = argv[1]; if (!(*maptabp = data1_read_maptab (dh, name))) { - logf(LOG_WARN, "%s:%d: Couldn't load maptab %s", + yaz_log(LOG_WARN, "%s:%d: Couldn't load maptab %s", file, lineno, name); continue; } @@ -730,26 +655,38 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) if (argc != 2) { - logf(LOG_WARN, "%s:%d: Bad # or args for marc", + yaz_log(LOG_WARN, "%s:%d: Bad # or args for marc", file, lineno); continue; } name = argv[1]; if (!(*marcp = data1_read_marctab (dh, name))) { - logf(LOG_WARN, "%s:%d: Couldn't read marctab %s", + yaz_log(LOG_WARN, "%s:%d: Couldn't read marctab %s", file, lineno, name); continue; } marcp = &(*marcp)->next; } + else if (!strcmp(cmd, "encoding")) + { + if (argc != 2) + { + yaz_log(LOG_WARN, "%s:%d: Bad # or args for encoding", + file, lineno); + continue; + } + res->encoding = nmem_strdup (data1_nmem_get(dh), argv[1]); + } else { - logf(LOG_WARN, "%s:%d: Unknown directive '%s'", file, lineno, cmd); + yaz_log(LOG_WARN, "%s:%d: Unknown directive '%s'", file, + lineno, cmd); continue; } } - fclose(f); + if (f) + fclose(f); for (cur_elements = res->sub_elements; cur_elements; cur_elements = cur_elements->next) @@ -758,6 +695,6 @@ data1_absyn *data1_read_absyn (data1_handle dh, const char *file) res->main_elements = cur_elements->elements; fix_element_ref (dh, res, cur_elements->elements); } - logf (LOG_DEBUG, "%s: data1_read_absyn end", file); + yaz_log (LOG_DEBUG, "%s: data1_read_absyn end", file); return res; }