2 * Copyright (c) 1995-1998, Index Data.
3 * See the file LICENSE for details.
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.22 1998-10-13 16:09:47 adam
8 * Added support for arbitrary OID's for tagsets, schemas and attribute sets.
9 * Added support for multiple attribute set references and tagset references
10 * from an abstract syntax file.
11 * Fixed many bad logs-calls in routines that read the various
12 * specifications regarding data1 (*.abs,*.att,...) and made the messages
13 * consistent whenever possible.
14 * Added extra 'lineno' argument to function readconf_line.
16 * Revision 1.21 1998/06/09 13:55:07 adam
19 * Revision 1.20 1998/05/18 13:07:02 adam
20 * Changed the way attribute sets are handled by the retriaval module.
21 * Extended Explain conversion / schema.
22 * Modified server and client to work with ASN.1 compiled protocol handlers.
24 * Revision 1.19 1998/03/05 08:15:32 adam
25 * Implemented data1_add_insert_taggeddata utility which is more flexible
26 * than data1_insert_taggeddata.
28 * Revision 1.18 1998/02/27 14:08:04 adam
29 * Added const to some char pointer arguments.
30 * Reworked data1_read_node so that it doesn't create a tree with
31 * pointers to original "SGML"-buffer.
33 * Revision 1.17 1998/02/11 11:53:34 adam
34 * Changed code so that it compiles as C++.
36 * Revision 1.16 1997/12/18 10:51:30 adam
37 * Implemented sub-trees feature for schemas - including forward
40 * Revision 1.15 1997/12/09 16:18:16 adam
41 * Work on EXPLAIN schema. First implementation of sub-schema facility
44 * Revision 1.14 1997/10/31 12:20:09 adam
45 * Improved memory debugging for xmalloc/nmem.c. References to NMEM
46 * instead of ODR in n ESPEC-1 handling in source d1_espec.c.
47 * Bug fix: missing fclose in data1_read_espec1.
49 * Revision 1.13 1997/10/27 13:54:18 adam
50 * Changed structure field in data1 node to be simple string which
51 * is "unknown" to the retrieval system itself.
53 * Revision 1.12 1997/09/17 12:10:34 adam
56 * Revision 1.11 1997/09/05 09:50:55 adam
57 * Removed global data1_tabpath - uses data1_get_tabpath() instead.
59 * Revision 1.10 1997/05/14 06:54:01 adam
62 * Revision 1.9 1997/02/19 14:46:15 adam
63 * The "all" specifier only affects elements that are indexed (and not
66 * Revision 1.8 1997/01/02 10:47:59 quinn
67 * Added optional, physical ANY
69 * Revision 1.7 1996/06/10 08:56:01 quinn
72 * Revision 1.6 1996/05/31 13:52:21 quinn
73 * Fixed uninitialized variable for local tags in abstract syntax.
75 * Revision 1.5 1996/05/09 07:27:43 quinn
76 * Multiple local attributes values supported.
78 * Revision 1.4 1996/05/01 12:45:28 quinn
79 * Support use of local tag names in abs file.
81 * Revision 1.3 1995/11/01 16:34:55 quinn
82 * Making data1 look for tables in data1_tabpath
84 * Revision 1.2 1995/11/01 13:54:44 quinn
87 * Revision 1.1 1995/11/01 11:56:06 quinn
88 * Added Retrieval (data management) functions en masse.
102 #define D1_MAX_NESTING 128
104 struct data1_absyn_cache_info
108 data1_absyn_cache next;
111 struct data1_attset_cache_info
114 data1_attset *attset;
115 data1_attset_cache next;
118 data1_absyn *data1_absyn_search (data1_handle dh, const char *name)
120 data1_absyn_cache p = *data1_absyn_cache_get (dh);
124 if (!strcmp (name, p->name))
131 void data1_absyn_trav (data1_handle dh, void *handle,
132 void (*fh)(data1_handle dh, void *h, data1_absyn *a))
134 data1_absyn_cache p = *data1_absyn_cache_get (dh);
138 (*fh)(dh, handle, p->absyn);
143 data1_absyn *data1_absyn_add (data1_handle dh, const char *name)
146 NMEM mem = data1_nmem_get (dh);
148 data1_absyn_cache p = (data1_absyn_cache)nmem_malloc (mem, sizeof(*p));
149 data1_absyn_cache *pp = data1_absyn_cache_get (dh);
151 sprintf(fname, "%s.abs", name);
152 p->absyn = data1_read_absyn (dh, fname);
153 p->name = nmem_strdup (mem, name);
159 data1_absyn *data1_get_absyn (data1_handle dh, const char *name)
163 if (!(absyn = data1_absyn_search (dh, name)))
164 absyn = data1_absyn_add (dh, name);
168 data1_attset *data1_attset_search_name (data1_handle dh, const char *name)
170 data1_attset_cache p = *data1_attset_cache_get (dh);
174 if (!strcmp (name, p->name))
181 data1_attset *data1_attset_search_id (data1_handle dh, int id)
183 data1_attset_cache p = *data1_attset_cache_get (dh);
187 if (id == p->attset->reference)
194 data1_attset *data1_attset_add (data1_handle dh, const char *name)
196 char fname[512], aname[512];
197 NMEM mem = data1_nmem_get (dh);
198 data1_attset *attset;
200 strcpy (aname, name);
201 sprintf(fname, "%s.att", name);
202 attset = data1_read_attset (dh, fname);
206 attset = data1_read_attset (dh, name);
207 if (attset && (cp = strrchr (aname, '.')))
211 logf (LOG_WARN|LOG_ERRNO, "Couldn't load attribute set %s", name);
214 data1_attset_cache p = (data1_attset_cache)
215 nmem_malloc (mem, sizeof(*p));
216 data1_attset_cache *pp = data1_attset_cache_get (dh);
218 attset->name = p->name = nmem_strdup (mem, aname);
226 data1_attset *data1_get_attset (data1_handle dh, const char *name)
228 data1_attset *attset;
230 if (!(attset = data1_attset_search_name (dh, name)))
231 attset = data1_attset_add (dh, name);
235 data1_esetname *data1_getesetbyname(data1_handle dh, data1_absyn *a,
240 for (r = a->esetnames; r; r = r->next)
241 if (!data1_matchstr(r->name, name))
246 data1_element *data1_getelementbytagname (data1_handle dh, data1_absyn *abs,
247 data1_element *parent,
253 r = abs->main_elements;
255 r = parent->children;
256 assert (abs->main_elements);
257 for (; r; r = r->next)
261 for (n = r->tag->names; n; n = n->next)
262 if (!data1_matchstr(tagname, n->name))
268 data1_element *data1_getelementbyname (data1_handle dh, data1_absyn *absyn,
272 assert (absyn->main_elements);
273 for (r = absyn->main_elements; r; r = r->next)
274 if (!data1_matchstr(r->name, name))
280 void fix_element_ref (data1_handle dh, data1_absyn *absyn, data1_element *e)
282 for (; e; e = e->next)
287 fix_element_ref (dh, absyn, e->children);
291 data1_sub_elements *sub_e = absyn->sub_elements;
292 while (sub_e && strcmp (e->sub_name, sub_e->name))
295 e->children = sub_e->elements;
297 logf (LOG_WARN, "Unresolved reference to sub-elements %s",
303 data1_absyn *data1_read_absyn (data1_handle dh, const char *file)
305 data1_sub_elements *cur_elements = NULL;
306 data1_absyn *res = 0;
308 data1_element **ppl[D1_MAX_NESTING];
309 data1_esetname **esetpp;
310 data1_maptab **maptabp;
311 data1_marctab **marcp;
312 data1_termlist *all = 0;
313 data1_attset_child **attset_childp;
314 data1_tagset **tagset_childp;
318 char *argv[50], line[512];
320 if (!(f = yaz_path_fopen(data1_get_tabpath (dh), file, "r")))
322 logf(LOG_WARN|LOG_ERRNO, "Couldn't open %s", file);
326 res = (data1_absyn *) nmem_malloc(data1_nmem_get(dh), sizeof(*res));
328 res->reference = VAL_NONE;
330 tagset_childp = &res->tagset;
332 res->attset = data1_empty_attset (dh);
333 attset_childp = &res->attset->children;
337 esetpp = &res->esetnames;
339 maptabp = &res->maptabs;
343 res->sub_elements = NULL;
344 res->main_elements = NULL;
346 while ((argc = readconf_line(f, &lineno, line, 512, argv, 50)))
349 if (!strcmp(cmd, "elm"))
351 data1_element *new_element;
353 char *p, *sub_p, *path, *name, *termlists;
359 logf(LOG_WARN, "%s:%d: Bad # of args to elm", file, lineno);
368 cur_elements = (data1_sub_elements *)
369 nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
370 cur_elements->next = res->sub_elements;
371 cur_elements->elements = NULL;
372 cur_elements->name = "main";
373 res->sub_elements = cur_elements;
376 ppl[level] = &cur_elements->elements;
383 if ((e = strchr(p, '/')))
390 logf(LOG_WARN, "%s:%d: Bad level increase", file, lineno);
395 new_element = *ppl[level] = (data1_element *)
396 nmem_malloc(data1_nmem_get(dh), sizeof(*new_element));
397 new_element->next = new_element->children = 0;
398 new_element->tag = 0;
399 new_element->termlists = 0;
400 new_element->sub_name = 0;
402 tp = &new_element->termlists;
403 ppl[level] = &new_element->next;
404 ppl[level+1] = &new_element->children;
406 /* consider subtree (if any) ... */
407 if ((sub_p = strchr (p, ':')) && sub_p[1])
410 new_element->sub_name =
411 nmem_strdup (data1_nmem_get(dh), sub_p);
413 /* well-defined tag */
414 if (sscanf(p, "(%d,%d)", &type, &value) == 2)
418 logf(LOG_WARN, "%s:%d: No tagset loaded", file, lineno);
422 if (!(new_element->tag = data1_gettagbynum (dh, res->tagset,
425 logf(LOG_WARN, "%s:%d: Couldn't find tag %s in tagset",
435 new_element->tag = (data1_tag *)
436 nmem_malloc(data1_nmem_get (dh),
437 sizeof(*new_element->tag));
438 nt->which = DATA1T_string;
439 nt->value.string = nmem_strdup(data1_nmem_get (dh), p);
440 nt->names = (data1_name *)
441 nmem_malloc(data1_nmem_get(dh),
442 sizeof(*new_element->tag->names));
443 nt->names->name = nt->value.string;
445 nt->kind = DATA1K_string;
451 logf(LOG_WARN, "%s:%d: Bad element", file, lineno);
455 /* parse termList definitions */
458 new_element->termlists = 0;
461 assert (res->attset);
464 char attname[512], structure[512];
467 if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname,
471 "%s:%d: Syntax error in termlistspec '%s'",
477 strcpy(attname, name);
478 *tp = (data1_termlist *)
479 nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
481 if (!((*tp)->att = data1_getattbyname(dh, res->attset,
485 "%s:%d: Couldn't find att '%s' in attset",
486 file, lineno, attname);
490 if (r < 2) /* is the structure qualified? */
491 (*tp)->structure = "w";
494 (*tp)->structure = (char *)
495 nmem_malloc (data1_nmem_get (dh),
496 strlen(structure)+1);
497 strcpy ((*tp)->structure, structure);
501 while ((p = strchr(p, ',')) && *(++p));
502 *tp = all; /* append any ALL entries to the list */
504 new_element->name = nmem_strdup(data1_nmem_get (dh), name);
506 else if (!strcmp(cmd, "section"))
512 logf(LOG_WARN, "%s:%d: Bad # of args to section",
518 cur_elements = (data1_sub_elements *)
519 nmem_malloc(data1_nmem_get(dh), sizeof(*cur_elements));
520 cur_elements->next = res->sub_elements;
521 cur_elements->elements = NULL;
522 cur_elements->name = nmem_strdup (data1_nmem_get(dh), name);
523 res->sub_elements = cur_elements;
526 ppl[level] = &cur_elements->elements;
528 else if (!strcmp(cmd, "all"))
531 data1_termlist **tp = &all;
535 logf(LOG_WARN, "%s:%d: Too many 'all' directives - ignored",
542 logf(LOG_WARN, "%s:%d: Bad # of args to 'all' directive",
547 assert (res->attset);
550 char attname[512], structure[512];
553 if (!(r = sscanf(p, "%511[^:,]:%511[^,]", attname,
556 logf(LOG_WARN, "%s:%d: Syntax error in termlistspec",
561 *tp = (data1_termlist *)
562 nmem_malloc(data1_nmem_get(dh), sizeof(**tp));
564 data1_getattbyname (dh, res->attset, attname)))
566 logf(LOG_WARN, "%s:%d: Couldn't find att '%s' in attset",
567 file, lineno, attname);
571 if (r < 2) /* is the structure qualified? */
572 (*tp)->structure = "w";
576 (char *)nmem_malloc (data1_nmem_get (dh),
577 strlen(structure)+1);
578 strcpy ((*tp)->structure, structure);
583 while ((p = strchr(p, ',')) && *(++p));
585 else if (!strcmp(cmd, "name"))
589 logf(LOG_WARN, "%s:%d: Bad # of args to name directive",
593 res->name = nmem_strdup(data1_nmem_get(dh), argv[1]);
595 else if (!strcmp(cmd, "reference"))
601 logf(LOG_WARN, "%s:%d: Bad # of args to reference",
606 if ((res->reference = oid_getvalbyname(name)) == VAL_NONE)
608 logf(LOG_WARN, "%s:%d: Unknown tagset ref '%s'",
613 else if (!strcmp(cmd, "attset"))
616 data1_attset *attset;
620 logf(LOG_WARN, "%s:%d: Bad # of args to attset",
625 if (!(attset = data1_get_attset (dh, name)))
627 logf(LOG_WARN, "%s:%d: Couldn't find attset %s",
631 *attset_childp = (data1_attset_child *)
632 nmem_malloc (data1_nmem_get(dh), sizeof(**attset_childp));
633 (*attset_childp)->child = attset;
634 (*attset_childp)->next = 0;
635 attset_childp = &(*attset_childp)->next;
637 else if (!strcmp(cmd, "tagset"))
642 logf(LOG_WARN, "%s:%d: Bad # of args to tagset",
647 if (!(*tagset_childp = data1_read_tagset (dh, name)))
649 logf(LOG_WARN, "%s:%d: Couldn't load tagset %s",
653 tagset_childp = &(*tagset_childp)->next;
655 else if (!strcmp(cmd, "varset"))
661 logf(LOG_WARN, "%s:%d: Bad # of args in varset",
666 if (!(res->varset = data1_read_varset (dh, name)))
668 logf(LOG_WARN, "%s:%d: Couldn't load Varset %s",
673 else if (!strcmp(cmd, "esetname"))
679 logf(LOG_WARN, "%s:%d: Bad # of args in esetname",
686 *esetpp = (data1_esetname *)
687 nmem_malloc(data1_nmem_get(dh), sizeof(**esetpp));
688 (*esetpp)->name = nmem_strdup(data1_nmem_get(dh), name);
692 else if (!((*esetpp)->spec = data1_read_espec1 (dh, fname)))
694 logf(LOG_WARN, "%s:%d: Espec-1 read failed for %s",
695 file, lineno, fname);
698 esetpp = &(*esetpp)->next;
700 else if (!strcmp(cmd, "maptab"))
706 logf(LOG_WARN, "%s:%d: Bad # of args for maptab",
711 if (!(*maptabp = data1_read_maptab (dh, name)))
713 logf(LOG_WARN, "%s:%d: Couldn't load maptab %s",
717 maptabp = &(*maptabp)->next;
719 else if (!strcmp(cmd, "marc"))
725 logf(LOG_WARN, "%s:%d: Bad # or args for marc",
730 if (!(*marcp = data1_read_marctab (dh, name)))
732 logf(LOG_WARN, "%s:%d: Couldn't read marctab %s",
736 marcp = &(*marcp)->next;
740 logf(LOG_WARN, "%s:%d: Unknown directive '%s'", file, lineno, cmd);
746 for (cur_elements = res->sub_elements; cur_elements;
747 cur_elements = cur_elements->next)
749 if (!strcmp (cur_elements->name, "main"))
750 res->main_elements = cur_elements->elements;
751 fix_element_ref (dh, res, cur_elements->elements);
753 logf (LOG_DEBUG, "%s: data1_read_absyn end", file);