From: Sebastian Hammer Date: Wed, 1 Nov 1995 11:56:06 +0000 (+0000) Subject: Added Retrieval (data management) functions en masse. X-Git-Tag: YAZ.1.8~889 X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=commitdiff_plain;h=cea212fba995de3fa0d6d6723d828f6fd7ca31df Added Retrieval (data management) functions en masse. --- diff --git a/retrieval/Makefile b/retrieval/Makefile new file mode 100644 index 0000000..84f90a6 --- /dev/null +++ b/retrieval/Makefile @@ -0,0 +1,46 @@ +# $Id: Makefile,v 1.1 1995-11-01 11:56:06 quinn Exp $ + +SHELL=/bin/sh +RANLIB=ranlib +INCLUDE=-I../include -I. +CFLAGS=-g -Wall -pedantic -ansi +DEFS=$(INCLUDE) +LIB=../lib/data1.a +PO = d1_read.o d1_attset.o d1_tagset.o d1_absyn.o d1_grs.o d1_grs_key.o \ + attribute.o d1_matchstr.o d1_sutrs.o d1_varset.o d1_espec.o \ + d1_doespec.o d1_map.o d1_marc.o +CPP=$(CC) -E + +all: $(LIB) + +tst: tst.c $(LIB) + $(CC) -g -o tst $(INCLUDE) tst.c \ + ../lib/data1.a $(YAZ)/lib/libyaz.a + +$(LIB): $(PO) + rm -f $(LIB) + ar qc $(LIB) $(PO) + $(RANLIB) $(LIB) + +.c.o: + $(CC) -c $(DEFS) $(CFLAGS) $< + +clean: + rm -f *.[oa] $(TPROG) core mon.out gmon.out errlist test isam-test issh + +depend: depend2 + +depend1: + mv Makefile Makefile.tmp + sed '/^#Depend/q' Makefile + $(CPP) $(INCLUDE) -M *.c >>Makefile + -rm Makefile.tmp + +depend2: + $(CPP) $(INCLUDE) -M *.c >.depend + +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + +#Depend --- DOT NOT DELETE THIS LINE diff --git a/retrieval/d1_absyn.c b/retrieval/d1_absyn.c new file mode 100644 index 0000000..f93d39b --- /dev/null +++ b/retrieval/d1_absyn.c @@ -0,0 +1,373 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_absyn.c,v $ + * Revision 1.1 1995-11-01 11:56:06 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include "data1.h" + +#define D1_MAX_NESTING 128 +#define DATA1_MAX_SYNTAXES 30 /* max no of syntaxes to handle in one session */ + +static struct /* cache of abstract syntaxes */ +{ + char *name; + data1_absyn *absyn; +} syntaxes[DATA1_MAX_SYNTAXES] = {{0,0}}; + +data1_absyn *data1_get_absyn(char *name) +{ + char fname[512]; + int i; + + for (i = 0; syntaxes[i].name; i++) + if (!strcmp(name, syntaxes[i].name)) + return syntaxes[i].absyn; + + if (i >= DATA1_MAX_SYNTAXES - 1) + { + logf(LOG_WARN, "Too many abstract syntaxes loaded"); + return 0; + } + sprintf(fname, "%s.abs", name); + if (!(syntaxes[i].absyn = data1_read_absyn(fname))) + return 0; + if (!(syntaxes[i].name = xmalloc(strlen(name)+1))) + abort(); + strcpy(syntaxes[i].name, name); + syntaxes[i+1].name = 0; + return syntaxes[i].absyn; +} + +data1_esetname *data1_getesetbyname(data1_absyn *a, char *name) +{ + data1_esetname *r; + + for (r = a->esetnames; r; r = r->next) + if (!data1_matchstr(r->name, name)) + return r; + return 0; +} + +data1_element *data1_getelementbytagname(data1_absyn *abs, + data1_element *parent, char *tagname) +{ + data1_element *r; + + if (!parent) + r = abs->elements; + else + r = parent->children; + for (; r; r = r->next) + { + data1_name *n; + + for (n = r->tag->names; n; n = n->next) + if (!data1_matchstr(tagname, n->name)) + return r; + } + return 0; +} + +data1_element *data1_getelementbyname(data1_absyn *absyn, char *name) +{ + data1_element *r; + + for (r = absyn->elements; r; r = r->next) + if (!data1_matchstr(r->name, name)) + return r; + return 0; +} + +data1_absyn *data1_read_absyn(char *file) +{ + char line[512], *r, cmd[512], args[512]; + data1_absyn *res = 0; + FILE *f; + data1_element **ppl[D1_MAX_NESTING]; + data1_esetname **esetpp; + data1_maptab **maptabp; + data1_marctab **marcp; + int level = 0; + + if (!(f = fopen(file, "r"))) + { + logf(LOG_WARN|LOG_ERRNO, "%s", file); + return 0; + } + + if (!(res = xmalloc(sizeof(*res)))) + abort(); + res->name = 0; + res->reference = VAL_NONE; + res->tagset = 0; + res->attset = 0; + res->varset = 0; + res->esetnames = 0; + res->maptabs = 0; + maptabp = &res->maptabs; + res->marc = 0; + marcp = &res->marc; + res->elements = 0; + ppl[0] = &res->elements; + esetpp = &res->esetnames; + + for (;;) + { + while ((r = fgets(line, 512, f))) + { + while (*r && isspace(*r)) + r++; + if (*r && *r != '#') + break; + } + if (!r) + { + fclose(f); + return res; + } + if (sscanf(r, "%s %[^\n]", cmd, args) < 2) + *args = '\0'; + if (!strcmp(cmd, "elm")) + { + data1_element *new; + int i; + char path[512], name[512], att[512], *p; + int type, value; + + if (sscanf(args, "%s %s %s", path, name, att) < 3) + { + logf(LOG_WARN, "Bad # of args to elm in %s: '%s'", + file, args); + fclose(f); + return 0; + } + p = path; + for (i = 0;; i++) + { + char *e; + + if ((e = strchr(p, '/'))) + p = e+1; + else + break; + } + if (i > level + 1) + { + logf(LOG_WARN, "Bad level inc in %s in '%'", file, args); + fclose(f); + return 0; + } + level = i; + if (!(new = *ppl[level] = xmalloc(sizeof(*new)))) + abort; + new ->next = new->children = 0; + ppl[level] = &new->next; + ppl[level+1] = &new->children; + + if (sscanf(p, "(%d,%d)", &type, &value) < 2) + { + logf(LOG_WARN, "Malformed element '%s' in %s", p, file); + fclose(f); + return 0; + } + if (!res->tagset) + { + logf(LOG_WARN, "No tagset loaded in %s", file); + fclose(f); + return 0; + } + if (!(new->tag = data1_gettagbynum(res->tagset, type, value))) + { + logf(LOG_WARN, "Couldn't find tag %s in tagset in %s", + p, file); + fclose(f); + return 0; + } + if (*att == '!') + strcpy(att, name); + if (*att == '-') + new->att = 0; + else + { + if (!res->attset) + { + logf(LOG_WARN, "No attset loaded in %s", file); + fclose(f); + return 0; + } + if (!(new->att = data1_getattbyname(res->attset, att))) + { + logf(LOG_WARN, "Couldn't find att '%s' in attset", att); + fclose(f); + return 0; + } + } + if (!(new->name = xmalloc(strlen(name)+1))) + abort(); + strcpy(new->name, name); + } + else if (!strcmp(cmd, "name")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed name directive in %s", file); + fclose(f); + return 0; + } + if (!(res->name = xmalloc(strlen(args)+1))) + abort(); + strcpy(res->name, name); + } + else if (!strcmp(cmd, "reference")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed reference directive in %s", file); + fclose(f); + return 0; + } + if ((res->reference = oid_getvalbyname(name)) == VAL_NONE) + { + logf(LOG_WARN, "Unknown tagset ref '%s' in %s", name, file); + fclose(f); + return 0; + } + } + else if (!strcmp(cmd, "attset")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed attset directive in %s", file); + fclose(f); + return 0; + } + if (!(res->attset = data1_read_attset(name))) + { + logf(LOG_WARN, "Attset failed in %s", file); + fclose(f); + return 0; + } + } + else if (!strcmp(cmd, "tagset")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed tagset directive in %s", file); + fclose(f); + return 0; + } + if (!(res->tagset = data1_read_tagset(name))) + { + logf(LOG_WARN, "Tagset failed in %s", file); + fclose(f); + return 0; + } + } + else if (!strcmp(cmd, "varset")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed varset directive in %s", file); + fclose(f); + return 0; + } + if (!(res->varset = data1_read_varset(name))) + { + logf(LOG_WARN, "Varset failed in %s", file); + fclose(f); + return 0; + } + } + else if (!strcmp(cmd, "esetname")) + { + char name[512], fname[512]; + + if (sscanf(args, "%s %s", name, fname) != 2) + { + logf(LOG_WARN, "%s: Two arg's required for esetname directive"); + fclose(f); + return 0; + } + *esetpp = xmalloc(sizeof(**esetpp)); + (*esetpp)->name = xmalloc(strlen(name)+1); + strcpy((*esetpp)->name, name); + if (*fname == '@') + (*esetpp)->spec = 0; + else if (!((*esetpp)->spec = data1_read_espec1(fname, 0))) + { + logf(LOG_WARN, "%s: Espec-1 read failed", file); + fclose(f); + return 0; + } + (*esetpp)->next = 0; + esetpp = &(*esetpp)->next; + } + else if (!strcmp(cmd, "maptab")) + { + char name[512]; + + if (sscanf(args, "%s", name) != 1) + { + logf(LOG_WARN, "%s: One argument required for maptab directive", + file); + continue; + } + if (!(*maptabp = data1_read_maptab(name))) + { + logf(LOG_WARN, "%s: Failed to read maptab."); + continue; + } + maptabp = &(*maptabp)->next; + } + else if (!strcmp(cmd, "marc")) + { + char name[512]; + + if (sscanf(args, "%s", name) != 1) + { + logf(LOG_WARN, "%s: One argument required for marc directive", + file); + continue; + } + if (!(*marcp = data1_read_marctab(name))) + { + logf(LOG_WARN, "%s: Failed to read marctab."); + continue; + } + marcp = &(*marcp)->next; + } + else + { + logf(LOG_WARN, "Unknown directive '%s' in %s", cmd, file); + fclose(f); + return 0; + } + } +} diff --git a/retrieval/d1_attset.c b/retrieval/d1_attset.c new file mode 100644 index 0000000..44dec99 --- /dev/null +++ b/retrieval/d1_attset.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_attset.c,v $ + * Revision 1.1 1995-11-01 11:56:07 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + +#include +#include +#include +#include + +#include +#include +#include + +data1_att *data1_getattbyname(data1_attset *s, char *name) +{ + data1_att *r; + + for (; s; s = s->next) + { + /* scan local set */ + for (r = s->atts; r; r = r->next) + if (!strcmp(r->name, name)) + return r; + /* scan included sets */ + if (s->children && (r = data1_getattbyname(s->children, name))) + return r; + } + return 0; +} + +data1_attset *data1_read_attset(char *file) +{ + char line[512], *r, cmd[512], args[512]; + data1_attset *res = 0, **childp; + data1_att **attp; + FILE *f; + + if (!(f = fopen(file, "r"))) + { + logf(LOG_WARN|LOG_ERRNO, "%s", file); + return 0; + } + + if (!(res = xmalloc(sizeof(*res)))) + abort(); + res->name = 0; + res->reference = VAL_NONE; + res->ordinal = -1; + res->atts = 0; + res->children = res->next = 0; + childp = &res->children; + attp = &res->atts; + + for (;;) + { + while ((r = fgets(line, 512, f))) + { + while (*r && isspace(*r)) + r++; + if (*r && *r != '#') + break; + } + if (!r) + { + return res; + fclose(f); + } + if (sscanf(r, "%s %[^\n]", cmd, args) < 2) + *args = '\0'; + if (!strcmp(cmd, "att")) + { + int num, local, rr; + char name[512]; + data1_att *t; + + if ((rr = sscanf(args, "%d %s %d", &num, name, &local)) < 2) + { + logf(LOG_WARN, "Not enough arguments to att in '%s' in %s", + args, file); + fclose(f); + return 0; + } + if (rr < 3) + local = num; + if (!(t = *attp = xmalloc(sizeof(*t)))) + abort(); + t->parent = res; + if (!(t->name = xmalloc(strlen(name)+1))) + abort(); + strcpy(t->name, name); + t->value = num; + t->local = local; + t->next = 0; + attp = &t->next; + } + else if (!strcmp(cmd, "name")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed name directive in %s", file); + fclose(f); + return 0; + } + if (!(res->name = xmalloc(strlen(args)+1))) + abort(); + strcpy(res->name, name); + } + else if (!strcmp(cmd, "reference")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed reference directive in %s", file); + fclose(f); + return 0; + } + if ((res->reference = oid_getvalbyname(name)) == VAL_NONE) + { + logf(LOG_WARN, "Unknown attset name '%s' in %s", name, file); + fclose(f); + return 0; + } + } + else if (!strcmp(cmd, "ordinal")) + { + if (!sscanf(args, "%d", &res->ordinal)) + { + logf(LOG_WARN, "%s malformed ordinal directive in %s", file); + fclose(f); + return 0; + } + } + else if (!strcmp(cmd, "include")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed reference directive in %s", file); + fclose(f); + return 0; + } + if (!(*childp = data1_read_attset(name))) + { + logf(LOG_WARN, "Inclusion failed in %s", file); + fclose(f); + return 0; + } + childp = &(*childp)->next; + } + else + { + logf(LOG_WARN, "Unknown directive '%s' in %s", cmd, file); + fclose(f); + return 0; + } + } +} diff --git a/retrieval/d1_doespec.c b/retrieval/d1_doespec.c new file mode 100644 index 0000000..bc16d07 --- /dev/null +++ b/retrieval/d1_doespec.c @@ -0,0 +1,134 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_doespec.c,v $ + * Revision 1.1 1995-11-01 11:56:07 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + + +#include +#include +#include +#include "data1.h" + +static int match_children(data1_node *n, Z_ETagUnit **t, int num); + +static int match_children_wildpath(data1_node *n, Z_ETagUnit **t, int num) +{return 0;} + +static int match_children_here(data1_node *n, Z_ETagUnit **t, int num) +{ + int counter = 0, hits = 0; + data1_node *c; + Z_ETagUnit *tp = *t; + Z_Occurrences *occur; + + for (c = n->child; c ; c = c->next) + { + data1_tag *tag = 0; + + if (c->which != DATA1N_tag) + return 0; + + if (tp->which == Z_ETagUnit_specificTag) + { + Z_SpecificTag *want = tp->u.specificTag; + occur = want->occurrences; + if (c->u.tag.element) + tag = c->u.tag.element->tag; + if (*want->tagType != (tag ? tag->tagset->type : 3)) + continue; + if (want->tagValue->which == Z_StringOrNumeric_numeric) + { + if (!tag || tag->which != DATA1T_numeric) + continue; + if (*want->tagValue->u.numeric != tag->value.numeric) + continue; + } + else + { + assert(want->tagValue->which == Z_StringOrNumeric_string); + if (tag && tag->which != DATA1T_string) + continue; + if (data1_matchstr(want->tagValue->u.string, + tag ? tag->value.string : c->u.tag.tag)) + continue; + } + } + else + occur = tp->u.wildThing; + + /* + * Ok, so we have a matching tag. Are we within occurrences-range? + */ + counter++; + if (occur && occur->which == Z_Occurrences_last) + { + logf(LOG_WARN, "Can't do occurrences=last (yet)"); + return 0; + } + if (!occur || occur->which == Z_Occurrences_all || + (occur->which == Z_Occurrences_values && counter >= + *occur->u.values->start)) + { + if (match_children(c, t + 1, num - 1)) + { + c->u.tag.node_selected = 1; + hits++; + /* + * have we looked at enough children? + */ + if (!occur || (occur->which == Z_Occurrences_values && + counter - *occur->u.values->start >= + *occur->u.values->howMany - 1)) + return hits; + } + } + } + return hits; +} + +static void mark_children(data1_node *n) +{ + data1_node *c; + + for (c = n->child; c; c = c->next) + { + if (c->which != DATA1N_tag) + continue; + c->u.tag.node_selected = 1; + mark_children(c); + } +} + +static int match_children(data1_node *n, Z_ETagUnit **t, int num) +{ + if (!num) + { + mark_children(n); /* Here there shall be variants, like, dude */ + return 1; + } + switch (t[0]->which) + { + case Z_ETagUnit_wildThing: + case Z_ETagUnit_specificTag: return match_children_here(n, t, num); + case Z_ETagUnit_wildPath: return match_children_wildpath(n, t, num); + default: + abort(); + } +} + +int data1_doespec1(data1_node *n, Z_Espec1 *e) +{ + int i; + + for (i = 0; i < e->num_elements; i++) + match_children(n, e->elements[i]->u.simpleElement->path->tags, + e->elements[i]->u.simpleElement->path->num_tags); + return 0; +} diff --git a/retrieval/d1_espec.c b/retrieval/d1_espec.c new file mode 100644 index 0000000..181e256 --- /dev/null +++ b/retrieval/d1_espec.c @@ -0,0 +1,177 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_espec.c,v $ + * Revision 1.1 1995-11-01 11:56:07 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +/* + * Read an element-set specification from a file. If !o, use xmalloc for + * memory allocation. + */ +Z_Espec1 *data1_read_espec1(char *file, ODR o) +{ + FILE *f; + int argc, size_esn = 0; + char *argv[50], line[512]; + Z_Espec1 *res = odr_malloc(o, sizeof(*res)); + + if (!(f = fopen(file, "r"))) + { + logf(LOG_WARN|LOG_ERRNO, "%s", file); + return 0; + } + + res->num_elementSetNames = 0; + res->elementSetNames = 0; + res->defaultVariantSetId = 0; + res->defaultVariantRequest = 0; + res->defaultTagType = 0; + res->num_elements = 0; + res->elements = 0; + + while ((argc = readconf_line(f, line, 512, argv, 50))) + if (!strcmp(argv[0], "elementsetnames")) + { + int nnames = argc-1, i; + + if (!nnames) + { + logf(LOG_WARN, "%s: Empty elementsetnames directive", + file); + continue; + } + + res->elementSetNames = odr_malloc(o, sizeof(char*)*nnames); + for (i = 0; i < nnames; i++) + { + res->elementSetNames[i] = odr_malloc(o, strlen(argv[i+1])+1); + strcpy(res->elementSetNames[i], argv[i+1]); + } + res->num_elementSetNames = nnames; + } + else if (!strcmp(argv[0], "defaultvariantsetid")) + { + if (argc != 2 || !(res->defaultVariantSetId = + odr_getoidbystr(o, argv[1]))) + { + logf(LOG_WARN, "%s: Bad defaultvariantsetid directive", file); + continue; + } + } + else if (!strcmp(argv[0], "defaulttagtype")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: Bad defaulttagtype directive", file); + continue; + } + res->defaultTagType = odr_malloc(o, sizeof(int)); + *res->defaultTagType = atoi(argv[1]); + } + else if (!strcmp(argv[0], "defaultvariantrequest")) + { + abort(); + } + else if (!strcmp(argv[0], "simpleelement")) + { + Z_ElementRequest *er; + Z_SimpleElement *se; + Z_ETagPath *tp; + char *path = argv[1]; + char *ep; + int num, i = 0; + + if (!res->elements) + res->elements = odr_malloc(o, size_esn = 24*sizeof(*er)); + else if (res->num_elements >= size_esn) + { + size_esn *= 2; + res->elements = o ? odr_malloc(o, size_esn) : + xrealloc(res->elements, size_esn); + } + if (argc < 2) + { + logf(LOG_WARN, "%s: Empty simpleelement directive", file); + continue; + } + res->elements[res->num_elements++] = er = + odr_malloc(o, sizeof(*er)); + er->which = Z_ERequest_simpleElement; + er->u.simpleElement = se = odr_malloc(o, sizeof(*se)); + se->variantRequest = 0; + se->path = tp = odr_malloc(o, sizeof(*tp)); + tp->num_tags = 0; + for (num = 1, ep = path; (ep = strchr(ep, '/')); num++, ep++); + tp->tags = odr_malloc(o, sizeof(Z_ETagUnit*)*num); + + for ((ep = strchr(path, '/')) ; path ; (void)((path = ep) && + (ep = strchr(path, '/')))) + { + int type; + char value[512]; + Z_ETagUnit *u; + + if (ep) + ep++; + + assert(itags[tp->num_tags++] = u = odr_malloc(o, sizeof(*u)); + if (sscanf(path, "(%d,%[^)])", &type, value) == 2) + { + int numval; + Z_SpecificTag *t; + char *valp = value; + int force_string = 0; + + if (*valp == '\'') + { + valp++; + force_string = 1; + } + u->which = Z_ETagUnit_specificTag; + u->u.specificTag = t = odr_malloc(o, sizeof(*t)); + t->tagType = odr_malloc(o, sizeof(*t->tagType)); + *t->tagType = type; + t->tagValue = odr_malloc(o, sizeof(*t->tagValue)); + if (!force_string && (numval = atoi(valp))) + { + t->tagValue->which = Z_StringOrNumeric_numeric; + t->tagValue->u.numeric = odr_malloc(o, sizeof(int)); + *t->tagValue->u.numeric = numval; + } + else + { + t->tagValue->which = Z_StringOrNumeric_string; + t->tagValue->u.string = odr_malloc(o, strlen(valp)+1); + strcpy(t->tagValue->u.string, valp); + } + t->occurrences = 0; /* for later */ + } + } + } + else + { + logf(LOG_WARN, "%s: Unknown directive %s", file, argv[0]); + fclose(f); + return 0; + } + + return res; +} diff --git a/retrieval/d1_grs.c b/retrieval/d1_grs.c new file mode 100644 index 0000000..650f9c7 --- /dev/null +++ b/retrieval/d1_grs.c @@ -0,0 +1,255 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_grs.c,v $ + * Revision 1.1 1995-11-01 11:56:07 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + +#include +#include + +#include +#include + +#include "data1.h" + +#define D1_VARIANTARRAY 20 /* fixed max length on sup'd variant-list. Lazy me */ + +Z_GenericRecord *data1_nodetogr(data1_node *n, int select, ODR o); + +static Z_ElementMetaData *get_ElementMetaData(ODR o) +{ + Z_ElementMetaData *r = odr_malloc(o, sizeof(*r)); + + r->seriesOrder = 0; + r->usageRight = 0; + r->num_hits = 0; + r->hits = 0; + r->displayName = 0; + r->num_supportedVariants = 0; + r->supportedVariants = 0; + r->message = 0; + r->elementDescriptor = 0; + r->surrogateFor = 0; + r->surrogateElement = 0; + r->other = 0; + + return r; +} + +/* + * N should point to the *last* (leaf) triple in a sequence. Construct a variant + * from each of the triples beginning (ending) with 'n', up to the + * nearest parent tag. num should equal the number of triples in the + * sequence. + */ +static Z_Variant *make_variant(data1_node *n, int num, ODR o) +{ + Z_Variant *v = odr_malloc(o, sizeof(*v)); + data1_node *p; + + v->globalVariantSetId = 0; + v->num_triples = num; + v->triples = odr_malloc(o, sizeof(Z_Triple*) * num); + + /* + * cycle back up through the tree of variants + * (traversing exactly 'level' variants). + */ + for (p = n, num--; p && num >= 0; p = p->parent, num--) + { + Z_Triple *t; + + assert(p->which == DATA1N_variant); + t = v->triples[num] = odr_malloc(o, sizeof(*t)); + t->variantSetId = 0; + t->class = odr_malloc(o, sizeof(int)); + *t->class = p->u.variant.type->class->class; + t->type = odr_malloc(o, sizeof(int)); + *t->type = p->u.variant.type->type; + + switch (p->u.variant.type->datatype) + { + case DATA1K_string: + t->which = Z_Triple_internationalString; + t->value.internationalString = odr_malloc(o, + strlen(p->u.variant.value)+1); + strcpy(t->value.internationalString, p->u.variant.value); + break; + default: + logf(LOG_WARN, "Unable to handle value for variant %s", + p->u.variant.type->name); + return 0; + } + } + return v; +} + +/* + * Traverse the variant children of n, constructing a supportedVariant list. + */ +static int traverse_triples(data1_node *n, int level, Z_ElementMetaData *m, + ODR o) +{ + data1_node *c; + + for (c = n->child; c; c = c->next) + if (c->which == DATA1N_data && level) + { + if (!m->supportedVariants) + m->supportedVariants = odr_malloc(o, sizeof(Z_Variant*) * + D1_VARIANTARRAY); + else if (m->num_supportedVariants >= D1_VARIANTARRAY) + { + logf(LOG_WARN, "Too many variants (D1_VARIANTARRAY==%d)", + D1_VARIANTARRAY); + return -1; + } + + if (!(m->supportedVariants[m->num_supportedVariants++] = + make_variant(n, level, o))) + return -1; + } + else if (c->which == DATA1N_variant) + if (traverse_triples(c, level+1, m, o) < 0) + return -1; + return 0; +} + +static Z_ElementData *nodetoelementdata(data1_node *n, int select, int leaf, + ODR o) +{ + Z_ElementData *res = odr_malloc(o, sizeof(*res)); + + if (!n) + { + res->which = Z_ElementData_elementNotThere; + res->u.elementNotThere = ODR_NULLVAL; + } + else if (n->which == DATA1N_data && (leaf || n->parent->num_children == 1)) + { + switch (n->u.data.what) + { + case DATA1I_num: + res->which = Z_ElementData_numeric; + res->u.numeric = odr_malloc(o, sizeof(int)); + *res->u.numeric = atoi(n->u.data.data); + break; + case DATA1I_text: + res->which = Z_ElementData_string; + res->u.string = odr_malloc(o, n->u.data.len+1); + memcpy(res->u.string, n->u.data.data, n->u.data.len); + res->u.string[n->u.data.len] = '\0'; + break; + default: + logf(LOG_WARN, "Can't handle datatype."); + return 0; + } + } + else + { + res->which = Z_ElementData_subtree; + if (!(res->u.subtree = data1_nodetogr(n->parent, select, o))) + return 0; + } + return res; +} + +static Z_TaggedElement *nodetotaggedelement(data1_node *n, int select, ODR o) +{ + Z_TaggedElement *res = odr_malloc(o, sizeof(*res)); + data1_tag *tag = 0; + data1_node *data; + int leaf; + + if (n->which == DATA1N_tag) + { + if (n->u.tag.element) + tag = n->u.tag.element->tag; + data = n->child; + leaf = 0; + } + else if (n->which == DATA1N_data || n->which == DATA1N_variant) + { + if (!(tag = data1_gettagbyname(n->root->u.root.absyn->tagset, + "wellKnown"))) + { + logf(LOG_WARN, "Unable to locate tag for 'wellKnown'"); + return 0; + } + data = n; + leaf = 1; + } + else + { + logf(LOG_WARN, "Bad data."); + return 0; + } + + res->tagType = odr_malloc(o, sizeof(int)); + *res->tagType = tag ? tag->tagset->type : 3; + res->tagValue = odr_malloc(o, sizeof(Z_StringOrNumeric)); + if (tag && tag->which == DATA1T_numeric) + { + res->tagValue->which = Z_StringOrNumeric_numeric; + res->tagValue->u.numeric = odr_malloc(o, sizeof(int)); + *res->tagValue->u.numeric = tag->value.numeric; + } + else + { + char *tagstr; + + if (tag) /* well-known tag */ + tagstr = tag->value.string; + else /* tag local to this file */ + tagstr = n->u.tag.tag; + + res->tagValue->which = Z_StringOrNumeric_string; + res->tagValue->u.string = odr_malloc(o, strlen(tagstr)+1); + strcpy(res->tagValue->u.string, tagstr); + } + res->tagOccurrence = 0; + res->appliedVariant = 0; + res->metaData = 0; + if (n->which == DATA1N_variant || (data && data->which == + DATA1N_variant && data->parent->num_children == 1)) + { + int nvars = 0; + + res->metaData = get_ElementMetaData(o); + if (traverse_triples(data, 0, res->metaData, o) < 0) + return 0; + while (data && data->which == DATA1N_variant) + { + nvars++; + data = data->child; + } + res->appliedVariant = make_variant(data->parent, nvars-1, o); + } + if (!(res->content = nodetoelementdata(data, select, leaf, o))) + return 0; + return res; +} + +Z_GenericRecord *data1_nodetogr(data1_node *n, int select, ODR o) +{ + Z_GenericRecord *res = odr_malloc(o, sizeof(*res)); + data1_node *c; + + res->elements = odr_malloc(o, sizeof(Z_TaggedElement *) * n->num_children); + res->num_elements = 0; + for (c = n->child; c; c = c->next) + { + if (c->which == DATA1N_tag && select && !c->u.tag.node_selected) + continue; + if (!(res->elements[res->num_elements++] = + nodetotaggedelement(c, select, o))) + return 0; + } + return res; +} diff --git a/retrieval/d1_map.c b/retrieval/d1_map.c new file mode 100644 index 0000000..cbc2e7c --- /dev/null +++ b/retrieval/d1_map.c @@ -0,0 +1,315 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_map.c,v $ + * Revision 1.1 1995-11-01 11:56:08 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + +#include +#include +#include + +#include +#include +#include +#include + +#include "data1.h" +#include "d1_map.h" + +data1_maptab *data1_read_maptab(char *file) +{ + data1_maptab *res = xmalloc(sizeof(*res)); + FILE *f; + int argc; + char *argv[50], line[512]; + data1_mapunit **mapp; + + if (!(f = fopen(file, "r"))) + { + logf(LOG_WARN|LOG_ERRNO, "%s", file); + return 0; + } + + res->name = 0; + res->target_absyn_ref = ODR_NONE; + res->map = 0; + mapp = &res->map; + res->next = 0; + + while ((argc = readconf_line(f, line, 512, argv, 50))) + if (!strcmp(argv[0], "targetref")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: one argument required for targetref", + file); + continue; + } + if ((res->target_absyn_ref = oid_getvalbyname(argv[1])) == ODR_NONE) + { + logf(LOG_WARN, "%s: Unknown reference '%s'", file, argv[1]); + continue; + } + } + else if (!strcmp(argv[0], "targetname")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: one argument required for targetref", + file); + continue; + } + res->target_absyn_name = xmalloc(strlen(argv[1])+1); + strcpy(res->target_absyn_name, argv[1]); + } + else if (!strcmp(argv[0], "name")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: one argument required for name", + file); + continue; + } + res->name = xmalloc(strlen(argv[1])+1); + strcpy(res->name, argv[1]); + } + else if (!strcmp(argv[0], "map")) + { + data1_maptag **mtp; + char *ep, *path = argv[2]; + + if (argc < 3) + { + logf(LOG_WARN, "%s: At least 2 arguments required for map", + file); + continue; + } + *mapp = xmalloc(sizeof(**mapp)); + (*mapp)->next = 0; + if (argc > 3 && !data1_matchstr(argv[3], "nodata")) + (*mapp)->no_data = 1; + else + (*mapp)->no_data = 0; + (*mapp)->source_element_name = xmalloc(strlen(argv[1])+1); + strcpy((*mapp)->source_element_name, argv[1]); + mtp = &(*mapp)->target_path; + if (*path == '/') + path++; + for (ep = strchr(path, '/'); path; (void)((path = ep) && + (ep = strchr(path, '/')))) + { + int type, np; + char valstr[512], parm[512]; + int numval; + + if (ep) + ep++; + if ((np = sscanf(path, "(%d,%[^)]):%[^/]", &type, valstr, + parm)) < 2) + { + logf(LOG_WARN, "%s: Syntax error in map directive: %s", + file, argv[2]); + fclose(f); + return 0; + } + *mtp = xmalloc(sizeof(**mtp)); + (*mtp)->next = 0; + (*mtp)->type = type; + if (np > 2 && !data1_matchstr(parm, "new")) + (*mtp)->new_field = 1; + else + (*mtp)->new_field = 0; +#if 0 + if ((numval = atoi(valstr))) + { + (*mtp)->which = D1_MAPTAG_numeric; + (*mtp)->value.numeric = numval; + } + else + { +#endif + (*mtp)->which = D1_MAPTAG_string; + (*mtp)->value.string = xmalloc(strlen(valstr)+1); + strcpy((*mtp)->value.string, valstr); +#if 0 + } +#endif + mtp = &(*mtp)->next; + } + mapp = &(*mapp)->next; + } + else + logf(LOG_WARN, "%s: Unknown directive '%s'", argv[0]); + + fclose(f); + return res; +} + +/* + * Locate node with givel elementname. + * NOTE: This is stupid - we don't find repeats this way. + */ +static data1_node *find_node(data1_node *p, char *elementname) +{ + data1_node *c, *r; + + for (c = p->child; c; c = c->next) + if (c->which == DATA1N_tag && c->u.tag.element && + !data1_matchstr(c->u.tag.element->name, elementname)) + return c; + else if ((r = find_node(c, elementname))) + return r; + return 0; +} + +/* + * See if the node n is equivalent to the tag t. + */ +static int tagmatch(data1_node *n, data1_maptag *t) +{ + if (n->which != DATA1N_tag) + return 0; + if (n->u.tag.element) + { + if (n->u.tag.element->tag->tagset->type != t->type) + return 0; + if (n->u.tag.element->tag->which == DATA1T_numeric) + { + if (t->which != D1_MAPTAG_numeric) + return 0; + if (n->u.tag.element->tag->value.numeric != t->value.numeric) + return 0; + } + else + { + if (t->which != D1_MAPTAG_string) + return 0; + if (data1_matchstr(n->u.tag.element->tag->value.string, + t->value.string)) + return 0; + } + } + else /* local tag */ + { + char str[10]; + + if (t->type != 3) + return 0; + if (t->which == D1_MAPTAG_numeric) + sprintf(str, "%d", t->value.numeric); + else + strcpy(str, t->value.string); + if (data1_matchstr(n->u.tag.tag, str)) + return 0; + } + return 1; +} + +static int map_children(data1_node *n, data1_maptab *map, data1_node *res) +{ + data1_node *c; + data1_mapunit *m; + /* + * locate each source element in turn. + */ + for (c = n->child; c; c = c->next) + if (c->which == DATA1N_tag && c->u.tag.element) + { + for (m = map->map; m; m = m->next) + { + if (!data1_matchstr(m->source_element_name, + c->u.tag.element->name)) + { + data1_node *pn = res; + data1_maptag *mt; + data1_node *l; + + /* + * process the target path specification. + */ + for (mt = m->target_path; mt; mt = mt->next) + { + int match = 0; + data1_node *cur; + data1_node *last; + + for (l = pn->child, last = 0; l; last = l, l = l->next) + if (!match) + match = tagmatch(l, mt); + else + if (!tagmatch(l, mt)) + break; + if (!match || !mt->next || mt->new_field) + { + cur = data1_mk_node(); + cur->which = DATA1N_tag; + cur->u.tag.element = 0; + cur->u.tag.tag = mt->value.string; + cur->u.tag.node_selected = 0; + cur->parent = pn; + cur->root = pn->root; + if (!last) + { + cur->next = pn->child; + pn->child = cur; + } + else + { + cur->next = last->next; + last->next = cur; + } + pn->num_children++; + } + else + cur = last ? last : pn->child; + + if (mt ->next) + pn = cur; + else if (!m->no_data) + { + cur->child = c->child; + cur->num_children = c->num_children; + c->child = 0; + c->num_children = 0; + } + } + } + } + if (map_children(c, map, res) < 0) + return -1; + } + return 0; +} + +/* + * Create a (possibly lossy) copy of the given record based on the + * table. The new copy will refer back to the data of the original record, + * which should not be discarded during the lifetime of the copy. + */ +data1_node *data1_map_record(data1_node *n, data1_maptab *map) +{ + data1_node *res = data1_mk_node(); + + res->which = DATA1N_root; + res->u.root.type = map->target_absyn_name; + if (!(res->u.root.absyn = data1_get_absyn(map->target_absyn_name))) + { + logf(LOG_WARN, "%s: Failed to load target absyn '%s'", + map->name, map->target_absyn_name); + } + res->parent = 0; + res->root = res; + + if (map_children(n, map, res) < 0) + { + data1_free_tree(res); + return 0; + } + return res; +} diff --git a/retrieval/d1_marc.c b/retrieval/d1_marc.c new file mode 100644 index 0000000..2ac1842 --- /dev/null +++ b/retrieval/d1_marc.c @@ -0,0 +1,266 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_marc.c,v $ + * Revision 1.1 1995-11-01 11:56:08 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + + +#include +#include +#include + +#include +#include +#include +#include +#include "data1.h" + +#define ISO2709_RS 035 +#define ISO2709_FS 036 +#define ISO2709_IDFS 037 + +data1_marctab *data1_read_marctab(char *file) +{ + FILE *f; + data1_marctab *res = xmalloc(sizeof(*res)); + char line[512], *argv[50]; + int argc; + + if (!(f = fopen(file, "r"))) + { + logf(LOG_WARN|LOG_ERRNO, "%s", file); + return 0; + } + + res->name = 0; + res->reference = VAL_NONE; + res->next = 0; + res->length_data_entry = 4; + res->length_starting = 5; + res->length_implementation = 0; + strcpy(res->future_use, "4"); + + strcpy(res->record_status, "n"); + strcpy(res->implementation_codes, " "); + res->indicator_length = 2; + res->identifier_length = 2; + strcpy(res->user_systems, "z "); + + while ((argc = readconf_line(f, line, 512, argv, 50))) + if (!strcmp(argv[0], "name")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: Bad name directive"); + continue; + } + res->name = xmalloc(strlen(argv[1])+1); + strcpy(res->name, argv[1]); + } + else if (!strcmp(argv[0], "reference")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: Bad name directive"); + continue; + } + if ((res->reference = oid_getvalbyname(argv[1])) == VAL_NONE) + { + logf(LOG_WARN, "%s: Unknown tagset ref '%s' in %s", file, + argv[1]); + continue; + } + } + else if (!strcmp(argv[0], "length-data-entry")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: Bad data-length-entry"); + continue; + } + res->length_data_entry = atoi(argv[1]); + } + else if (!strcmp(argv[0], "length-starting")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: Bad length-starting"); + continue; + } + res->length_starting = atoi(argv[1]); + } + else if (!strcmp(argv[0], "length-implementation")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: Bad length-implentation"); + continue; + } + res->length_implementation = atoi(argv[1]); + } + else if (!strcmp(argv[0], "future-use")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: Bad future-use"); + continue; + } + strncpy(res->future_use, argv[1], 2); + } + else + logf(LOG_WARN, "%s: Bad directive '%s'", file, argv[0]); + + fclose(f); + return res; +} + +/* + * Locate some data under this node. This routine should handle variants + * prettily. + */ +static char *get_data(data1_node *n, int *len) +{ + char *r; + + while (n->which != DATA1N_data && n->child) + n = n->child; + if (n->which != DATA1N_data || n->u.data.what != DATA1I_text) + { + r = "[Structured/included data]"; + *len = strlen(r); + return r; + } + + *len = n->u.data.len; + return n->u.data.data; +} + +static void memint (char *p, int val, int len) +{ + static char buf[9]; + + if (len == 1) + *p = val + '0'; + else + { + sprintf (buf, "%08d", val); + memcpy (p, buf+8-len, len); + } +} + +static int nodetomarc(data1_marctab *p, data1_node *n, int selected, + char **buf, int *size) +{ + int len = 26; + int dlen; + int base_address = 25; + int entry_p, data_p; + char *op; + data1_node *field, *subf; + + for (field = n->child; field; field = field->next) + { + if (field->which != DATA1N_tag) + { + logf(LOG_WARN, "Malformed field composition for marc output."); + return -1; + } + if (selected && !field->u.tag.node_selected) + continue; + len += 4 + p->length_data_entry + p->length_starting + + p->length_implementation; + base_address += 3 + p->length_data_entry + p->length_starting + + p->length_implementation; + if (strncmp(field->u.tag.tag, "00", 2)) + len += p->indicator_length; /* this is fairly bogus */ + for (subf = field->child; subf; subf = subf->next) + { + if (subf->which != DATA1N_tag) + { + logf(LOG_WARN, + "Malformed subfield composition for marc output."); + return -1; + } + if (strncmp(field->u.tag.tag, "00", 2)) + len += p->identifier_length; + get_data(subf, &dlen); + len += dlen; + } + } + + if (!*buf) + *buf = xmalloc(*size = len); + else if (*size <= len) + *buf = xrealloc(*buf, *size = len); + + op = *buf; + memint (op, len, 5); + memcpy (op+5, p->record_status, 1); + memcpy (op+6, p->implementation_codes, 4); + memint (op+10, p->indicator_length, 1); + memint (op+11, p->identifier_length, 1); + memint (op+12, base_address, 5); + memcpy (op+17, p->user_systems, 3); + memint (op+20, p->length_data_entry, 1); + memint (op+21, p->length_starting, 1); + memint (op+22, p->length_implementation, 1); + memcpy (op+23, p->future_use, 1); + + entry_p = 24; + data_p = base_address; + + for (field = n->child; field; field = field->next) + { + int data_0 = data_p; + if (selected && !field->u.tag.node_selected) + continue; + if (strncmp(field->u.tag.tag, "00", 2)) /* bogus */ + { + memcpy (op + data_p, " ", p->indicator_length); + data_p += p->indicator_length; + } + for (subf = field->child; subf; subf = subf->next) + { + char *data; + + if (strncmp(field->u.tag.tag, "00", 2)) + { + op[data_p] = ISO2709_IDFS; + memcpy (op + data_p+1, subf->u.tag.tag, p->identifier_length-1); + data_p += p->identifier_length; + } + data = get_data(subf, &dlen); + memcpy (op + data_p, data, dlen); + data_p += dlen; + } + op[data_p++] = ISO2709_FS; + + memcpy (op + entry_p, field->u.tag.tag, 3); + entry_p += 3; + memint (op + entry_p, data_p - data_0, p->length_data_entry); + entry_p += p->length_data_entry; + memint (op + entry_p, data_0 - base_address, p->length_starting); + entry_p += p->length_starting; + entry_p += p->length_implementation; + } + op[entry_p++] = ISO2709_FS; + assert (entry_p == base_address); + op[data_p++] = ISO2709_RS; + assert (data_p == len); + return len; +} + +char *data1_nodetomarc(data1_marctab *p, data1_node *n, int selected, int *len) +{ + static char *buf = 0; + static int size = 0; + + *len = nodetomarc(p, n, selected, &buf, &size); + return buf; +} diff --git a/retrieval/d1_read.c b/retrieval/d1_read.c new file mode 100644 index 0000000..c951c97 --- /dev/null +++ b/retrieval/d1_read.c @@ -0,0 +1,431 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_read.c,v $ + * Revision 1.1 1995-11-01 11:56:09 quinn + * Added Retrieval (data management) functions en masse. + * + * Revision 1.14 1995/10/30 12:40:55 quinn + * Fixed a couple of bugs. + * + * Revision 1.13 1995/10/25 16:00:47 quinn + * USMARC support is now almost operational + * + * Revision 1.12 1995/10/16 14:02:55 quinn + * Changes to support element set names and espec1 + * + * Revision 1.11 1995/10/13 16:05:08 quinn + * Adding Espec1-processing + * + * Revision 1.10 1995/10/11 14:53:44 quinn + * Work on variants. + * + * Revision 1.9 1995/10/06 16:56:50 quinn + * Fixed ranked result. + * + * Revision 1.8 1995/10/06 16:44:13 quinn + * Work on attribute set mapping, etc. + * + * Revision 1.7 1995/10/06 12:58:35 quinn + * SUTRS support + * + * Revision 1.6 1995/10/04 09:29:49 quinn + * Adjustments to support USGS test data + * + * Revision 1.5 1995/10/03 17:56:43 quinn + * Fixing GRS code. + * + * Revision 1.4 1995/10/02 15:53:19 quinn + * Work + * + * Revision 1.3 1995/10/02 14:55:21 quinn + * *** empty log message *** + * + * Revision 1.2 1995/09/14 15:18:13 quinn + * Work + * + * Revision 1.1 1995/09/12 11:24:30 quinn + * Beginning to add code for structured records. + * + * + */ + +#include +#include +#include + +#include +#include +#include "data1.h" + +static data1_node *freelist = 0; + +/* + * get the tag which is the immediate parent of this node (this may mean + * traversing intermediate things like variants and stuff. + */ +data1_node *get_parent_tag(data1_node *n) +{ + for (; n && n->which != DATA1N_root; n = n->parent) + if (n->which == DATA1N_tag) + return n; + return 0; +} + +data1_node *data1_mk_node(void) +{ + data1_node *r; + + if ((r = freelist)) + freelist = r->next; + else + if (!(r = xmalloc(sizeof(*r)))) + abort(); + r->next = r->child = r->parent = 0; + r->num_children = 0; + return r; +} + +static void fr_node(data1_node *n) +{ + n->next = freelist; + freelist = n; +} + +void data1_free_tree(data1_node *t) +{ + data1_node *p = t->child, *pn; + + while (p) + { + pn = p->next; + data1_free_tree(p); + p = pn; + } + fr_node(t); +} + +/* + * Insert a tagged node into the record root as first child of the node at + * which should be root or tag itself). Returns pointer to the data node, + * which can then be modified. + */ +data1_node *data1_insert_taggeddata(data1_node *root, data1_node *at, + char *tagname) +{ + data1_node *tagn = data1_mk_node(); + data1_node *datn; + + tagn->which = DATA1N_tag; + tagn->line = -1; + tagn->u.tag.tag = 0; + tagn->u.tag.node_selected = 0; + if (!(tagn->u.tag.element = data1_getelementbytagname(root->u.root.absyn, + 0, tagname))) + { + fr_node(tagn); + return 0; + } + tagn->child = datn = data1_mk_node(); + tagn->num_children = 1; + datn->parent = tagn; + datn->root = root; + datn->which = DATA1N_data; + tagn->next = at->child; + tagn->parent = at; + at->child = tagn; + at->num_children++; + return datn; +} + +/* + * Ugh. Sometimes functions just grow and grow on you. This one reads a + * 'node' and its children. + */ +data1_node *data1_read_node(char **buf, data1_node *parent, int *line, + data1_absyn *absyn) +{ + data1_node *res; + + while (**buf && isspace(**buf)) + { + if (**buf == '\n') + (*line)++; + (*buf)++; + } + if (!**buf) + return 0; + + if (**buf == '<') /* beginning of tag */ + { + char *tag = (*buf) + 1; + char *args = 0; + char *t = tag; + data1_node **pp; + data1_element *elem = 0; + + for (; *t && *t != '>' && !isspace(*t); t++); + if (*t != '>' && !isspace(*t)) + { + logf(LOG_WARN, "d1: %d: Malformed tag", *line); + return 0; + } + if (isspace(*t)) /* the tag has arguments */ + { + while (isspace(*t)) + t++; + if (*t != '>') + { + args = t; + for (; *t && *t != '>'; t++); + if (*t != '>' && !isspace(*t)) + { + logf(LOG_WARN, "d1: %d: Malformed tag", *line); + return 0; + } + } + } + + /* + * if end-tag, see if we terminate parent. If so, consume and return. + * Else, return. + */ + *t = '\0'; + if (*tag == '/') + { + if (!parent) + return 0; + if (!*(tag +1) || (parent->which == DATA1N_root && !strcmp(tag + 1, + parent->u.root.type)) || + (parent->which == DATA1N_tag && !strcmp(tag + 1, + parent->u.tag.tag))) + { + *buf = t + 1; + return 0; + } + else + { + *t = '>'; + return 0; + } + } + + if (!absyn) /* parent node - what are we? */ + { + if (!(absyn = data1_get_absyn(tag))) + { + logf(LOG_WARN, "Unable to acquire abstract syntax for '%s'", + tag); + return 0; + } + res = data1_mk_node(); + res->which = DATA1N_root; + res->u.root.type = tag; + res->u.root.absyn = absyn; + res->root = res; + *buf = t + 1; + } + else if (!strncmp(tag, "var", 3)) + { + char class[DATA1_MAX_SYMBOL], type[DATA1_MAX_SYMBOL]; + data1_vartype *tp; + int val_offset; + data1_node *p; + + if (sscanf(args, "%s %s %n", class, type, &val_offset) != 2) + { + logf(LOG_WARN, "Malformed variant triple at '%s'", tag); + return 0; + } + if (!(tp = data1_getvartypebyct(parent->root->u.root.absyn->varset, + class, type))) + return 0; + + /* + * If we're the first variant in this group, create a parent var, + * and insert it before the current variant. + */ + if (parent->which != DATA1N_variant) + { + res = data1_mk_node(); + res->which = DATA1N_variant; + res->u.variant.type = 0; + res->u.variant.value = 0; + res->root = parent->root; + *t = '>'; + } + else + { + /* + * now determine if one of our ancestor triples is of same type. + * If so, we break here. This will make the parser unwind until + * we become a sibling (alternate variant) to the aforementioned + * triple. It stinks that we re-parse these tags on every + * iteration of this. This is a function in need of a rewrite. + */ + for (p = parent; p->which == DATA1N_variant; p = p->parent) + if (p->u.variant.type == tp) + { + *t = '>'; + return 0; + } + + res = data1_mk_node(); + res->which = DATA1N_variant; + res->root = parent->root; + res->u.variant.type = tp; + res->u.variant.value = args + val_offset; + *buf = t + 1; + } + } + else /* acquire our element in the abstract syntax */ + { + data1_node *partag = get_parent_tag(parent); + data1_element *e = 0; + int localtag = 0; + + if (parent->which == DATA1N_variant) + { + *t = '>'; + return 0; + } + if (partag) + if (!(e = partag->u.tag.element)) + localtag = 1; /* our parent is a local tag */ + +#if 0 + if (!localtag && !(elem = data1_getelementbytagname(absyn, + e, tag)) && (data1_gettagbyname(absyn->tagset, tag))) + { + if (parent->which == DATA1N_root) + logf(LOG_WARN, "Tag '%s' used out of context", tag); + *t = '>'; + return 0; + } +#else + elem = data1_getelementbytagname(absyn, e, tag); +#endif + res = data1_mk_node(); + res->which = DATA1N_tag; + res->u.tag.element = elem; + res->u.tag.tag = tag; + res->u.tag.node_selected = 0; + res->root = parent->root; + *buf = t + 1; + } + + res->parent = parent; + res->num_children = 0; + + pp = &res->child; + /* + * Read child nodes. + */ + while ((*pp = data1_read_node(buf, res, line, absyn))) + { + res->num_children++; + pp = &(*pp)->next; + } + } + else /* != '<'... this is a body of text */ + { + int len = 0; + char *data = *buf, *pp = *buf; +#if 0 + data1_node *partag = get_parent_tag(parent); +#endif + + /* Determine length and remove newlines/extra blanks */ + while (**buf && **buf != '<') + { + if (**buf == '\n') + (*line)++; + if (isspace(**buf)) + { + *(pp++) = ' '; + (*buf)++; + while (isspace(**buf)) + (*buf)++; + } + else + *(pp++) = *((*buf)++); + len++; + } + while (isspace(data[len-1])) + len--; + res = data1_mk_node(); + res->parent = parent; + res->which = DATA1N_data; + res->u.data.what = DATA1I_text; + res->u.data.len = len; + res->u.data.data = data; + res->root = parent->root; + + /* + * if the parent is structured, we'll insert a 'wellKnown' marker + * in front of the data. + */ +#if 0 + if (partag->u.tag.element && partag->u.tag.element->tag->kind == + DATA1K_structured) + { + data1_node *wk = mk_node(); + static data1_element wk_element = { 0, 0, 0, 0, 0}; + + wk->parent = partag; + wk->root = partag->root; + wk->which = DATA1N_tag; + wk->u.tag.tag = 0; + /* + * get well-known tagdef if required. + */ + if (!wk_element.tag && !(wk_element.tag = + data1_gettagbynum(wk->root->u.root.absyn->tagset, 1, 19))) + { + logf(LOG_WARN, + "Failed to initialize 'wellknown' tag from tagsetM"); + return 0; + } + wk->u.tag.element = &wk_element; + wk->child = partag->child; + if (wk->child) + wk->child->parent = wk; + partag->child = wk; + } +#endif + } + return res; +} + +/* + * Read a record in the native syntax. + */ +data1_node *data1_read_record(int (*rf)(int, char *, size_t), int fd) +{ + static char *buf = 0; + char *bp; + static int size; + int rd = 0, res; + int line = 0; + + if (!buf && !(buf = xmalloc(size = 4096))) + abort(); + for (;;) + { + if (rd + 4096 > size && !(buf = realloc(buf, size *= 2))) + abort(); + if ((res = (*rf)(fd, buf + rd, 4096)) <= 0) + { + if (!res) + { + bp = buf; + return data1_read_node(&bp, 0, &line, 0); + } + else + return 0; + } + rd += res; + } +} diff --git a/retrieval/d1_sutrs.c b/retrieval/d1_sutrs.c new file mode 100644 index 0000000..0411616 --- /dev/null +++ b/retrieval/d1_sutrs.c @@ -0,0 +1,136 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_sutrs.c,v $ + * Revision 1.1 1995-11-01 11:56:09 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + +#include + +#include + +#include "data1.h" + +#define NTOBUF_INDENT 2 +#define NTOBUF_MARGIN 75 + +static int wordlen(char *b) +{ + int l = 0; + + while (*b && !isspace(*b)) + l++, b++; + return l; +} + +static int nodetobuf(data1_node *n, int select, WRBUF b, int indent, int col) +{ + data1_node *c; + char line[1024]; + + for (c = n->child; c; c = c->next) + { + char *tag; + + if (c->which == DATA1N_tag) + { + if (select && !c->u.tag.node_selected) + continue; + if (c->u.tag.element && c->u.tag.element->tag) + tag = c->u.tag.element->tag->names->name; /* first name */ + else + tag = c->u.tag.tag; /* local string tag */ + if (data1_matchstr(tag, "wellknown")) /* skip wellknown */ + { + if (col) + wrbuf_putc(b, '\n'); + sprintf(line, "%*s%s:", indent * NTOBUF_INDENT, "", tag); + wrbuf_write(b, line, strlen(line)); + col = strlen(line); + } + if (nodetobuf(c, select, b, indent+1, col) < 0) + return 0; + } + else if (c->which == DATA1N_data) + { + char *p = c->u.data.data; + int l = c->u.data.len; + int first = 0; + + if (c->u.data.what == DATA1I_text) + { + while (l) + { + int wlen; + + while (l && isspace(*p)) + p++, l--; + if (!l) + break; + /* break if we'll cross margin and word is not too long */ + if (col + (wlen = wordlen(p)) > NTOBUF_MARGIN && wlen < + NTOBUF_MARGIN - indent * NTOBUF_INDENT) + { + sprintf(line, "\n%*s", indent * NTOBUF_INDENT, ""); + wrbuf_write(b, line, strlen(line)); + col = indent * NTOBUF_INDENT; + first = 1; + } + if (!first) + { + wrbuf_putc(b, ' '); + col++; + } + while (l && !isspace(*p)) + { + if (col > NTOBUF_MARGIN) + { + wrbuf_putc(b, '='); + wrbuf_putc(b, '\n'); + sprintf(line, "%*s", indent * NTOBUF_INDENT, ""); + wrbuf_write(b, line, strlen(line)); + col = indent * NTOBUF_INDENT; + } + wrbuf_putc(b, *p); + p++; + l--; + col++; + } + first = 0; + } + } + else if (c->u.data.what == DATA1I_num) + { + wrbuf_putc(b, ' '); + wrbuf_write(b, c->u.data.data, c->u.data.len); + } + } + } + return 0; +} + +/* + * Return area containing SUTRS-formatted data. Ownership of this data + * remains in this module, and the buffer is reused on next call. This may + * need changing. + */ + +char *data1_nodetobuf(data1_node *n, int select, int *len) +{ + static WRBUF b = 0; + + if (!b) + b = wrbuf_alloc(); + else + wrbuf_rewind(b); + if (nodetobuf(n, select, b, 0, 0)) + return 0; + wrbuf_putc(b, '\n'); + *len = wrbuf_len(b); + return wrbuf_buf(b); +} diff --git a/retrieval/d1_tagset.c b/retrieval/d1_tagset.c new file mode 100644 index 0000000..794315e --- /dev/null +++ b/retrieval/d1_tagset.c @@ -0,0 +1,252 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_tagset.c,v $ + * Revision 1.1 1995-11-01 11:56:09 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + +#include +#include +#include +#include + +#include +#include + +#include "data1.h" + +/* + * We'll probably want to add some sort of hashed index to these lookup- + * functions eventually. + */ + +data1_datatype data1_maptype(char *t) +{ + static struct + { + char *tname; + data1_datatype type; + } types[] = + { + {"structured", DATA1K_structured}, + {"string", DATA1K_string}, + {"numeric", DATA1K_numeric}, + {"oid", DATA1K_oid}, + {"bool", DATA1K_bool}, + {"generalizedtime", DATA1K_generalizedtime}, + {"intunit", DATA1K_intunit}, + {"int", DATA1K_int}, + {"octetstring", DATA1K_octetstring}, + {0, -1} + }; + int i; + + for (i = 0; types[i].tname; i++) + if (!data1_matchstr(types[i].tname, t)) + return types[i].type; + return 0; +} + +data1_tag *data1_gettagbynum(data1_tagset *s, int type, int value) +{ + data1_tag *r; + + for (; s; s = s->next) + { + /* scan local set */ + if (type == s->type) + for (r = s->tags; r; r = r->next) + if (r->which == DATA1T_numeric && r->value.numeric == value) + return r; + /* scan included sets */ + if (s->children && (r = data1_gettagbynum(s->children, type, value))) + return r; + } + return 0; +} + +data1_tag *data1_gettagbyname(data1_tagset *s, char *name) +{ + data1_tag *r; + + for (; s; s = s->next) + { + /* scan local set */ + for (r = s->tags; r; r = r->next) + { + data1_name *np; + + for (np = r->names; np; np = np->next) + if (!data1_matchstr(np->name, name)) + return r; + } + /* scan included sets */ + if (s->children && (r = data1_gettagbyname(s->children, name))) + return r; + } + return 0; +} + +data1_tagset *data1_read_tagset(char *file) +{ + char line[512], *r, cmd[512], args[512]; + data1_tagset *res = 0, **childp; + data1_tag **tagp; + FILE *f; + + if (!(f = fopen(file, "r"))) + { + logf(LOG_WARN|LOG_ERRNO, "%s", file); + return 0; + } + + if (!(res = xmalloc(sizeof(*res)))) + abort(); + res->name = 0; + res->type = 0; + res->tags = 0; + res->children = 0; + res->next = 0; + childp = &res->children; + tagp = &res->tags; + + for (;;) + { + while ((r = fgets(line, 512, f))) + { + while (*r && isspace(*r)) + r++; + if (*r && *r != '#') + break; + } + if (!r) + { + fclose(f); + return res; + } + if (sscanf(r, "%s %[^\n]", cmd, args) < 2) + *args = '\0'; + if (!strcmp(cmd, "tag")) + { + int value; + char names[512], type[512], *nm; + data1_tag *rr; + data1_name **npp; + + if (sscanf(args, "%d %s %s", &value, names, type) < 3) + { + logf(LOG_WARN, "Bad number of parms in '%s' in %s", + args, file); + fclose(f); + return 0; + } + if (!(rr = *tagp = xmalloc(sizeof(*rr)))) + abort(); + + rr->tagset = res; + rr->next = 0; + rr->which = DATA1T_numeric; + rr->value.numeric = value; + /* + * how to deal with local numeric tags? + */ + + if (!(rr->kind = data1_maptype(type))) + { + logf(LOG_WARN, "Unknown datatype %s in %s", type, file); + fclose(f); + return 0; + } + + /* read namelist */ + nm = names; + npp = &rr->names; + do + { + char *e; + + if (!(*npp = xmalloc(sizeof(**npp)))) + abort(); + if ((e = strchr(nm, '/'))) + *(e++) = '\0'; + if (!((*npp)->name = xmalloc(strlen(nm)+1))) + abort(); + strcpy((*npp)->name, nm); + (*npp)->next = 0; + npp = &(*npp)->next; + nm = e; + } + while (nm); + tagp = &rr->next; + } + else if (!strcmp(cmd, "name")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed name directive in %s", file); + fclose(f); + return 0; + } + if (!(res->name = xmalloc(strlen(args)+1))) + abort(); + strcpy(res->name, name); + } + else if (!strcmp(cmd, "reference")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed reference directive in %s", file); + fclose(f); + return 0; + } + if ((res->reference = oid_getvalbyname(name)) == VAL_NONE) + { + logf(LOG_WARN, "Unknown tagset ref '%s' in %s", name, file); + fclose(f); + return 0; + } + } + else if (!strcmp(cmd, "type")) + { + if (!sscanf(args, "%d", &res->type)) + { + logf(LOG_WARN, "%s malformed type directive in %s", file); + fclose(f); + return 0; + } + } + else if (!strcmp(cmd, "include")) + { + char name[512]; + + if (!sscanf(args, "%s", name)) + { + logf(LOG_WARN, "%s malformed reference directive in %s", file); + fclose(f); + return 0; + } + if (!(*childp = data1_read_tagset(name))) + { + logf(LOG_WARN, "Inclusion failed in %s", file); + fclose(f); + return 0; + } + childp = &(*childp)->next; + } + else + { + logf(LOG_WARN, "Unknown directive '%s' in %s", cmd, file); + fclose(f); + return 0; + } + } +} diff --git a/retrieval/d1_varset.c b/retrieval/d1_varset.c new file mode 100644 index 0000000..2d20727 --- /dev/null +++ b/retrieval/d1_varset.c @@ -0,0 +1,142 @@ +/* + * Copyright (c) 1995, Index Data. + * See the file LICENSE for details. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: d1_varset.c,v $ + * Revision 1.1 1995-11-01 11:56:09 quinn + * Added Retrieval (data management) functions en masse. + * + * + */ + +#include +#include + +#include +#include +#include +#include + +#include "data1.h" + +data1_vartype *data1_getvartypebyct(data1_varset *set, char *class, char *type) +{ + data1_varclass *c; + data1_vartype *t; + + for (c = set->classes; c; c = c->next) + if (!data1_matchstr(c->name, class)) + { + for (t = c->types; t; t = t->next) + if (!data1_matchstr(t->name, type)) + return t; + logf(LOG_WARN, "Unknown variant type %s in class %s", type, class); + return 0; + } + logf(LOG_WARN, "Unknown variant class %s", class); + return 0; +} + +data1_varset *data1_read_varset(char *file) +{ + data1_varset *res = xmalloc(sizeof(*res)); + data1_varclass **classp = &res->classes, *class = 0; + data1_vartype **typep = 0; + FILE *f; + int argc; + char *argv[50],line[512]; + + res->name = 0; + res->reference = VAL_NONE; + res->classes = 0; + + if (!(f = fopen(file, "r"))) + { + logf(LOG_WARN|LOG_ERRNO, "%s", file); + return 0; + } + while ((argc = readconf_line(f, line, 512, argv, 50))) + if (!strcmp(argv[0], "class")) + { + data1_varclass *r; + + if (argc != 3) + { + logf(LOG_FATAL, "%s: malformed class directive", file); + fclose(f); + return 0; + } + *classp = r = class = xmalloc(sizeof(*r)); + r->set = res; + r->class = atoi(argv[1]); + r->name = xmalloc(strlen(argv[2])+1); + strcpy(r->name, argv[2]); + r->types = 0; + typep = &r->types; + r->next = 0; + classp = &r->next; + } + else if (!strcmp(argv[0], "type")) + { + data1_vartype *r; + + if (!typep) + { + logf(LOG_WARN, "%s: class directive must precede type", file); + fclose(f); + return 0; + } + if (argc != 4) + { + logf(LOG_WARN, "%s: Malformed type directive", file); + fclose(f); + return 0; + } + *typep = r = xmalloc(sizeof(*r)); + r->name = xmalloc(strlen(argv[2])+1); + strcpy(r->name, argv[2]); + r->class = class; + r->type = atoi(argv[1]); + if (!(r->datatype = data1_maptype(argv[3]))) + { + logf(LOG_WARN, "%s: Unknown datatype '%s'", file, argv[3]); + fclose(f); + return 0; + } + r->next = 0; + typep = &r->next; + } + else if (!strcmp(argv[0], "name")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s name: Expected 1 argument", file); + fclose(f); + return 0; + } + res->name = xmalloc(strlen(argv[1])+1); + strcpy(res->name, argv[1]); + } + else if (!strcmp(argv[0], "reference")) + { + if (argc != 2) + { + logf(LOG_WARN, "%s: reference: Expected 1 argument", file); + fclose(f); + return 0; + } + if ((res->reference = oid_getvalbyname(argv[1])) == VAL_NONE) + { + logf(LOG_WARN, "Unknown reference '%s' in %s", argv[1], file); + fclose(f); + return 0; + } + } + else + logf(LOG_WARN, "varset: Unknown directive '%s' in %s", argv[0], + file); + + fclose(f); + return res; +}