From 1f793b6c2f61fd47c7a26c0274f0c7e6ab9d1a07 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Thu, 4 Sep 1997 13:54:39 +0000 Subject: [PATCH] Added MARC filter - type grs.marc. where syntax refers to abstract syntax. New method tellf in retrieve/extract method. --- recctrl/Makefile | 4 +- recctrl/grsread.h | 8 +- recctrl/marcread.c | 260 ++++++++++++++++++++++++++++++++++++++++++++++++++++ recctrl/recgrs.c | 9 +- recctrl/sgmlread.c | 10 +- 5 files changed, 283 insertions(+), 8 deletions(-) create mode 100644 recctrl/marcread.c diff --git a/recctrl/Makefile b/recctrl/Makefile index 4fb31b0..fd41ee9 100644 --- a/recctrl/Makefile +++ b/recctrl/Makefile @@ -1,7 +1,7 @@ # Copyright (C) 1995-1996, Index Data I/S # All rights reserved. # Sebastian Hammer, Adam Dickmeiss -# $Id: Makefile,v 1.1 1996-10-11 10:57:22 adam Exp $ +# $Id: Makefile,v 1.2 1997-09-04 13:54:39 adam Exp $ SHELL=/bin/sh RANLIB=ranlib @@ -13,7 +13,7 @@ INCLUDE=-I../include $(YAZINC) DEFS=$(INCLUDE) LIB=../lib/recctrl.a PROG= -PO=recctrl.o recgrs.o regxread.o sgmlread.o rectext.o +PO=recctrl.o recgrs.o sgmlread.o regxread.o marcread.o rectext.o CPP=$(CC) -E all: $(LIB) diff --git a/recctrl/grsread.h b/recctrl/grsread.h index b0f2ca1..247c615 100644 --- a/recctrl/grsread.h +++ b/recctrl/grsread.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: grsread.h,v $ - * Revision 1.2 1997-04-30 08:56:08 quinn + * Revision 1.3 1997-09-04 13:54:40 adam + * Added MARC filter - type grs.marc. where syntax refers + * to abstract syntax. New method tellf in retrieve/extract method. + * + * Revision 1.2 1997/04/30 08:56:08 quinn * null * * Revision 1.1 1996/10/11 10:57:23 adam @@ -19,6 +23,7 @@ struct grs_read_info { int (*readf)(void *, char *, size_t); off_t (*seekf)(void *, off_t); + off_t (*tellf)(void *); void (*endf)(void *, off_t); void *fh; off_t offset; @@ -28,4 +33,5 @@ struct grs_read_info { data1_node *grs_read_regx (struct grs_read_info *p); data1_node *grs_read_sgml (struct grs_read_info *p); +data1_node *grs_read_marc (struct grs_read_info *p); #endif diff --git a/recctrl/marcread.c b/recctrl/marcread.c new file mode 100644 index 0000000..3f09254 --- /dev/null +++ b/recctrl/marcread.c @@ -0,0 +1,260 @@ +/* + * Copyright (C) 1997, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: marcread.c,v $ + * Revision 1.1 1997-09-04 13:54:40 adam + * Added MARC filter - type grs.marc. where syntax refers + * to abstract syntax. New method tellf in retrieve/extract method. + * + */ +#include +#include +#include + +#include +#include +#include +#include "grsread.h" + +data1_node *data1_mk_node_wp (NMEM mem, data1_node *parent) +{ + data1_node *res = data1_mk_node (mem); + + if (!parent) + res->root = res; + else + { + res->root = parent->root; + res->parent = parent; + if (!parent->child) + parent->child = parent->last_child = res; + else + parent->last_child->next = res; + parent->num_children++; + parent->last_child = res; + } + return res; +} + +static void destroy_data (struct data1_node *n) +{ + assert (n->which == DATA1N_data); + xfree (n->u.data.data); +} + +data1_node *data1_mk_node_text (NMEM mem, data1_node *parent, + const char *buf, size_t len) +{ + data1_node *res = data1_mk_node_wp (mem, parent); + res->which = DATA1N_data; + res->u.data.formatted_text = 0; + res->u.data.what = DATA1I_text; + res->u.data.len = len; + if (res->u.data.len > DATA1_LOCALDATA) { + res->u.data.data = xmalloc (res->u.data.len); + res->destroy = destroy_data; + } + else + res->u.data.data = res->lbuf; + memcpy (res->u.data.data, buf, res->u.data.len); + return res; +} + +data1_node *data1_mk_node_tag (NMEM mem, data1_node *parent, + const char *tag, size_t len) +{ + data1_element *elem = NULL; + data1_node *partag = get_parent_tag(parent); + data1_node *res; + data1_element *e = NULL; + int localtag = 0; + + res = data1_mk_node_wp (mem, parent); + + res->which = DATA1N_tag; + res->u.tag.tag = res->lbuf; + res->u.tag.get_bytes = -1; + + if (len >= DATA1_LOCALDATA) + len = DATA1_LOCALDATA-1; + + memcpy (res->u.tag.tag, tag, len); + res->u.tag.tag[len] = '\0'; + + if (parent->which == DATA1N_variant) + return res; + if (partag) + if (!(e = partag->u.tag.element)) + localtag = 1; + + elem = data1_getelementbytagname (res->root->u.root.absyn, e, + res->u.tag.tag); + res->u.tag.element = elem; + res->u.tag.node_selected = 0; + res->u.tag.make_variantlist = 0; + res->u.tag.no_data_requested = 0; + return res; +} + +#define MARC_DEBUG 0 + +data1_node *grs_read_marc (struct grs_read_info *p) +{ + char buf[100000]; + int entry_p; + int record_length; + int indicator_length; + int identifier_length; + int base_address; + int length_data_entry; + int length_starting; + int length_implementation; + int read_bytes; +#if MARC_DEBUG + FILE *outf = stdout; +#endif + + data1_node *res_root; + data1_absyn *absyn; + char *absynName; + + if ((*p->readf)(p->fh, buf, 5) != 5) + return NULL; + record_length = atoi_n (buf, 5); + if (record_length < 25) + { + logf (LOG_WARN, "MARC record length < 25, is %d", record_length); + return NULL; + } + /* read remaining part - attempt to read one byte furhter... */ + read_bytes = (*p->readf)(p->fh, buf+5, record_length-4); + if (read_bytes < record_length-5) + { + logf (LOG_WARN, "Couldn't read whole MARC record"); + return NULL; + } + if (read_bytes == record_length - 4) + { + off_t cur_offset = (*p->tellf)(p->fh); + assert (cur_offset > 26); + if (p->endf) + (*p->endf)(p->fh, cur_offset - 1); + } + absynName = p->type; + logf (LOG_DEBUG, "absynName = %s", absynName); + if (!(absyn = data1_get_absyn (absynName))) + { + logf (LOG_WARN, "Unknown abstract syntax: %s", absynName); + return NULL; + } + res_root = data1_mk_node_wp (p->mem, NULL); + res_root->u.root.type = nmem_malloc (p->mem, strlen(absynName)+1); + strcpy (res_root->u.root.type, absynName); + res_root->u.root.absyn = absyn; + + indicator_length = atoi_n (buf+10, 1); + identifier_length = atoi_n (buf+11, 1); + base_address = atoi_n (buf+12, 4); + + length_data_entry = atoi_n (buf+20, 1); + length_data_entry = atoi_n (buf+20, 1); + length_data_entry = atoi_n (buf+20, 1); + length_starting = atoi_n (buf+21, 1); + length_implementation = atoi_n (buf+22, 1); + + for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + entry_p += 3+length_data_entry+length_starting; + base_address = entry_p+1; + for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) + { + int data_length; + int data_offset; + int end_offset; + int i, i0; + char tag[4]; + data1_node *res; + data1_node *parent = res_root; + + memcpy (tag, buf+entry_p, 3); + entry_p += 3; + tag[3] = '\0'; + + /* generate field node */ + res = data1_mk_node_tag (p->mem, res_root, tag, 3); + +#if MARC_DEBUG + fprintf (outf, "%s ", tag); +#endif + data_length = atoi_n (buf+entry_p, length_data_entry); + entry_p += length_data_entry; + data_offset = atoi_n (buf+entry_p, length_starting); + entry_p += length_starting; + i = data_offset + base_address; + end_offset = i+data_length-1; + + if (memcmp (tag, "00", 2) && indicator_length) + { + /* generate indicator node */ +#if MARC_DEBUG + int j; +#endif + res = data1_mk_node_tag (p->mem, res, buf+i, indicator_length); +#if MARC_DEBUG + for (j = 0; jmem, parent, buf+i+1, + identifier_length-1); +#if MARC_DEBUG + fprintf (outf, " $"); + for (j = 1; jmem, res, buf + i0, i - i0); + i0 = i; + } + else + { +#if MARC_DEBUG + fprintf (outf, "%c", buf[i]); +#endif + i++; + } + } + if (i > i0) + { + data1_node *res = data1_mk_node_tag (p->mem, parent, "@", 1); + data1_mk_node_text (p->mem, res, buf + i0, i - i0); + } +#if MARC_DEBUG + fprintf (outf, "\n"); + if (i < end_offset) + fprintf (outf, "-- separator but not at end of field\n"); + if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) + fprintf (outf, "-- no separator at end of field\n"); +#endif + } + return res_root; +} diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index 3f57db8..f23dbb1 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: recgrs.c,v $ - * Revision 1.5 1997-07-15 16:29:03 adam + * Revision 1.6 1997-09-04 13:54:40 adam + * Added MARC filter - type grs.marc. where syntax refers + * to abstract syntax. New method tellf in retrieve/extract method. + * + * Revision 1.5 1997/07/15 16:29:03 adam * Initialized dummy variable to keep checker gcc happy. * * Revision 1.4 1997/04/30 08:56:08 quinn @@ -128,6 +132,7 @@ static data1_node *read_grs_type (struct grs_read_info *p, const char *type) } tab[] = { { "sgml", grs_read_sgml }, { "regx", grs_read_regx }, + { "marc", grs_read_marc }, { NULL, NULL } }; const char *cp = strchr (type, '.'); @@ -384,6 +389,7 @@ static int grs_extract(struct recExtractCtrl *p) gri.readf = p->readf; gri.seekf = p->seekf; + gri.tellf = p->tellf; gri.endf = p->endf; gri.fh = p->fh; gri.offset = p->offset; @@ -479,6 +485,7 @@ static int grs_retrieve(struct recRetrieveCtrl *p) gri.readf = p->readf; gri.seekf = p->seekf; + gri.tellf = p->tellf; gri.endf = NULL; gri.fh = p->fh; gri.offset = 0; diff --git a/recctrl/sgmlread.c b/recctrl/sgmlread.c index 3f0566d..f75a1c5 100644 --- a/recctrl/sgmlread.c +++ b/recctrl/sgmlread.c @@ -4,16 +4,18 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: sgmlread.c,v $ - * Revision 1.2 1997-04-30 08:56:08 quinn + * Revision 1.3 1997-09-04 13:54:41 adam + * Added MARC filter - type grs.marc. where syntax refers + * to abstract syntax. New method tellf in retrieve/extract method. + * + * Revision 1.2 1997/04/30 08:56:08 quinn * null * * Revision 1.1 1996/10/11 10:57:32 adam * New module recctrl. Used to manage records (extract/retrieval). * */ -#include -#include -#include +#include #include "grsread.h" -- 1.7.10.4