X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fmarcread.c;fp=recctrl%2Fmarcread.c;h=0000000000000000000000000000000000000000;hb=ea48a53fe407f3162fbe9b37046459b265b69d4b;hp=5d74c35aae275d04816036e2ebfb8942c2ec84ff;hpb=5ba92997dbb29abf9a102711533e1fb73424d956;p=idzebra-moved-to-github.git diff --git a/recctrl/marcread.c b/recctrl/marcread.c deleted file mode 100644 index 5d74c35..0000000 --- a/recctrl/marcread.c +++ /dev/null @@ -1,899 +0,0 @@ -/* $Id: marcread.c,v 1.34 2006-05-10 08:13:28 adam Exp $ - Copyright (C) 1995-2005 - Index Data ApS - -This file is part of the Zebra server. - -Zebra is free software; you can redistribute it and/or modify it under -the terms of the GNU General Public License as published by the Free -Software Foundation; either version 2, or (at your option) any later -version. - -Zebra is distributed in the hope that it will be useful, but WITHOUT ANY -WARRANTY; without even the implied warranty of MERCHANTABILITY or -FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License -for more details. - -You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. -*/ - -#include -#include -#include - -#include -#include -#include -#include "marcomp.h" -#include "inline.h" - -#define MARC_DEBUG 0 -#define MARCOMP_DEBUG 0 - -struct marc_info { - char type[256]; -}; - -static data1_node *grs_read_iso2709 (struct grs_read_info *p, int marc_xml) -{ - struct marc_info *mi = (struct marc_info*) p->clientData; - char buf[100000]; - int entry_p; - int record_length; - int indicator_length; - int identifier_length; - int base_address; - int end_of_directory; - int length_data_entry; - int length_starting; - int length_implementation; - int read_bytes; -#if MARC_DEBUG - FILE *outf = stdout; -#endif - data1_node *res_root, *res_top; - char *absynName; - data1_marctab *marctab; - - if ((*p->readf)(p->fh, buf, 5) != 5) - return NULL; - while (*buf < '0' || *buf > '9') - { - int i; - - yaz_log(YLOG_WARN, "MARC: Skipping bad byte %d (0x%02X)", - *buf & 0xff, *buf & 0xff); - for (i = 0; i<4; i++) - buf[i] = buf[i+1]; - - if ((*p->readf)(p->fh, buf+4, 1) != 1) - return NULL; - } - record_length = atoi_n (buf, 5); - if (record_length < 25) - { - yaz_log (YLOG_WARN, "MARC record length < 25, is %d", record_length); - return NULL; - } - /* read remaining part - attempt to read one byte furhter... */ - read_bytes = (*p->readf)(p->fh, buf+5, record_length-4); - if (read_bytes < record_length-5) - { - yaz_log (YLOG_WARN, "Couldn't read whole MARC record"); - return NULL; - } - if (read_bytes == record_length - 4) - { - off_t cur_offset = (*p->tellf)(p->fh); - if (cur_offset <= 27) - return NULL; - if (p->endf) - (*p->endf)(p->fh, cur_offset - 1); - } - absynName = mi->type; - res_root = data1_mk_root (p->dh, p->mem, absynName); - if (!res_root) - { - yaz_log (YLOG_WARN, "cannot read MARC without an abstract syntax"); - return 0; - } - if (marc_xml) - { - data1_node *lead; - const char *attr[] = { "xmlns", "http://www.loc.gov/MARC21/slim", 0}; - - res_top = data1_mk_tag (p->dh, p->mem, "record", attr, res_root); - - lead = data1_mk_tag(p->dh, p->mem, "leader", 0, res_top); - data1_mk_text_n(p->dh, p->mem, buf, 24, lead); - } - else - res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root); - - if ((marctab = data1_absyn_getmarctab(p->dh, res_root->u.root.absyn))) - { - memcpy(marctab->leader, buf, 24); - memcpy(marctab->implementation_codes, buf+6, 4); - marctab->implementation_codes[4] = '\0'; - memcpy(marctab->user_systems, buf+17, 3); - marctab->user_systems[3] = '\0'; - } - - if (marctab && marctab->force_indicator_length >= 0) - indicator_length = marctab->force_indicator_length; - else - indicator_length = atoi_n (buf+10, 1); - if (marctab && marctab->force_identifier_length >= 0) - identifier_length = marctab->force_identifier_length; - else - identifier_length = atoi_n (buf+11, 1); - base_address = atoi_n (buf+12, 5); - - length_data_entry = atoi_n (buf+20, 1); - length_starting = atoi_n (buf+21, 1); - length_implementation = atoi_n (buf+22, 1); - - for (entry_p = 24; buf[entry_p] != ISO2709_FS; ) - { - int l = 3 + length_data_entry + length_starting; - if (entry_p + l >= record_length) - { - yaz_log(YLOG_WARN, "MARC: Directory offset %d: end of record.", - entry_p); - return 0; - } - /* check for digits in length info */ - while (--l >= 3) - if (!isdigit(*(const unsigned char *) (buf + entry_p+l))) - break; - if (l >= 3) - { - /* not all digits, so stop directory scan */ - yaz_log(YLOG_LOG, "MARC: Bad directory"); - break; - } - entry_p += 3 + length_data_entry + length_starting; - } - end_of_directory = entry_p; - if (base_address != entry_p+1) - { - yaz_log(YLOG_WARN, "MARC: Base address does not follow directory"); - } - for (entry_p = 24; entry_p != end_of_directory; ) - { - int data_length; - int data_offset; - int end_offset; - int i, i0; - char tag[4]; - data1_node *res; - data1_node *parent = res_top; - - memcpy (tag, buf+entry_p, 3); - entry_p += 3; - tag[3] = '\0'; - - if (marc_xml) - res = parent; - else - res = data1_mk_tag_n (p->dh, p->mem, tag, 3, 0 /* attr */, parent); - -#if MARC_DEBUG - fprintf (outf, "%s ", tag); -#endif - data_length = atoi_n (buf+entry_p, length_data_entry); - entry_p += length_data_entry; - data_offset = atoi_n (buf+entry_p, length_starting); - entry_p += length_starting; - i = data_offset + base_address; - end_offset = i+data_length-1; - - if (data_length <= 0 || data_offset < 0 || end_offset >= record_length) - { - yaz_log(YLOG_WARN, "MARC: Bad offsets in data. Skipping rest"); - break; - } - - if (memcmp (tag, "00", 2) && indicator_length) - { - /* generate indicator node */ - if (marc_xml) - { - const char *attr[10]; - int j; - - attr[0] = "tag"; - attr[1] = tag; - attr[2] = 0; - - res = data1_mk_tag(p->dh, p->mem, "datafield", attr, res); - - for (j = 0; jdh, p->mem, res, attr); - } - } - else - { -#if MARC_DEBUG - int j; -#endif - res = data1_mk_tag_n (p->dh, p->mem, - buf+i, indicator_length, 0 /* attr */, res); -#if MARC_DEBUG - for (j = 0; jdh, p->mem, "controlfield", attr, res); - } - } - parent = res; - /* traverse sub fields */ - i0 = i; - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) - { - if (memcmp (tag, "00", 2) && identifier_length) - { - data1_node *res; - if (marc_xml) - { - int j; - const char *attr[3]; - char code[10]; - - for (j = 1; jdh, p->mem, "subfield", - attr, parent); - } - else - { - res = data1_mk_tag_n (p->dh, p->mem, - buf+i+1, identifier_length-1, - 0 /* attr */, parent); - } -#if MARC_DEBUG - fprintf (outf, " $"); - for (j = 1; jdh, p->mem, buf + i0, i - i0, res); - i0 = i; - } - else - { -#if MARC_DEBUG - fprintf (outf, "%c", buf[i]); -#endif - i++; - } - } - if (i > i0) - { - data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, parent); - } -#if MARC_DEBUG - fprintf (outf, "\n"); - if (i < end_offset) - fprintf (outf, "-- separator but not at end of field\n"); - if (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS) - fprintf (outf, "-- no separator at end of field\n"); -#endif - } - return res_root; -} - -/* - * Locate some data under this node. This routine should handle variants - * prettily. - */ -static char *get_data(data1_node *n, int *len) -{ - char *r; - - while (n) - { - if (n->which == DATA1N_data) - { - int i; - *len = n->u.data.len; - - for (i = 0; i<*len; i++) - if (!d1_isspace(n->u.data.data[i])) - break; - while (*len && d1_isspace(n->u.data.data[*len - 1])) - (*len)--; - *len = *len - i; - if (*len > 0) - return n->u.data.data + i; - } - if (n->which == DATA1N_tag) - n = n->child; - else if (n->which == DATA1N_data) - n = n->next; - else - break; - } - r = ""; - *len = strlen(r); - return r; -} - -static data1_node *lookup_subfield(data1_node *node, const char *name) -{ - data1_node *p; - - for (p=node; p; p=p->next) - { - if (!yaz_matchstr(p->u.tag.tag, name)) - return p; - } - return 0; -} - -static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, - const char *name) -{ - inline_subfield *p; - - for (p=pisf; p; p=p->next) - { - if (!yaz_matchstr(p->name, name)) - return p; - } - return 0; -} - -static inline_subfield *cat_inline_subfield(mc_subfield *psf, WRBUF buf, - inline_subfield *pisf) -{ - mc_subfield *p; - - for (p = psf; p && pisf; p = p->next) - { - if (p->which == MC_SF) - { - inline_subfield *found = lookup_inline_subfield(pisf, p->name); - - if (found) - { - if (strcmp(p->prefix, "_")) - { - wrbuf_puts(buf, " "); - wrbuf_puts(buf, p->prefix); - } - if (p->interval.start == -1) - { - wrbuf_puts(buf, found->data); - } - else - { - wrbuf_write(buf, found->data+p->interval.start, - p->interval.end-p->interval.start); - wrbuf_puts(buf, ""); - } - if (strcmp(p->suffix, "_")) - { - wrbuf_puts(buf, p->suffix); - wrbuf_puts(buf, " "); - } -#if MARCOMP_DEBUG - yaz_log(YLOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name); -#endif - pisf = found->next; - } - } - else if (p->which == MC_SFVARIANT) - { - inline_subfield *next; - - do { - next = cat_inline_subfield(p->u.child, buf, pisf); - if (next == pisf) - break; - pisf = next; - } while (pisf); - } - else if (p->which == MC_SFGROUP) - { - mc_subfield *pp; - int found; - - for (pp = p->u.child, found = 0; pp; pp = pp->next) - { - if (!yaz_matchstr(pisf->name, p->name)) - { - found = 1; - break; - } - } - if (found) - { - wrbuf_puts(buf, " ("); - pisf = cat_inline_subfield(p->u.child, buf, pisf); - wrbuf_puts(buf, ") "); - } - } - } - return pisf; -} - -static void cat_inline_field(mc_field *pf, WRBUF buf, data1_node *subfield) -{ - if (!pf || !subfield) - return; - - for (;subfield;) - { - int len; - inline_field *pif=NULL; - data1_node *psubf; - - if (yaz_matchstr(subfield->u.tag.tag, "1")) - { - subfield = subfield->next; - continue; - } - - psubf = subfield; - pif = inline_mk_field(); - do - { - int i; - if ((i=inline_parse(pif, psubf->u.tag.tag, get_data(psubf, &len)))<0) - { - yaz_log(YLOG_WARN, "inline subfield ($%s): parse error", - psubf->u.tag.tag); - inline_destroy_field(pif); - return; - } - psubf = psubf->next; - } while (psubf && yaz_matchstr(psubf->u.tag.tag, "1")); - - subfield = psubf; - - if (pif && !yaz_matchstr(pif->name, pf->name)) - { - if (!pf->list && pif->list) - { - wrbuf_puts(buf, pif->list->data); - } - else - { - int ind1, ind2; - - /* - check indicators - */ - - ind1 = (pif->ind1[0] == ' ') ? '_':pif->ind1[0]; - ind2 = (pif->ind2[0] == ' ') ? '_':pif->ind2[0]; - - if (((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) && - ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0]))) - { - cat_inline_subfield(pf->list, buf, pif->list); - - /* - add separator for inline fields - */ - if (wrbuf_len(buf)) - { - wrbuf_puts(buf, "\n"); - } - } - else - { - yaz_log(YLOG_WARN, "In-line field %s missed -- indicators do not match", pif->name); - } - } - } - inline_destroy_field(pif); - } -#if MARCOMP_DEBUG - yaz_log(YLOG_LOG, "cat_inline_field(): got buffer {%s}", buf); -#endif -} - -static data1_node *cat_subfield(mc_subfield *psf, WRBUF buf, - data1_node *subfield) -{ - mc_subfield *p; - - for (p = psf; p && subfield; p = p->next) - { - if (p->which == MC_SF) - { - data1_node *found = lookup_subfield(subfield, p->name); - - if (found) - { - int len; - - if (strcmp(p->prefix, "_")) - { - wrbuf_puts(buf, " "); - wrbuf_puts(buf, p->prefix); - } - - if (p->u.in_line) - { - cat_inline_field(p->u.in_line, buf, found); - } - else if (p->interval.start == -1) - { - wrbuf_puts(buf, get_data(found, &len)); - } - else - { - wrbuf_write(buf, get_data(found, &len)+p->interval.start, - p->interval.end-p->interval.start); - wrbuf_puts(buf, ""); - } - if (strcmp(p->suffix, "_")) - { - wrbuf_puts(buf, p->suffix); - wrbuf_puts(buf, " "); - } -#if MARCOMP_DEBUG - yaz_log(YLOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag); -#endif - subfield = found->next; - } - } - else if (p->which == MC_SFVARIANT) - { - data1_node *next; - do { - next = cat_subfield(p->u.child, buf, subfield); - if (next == subfield) - break; - subfield = next; - } while (subfield); - } - else if (p->which == MC_SFGROUP) - { - mc_subfield *pp; - int found; - - for (pp = p->u.child, found = 0; pp; pp = pp->next) - { - if (!yaz_matchstr(subfield->u.tag.tag, pp->name)) - { - found = 1; - break; - } - } - if (found) - { - wrbuf_puts(buf, " ("); - subfield = cat_subfield(p->u.child, buf, subfield); - wrbuf_puts(buf, ") "); - } - } - } - return subfield; -} - -static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, - WRBUF buf, data1_node *field) -{ - data1_node *subfield; - int ind1, ind2; - - if (!pf || !field) - return 0; - - - if (yaz_matchstr(field->u.tag.tag, pf->name)) - return field->next; - - subfield = field->child; - - if (!subfield) - return field->next; - - /* - check subfield without indicators - */ - - if (!pf->list && subfield->which == DATA1N_data) - { - int len; - - if (pf->interval.start == -1) - { - wrbuf_puts(buf, get_data(field, &len)); - } - else - { - wrbuf_write(buf, get_data(field, &len)+pf->interval.start, - pf->interval.end-pf->interval.start); - wrbuf_puts(buf, ""); - } -#if MARCOMP_DEBUG - yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf); -#endif - return field->next; - } - - /* - check indicators - */ - - ind1 = (subfield->u.tag.tag[0] == ' ') ? '_':subfield->u.tag.tag[0]; - ind2 = (subfield->u.tag.tag[1] == ' ') ? '_':subfield->u.tag.tag[1]; - - if (!( - ((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) && - ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0])) - )) - { -#if MARCOMP_DEBUG - yaz_log(YLOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag); -#endif - return field->next; - } - - subfield = subfield->child; - - if (!subfield) - return field->next; - - cat_subfield(pf->list, buf, subfield); - -#if MARCOMP_DEBUG - yaz_log(YLOG_LOG, "cat_field(): got buffer {%s}", buf); -#endif - - return field->next; -} - -static int is_empty(char *s) -{ - char *p = s; - - for (p = s; *p; p++) - { - if (!isspace(*(unsigned char *)p)) - return 0; - } - return 1; -} - -static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, - data1_node *root) -{ - data1_marctab *marctab = data1_absyn_getmarctab(p->dh, root->u.root.absyn); - data1_node *top = root->child; - data1_node *field; - mc_context *c; - mc_field *pf; - WRBUF buf; - - c = mc_mk_context(mc_stmnt+3); - - if (!c) - return; - - pf = mc_getfield(c); - - if (!pf) - { - mc_destroy_context(c); - return; - } - buf = wrbuf_alloc(); -#if MARCOMP_DEBUG - yaz_log(YLOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt); -#endif - if (!yaz_matchstr(pf->name, "ldr")) - { - data1_node *new; -#if MARCOMP_DEBUG - yaz_log(YLOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions", - pf->interval.start, pf->interval.end); -#endif - new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top); - data1_mk_text_n(p->dh, p->mem, marctab->leader+pf->interval.start, - pf->interval.end-pf->interval.start+1, new); - } - else - { - field=top->child; - - while(field) - { - if (!yaz_matchstr(field->u.tag.tag, pf->name)) - { - data1_node *new; - char *pb; -#if MARCOMP_DEBUG - yaz_log(YLOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag); -#endif - wrbuf_rewind(buf); - wrbuf_puts(buf, ""); - - field = cat_field(p, pf, buf, field); - - pb = wrbuf_buf(buf); - for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n")) - { - if (!is_empty(pb)) - { - new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top); - data1_mk_text_n(p->dh, p->mem, pb, strlen(pb), new); - } - } - } - else - { - field = field->next; - } - } - } - mc_destroy_field(pf); - mc_destroy_context(c); - wrbuf_free(buf, 1); -} - -data1_node *grs_read_marcxml(struct grs_read_info *p) -{ - data1_node *root = grs_read_iso2709(p, 1); - data1_element *e; - - if (!root) - return 0; - - for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next) - { - data1_tag *tag = e->tag; - - if (tag && tag->which == DATA1T_string && - !yaz_matchstr(tag->value.string, "mc?")) - parse_data1_tree(p, tag->value.string, root); - } - return root; -} - -data1_node *grs_read_marc(struct grs_read_info *p) -{ - data1_node *root = grs_read_iso2709(p, 0); - data1_element *e; - - if (!root) - return 0; - - for (e = data1_absyn_getelements(p->dh, root->u.root.absyn); e; e=e->next) - { - data1_tag *tag = e->tag; - - if (tag && tag->which == DATA1T_string && - !yaz_matchstr(tag->value.string, "mc?")) - parse_data1_tree(p, tag->value.string, root); - } - return root; -} - -static void *init_marc(Res res, RecType rt) -{ - struct marc_info *p = xmalloc(sizeof(*p)); - strcpy(p->type, ""); - return p; -} - -static ZEBRA_RES config_marc(void *clientData, Res res, const char *args) -{ - struct marc_info *p = (struct marc_info*) clientData; - if (strlen(args) < sizeof(p->type)) - strcpy(p->type, args); - return ZEBRA_OK; -} - -static void destroy_marc(void *clientData) -{ - struct marc_info *p = (struct marc_info*) clientData; - xfree (p); -} - - -static int extract_marc(void *clientData, struct recExtractCtrl *ctrl) -{ - return zebra_grs_extract(clientData, ctrl, grs_read_marc); -} - -static int retrieve_marc(void *clientData, struct recRetrieveCtrl *ctrl) -{ - return zebra_grs_retrieve(clientData, ctrl, grs_read_marc); -} - -static struct recType marc_type = { - 0, - "grs.marc", - init_marc, - config_marc, - destroy_marc, - extract_marc, - retrieve_marc, -}; - -static int extract_marcxml(void *clientData, struct recExtractCtrl *ctrl) -{ - return zebra_grs_extract(clientData, ctrl, grs_read_marcxml); -} - -static int retrieve_marcxml(void *clientData, struct recRetrieveCtrl *ctrl) -{ - return zebra_grs_retrieve(clientData, ctrl, grs_read_marcxml); -} - -static struct recType marcxml_type = { - 0, - "grs.marcxml", - init_marc, - config_marc, - destroy_marc, - extract_marcxml, - retrieve_marcxml, -}; - -RecType -#ifdef IDZEBRA_STATIC_GRS_MARC -idzebra_filter_grs_marc -#else -idzebra_filter -#endif - -[] = { - &marc_type, - &marcxml_type, - 0, -}; - -/* - * Local variables: - * c-basic-offset: 4 - * indent-tabs-mode: nil - * End: - * vim: shiftwidth=4 tabstop=8 expandtab - */ -