X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fmarcread.c;fp=recctrl%2Fmarcread.c;h=f168081e30355302e061c1f5a0cc16125b398f58;hb=0a5aa3b65fe14789bcada545a0e399545725d1ff;hp=49a27fcbbc4d293a7709aaec6b6e0b5d9e2791b6;hpb=52fad02a1584f75a0c7ea56c5e3381f84c3f1ce0;p=idzebra-moved-to-github.git diff --git a/recctrl/marcread.c b/recctrl/marcread.c index 49a27fc..f168081 100644 --- a/recctrl/marcread.c +++ b/recctrl/marcread.c @@ -1,4 +1,4 @@ -/* $Id: marcread.c,v 1.17 2002-08-02 19:26:56 adam Exp $ +/* $Id: marcread.c,v 1.18 2003-02-28 12:33:39 oleg Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -20,7 +20,6 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ - #include #include #include @@ -29,10 +28,13 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include #include "grsread.h" +#include "marcomp.h" +#include "inline.h" #define MARC_DEBUG 0 +#define MARCOMP_DEBUG 0 -data1_node *grs_read_marc (struct grs_read_info *p) +static data1_node *grs_read_iso2709 (struct grs_read_info *p) { char buf[100000]; int entry_p; @@ -83,7 +85,14 @@ data1_node *grs_read_marc (struct grs_read_info *p) } res_top = data1_mk_tag (p->dh, p->mem, absynName, 0, res_root); - marctab = res_root->u.root.absyn->marc; + if (marctab = res_root->u.root.absyn->marc) + { + memcpy(marctab->leader, buf, 24); + memcpy(marctab->implementation_codes, buf+6, 4); + marctab->implementation_codes[4] = '\0'; + memcpy(marctab->user_systems, buf+17, 3); + marctab->user_systems[3] = '\0'; + } if (marctab && marctab->force_indicator_length >= 0) indicator_length = marctab->force_indicator_length; @@ -149,7 +158,37 @@ data1_node *grs_read_marc (struct grs_read_info *p) i0 = i; while (buf[i] != ISO2709_RS && buf[i] != ISO2709_FS && i < end_offset) { - if (memcmp (tag, "00", 2) && identifier_length) + + if (!memcmp(tag, "4", 1) && (!yaz_matchstr(absynName, "UNIMARC")|| + !yaz_matchstr(absynName, "RUSMARC"))) + { + int go = 1; + data1_node *res = + data1_mk_tag_n (p->dh, p->mem, + buf+i+1, identifier_length-1, + 0 /* attr */, parent); + i += identifier_length; + i0 = i; + do { + while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && + buf[i] != ISO2709_FS && i < end_offset) + { + i++; + } + if (!memcmp(buf+i+1, "1", 1) && idh, p->mem, buf + i0, i - i0, res); + i0 = i; + } + else if (memcmp (tag, "00", 2) && identifier_length) { data1_node *res = data1_mk_tag_n (p->dh, p->mem, @@ -163,15 +202,15 @@ data1_node *grs_read_marc (struct grs_read_info *p) #endif i += identifier_length; i0 = i; - while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && - buf[i] != ISO2709_FS && i < end_offset) - { + while (buf[i] != ISO2709_RS && buf[i] != ISO2709_IDFS && + buf[i] != ISO2709_FS && i < end_offset) + { #if MARC_DEBUG - fprintf (outf, "%c", buf[i]); + fprintf (outf, "%c", buf[i]); #endif - i++; - } - data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res); + i++; + } + data1_mk_text_n (p->dh, p->mem, buf + i0, i - i0, res); i0 = i; } else @@ -195,8 +234,435 @@ data1_node *grs_read_marc (struct grs_read_info *p) #endif } return res_root; -} +} +/* + * Locate some data under this node. This routine should handle variants + * prettily. + */ +static char *get_data(data1_node *n, int *len) +{ + char *r; + while (n) + { + if (n->which == DATA1N_data) + { + int i; + *len = n->u.data.len; + + for (i = 0; i<*len; i++) + if (!d1_isspace(n->u.data.data[i])) + break; + while (*len && d1_isspace(n->u.data.data[*len - 1])) + (*len)--; + *len = *len - i; + if (*len > 0) + return n->u.data.data + i; + } + if (n->which == DATA1N_tag) + n = n->child; + else if (n->which == DATA1N_data) + n = n->next; + else + break; + } + r = ""; + *len = strlen(r); + return r; +} +static char *tr(char *s, int c1, int c2) +{ + char *p = s; + + while(*p) + { + if (*p == c1) + *p = c2; + p++; + } + return s; +} +static data1_node *lookup_subfield(data1_node *node, const char *name) +{ + data1_node *p; + + for (p=node; p; p=p->next) + { + if (!yaz_matchstr(p->u.tag.tag, name)) + return p; + } + return 0; +} +static inline_subfield *lookup_inline_subfield(inline_subfield *pisf, const char *name) +{ + inline_subfield *p; + + for (p=pisf; p; p=p->next) + { + if (!yaz_matchstr(p->name, name)) + return p; + } + return 0; +} +static inline_subfield *cat_inline_subfield(mc_subfield *psf, char *buf, inline_subfield *pisf) +{ + mc_subfield *p; + + for (p = psf; p && pisf; p = p->next) + { + if (p->which == MC_SF) + { + inline_subfield *found = lookup_inline_subfield(pisf, p->name); + + if (found) + { + if (strcmp(p->prefix, "_")) + { + strcat(strcat(buf, " "), p->prefix); + } + if (p->interval.start == -1) + { + strcat(buf, found->data); + } + else + { + strncat(buf, found->data+p->interval.start, + p->interval.end-p->interval.start+1); + } + if (strcmp(p->suffix, "_")) + { + strcat(strcat(buf, p->suffix), " "); + } +#if MARCOMP_DEBUG + logf(LOG_LOG, "cat_inline_subfield(): add subfield $%s", found->name); +#endif + pisf = found->next; + } + } + else if (p->which == MC_SFVARIANT) + { + inline_subfield *next; + + do { + next = cat_inline_subfield(p->u.child, buf, pisf); + if (next == pisf) + break; + pisf = next; + } while (pisf); + } + else if (p->which == MC_SFGROUP) + { + mc_subfield *pp; + int found; + + for (pp = p->u.child, found = 0; pp; pp = pp->next) + { + if (!yaz_matchstr(pisf->name, p->name)) + { + found = 1; + break; + } + } + if (found) + { + strcat(buf, " ("); + pisf = cat_inline_subfield(p->u.child, buf, pisf); + strcat(buf, ") "); + } + } + } + return pisf; +} +static void cat_inline_field(mc_field *pf, char *buf, data1_node *subfield) +{ + + if (!pf || !subfield) + return; + + for (;subfield; subfield = subfield->next) + { + int len; + inline_field *pif = inline_parse(get_data(subfield,&len)); + + if (pif && !yaz_matchstr(pif->name, pf->name)) + { + if (!pf->list && pif->list) + { + strcat(buf, pif->list->data); + } + else + { + int ind1, ind2; + + /* + check indicators + */ + + ind1 = (pif->ind1[0] == ' ') ? '_':pif->ind1[0]; + ind2 = (pif->ind2[0] == ' ') ? '_':pif->ind2[0]; + + if (((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) && + ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0]))) + { + cat_inline_subfield(pf->list, buf, pif->list); + + /* + add separator for inline fields + */ + if (strlen(buf)) + { + strcat(buf, "\n"); + } + } + else + { + logf(LOG_WARN, "In-line field %s missed -- indicators does not match", pif->name); + } + } + } + inline_destroy_field(pif); + } +#if MARCOMP_DEBUG + logf(LOG_LOG, "cat_inline_field(): got buffer {%s}", buf); +#endif +} +static data1_node *cat_subfield(mc_subfield *psf, char *buf, data1_node *subfield) +{ + mc_subfield *p; + + for (p = psf; p && subfield; p = p->next) + { + if (p->which == MC_SF) + { + data1_node *found = lookup_subfield(subfield, p->name); + + if (found) + { + int len; + + if (strcmp(p->prefix, "_")) + { + strcat(strcat(buf, " "), p->prefix); + } + + if (p->u.in_line) + { + cat_inline_field(p->u.in_line, buf, found); + } + else if (p->interval.start == -1) + { + strcat(buf, get_data(found, &len)); + } + else + { + strncat(buf, get_data(found, &len)+p->interval.start, + p->interval.end-p->interval.start+1); + } + if (strcmp(p->suffix, "_")) + { + strcat(strcat(buf, p->suffix), " "); + } +#if MARCOMP_DEBUG + logf(LOG_LOG, "cat_subfield(): add subfield $%s", found->u.tag.tag); +#endif + subfield = found->next; + } + } + else if (p->which == MC_SFVARIANT) + { + data1_node *next; + do { + next = cat_subfield(p->u.child, buf, subfield); + if (next == subfield) + break; + subfield = next; + } while (subfield); + } + else if (p->which == MC_SFGROUP) + { + mc_subfield *pp; + int found; + + for (pp = p->u.child, found = 0; pp; pp = pp->next) + { + if (!yaz_matchstr(subfield->u.tag.tag, pp->name)) + { + found = 1; + break; + } + } + if (found) + { + strcat(buf, " ("); + subfield = cat_subfield(p->u.child, buf, subfield); + strcat(buf, ") "); + } + } + } + return subfield; +} +static data1_node *cat_field(struct grs_read_info *p, mc_field *pf, char *buf, data1_node *field) +{ + data1_node *subfield; + int ind1, ind2; + + if (!pf || !field) + return 0; + + + if (yaz_matchstr(field->u.tag.tag, pf->name)) + return field->next; + + subfield = field->child; + + if (!subfield) + return field->next; + + /* + check subfield without indicators + */ + + if (!pf->list && subfield->which == DATA1N_data) + { + int len; + + strcat(buf, get_data(field, &len)); +#if MARCOMP_DEBUG + logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); +#endif + return field->next; + } + + /* + check indicators + */ + + ind1 = (subfield->u.tag.tag[0] == ' ') ? '_':subfield->u.tag.tag[0]; + ind2 = (subfield->u.tag.tag[1] == ' ') ? '_':subfield->u.tag.tag[1]; + + if (!( + ((pf->ind1[0] == '.') || (ind1 == pf->ind1[0])) && + ((pf->ind2[0] == '.') || (ind2 == pf->ind2[0])) + )) + { +#if MARCOMP_DEBUG + logf(LOG_WARN, "Field %s missed -- does not match indicators", field->u.tag.tag); +#endif + return field->next; + } + + subfield = subfield->child; + + if (!subfield) + return field->next; + + cat_subfield(pf->list, buf, subfield); + +#if MARCOMP_DEBUG + logf(LOG_LOG, "cat_field(): got buffer {%s}", buf); +#endif + + return field->next; +} +static int is_empty(char *s) +{ + char *p = s; + + for (p = s; *p; p++) + { + if (!isspace(*p)) + return 0; + } + return 1; +} +static void parse_data1_tree(struct grs_read_info *p, const char *mc_stmnt, data1_node *root) +{ + data1_marctab *marctab = root->u.root.absyn->marc; + data1_node *top = root->child; + data1_node *field; + mc_context *c; + mc_field *pf; + char buf[1000000]; + + c = mc_mk_context(mc_stmnt+3); + + if (!c) + return; + + pf = mc_getfield(c); + + if (!pf) + { + mc_destroy_context(c); + return; + } +#if MARCOMP_DEBUG + logf(LOG_LOG, "parse_data1_tree(): statement -{%s}", mc_stmnt); +#endif + if (!yaz_matchstr(pf->name, "ldr")) + { + data1_node *new; +#if MARCOMP_DEBUG + logf(LOG_LOG,"parse_data1_tree(): try LEADER from {%d} to {%d} positions", + pf->interval.start, pf->interval.end); +#endif + new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top); + data1_mk_text_n(p->dh, p->mem, marctab->leader+pf->interval.start, + pf->interval.end-pf->interval.start+1, new); + } + else + { + field=top->child; + + while(field) + { + if (!yaz_matchstr(field->u.tag.tag, pf->name)) + { + data1_node *new; + char *pb = buf; +#if MARCOMP_DEBUG + logf(LOG_LOG, "parse_data1_tree(): try field {%s}", field->u.tag.tag); +#endif + *buf = '\0'; + field = cat_field(p, pf, buf, field); + + for (pb = strtok(pb, "\n"); pb; pb = strtok(NULL, "\n")) + { + if (!is_empty(pb)) + { + new = data1_mk_tag_n(p->dh, p->mem, mc_stmnt, strlen(mc_stmnt), 0, top); + data1_mk_text_n(p->dh, p->mem, pb, strlen(pb), new); + } + } + } + else + { + field = field->next; + } + } + } + mc_destroy_field(pf); + mc_destroy_context(c); +} + +data1_node *grs_read_marc(struct grs_read_info *p) +{ + data1_node *root = grs_read_iso2709(p); + data1_element *e; + + if (!root) + return 0; + + for (e=root->u.root.absyn->main_elements; e; e=e->next) + { + data1_tag *tag = e->tag; + + if (tag && tag->which == DATA1T_string && + !yaz_matchstr(tag->value.string, "mc?")) + parse_data1_tree(p, tag->value.string, root); + } + return root; +} static void *grs_init_marc(void) { return 0;