X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarc_read_line.c;h=eefeb84f0ce6bdc7ca8282d24346c7ea437acfea;hp=c05af1bbf11e0fdf7fe8ea76733adb1217f587ab;hb=23b8fd79b9b42dd8655bcd2e0af8a3905348f059;hpb=54db174ec4113c44b05f3766dc6f8ad665968cfb diff --git a/src/marc_read_line.c b/src/marc_read_line.c index c05af1b..eefeb84 100644 --- a/src/marc_read_line.c +++ b/src/marc_read_line.c @@ -1,13 +1,11 @@ -/* - * Copyright (C) 1995-2006, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2010 Index Data * See the file LICENSE for details. - * - * $Id: marc_read_line.c,v 1.1 2006-12-15 12:37:18 adam Exp $ */ /** - * \file marc_read_iso2709.c - * \brief Implements reading of MARC as ISO2709 + * \file marc_read_line.c + * \brief Implements reading of MARC in line format */ #if HAVE_CONFIG_H @@ -18,6 +16,7 @@ #include #endif +#include #include #include #include @@ -26,19 +25,234 @@ #include #include +int yaz_gets(int (*getbyte)(void *client_data), + void (*ungetbyte)(int b, void *client_data), + void *client_data, + WRBUF w) +{ + size_t sz = 0; + int ch = getbyte(client_data); + + while (ch != '\0' && ch != '\r' && ch != '\n') + { + wrbuf_putc(w, ch); + sz++; + ch = getbyte(client_data); + } + if (ch == '\r') + { + ch = getbyte(client_data); + if (ch != '\n' && ch != '\0') + ungetbyte(ch, client_data); + } + else if (ch == '\n') + { + ch = getbyte(client_data); + if (ch != '\r' && ch != '\0') + ungetbyte(ch, client_data); + } + if (sz) + { + return 1; + } + return 0; +} + +static int yaz_marc_line_gets(int (*getbyte)(void *client_data), + void (*ungetbyte)(int b, void *client_data), + void *client_data, + WRBUF w) +{ + int more; + + wrbuf_rewind(w); + more = yaz_gets(getbyte, ungetbyte, client_data, w); + if (!more) + return 0; + + while (more) + { + int i; + for (i = 0; i<4; i++) + { + int ch = getbyte(client_data); + if (ch != ' ') + { + if (ch) + ungetbyte(ch, client_data); + return 1; + } + } + if (wrbuf_len(w) > 60 && wrbuf_buf(w)[wrbuf_len(w)-1] == '=') + wrbuf_cut_right(w, 1); + else + wrbuf_puts(w, " "); + more = yaz_gets(getbyte, ungetbyte, client_data, w); + } + return 1; +} + + int yaz_marc_read_line(yaz_marc_t mt, int (*getbyte)(void *client_data), void (*ungetbyte)(int b, void *client_data), void *client_data) { + int indicator_length; + int identifier_length; + int base_address; + int length_data_entry; + int length_starting; + int length_implementation; + int marker_ch = 0; + int marker_skip = 0; + int header_created = 0; + WRBUF wrbuf_line = wrbuf_alloc(); + yaz_marc_reset(mt); - return -1; + while (yaz_marc_line_gets(getbyte, ungetbyte, client_data, wrbuf_line)) + { + const char *line = wrbuf_cstr(wrbuf_line); + int val; + size_t line_len = strlen(line); + if (line_len == 0) /* empty line indicates end of record */ + { + if (header_created) + break; + } + else if (line[0] == '$') /* indicates beginning/end of record */ + { + if (header_created) + break; + } + else if (line[0] == '(') /* annotation, skip it */ + ; + else if (line_len == 24 && atoi_n_check(line, 5, &val) && val >= 24) + { + /* deal with header lines: 00366nam 22001698a 4500 + */ + + if (header_created) + break; + yaz_marc_set_leader(mt, line, + &indicator_length, + &identifier_length, + &base_address, + &length_data_entry, + &length_starting, + &length_implementation); + header_created = 1; + } + else if (line_len > 4 && line[0] != ' ' && line[1] != ' ' + && line[2] != ' ' && line[3] == ' ' ) + { + /* deal with data/control lines: 245 12 ........ */ + char tag[4]; + const char *datafield_start = line+6; + marker_ch = 0; + marker_skip = 0; + + memcpy(tag, line, 3); + tag[3] = '\0'; + if (line_len >= 8) /* control - or datafield ? */ + { + if (*datafield_start == ' ') + datafield_start++; /* skip blank after indicator */ + + if (strchr("$_*", *datafield_start)) + { + marker_ch = *datafield_start; + if (datafield_start[2] == ' ') + marker_skip = 1; /* subfields has blank before data */ + } + } + if (!header_created) + { + const char *leader = "01000cam 2200265 i 4500"; + + yaz_marc_set_leader(mt, leader, + &indicator_length, + &identifier_length, + &base_address, + &length_data_entry, + &length_starting, + &length_implementation); + header_created = 1; + } + + if (marker_ch == 0) + { /* control field */ + yaz_marc_add_controlfield(mt, tag, line+4, strlen(line+4)); + } + else + { /* data field */ + const char *indicator = line+4; + int indicator_len = 2; + const char *cp = datafield_start; + + yaz_marc_add_datafield(mt, tag, indicator, indicator_len); + for (;;) + { + const char *next; + size_t len; + + assert(cp[0] == marker_ch); + cp++; + next = cp; + while ((next = strchr(next, marker_ch))) + { + if ((next[1] >= 'A' && next[1] <= 'Z') + ||(next[1] >= 'a' && next[1] <= 'z')) + { + if (!marker_skip) + break; + else if (next[2] == ' ') + break; + } + next++; + } + len = strlen(cp); + if (next) + len = next - cp - marker_skip; + + if (marker_skip) + { + /* remove ' ' after subfield marker */ + char *cp_blank = strchr(cp, ' '); + if (cp_blank) + { + len--; + while (cp_blank != cp) + { + cp_blank[0] = cp_blank[-1]; + cp_blank--; + } + cp++; + } + } + yaz_marc_add_subfield(mt, cp, len); + if (!next) + break; + cp = next; + } + } + } + else + { + yaz_marc_cprintf(mt, "Ignoring line: %s", line); + } + } + wrbuf_destroy(wrbuf_line); + if (!header_created) + return -1; + return 0; } /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab