X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarc_read_line.c;h=d6695765076c467d570962547cd3319232cf6ad0;hp=90a35216a8f24685a1a8f4de18445a4f565ad11d;hb=30a1eadf63ab5d1357d10847f905243a250703b8;hpb=cd2f012a70ebadf660acb71936e32192287dc30a diff --git a/src/marc_read_line.c b/src/marc_read_line.c index 90a3521..d669576 100644 --- a/src/marc_read_line.c +++ b/src/marc_read_line.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 1995-2006, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2013 Index Data * See the file LICENSE for details. - * - * $Id: marc_read_line.c,v 1.2 2006-12-15 19:28:47 adam Exp $ */ /** @@ -21,23 +19,23 @@ #include #include #include -#include #include #include #include -int yaz_gets(int (*getbyte)(void *client_data), - void (*ungetbyte)(int b, void *client_data), - void *client_data, - char *buf, size_t len) +static int yaz_gets(int (*getbyte)(void *client_data), + void (*ungetbyte)(int b, void *client_data), + void *client_data, + WRBUF w) { size_t sz = 0; int ch = getbyte(client_data); + while (ch != '\0' && ch != '\r' && ch != '\n') { - if (sz < len-1) - buf[sz++] = ch; + wrbuf_putc(w, ch); + sz++; ch = getbyte(client_data); } if (ch == '\r') @@ -52,12 +50,48 @@ int yaz_gets(int (*getbyte)(void *client_data), if (ch != '\r' && ch != '\0') ungetbyte(ch, client_data); } - buf[sz] = '\0'; if (sz) + { return 1; + } return 0; } - + +static int yaz_marc_line_gets(int (*getbyte)(void *client_data), + void (*ungetbyte)(int b, void *client_data), + void *client_data, + WRBUF w) +{ + int more; + + wrbuf_rewind(w); + more = yaz_gets(getbyte, ungetbyte, client_data, w); + if (!more) + return 0; + + while (more) + { + int i; + for (i = 0; i<4; i++) + { + int ch = getbyte(client_data); + if (ch != ' ') + { + if (ch) + ungetbyte(ch, client_data); + return 1; + } + } + if (wrbuf_len(w) > 60 && wrbuf_buf(w)[wrbuf_len(w)-1] == '=') + wrbuf_cut_right(w, 1); + else + wrbuf_puts(w, " "); + more = yaz_gets(getbyte, ungetbyte, client_data, w); + } + return 1; +} + + int yaz_marc_read_line(yaz_marc_t mt, int (*getbyte)(void *client_data), void (*ungetbyte)(int b, void *client_data), @@ -72,17 +106,15 @@ int yaz_marc_read_line(yaz_marc_t mt, int marker_ch = 0; int marker_skip = 0; int header_created = 0; - char line[4096]; + WRBUF wrbuf_line = wrbuf_alloc(); yaz_marc_reset(mt); - while (yaz_gets(getbyte, ungetbyte, client_data, line, sizeof(line))) + while (yaz_marc_line_gets(getbyte, ungetbyte, client_data, wrbuf_line)) { + const char *line = wrbuf_cstr(wrbuf_line); int val; size_t line_len = strlen(line); - /* see if have leader lines of the form: - 00366nam 22001698a 4500 - */ if (line_len == 0) /* empty line indicates end of record */ { if (header_created) @@ -95,8 +127,11 @@ int yaz_marc_read_line(yaz_marc_t mt, } else if (line[0] == '(') /* annotation, skip it */ ; - else if (line_len == 24 && atoi_n_check(line, 5, &val) && val >= 24) + else if (line_len == 24 && atoi_n_check(line, 5, &val)) { + /* deal with header lines: 00366nam 22001698a 4500 + */ + if (header_created) break; yaz_marc_set_leader(mt, line, @@ -108,14 +143,12 @@ int yaz_marc_read_line(yaz_marc_t mt, &length_implementation); header_created = 1; } - else if (line_len > 5 && memcmp(line, " ", 4) == 0) - { /* continuation line */ - ; - } - else if (line_len > 5 && line[3] == ' ') + else if (line_len > 4 && line[0] != ' ' && line[1] != ' ' + && line[2] != ' ' && line[3] == ' ' ) { + /* deal with data/control lines: 245 12 ........ */ char tag[4]; - char *datafield_start = line+6; + const char *datafield_start = line+6; marker_ch = 0; marker_skip = 0; @@ -155,21 +188,22 @@ int yaz_marc_read_line(yaz_marc_t mt, { /* data field */ const char *indicator = line+4; int indicator_len = 2; - char *cp = datafield_start; + const char *cp = datafield_start; yaz_marc_add_datafield(mt, tag, indicator, indicator_len); for (;;) { - char *next; + const char *next; size_t len; - + assert(cp[0] == marker_ch); cp++; next = cp; while ((next = strchr(next, marker_ch))) { if ((next[1] >= 'A' && next[1] <= 'Z') - ||(next[1] >= 'a' && next[1] <= 'z')) + ||(next[1] >= 'a' && next[1] <= 'z') + ||(next[1] >= '0' && next[1] <= '9')) { if (!marker_skip) break; @@ -197,8 +231,6 @@ int yaz_marc_read_line(yaz_marc_t mt, cp++; } } - assert(len >= 0); - assert(len < 399); yaz_marc_add_subfield(mt, cp, len); if (!next) break; @@ -206,7 +238,12 @@ int yaz_marc_read_line(yaz_marc_t mt, } } } + else + { + yaz_marc_cprintf(mt, "Ignoring line: %s", line); + } } + wrbuf_destroy(wrbuf_line); if (!header_created) return -1; return 0; @@ -215,6 +252,7 @@ int yaz_marc_read_line(yaz_marc_t mt, /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab