X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=util%2Fmarcdump.c;h=cec66ab347d4320f050eaa2499c31524bfb3ad69;hp=f5087e46f9ff8523fda5923fc2c64493037c061c;hb=725b07a551ac42b73d3b621e6a9b696cd13f42d0;hpb=7d280285b7dd17c9a71b8d458171f40f35dd6682 diff --git a/util/marcdump.c b/util/marcdump.c index f5087e4..cec66ab 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 1995-2006, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2013 Index Data * See the file LICENSE for details. - * - * $Id: marcdump.c,v 1.39 2006-06-12 16:12:58 mike Exp $ */ #define _FILE_OFFSET_BITS 64 @@ -11,13 +9,23 @@ #include #endif -#if HAVE_XML2 +#if YAZ_HAVE_XML2 #include #include - #include #include +/* Libxml2 version < 2.6.15. xmlreader not reliable/present */ +#if LIBXML_VERSION < 20615 +#define USE_XMLREADER 0 +#else +#define USE_XMLREADER 1 +#endif + +#if USE_XMLREADER +#include +#endif + #endif #include @@ -34,6 +42,7 @@ #endif #include +#include #include #include #include @@ -48,47 +57,215 @@ static char *prog; +static int no_errors = 0; + static void usage(const char *prog) { - fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-X] [-e] [-I] [-v] [-s splitfname] file...\n", - prog); -} + fprintf(stderr, "Usage: %s [-i format] [-o format] [-f from] [-t to] " + "[-l pos=value] [-c cfile] [-s prefix] [-C size] [-n] " + "[-p] [-v] [-V] file...\n", + prog); +} -#if HAVE_XML2 -static void marcdump_read_xml(yaz_marc_t mt, const char *fname) +static void show_version(void) { - xmlNodePtr ptr; - xmlDocPtr doc = xmlParseFile(fname); - if (!doc) - return; + char vstr[20], sha1_str[41]; + + yaz_version(vstr, sha1_str); + printf("YAZ version: %s %s\n", YAZ_VERSION, YAZ_VERSION_SHA1); + if (strcmp(sha1_str, YAZ_VERSION_SHA1)) + printf("YAZ DLL/SO: %s %s\n", vstr, sha1_str); + exit(0); +} + +static int getbyte_stream(void *client_data) +{ + FILE *f = (FILE*) client_data; + + int c = fgetc(f); + if (c == EOF) + return 0; + return c; +} + +static void ungetbyte_stream(int c, void *client_data) +{ + FILE *f = (FILE*) client_data; + + if (c == 0) + c = EOF; + ungetc(c, f); +} + +static void marcdump_read_line(yaz_marc_t mt, const char *fname) +{ + FILE *inf = fopen(fname, "rb"); + if (!inf) + { + fprintf(stderr, "%s: cannot open %s:%s\n", + prog, fname, strerror(errno)); + exit(1); + } - ptr = xmlDocGetRootElement(doc); - if (ptr) + while (yaz_marc_read_line(mt, getbyte_stream, + ungetbyte_stream, inf) == 0) { - int r; WRBUF wrbuf = wrbuf_alloc(); - r = yaz_marc_read_xml(mt, ptr); - if (r) - fprintf(stderr, "yaz_marc_read_xml failed\n"); - yaz_marc_write_mode(mt, wrbuf); + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_destroy(wrbuf); + } + fclose(inf); +} - fputs(wrbuf_buf(wrbuf), stdout); +static void marcdump_read_json(yaz_marc_t mt, const char *fname) +{ + FILE *inf = fopen(fname, "rb"); + if (!inf) + { + fprintf(stderr, "%s: cannot open %s:%s\n", + prog, fname, strerror(errno)); + exit(1); + } + else + { + const char *errmsg; + size_t errpos; + WRBUF w = wrbuf_alloc(); + struct json_node *n; + int c; - wrbuf_free(wrbuf, 1); + while ((c = getc(inf)) != EOF) + wrbuf_putc(w, c); + n = json_parse2(wrbuf_cstr(w), &errmsg, &errpos); + if (n) + { + int r = yaz_marc_read_json_node(mt, n); + if (r == 0) + { + wrbuf_rewind(w); + yaz_marc_write_mode(mt, w); + fputs(wrbuf_cstr(w), stdout); + wrbuf_rewind(w); + } + else + { + fprintf(stderr, "%s: JSON MARC parsing failed ret=%d\n", fname, + r); + } + } + else + { + fprintf(stderr, "%s: JSON parse error: %s . pos=%ld\n", fname, + errmsg, (long) errpos); + } + wrbuf_destroy(w); + fclose(inf); + } +} + + +#if YAZ_HAVE_XML2 +static void marcdump_read_xml(yaz_marc_t mt, const char *fname) +{ + WRBUF wrbuf = wrbuf_alloc(); +#if USE_XMLREADER + xmlTextReaderPtr reader = xmlReaderForFile(fname, 0 /* encoding */, + 0 /* options */); + + if (reader) + { + int ret; + while ((ret = xmlTextReaderRead(reader)) == 1) + { + int type = xmlTextReaderNodeType(reader); + if (type == XML_READER_TYPE_ELEMENT) + { + char *name = (char *) xmlTextReaderLocalName(reader); + if (!strcmp(name, "record") || !strcmp(name, "r")) + { + xmlNodePtr ptr = xmlTextReaderExpand(reader); + + int r = yaz_marc_read_xml(mt, ptr); + if (r) + { + no_errors++; + fprintf(stderr, "yaz_marc_read_xml failed\n"); + } + else + { + int write_rc = yaz_marc_write_mode(mt, wrbuf); + if (write_rc) + { + yaz_log(YLOG_WARN, "yaz_marc_write_mode: " + "write error: %d", write_rc); + no_errors++; + } + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_rewind(wrbuf); + } + } + xmlFree(name); + } + } + xmlFreeTextReader(reader); + } +#else + xmlDocPtr doc = xmlParseFile(fname); + if (doc) + { + xmlNodePtr ptr = xmlDocGetRootElement(doc); + for (; ptr; ptr = ptr->next) + { + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "collection")) + { + ptr = ptr->children; + continue; + } + if (!strcmp((const char *) ptr->name, "record") || + !strcmp((const char *) ptr->name, "r")) + { + int r = yaz_marc_read_xml(mt, ptr); + if (r) + { + no_errors++; + fprintf(stderr, "yaz_marc_read_xml failed\n"); + } + else + { + yaz_marc_write_mode(mt, wrbuf); + + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_rewind(wrbuf); + } + } + } + } + xmlFreeDoc(doc); } - xmlFreeDoc(doc); +#endif + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_destroy(wrbuf); } #endif static void dump(const char *fname, const char *from, const char *to, - int read_xml, int xml, - int print_offset, const char *split_fname, int verbose, - FILE *cfile) + int input_format, int output_format, + int write_using_libxml2, + int print_offset, const char *split_fname, int split_chunk, + int verbose, FILE *cfile, const char *leader_spec) { yaz_marc_t mt = yaz_marc_create(); yaz_iconv_t cd = 0; - + + if (yaz_marc_leader_spec(mt, leader_spec)) + { + fprintf(stderr, "bad leader spec: %s\n", leader_spec); + yaz_marc_destroy(mt); + exit(2); + } if (from && to) { cd = yaz_iconv_open(to, from); @@ -96,150 +273,209 @@ static void dump(const char *fname, const char *from, const char *to, { fprintf(stderr, "conversion from %s to %s " "unsupported\n", from, to); + yaz_marc_destroy(mt); exit(2); } yaz_marc_iconv(mt, cd); } - yaz_marc_xml(mt, xml); + yaz_marc_enable_collection(mt); + yaz_marc_xml(mt, output_format); + yaz_marc_write_using_libxml2(mt, write_using_libxml2); yaz_marc_debug(mt, verbose); - if (read_xml) + if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TURBOMARC || input_format == YAZ_MARC_XCHANGE) { -#if HAVE_XML2 +#if YAZ_HAVE_XML2 marcdump_read_xml(mt, fname); -#else - return; #endif } - else + else if (input_format == YAZ_MARC_LINE) + { + marcdump_read_line(mt, fname); + } + else if (input_format == YAZ_MARC_JSON) + { + marcdump_read_json(mt, fname); + } + else if (input_format == YAZ_MARC_ISO2709) { FILE *inf = fopen(fname, "rb"); - int count = 0; int num = 1; + int marc_no = 0; + int split_file_no = -1; if (!inf) { - fprintf (stderr, "%s: cannot open %s:%s\n", - prog, fname, strerror (errno)); + fprintf(stderr, "%s: cannot open %s:%s\n", + prog, fname, strerror(errno)); exit(1); } if (cfile) - fprintf (cfile, "char *marc_records[] = {\n"); - if (1) + fprintf(cfile, "char *marc_records[] = {\n"); + for(;; marc_no++) { - int marc_no = 0; - for(;; marc_no++) + const char *result = 0; + size_t len; + size_t rlen; + size_t len_result; + size_t r; + char buf[100001]; + + r = fread(buf, 1, 5, inf); + if (r < 5) { - size_t len; - char *result = 0; - size_t rlen; - size_t r; - char buf[100001]; - - r = fread (buf, 1, 5, inf); - if (r < 5) - { - if (r && print_offset && verbose) - printf ("\n", - (long) r); + if (r == 0) /* normal EOF, all good */ break; - } - while (*buf < '0' || *buf > '9') + if (print_offset && verbose) { - int i; - long off = ftell(inf) - 5; - if (verbose || print_offset) - printf("\n", - *buf & 0xff, *buf & 0xff, - off, off); - for (i = 0; i<4; i++) - buf[i] = buf[i+1]; - r = fread(buf+4, 1, 1, inf); - if (r < 1) - break; + printf("\n", + (long) r); } + break; + } + while (*buf < '0' || *buf > '9') + { + int i; + long off = ftell(inf) - 5; + printf("\n", + *buf & 0xff, *buf & 0xff, + off, off); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + r = fread(buf+4, 1, 1, inf); + no_errors++; if (r < 1) + break; + } + if (r < 1) + { + if (verbose || print_offset) + printf("\n"); + break; + } + if (print_offset) + { + long off = ftell(inf) - 5; + printf("\n", + num, off, off); + } + len = atoi_n(buf, 5); + if (len < 25 || len > 100000) + { + long off = ftell(inf) - 5; + printf("\n", + (long)len, (long) off, (long) off); + no_errors++; + break; + } + rlen = len - 5; + r = fread(buf + 5, 1, rlen, inf); + if (r < rlen) + { + long off = ftell(inf); + printf("\n", + (long) off, (long) off); + no_errors++; + break; + } + while (buf[len-1] != ISO2709_RS) + { + if (len > sizeof(buf)-2) { - if (verbose || print_offset) - printf ("\n"); + r = 0; break; } - if (print_offset) + r = fread(buf + len, 1, 1, inf); + if (r != 1) + break; + len++; + } + if (r < 1) + { + printf("\n"); + no_errors++; + break; + } + if (split_fname) + { + char fname[256]; + const char *mode = 0; + FILE *sf; + if ((marc_no % split_chunk) == 0) { - long off = ftell(inf) - 5; - printf ("\n", - num, off, off); + mode = "wb"; + split_file_no++; } - len = atoi_n(buf, 5); - if (len < 25 || len > 100000) + else + mode = "ab"; + sprintf(fname, "%.200s%07d", split_fname, split_file_no); + sf = fopen(fname, mode); + if (!sf) { - long off = ftell(inf) - 5; - printf("Bad Length %d read at offset %ld (%lx)\n", - len, (long) off, (long) off); - break; + fprintf(stderr, "Could not open %s\n", fname); + split_fname = 0; } - rlen = len - 5; - r = fread (buf + 5, 1, rlen, inf); - if (r < rlen) - break; - if (split_fname) + else { - char fname[256]; - FILE *sf; - sprintf(fname, "%.200s%07d", split_fname, marc_no); - sf = fopen(fname, "wb"); - if (!sf) + if (fwrite(buf, 1, len, sf) != len) { - fprintf(stderr, "Could not open %s\n", fname); + fprintf(stderr, "Could write content to %s\n", + fname); split_fname = 0; + no_errors++; } - else - { - if (fwrite(buf, 1, len, sf) != len) - { - fprintf(stderr, "Could write content to %s\n", - fname); - split_fname = 0; - } - fclose(sf); - } + fclose(sf); } - { int rlentmp = (int) rlen; - r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlentmp); - rlen = (size_t) rlentmp; } - if (r > 0 && result) + } + len_result = rlen; + r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result); + if (r == -1) + no_errors++; + if (r > 0 && result && len_result) + { + if (fwrite(result, len_result, 1, stdout) != 1) { - fwrite (result, rlen, 1, stdout); + fprintf(stderr, "Write to stdout failed\n"); + no_errors++; + break; } - if (r > 0 && cfile) + } + if (r > 0 && cfile) + { + char *p = buf; + size_t i; + if (marc_no) + fprintf(cfile, ","); + fprintf(cfile, "\n"); + for (i = 0; i < r; i++) { - char *p = buf; - size_t i; - if (count) - fprintf (cfile, ","); - fprintf (cfile, "\n"); - for (i = 0; i < r; i++) - { - if ((i & 15) == 0) - fprintf (cfile, " \""); - fprintf (cfile, "\\x%02X", p[i] & 255); - - if (i < r - 1 && (i & 15) == 15) - fprintf (cfile, "\"\n"); - - } - fprintf (cfile, "\"\n"); + if ((i & 15) == 0) + fprintf(cfile, " \""); + if (p[i] < 32 || p[i] > 126) + fprintf(cfile, "\" \"\\x%02X\" \"", p[i] & 255); + else + fputc(p[i], cfile); + + if (i < r - 1 && (i & 15) == 15) + fprintf(cfile, "\"\n"); + } - num++; - if (verbose) - printf("\n"); + fprintf(cfile, "\"\n"); } - count++; + num++; + if (verbose) + printf("\n"); } if (cfile) - fprintf (cfile, "};\n"); + fprintf(cfile, "};\n"); fclose(inf); } + { + WRBUF wrbuf = wrbuf_alloc(); + yaz_marc_write_trailer(mt, wrbuf); + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_destroy(wrbuf); + } if (cd) yaz_iconv_close(cd); yaz_marc_destroy(mt); @@ -252,12 +488,15 @@ int main (int argc, char **argv) char *arg; int verbose = 0; int no = 0; - int xml = 0; + int output_format = YAZ_MARC_LINE; FILE *cfile = 0; char *from = 0, *to = 0; - int read_xml = 0; + int input_format = YAZ_MARC_ISO2709; + int split_chunk = 1; const char *split_fname = 0; - + const char *leader_spec = 0; + int write_using_libxml2 = 0; + #if HAVE_LOCALE_H setlocale(LC_CTYPE, ""); #endif @@ -268,11 +507,52 @@ int main (int argc, char **argv) #endif prog = *argv; - while ((r = options("pvc:xOeXIf:t:s:", argv, argc, &arg)) != -2) + while ((r = options("i:o:C:npc:xOeXIf:t:s:l:Vv", argv, argc, &arg)) != -2) { no++; switch (r) { + case 'i': + input_format = yaz_marc_decode_formatstr(arg); + if (input_format == -1) + { + fprintf(stderr, "%s: bad input format: %s\n", prog, arg); + exit(1); + } +#if YAZ_HAVE_XML2 +#else + if (input_format == YAZ_MARC_MARCXML + || input_format == YAZ_MARC_XCHANGE) + { + fprintf(stderr, "%s: Libxml2 support not enabled\n", prog); + exit(3); + } +#endif + break; + case 'o': + /* dirty hack so we can make Libxml2 do the writing .. + rather than WRBUF */ + if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0) + { + /* Only supported for Libxml2 2.6.0 or later */ +#if LIBXML_VERSION >= 20600 + arg = arg + 4; + write_using_libxml2 = 1; +#else + fprintf(stderr, "%s: output using Libxml2 unsupported\n", prog); + exit(4); +#endif + } + output_format = yaz_marc_decode_formatstr(arg); + if (output_format == -1) + { + fprintf(stderr, "%s: bad output format: %s\n", prog, arg); + exit(1); + } + break; + case 'l': + leader_spec = arg; + break; case 'f': from = arg; break; @@ -281,17 +561,13 @@ int main (int argc, char **argv) break; case 'c': if (cfile) - fclose (cfile); + fclose(cfile); cfile = fopen(arg, "w"); break; case 'x': -#if HAVE_XML2 - read_xml = 1; -#else - fprintf(stderr, "%s: -x not supported." - " YAZ not compiled with Libxml2 support\n", prog); - exit(3); -#endif + fprintf(stderr, "%s: -x no longer supported. " + "Use -i marcxml instead\n", prog); + exit(1); break; case 'O': fprintf(stderr, "%s: OAI MARC no longer supported." @@ -299,13 +575,22 @@ int main (int argc, char **argv) exit(1); break; case 'e': - xml = YAZ_MARC_XCHANGE; + fprintf(stderr, "%s: -e no longer supported. " + "Use -o marcxchange instead\n", prog); + exit(1); break; case 'X': - xml = YAZ_MARC_MARCXML; + fprintf(stderr, "%s: -X no longer supported. " + "Use -o marcxml instead\n", prog); + exit(1); break; case 'I': - xml = YAZ_MARC_ISO2709; + fprintf(stderr, "%s: -I no longer supported. " + "Use -o marc instead\n", prog); + exit(1); + break; + case 'n': + output_format = YAZ_MARC_CHECK; break; case 'p': print_offset = 1; @@ -313,30 +598,41 @@ int main (int argc, char **argv) case 's': split_fname = arg; break; + case 'C': + split_chunk = atoi(arg); + break; case 0: - dump(arg, from, to, read_xml, xml, - print_offset, split_fname, verbose, cfile); + dump(arg, from, to, input_format, output_format, + write_using_libxml2, + print_offset, split_fname, split_chunk, + verbose, cfile, leader_spec); break; case 'v': verbose++; break; + case 'V': + show_version(); + break; default: usage(prog); - exit (1); + exit(1); } } if (cfile) - fclose (cfile); + fclose(cfile); if (!no) { usage(prog); - exit (1); + exit(1); } - exit (0); + if (no_errors) + exit(5); + exit(0); } /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab