X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=util%2Fmarcdump.c;h=d94250a2683600256dd282bf96b624d35a7a1220;hp=8e9c4d8ee4ffe515fa3a5afa926925e7911155f3;hb=e5c00c8cfb4a34c9d2dde1e9d8ffa29817b38b7e;hpb=861f9deb72bd92679ea08d528b40993ada55cdb9 diff --git a/util/marcdump.c b/util/marcdump.c index 8e9c4d8..d94250a 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 1995-2006, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2010 Index Data * See the file LICENSE for details. - * - * $Id: marcdump.c,v 1.46 2006-12-18 10:33:52 adam Exp $ */ #define _FILE_OFFSET_BITS 64 @@ -14,10 +12,20 @@ #if YAZ_HAVE_XML2 #include #include - #include #include +/* Libxml2 version < 2.6.15. xmlreader not reliable/present */ +#if LIBXML_VERSION < 20615 +#define USE_XMLREADER 0 +#else +#define USE_XMLREADER 1 +#endif + +#if USE_XMLREADER +#include +#endif + #endif #include @@ -50,12 +58,23 @@ static char *prog; static void usage(const char *prog) { - fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] " - "[-i format] [-o format] " - "[-n] [-l pos=value] [-v] [-C chunk] [-s splitfname] file...\n", + fprintf (stderr, "Usage: %s [-i format] [-o format] [-f from] [-t to] " + "[-l pos=value] [-c cfile] [-s prefix] [-C size] [-n] " + "[-p] [-v] [-V] file...\n", prog); } +static void show_version(void) +{ + char vstr[20], sha1_str[41]; + + yaz_version(vstr, sha1_str); + printf("YAZ version: %s %s\n", YAZ_VERSION, YAZ_VERSION_SHA1); + if (strcmp(sha1_str, YAZ_VERSION_SHA1)) + printf("YAZ DLL/SO: %s %s\n", vstr, sha1_str); + exit(0); +} + static int getbyte_stream(void *client_data) { FILE *f = (FILE*) client_data; @@ -90,8 +109,8 @@ static void marcdump_read_line(yaz_marc_t mt, const char *fname) { WRBUF wrbuf = wrbuf_alloc(); yaz_marc_write_mode(mt, wrbuf); - fputs(wrbuf_buf(wrbuf), stdout); - wrbuf_free(wrbuf, 1); + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_destroy(wrbuf); } fclose(inf); } @@ -99,28 +118,75 @@ static void marcdump_read_line(yaz_marc_t mt, const char *fname) #if YAZ_HAVE_XML2 static void marcdump_read_xml(yaz_marc_t mt, const char *fname) { - xmlNodePtr ptr; - xmlDocPtr doc = xmlParseFile(fname); - if (!doc) - return; + WRBUF wrbuf = wrbuf_alloc(); +#if USE_XMLREADER + xmlTextReaderPtr reader = xmlReaderForFile(fname, 0 /* encoding */, + 0 /* options */); - ptr = xmlDocGetRootElement(doc); - if (ptr) + if (reader) { - int r; - WRBUF wrbuf = wrbuf_alloc(); - r = yaz_marc_read_xml(mt, ptr); - if (r) - fprintf(stderr, "yaz_marc_read_xml failed\n"); - else + int ret; + while ((ret = xmlTextReaderRead(reader)) == 1) { - yaz_marc_write_mode(mt, wrbuf); - - fputs(wrbuf_buf(wrbuf), stdout); + int type = xmlTextReaderNodeType(reader); + if (type == XML_READER_TYPE_ELEMENT) + { + const char *name = (const char *) + xmlTextReaderLocalName(reader); + if (!strcmp(name, "record")) + { + xmlNodePtr ptr = xmlTextReaderExpand(reader); + + int r = yaz_marc_read_xml(mt, ptr); + if (r) + fprintf(stderr, "yaz_marc_read_xml failed\n"); + else + { + int write_rc = yaz_marc_write_mode(mt, wrbuf); + if (write_rc) + yaz_log(YLOG_WARN, "yaz_marc_write_mode: write error: %d", write_rc); + + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_rewind(wrbuf); + } + } + } + } + } +#else + xmlDocPtr doc = xmlParseFile(fname); + if (doc) + { + xmlNodePtr ptr = xmlDocGetRootElement(doc); + for (; ptr; ptr = ptr->next) + { + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strcmp((const char *) ptr->name, "collection")) + { + ptr = ptr->children; + continue; + } + if (!strcmp((const char *) ptr->name, "record")) + { + int r = yaz_marc_read_xml(mt, ptr); + if (r) + fprintf(stderr, "yaz_marc_read_xml failed\n"); + else + { + yaz_marc_write_mode(mt, wrbuf); + + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_rewind(wrbuf); + } + } + } } - wrbuf_free(wrbuf, 1); + xmlFreeDoc(doc); } - xmlFreeDoc(doc); +#endif + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_destroy(wrbuf); } #endif @@ -151,11 +217,13 @@ static void dump(const char *fname, const char *from, const char *to, } yaz_marc_iconv(mt, cd); } - yaz_marc_xml(mt, output_format); + yaz_marc_enable_collection(mt); + yaz_marc_set_read_format(mt, input_format); + yaz_marc_set_write_format(mt, output_format); yaz_marc_write_using_libxml2(mt, write_using_libxml2); yaz_marc_debug(mt, verbose); - if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_XCHANGE) + if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TMARCXML || input_format == YAZ_MARC_XCHANGE) { #if YAZ_HAVE_XML2 marcdump_read_xml(mt, fname); @@ -181,10 +249,10 @@ static void dump(const char *fname, const char *from, const char *to, fprintf (cfile, "char *marc_records[] = {\n"); for(;; marc_no++) { - char *result = 0; + const char *result = 0; size_t len; size_t rlen; - int len_result; + size_t len_result; size_t r; char buf[100001]; @@ -235,6 +303,15 @@ static void dump(const char *fname, const char *from, const char *to, r = fread (buf + 5, 1, rlen, inf); if (r < rlen) break; + while (buf[len-1] != ISO2709_RS) + { + if (len > sizeof(buf)-2) + break; + r = fread (buf + len, 1, 1, inf); + if (r != 1) + break; + len++; + } if (split_fname) { char fname[256]; @@ -265,11 +342,15 @@ static void dump(const char *fname, const char *from, const char *to, fclose(sf); } } - len_result = (int) rlen; + len_result = rlen; r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result); - if (r > 0 && result) + if (r > 0 && result && len_result) { - fwrite (result, len_result, 1, stdout); + if (fwrite(result, len_result, 1, stdout) != 1) + { + fprintf(stderr, "Write to stdout failed\n"); + break; + } } if (r > 0 && cfile) { @@ -298,6 +379,12 @@ static void dump(const char *fname, const char *from, const char *to, fprintf (cfile, "};\n"); fclose(inf); } + { + WRBUF wrbuf = wrbuf_alloc(); + yaz_marc_write_trailer(mt, wrbuf); + fputs(wrbuf_cstr(wrbuf), stdout); + wrbuf_destroy(wrbuf); + } if (cd) yaz_iconv_close(cd); yaz_marc_destroy(mt); @@ -318,7 +405,7 @@ int main (int argc, char **argv) const char *split_fname = 0; const char *leader_spec = 0; int write_using_libxml2 = 0; - + #if HAVE_LOCALE_H setlocale(LC_CTYPE, ""); #endif @@ -329,7 +416,7 @@ int main (int argc, char **argv) #endif prog = *argv; - while ((r = options("i:o:C:npvc:xOeXIf:t:s:l:", argv, argc, &arg)) != -2) + while ((r = options("i:o:C:npc:xOeXIf:t:s:l:Vv", argv, argc, &arg)) != -2) { no++; switch (r) @@ -341,6 +428,15 @@ int main (int argc, char **argv) fprintf(stderr, "%s: bad input format: %s\n", prog, arg); exit(1); } +#if YAZ_HAVE_XML2 +#else + if (input_format == YAZ_MARC_MARCXML + || input_format == YAZ_MARC_XCHANGE) + { + fprintf(stderr, "%s: Libxml2 support not enabled\n", prog); + exit(3); + } +#endif break; case 'o': /* dirty hack so we can make Libxml2 do the writing .. @@ -417,6 +513,9 @@ int main (int argc, char **argv) case 'v': verbose++; break; + case 'V': + show_version(); + break; default: usage(prog); exit(1); @@ -434,6 +533,7 @@ int main (int argc, char **argv) /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab