X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=util%2Fmarcdump.c;h=f5087e46f9ff8523fda5923fc2c64493037c061c;hp=82377b30d2b679afe3882e4492b175c2f16a6a77;hb=7d280285b7dd17c9a71b8d458171f40f35dd6682;hpb=b414bc74229f18537e6742b081aad7c4679006c8 diff --git a/util/marcdump.c b/util/marcdump.c index 82377b3..f5087e4 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -1,10 +1,12 @@ /* - * Copyright (C) 1995-2005, Index Data ApS + * Copyright (C) 1995-2006, Index Data ApS * See the file LICENSE for details. * - * $Id: marcdump.c,v 1.29 2005-04-20 13:04:04 adam Exp $ + * $Id: marcdump.c,v 1.39 2006-06-12 16:12:58 mike Exp $ */ +#define _FILE_OFFSET_BITS 64 + #if HAVE_CONFIG_H #include #endif @@ -43,76 +45,218 @@ #define SEEK_END 2 #endif + +static char *prog; + static void usage(const char *prog) { - fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n", + fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-X] [-e] [-I] [-v] [-s splitfname] file...\n", prog); } #if HAVE_XML2 -void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) { - xmlNodePtr cur; - int size; - int i; - - assert(output); - size = (nodes) ? nodes->nodeNr : 0; - - fprintf(output, "Result (%d nodes):\n", size); - for(i = 0; i < size; ++i) { - assert(nodes->nodeTab[i]); - - if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) - { - xmlNsPtr ns; - - ns = (xmlNsPtr)nodes->nodeTab[i]; - cur = (xmlNodePtr)ns->next; - if(cur->ns) { - fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", - ns->prefix, ns->href, cur->ns->href, cur->name); - } else { - fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", - ns->prefix, ns->href, cur->name); - } - } - else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE) - { - cur = nodes->nodeTab[i]; - if(cur->ns) { - fprintf(output, "= element node \"%s:%s\"\n", - cur->ns->href, cur->name); - } - else - { - fprintf(output, "= element node \"%s\"\n", - cur->name); - } - } - else - { - cur = nodes->nodeTab[i]; - fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type); - } +static void marcdump_read_xml(yaz_marc_t mt, const char *fname) +{ + xmlNodePtr ptr; + xmlDocPtr doc = xmlParseFile(fname); + if (!doc) + return; + + ptr = xmlDocGetRootElement(doc); + if (ptr) + { + int r; + WRBUF wrbuf = wrbuf_alloc(); + r = yaz_marc_read_xml(mt, ptr); + if (r) + fprintf(stderr, "yaz_marc_read_xml failed\n"); + + yaz_marc_write_mode(mt, wrbuf); + + fputs(wrbuf_buf(wrbuf), stdout); + + wrbuf_free(wrbuf, 1); } + xmlFreeDoc(doc); } #endif +static void dump(const char *fname, const char *from, const char *to, + int read_xml, int xml, + int print_offset, const char *split_fname, int verbose, + FILE *cfile) +{ + yaz_marc_t mt = yaz_marc_create(); + yaz_iconv_t cd = 0; + + if (from && to) + { + cd = yaz_iconv_open(to, from); + if (!cd) + { + fprintf(stderr, "conversion from %s to %s " + "unsupported\n", from, to); + exit(2); + } + yaz_marc_iconv(mt, cd); + } + yaz_marc_xml(mt, xml); + yaz_marc_debug(mt, verbose); + + if (read_xml) + { +#if HAVE_XML2 + marcdump_read_xml(mt, fname); +#else + return; +#endif + } + else + { + FILE *inf = fopen(fname, "rb"); + int count = 0; + int num = 1; + if (!inf) + { + fprintf (stderr, "%s: cannot open %s:%s\n", + prog, fname, strerror (errno)); + exit(1); + } + if (cfile) + fprintf (cfile, "char *marc_records[] = {\n"); + if (1) + { + int marc_no = 0; + for(;; marc_no++) + { + size_t len; + char *result = 0; + size_t rlen; + size_t r; + char buf[100001]; + + r = fread (buf, 1, 5, inf); + if (r < 5) + { + if (r && print_offset && verbose) + printf ("\n", + (long) r); + break; + } + while (*buf < '0' || *buf > '9') + { + int i; + long off = ftell(inf) - 5; + if (verbose || print_offset) + printf("\n", + *buf & 0xff, *buf & 0xff, + off, off); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + r = fread(buf+4, 1, 1, inf); + if (r < 1) + break; + } + if (r < 1) + { + if (verbose || print_offset) + printf ("\n"); + break; + } + if (print_offset) + { + long off = ftell(inf) - 5; + printf ("\n", + num, off, off); + } + len = atoi_n(buf, 5); + if (len < 25 || len > 100000) + { + long off = ftell(inf) - 5; + printf("Bad Length %d read at offset %ld (%lx)\n", + len, (long) off, (long) off); + break; + } + rlen = len - 5; + r = fread (buf + 5, 1, rlen, inf); + if (r < rlen) + break; + if (split_fname) + { + char fname[256]; + FILE *sf; + sprintf(fname, "%.200s%07d", split_fname, marc_no); + sf = fopen(fname, "wb"); + if (!sf) + { + fprintf(stderr, "Could not open %s\n", fname); + split_fname = 0; + } + else + { + if (fwrite(buf, 1, len, sf) != len) + { + fprintf(stderr, "Could write content to %s\n", + fname); + split_fname = 0; + } + fclose(sf); + } + } + { int rlentmp = (int) rlen; + r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlentmp); + rlen = (size_t) rlentmp; } + if (r > 0 && result) + { + fwrite (result, rlen, 1, stdout); + } + if (r > 0 && cfile) + { + char *p = buf; + size_t i; + if (count) + fprintf (cfile, ","); + fprintf (cfile, "\n"); + for (i = 0; i < r; i++) + { + if ((i & 15) == 0) + fprintf (cfile, " \""); + fprintf (cfile, "\\x%02X", p[i] & 255); + + if (i < r - 1 && (i & 15) == 15) + fprintf (cfile, "\"\n"); + + } + fprintf (cfile, "\"\n"); + } + num++; + if (verbose) + printf("\n"); + } + count++; + } + if (cfile) + fprintf (cfile, "};\n"); + fclose(inf); + } + if (cd) + yaz_iconv_close(cd); + yaz_marc_destroy(mt); +} + int main (int argc, char **argv) { int r; - int libxml_dom_test = 0; int print_offset = 0; char *arg; int verbose = 0; - FILE *inf; - char buf[100001]; - char *prog = *argv; int no = 0; int xml = 0; FILE *cfile = 0; char *from = 0, *to = 0; - int num = 1; + int read_xml = 0; + const char *split_fname = 0; #if HAVE_LOCALE_H setlocale(LC_CTYPE, ""); @@ -123,10 +267,10 @@ int main (int argc, char **argv) #endif #endif - while ((r = options("pvc:xOeXIf:t:2", argv, argc, &arg)) != -2) + prog = *argv; + while ((r = options("pvc:xOeXIf:t:s:", argv, argc, &arg)) != -2) { - int count; - no++; + no++; switch (r) { case 'f': @@ -135,185 +279,46 @@ int main (int argc, char **argv) case 't': to = arg; break; - case 'c': - if (cfile) - fclose (cfile); - cfile = fopen (arg, "w"); - break; + case 'c': + if (cfile) + fclose (cfile); + cfile = fopen(arg, "w"); + break; case 'x': - xml = YAZ_MARC_SIMPLEXML; +#if HAVE_XML2 + read_xml = 1; +#else + fprintf(stderr, "%s: -x not supported." + " YAZ not compiled with Libxml2 support\n", prog); + exit(3); +#endif break; case 'O': - xml = YAZ_MARC_OAIMARC; + fprintf(stderr, "%s: OAI MARC no longer supported." + " Use MARCXML instead.\n", prog); + exit(1); + break; + case 'e': + xml = YAZ_MARC_XCHANGE; break; - case 'e': - xml = YAZ_MARC_XCHANGE; - break; case 'X': xml = YAZ_MARC_MARCXML; break; - case 'I': - xml = YAZ_MARC_ISO2709; - break; - case 'p': - print_offset = 1; - break; - case '2': - libxml_dom_test = 1; - break; + case 'I': + xml = YAZ_MARC_ISO2709; + break; + case 'p': + print_offset = 1; + break; + case 's': + split_fname = arg; + break; case 0: - inf = fopen (arg, "rb"); - count = 0; - if (!inf) - { - fprintf (stderr, "%s: cannot open %s:%s\n", - prog, arg, strerror (errno)); - exit(1); - } - if (cfile) - fprintf (cfile, "char *marc_records[] = {\n"); - if (1) - { - yaz_marc_t mt = yaz_marc_create(); - yaz_iconv_t cd = 0; - - if (from && to) - { - cd = yaz_iconv_open(to, from); - if (!cd) - { - fprintf(stderr, "conversion from %s to %s " - "unsupported\n", from, to); - exit(2); - } - yaz_marc_iconv(mt, cd); - } - yaz_marc_xml(mt, xml); - yaz_marc_debug(mt, verbose); - while (1) - { - int len; - char *result = 0; - int rlen; - - r = fread (buf, 1, 5, inf); - if (r < 5) - { - if (r && print_offset && verbose) - printf ("Extra %d bytes at end of file", r); - break; - } - while (*buf < '0' || *buf > '9') - { - int i; - long off = ftell(inf) - 5; - if (verbose || print_offset) - printf("Skipping bad byte %d (0x%02X) at offset " - "%ld (0x%lx)\n", - *buf & 0xff, *buf & 0xff, - off, off); - for (i = 0; i<4; i++) - buf[i] = buf[i+1]; - r = fread(buf+4, 1, 1, inf); - if (r < 1) - break; - } - if (r < 1) - { - if (verbose || print_offset) - printf ("End of file with extra garbage\n"); - break; - } - if (print_offset) - { - long off = ftell(inf) - 5; - printf ("Record %d offset %ld (0x%lx)\n", num, - off, off); - } - len = atoi_n(buf, 5); - if (len < 25 || len > 100000) - { - long off = ftell(inf) - 5; - printf("Bad Length %d read at offset %ld (%lx)\n", - len, (long) off, (long) off); - break; - } - len = len - 5; - r = fread (buf + 5, 1, len, inf); - if (r < len) - break; - r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen); - if (result) - fwrite (result, rlen, 1, stdout); -#if HAVE_XML2 - if (r > 0 && libxml_dom_test) - { - xmlDocPtr doc = xmlParseMemory(result, rlen); - if (!doc) - fprintf(stderr, "xmLParseMemory failed\n"); - else - { - int i; - xmlXPathContextPtr xpathCtx; - xmlXPathObjectPtr xpathObj; - static const char *xpathExpr[] = { - "/record/datafield[@tag='245']/subfield[@code='a']", - "/record/datafield[@tag='100']/subfield", - "/record/datafield[@tag='245']/subfield[@code='a']", - "/record/datafield[@tag='650']/subfield", - "/record/datafield[@tag='650']", - 0}; - - xpathCtx = xmlXPathNewContext(doc); - - for (i = 0; xpathExpr[i]; i++) { - xpathObj = xmlXPathEvalExpression(xpathExpr[i], xpathCtx); - if(xpathObj == NULL) { - fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]); - } - else - { - print_xpath_nodes(xpathObj->nodesetval, stdout); - xmlXPathFreeObject(xpathObj); - } - } - xmlXPathFreeContext(xpathCtx); - xmlFreeDoc(doc); - } - } -#endif - if (r > 0 && cfile) - { - char *p = buf; - int i; - if (count) - fprintf (cfile, ","); - fprintf (cfile, "\n"); - for (i = 0; i < r; i++) - { - if ((i & 15) == 0) - fprintf (cfile, " \""); - fprintf (cfile, "\\x%02X", p[i] & 255); - - if (i < r - 1 && (i & 15) == 15) - fprintf (cfile, "\"\n"); - - } - fprintf (cfile, "\"\n"); - } - num++; - } - count++; - if (cd) - yaz_iconv_close(cd); - yaz_marc_destroy(mt); - } - if (cfile) - fprintf (cfile, "};\n"); - fclose(inf); + dump(arg, from, to, read_xml, xml, + print_offset, split_fname, verbose, cfile); break; case 'v': - verbose++; + verbose++; break; default: usage(prog); @@ -321,11 +326,19 @@ int main (int argc, char **argv) } } if (cfile) - fclose (cfile); + fclose (cfile); if (!no) { usage(prog); - exit (1); + exit (1); } exit (0); } +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +