/*
- * Copyright (c) 1995-2001, Index Data
+ * Copyright (C) 1995-2007, Index Data ApS
* See the file LICENSE for details.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: marcdump.c,v $
- * Revision 1.13 2001-02-10 01:21:59 adam
- * Dumper only keeps one record at a time in memory.
- *
- * Revision 1.12 2000/10/02 11:07:45 adam
- * Added peer_name member for bend_init handler. Changed the YAZ
- * client so that tcp: can be avoided in target spec.
- *
- * Revision 1.11 2000/07/04 08:53:22 adam
- * Fixed bug.
- *
- * Revision 1.10 2000/02/29 13:44:55 adam
- * Check for config.h (currently not generated).
- *
- * Revision 1.9 1999/11/30 13:47:12 adam
- * Improved installation. Moved header files to include/yaz.
- *
- * Revision 1.8 1999/05/26 07:49:35 adam
- * C++ compilation.
- *
- * Revision 1.7 1998/02/11 11:53:36 adam
- * Changed code so that it compiles as C++.
- *
- * Revision 1.6 1997/12/12 06:32:33 adam
- * Added include of string.h.
- *
- * Revision 1.5 1997/09/24 13:29:40 adam
- * Added verbose option -v to marcdump utility.
- *
- * Revision 1.4 1995/11/01 13:55:05 quinn
- * Minor adjustments
- *
- * Revision 1.3 1995/05/16 08:51:12 quinn
- * License, documentation, and memory fixes
- *
- * Revision 1.2 1995/05/15 11:56:56 quinn
- * Debuggng & adjustments.
- *
- * Revision 1.1 1995/04/10 10:28:47 quinn
- * Added copy of CCL and MARC display
*
+ * $Id: marcdump.c,v 1.55 2007-12-18 21:13:06 adam Exp $
*/
+#define _FILE_OFFSET_BITS 64
+
#if HAVE_CONFIG_H
#include <config.h>
#endif
+#if YAZ_HAVE_XML2
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+/* Libxml2 version < 2.6.15. xmlreader not reliable/present */
+#if LIBXML_VERSION < 20615
+#define USE_XMLREADER 0
+#else
+#define USE_XMLREADER 1
+#endif
+
+#if USE_XMLREADER
+#include <libxml/xmlreader.h>
+#endif
+
+#endif
+
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <errno.h>
+#include <assert.h>
+
+#if HAVE_LOCALE_H
+#include <locale.h>
+#endif
+#if HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
+
#include <yaz/marcdisp.h>
#include <yaz/yaz-util.h>
#include <yaz/xmalloc.h>
#ifndef SEEK_END
#define SEEK_END 2
#endif
-
+
+
+static char *prog;
+
+static void usage(const char *prog)
+{
+ fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] "
+ "[-i format] [-o format] "
+ "[-n] [-l pos=value] [-v] [-C chunk] [-s splitfname] [-p] file...\n",
+ prog);
+}
+
+static int getbyte_stream(void *client_data)
+{
+ FILE *f = (FILE*) client_data;
+
+ int c = fgetc(f);
+ if (c == EOF)
+ return 0;
+ return c;
+}
+
+static void ungetbyte_stream(int c, void *client_data)
+{
+ FILE *f = (FILE*) client_data;
+
+ if (c == 0)
+ c = EOF;
+ ungetc(c, f);
+}
+
+static void marcdump_read_line(yaz_marc_t mt, const char *fname)
+{
+ FILE *inf = fopen(fname, "rb");
+ if (!inf)
+ {
+ fprintf (stderr, "%s: cannot open %s:%s\n",
+ prog, fname, strerror (errno));
+ exit(1);
+ }
+
+ while (yaz_marc_read_line(mt, getbyte_stream,
+ ungetbyte_stream, inf) == 0)
+ {
+ WRBUF wrbuf = wrbuf_alloc();
+ yaz_marc_write_mode(mt, wrbuf);
+ fputs(wrbuf_cstr(wrbuf), stdout);
+ wrbuf_destroy(wrbuf);
+ }
+ {
+ WRBUF wrbuf = wrbuf_alloc();
+ yaz_marc_write_trailer(mt, wrbuf);
+ fputs(wrbuf_cstr(wrbuf), stdout);
+ wrbuf_destroy(wrbuf);
+ }
+ fclose(inf);
+}
+
+#if YAZ_HAVE_XML2
+static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
+{
+ WRBUF wrbuf = wrbuf_alloc();
+#if USE_XMLREADER
+ xmlTextReaderPtr reader = xmlReaderForFile(fname, 0 /* encoding */,
+ 0 /* options */);
+
+ if (reader)
+ {
+ int ret;
+ while ((ret = xmlTextReaderRead(reader)) == 1)
+ {
+ int type = xmlTextReaderNodeType(reader);
+ if (type == XML_READER_TYPE_ELEMENT)
+ {
+ const char *name = (const char *)
+ xmlTextReaderConstName(reader);
+ if (!strcmp(name, "record"))
+ {
+ xmlNodePtr ptr = xmlTextReaderExpand(reader);
+
+ int r = yaz_marc_read_xml(mt, ptr);
+ if (r)
+ fprintf(stderr, "yaz_marc_read_xml failed\n");
+ else
+ {
+ yaz_marc_write_mode(mt, wrbuf);
+
+ fputs(wrbuf_cstr(wrbuf), stdout);
+ wrbuf_rewind(wrbuf);
+ }
+ }
+ }
+ }
+ yaz_marc_write_trailer(mt, wrbuf);
+ fputs(wrbuf_cstr(wrbuf), stdout);
+ }
+#else
+ xmlDocPtr doc = xmlParseFile(fname);
+ if (doc)
+ {
+ xmlNodePtr ptr = xmlDocGetRootElement(doc);
+ for (; ptr; ptr = ptr->next)
+ {
+ if (ptr->type == XML_ELEMENT_NODE)
+ {
+ if (!strcmp((const char *) ptr->name, "collection"))
+ {
+ ptr = ptr->children;
+ continue;
+ }
+ if (!strcmp((const char *) ptr->name, "record"))
+ {
+ int r = yaz_marc_read_xml(mt, ptr);
+ if (r)
+ fprintf(stderr, "yaz_marc_read_xml failed\n");
+ else
+ {
+ yaz_marc_write_mode(mt, wrbuf);
+
+ fputs(wrbuf_cstr(wrbuf), stdout);
+ wrbuf_rewind(wrbuf);
+ }
+ }
+ }
+ }
+ xmlFreeDoc(doc);
+ }
+#endif
+ yaz_marc_write_trailer(mt, wrbuf);
+ fputs(wrbuf_cstr(wrbuf), stdout);
+ wrbuf_destroy(wrbuf);
+}
+#endif
+
+static void dump(const char *fname, const char *from, const char *to,
+ int input_format, int output_format,
+ int write_using_libxml2,
+ int print_offset, const char *split_fname, int split_chunk,
+ int verbose, FILE *cfile, const char *leader_spec)
+{
+ yaz_marc_t mt = yaz_marc_create();
+ yaz_iconv_t cd = 0;
+
+ if (yaz_marc_leader_spec(mt, leader_spec))
+ {
+ fprintf(stderr, "bad leader spec: %s\n", leader_spec);
+ yaz_marc_destroy(mt);
+ exit(2);
+ }
+ if (from && to)
+ {
+ cd = yaz_iconv_open(to, from);
+ if (!cd)
+ {
+ fprintf(stderr, "conversion from %s to %s "
+ "unsupported\n", from, to);
+ yaz_marc_destroy(mt);
+ exit(2);
+ }
+ yaz_marc_iconv(mt, cd);
+ }
+ yaz_marc_xml(mt, output_format);
+ yaz_marc_enable_collection(mt);
+ yaz_marc_write_using_libxml2(mt, write_using_libxml2);
+ yaz_marc_debug(mt, verbose);
+
+ if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_XCHANGE)
+ {
+#if YAZ_HAVE_XML2
+ marcdump_read_xml(mt, fname);
+#endif
+ }
+ else if (input_format == YAZ_MARC_LINE)
+ {
+ marcdump_read_line(mt, fname);
+ }
+ else if (input_format == YAZ_MARC_ISO2709)
+ {
+ FILE *inf = fopen(fname, "rb");
+ int num = 1;
+ int marc_no = 0;
+ int split_file_no = -1;
+ if (!inf)
+ {
+ fprintf (stderr, "%s: cannot open %s:%s\n",
+ prog, fname, strerror (errno));
+ exit(1);
+ }
+ if (cfile)
+ fprintf (cfile, "char *marc_records[] = {\n");
+ for(;; marc_no++)
+ {
+ const char *result = 0;
+ size_t len;
+ size_t rlen;
+ size_t len_result;
+ size_t r;
+ char buf[100001];
+
+ r = fread (buf, 1, 5, inf);
+ if (r < 5)
+ {
+ if (r && print_offset && verbose)
+ printf ("<!-- Extra %ld bytes at end of file -->\n",
+ (long) r);
+ break;
+ }
+ while (*buf < '0' || *buf > '9')
+ {
+ int i;
+ long off = ftell(inf) - 5;
+ if (verbose || print_offset)
+ printf("<!-- Skipping bad byte %d (0x%02X) at offset "
+ "%ld (0x%lx) -->\n",
+ *buf & 0xff, *buf & 0xff,
+ off, off);
+ for (i = 0; i<4; i++)
+ buf[i] = buf[i+1];
+ r = fread(buf+4, 1, 1, inf);
+ if (r < 1)
+ break;
+ }
+ if (r < 1)
+ {
+ if (verbose || print_offset)
+ printf ("<!-- End of file with data -->\n");
+ break;
+ }
+ if (print_offset)
+ {
+ long off = ftell(inf) - 5;
+ printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
+ num, off, off);
+ }
+ len = atoi_n(buf, 5);
+ if (len < 25 || len > 100000)
+ {
+ long off = ftell(inf) - 5;
+ printf("Bad Length %ld read at offset %ld (%lx)\n",
+ (long)len, (long) off, (long) off);
+ break;
+ }
+ rlen = len - 5;
+ r = fread (buf + 5, 1, rlen, inf);
+ if (r < rlen)
+ break;
+ while (buf[len-1] != ISO2709_RS)
+ {
+ if (len > sizeof(buf)-2)
+ break;
+ r = fread (buf + len, 1, 1, inf);
+ if (r != 1)
+ break;
+ len++;
+ }
+ if (split_fname)
+ {
+ char fname[256];
+ const char *mode = 0;
+ FILE *sf;
+ if ((marc_no % split_chunk) == 0)
+ {
+ mode = "wb";
+ split_file_no++;
+ }
+ else
+ mode = "ab";
+ sprintf(fname, "%.200s%07d", split_fname, split_file_no);
+ sf = fopen(fname, mode);
+ if (!sf)
+ {
+ fprintf(stderr, "Could not open %s\n", fname);
+ split_fname = 0;
+ }
+ else
+ {
+ if (fwrite(buf, 1, len, sf) != len)
+ {
+ fprintf(stderr, "Could write content to %s\n",
+ fname);
+ split_fname = 0;
+ }
+ fclose(sf);
+ }
+ }
+ len_result = rlen;
+ r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
+ if (r > 0 && result)
+ {
+ fwrite (result, len_result, 1, stdout);
+ }
+ if (r > 0 && cfile)
+ {
+ char *p = buf;
+ size_t i;
+ if (marc_no)
+ fprintf (cfile, ",");
+ fprintf (cfile, "\n");
+ for (i = 0; i < r; i++)
+ {
+ if ((i & 15) == 0)
+ fprintf (cfile, " \"");
+ fprintf (cfile, "\\x%02X", p[i] & 255);
+
+ if (i < r - 1 && (i & 15) == 15)
+ fprintf (cfile, "\"\n");
+
+ }
+ fprintf (cfile, "\"\n");
+ }
+ num++;
+ if (verbose)
+ printf("\n");
+ }
+ if (cfile)
+ fprintf (cfile, "};\n");
+ fclose(inf);
+ }
+ {
+ WRBUF wrbuf = wrbuf_alloc();
+ yaz_marc_write_trailer(mt, wrbuf);
+ fputs(wrbuf_cstr(wrbuf), stdout);
+ wrbuf_destroy(wrbuf);
+ }
+ if (cd)
+ yaz_iconv_close(cd);
+ yaz_marc_destroy(mt);
+}
+
int main (int argc, char **argv)
{
int r;
+ int print_offset = 0;
char *arg;
int verbose = 0;
- FILE *inf;
- char buf[100001];
- char *prog = *argv;
int no = 0;
+ int output_format = YAZ_MARC_LINE;
FILE *cfile = 0;
+ char *from = 0, *to = 0;
+ int input_format = YAZ_MARC_ISO2709;
+ int split_chunk = 1;
+ const char *split_fname = 0;
+ const char *leader_spec = 0;
+ int write_using_libxml2 = 0;
+
+#if HAVE_LOCALE_H
+ setlocale(LC_CTYPE, "");
+#endif
+#if HAVE_LANGINFO_H
+#ifdef CODESET
+ to = nl_langinfo(CODESET);
+#endif
+#endif
- while ((r = options("vc:", argv, argc, &arg)) != -2)
+ prog = *argv;
+ while ((r = options("i:o:C:npvc:xOeXIf:t:s:l:", argv, argc, &arg)) != -2)
{
- int count;
- no++;
+ no++;
switch (r)
{
- case 'c':
- if (cfile)
- fclose (cfile);
- cfile = fopen (arg, "w");
- break;
+ case 'i':
+ input_format = yaz_marc_decode_formatstr(arg);
+ if (input_format == -1)
+ {
+ fprintf(stderr, "%s: bad input format: %s\n", prog, arg);
+ exit(1);
+ }
+#if YAZ_HAVE_XML2
+#else
+ if (input_format == YAZ_MARC_MARCXML
+ || input_format == YAZ_MARC_XCHANGE)
+ {
+ fprintf(stderr, "%s: Libxml2 support not enabled\n", prog);
+ exit(3);
+ }
+#endif
+ break;
+ case 'o':
+ /* dirty hack so we can make Libxml2 do the writing ..
+ rather than WRBUF */
+ if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0)
+ {
+ arg = arg + 4;
+ write_using_libxml2 = 1;
+ }
+ output_format = yaz_marc_decode_formatstr(arg);
+ if (output_format == -1)
+ {
+ fprintf(stderr, "%s: bad output format: %s\n", prog, arg);
+ exit(1);
+ }
+ break;
+ case 'l':
+ leader_spec = arg;
+ break;
+ case 'f':
+ from = arg;
+ break;
+ case 't':
+ to = arg;
+ break;
+ case 'c':
+ if (cfile)
+ fclose (cfile);
+ cfile = fopen(arg, "w");
+ break;
+ case 'x':
+ fprintf(stderr, "%s: -x no longer supported. "
+ "Use -i marcxml instead\n", prog);
+ exit(1);
+ break;
+ case 'O':
+ fprintf(stderr, "%s: OAI MARC no longer supported."
+ " Use MARCXML instead.\n", prog);
+ exit(1);
+ break;
+ case 'e':
+ fprintf(stderr, "%s: -e no longer supported. "
+ "Use -o marcxchange instead\n", prog);
+ exit(1);
+ break;
+ case 'X':
+ fprintf(stderr, "%s: -X no longer supported. "
+ "Use -o marcxml instead\n", prog);
+ exit(1);
+ break;
+ case 'I':
+ fprintf(stderr, "%s: -I no longer supported. "
+ "Use -o marc instead\n", prog);
+ exit(1);
+ break;
+ case 'n':
+ output_format = YAZ_MARC_CHECK;
+ break;
+ case 'p':
+ print_offset = 1;
+ break;
+ case 's':
+ split_fname = arg;
+ break;
+ case 'C':
+ split_chunk = atoi(arg);
+ break;
case 0:
- inf = fopen (arg, "r");
- count = 0;
- if (!inf)
- {
- fprintf (stderr, "%s: cannot open %s:%s\n",
- prog, arg, strerror (errno));
- exit (1);
- }
- if (cfile)
- fprintf (cfile, "char *marc_records[] = {\n");
- while (1)
- {
- int len;
-
- r = fread (buf, 1, 5, inf);
- if (r < 5)
- break;
- len = atoi_n(buf, 5);
- if (len < 25 || len > 100000)
- break;
- len = len - 5;
- r = fread (buf + 5, 1, len, inf);
- if (r < len)
- break;
- r = marc_display_ex (buf, stdout, verbose);
- if (r <= 0)
- break;
- if (cfile)
- {
- char *p = buf;
- int i;
- if (count)
- fprintf (cfile, ",");
- fprintf (cfile, "{\n");
- for (i = 0; i < r; i++)
- {
- if ((i & 15) == 0)
- fprintf (cfile, " \"");
- fprintf (cfile, "\\x%02X", p[i] & 255);
-
- if (i < r - 1 && (i & 15) == 15)
- fprintf (cfile, "\"\n");
-
- }
- fprintf (cfile, "\"\n}");
- }
- count++;
- }
- if (cfile)
- fprintf (cfile, "};\n");
+ dump(arg, from, to, input_format, output_format,
+ write_using_libxml2,
+ print_offset, split_fname, split_chunk,
+ verbose, cfile, leader_spec);
break;
case 'v':
- verbose++;
+ verbose++;
break;
default:
- fprintf (stderr, "Usage: %s [-c cfile] [-v] file...\n", prog);
- exit (1);
+ usage(prog);
+ exit(1);
}
}
if (cfile)
- fclose (cfile);
+ fclose (cfile);
if (!no)
{
- fprintf (stderr, "Usage: %s [-v] file...\n", prog);
- exit (1);
+ usage(prog);
+ exit (1);
}
exit (0);
}
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+