Utility yaz-marcdump got option to display YAZ version (-V)
[yaz-moved-to-github.git] / util / marcdump.c
index 040a51b..43ff281 100644 (file)
@@ -1,8 +1,6 @@
-/*
- * Copyright (C) 1995-2006, Index Data ApS
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2009 Index Data
  * See the file LICENSE for details.
- *
- * $Id: marcdump.c,v 1.44 2006-12-07 11:08:05 adam Exp $
  */
 
 #define _FILE_OFFSET_BITS 64
 #if YAZ_HAVE_XML2
 #include <libxml/parser.h>
 #include <libxml/tree.h>
-
 #include <libxml/xpath.h>
 #include <libxml/xpathInternals.h>
 
+/* Libxml2 version < 2.6.15. xmlreader not reliable/present */
+#if LIBXML_VERSION < 20615
+#define USE_XMLREADER 0
+#else
+#define USE_XMLREADER 1
+#endif
+
+#if USE_XMLREADER
+#include <libxml/xmlreader.h>
+#endif
+
 #endif
 
 #include <stdio.h>
@@ -50,41 +58,139 @@ static char *prog;
 
 static void usage(const char *prog)
 {
-    fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-X] [-e] "
-             "[-I] [-n] [-l pos=value] [-v] [-C chunk] [-s splitfname] file...\n",
+    fprintf (stderr, "Usage: %s [-i format] [-o format] [-f from] [-t to] "
+             "[-l pos=value] [-c cfile] [-s prefix] [-C size] [-n] "
+             "[-p] [-v] [-V] file...\n",
              prog);
 } 
 
+static void show_version(void)
+{
+    char vstr[20], sha1_str[41];
+
+    yaz_version(vstr, sha1_str);
+    printf("YAZ version: %s %s\n", YAZ_VERSION, YAZ_VERSION_SHA1);
+    if (strcmp(sha1_str, YAZ_VERSION_SHA1))
+        printf("YAZ DLL/SO: %s %s\n", vstr, sha1_str);
+    exit(0);
+}
+
+static int getbyte_stream(void *client_data)
+{
+    FILE *f = (FILE*) client_data;
+
+    int c = fgetc(f);
+    if (c == EOF)
+        return 0;
+    return c;
+}
+
+static void ungetbyte_stream(int c, void *client_data)
+{
+    FILE *f = (FILE*) client_data;
+
+    if (c == 0)
+        c = EOF;
+    ungetc(c, f);
+}
+
+static void marcdump_read_line(yaz_marc_t mt, const char *fname)
+{
+    FILE *inf = fopen(fname, "rb");
+    if (!inf)
+    {
+        fprintf (stderr, "%s: cannot open %s:%s\n",
+                 prog, fname, strerror (errno));
+        exit(1);
+    }
+    
+    while (yaz_marc_read_line(mt, getbyte_stream,
+                              ungetbyte_stream, inf) == 0)
+    {
+        WRBUF wrbuf = wrbuf_alloc();
+        yaz_marc_write_mode(mt, wrbuf);
+        fputs(wrbuf_cstr(wrbuf), stdout);
+        wrbuf_destroy(wrbuf);
+    }
+    fclose(inf);
+}
+
 #if YAZ_HAVE_XML2
 static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
 {
-    xmlNodePtr ptr;
-    xmlDocPtr doc = xmlParseFile(fname);
-    if (!doc)
-        return;
+    WRBUF wrbuf = wrbuf_alloc();
+#if USE_XMLREADER
+    xmlTextReaderPtr reader = xmlReaderForFile(fname, 0 /* encoding */,
+                                               0 /* options */);
 
-    ptr = xmlDocGetRootElement(doc);
-    if (ptr)
+    if (reader)
     {
-        int r;
-        WRBUF wrbuf = wrbuf_alloc();
-        r = yaz_marc_read_xml(mt, ptr);
-        if (r)
-            fprintf(stderr, "yaz_marc_read_xml failed\n");
-        else
+        int ret;
+        while ((ret = xmlTextReaderRead(reader)) == 1)
         {
-            yaz_marc_write_mode(mt, wrbuf);
-            
-            fputs(wrbuf_buf(wrbuf), stdout);
+            int type = xmlTextReaderNodeType(reader);
+            if (type == XML_READER_TYPE_ELEMENT)
+            {
+                const char *name = (const char *) 
+                    xmlTextReaderLocalName(reader);
+                if (!strcmp(name, "record"))
+                {
+                    xmlNodePtr ptr = xmlTextReaderExpand(reader);
+        
+                    int r = yaz_marc_read_xml(mt, ptr);
+                    if (r)
+                        fprintf(stderr, "yaz_marc_read_xml failed\n");
+                    else
+                    {
+                        yaz_marc_write_mode(mt, wrbuf);
+                        
+                        fputs(wrbuf_cstr(wrbuf), stdout);
+                        wrbuf_rewind(wrbuf);
+                    }
+                }
+            }
         }
-        wrbuf_free(wrbuf, 1);
     }
-    xmlFreeDoc(doc);
+#else
+    xmlDocPtr doc = xmlParseFile(fname);
+    if (doc)
+    {
+        xmlNodePtr ptr = xmlDocGetRootElement(doc);
+        for (; ptr; ptr = ptr->next)
+        {
+            if (ptr->type == XML_ELEMENT_NODE)
+            {
+                if (!strcmp((const char *) ptr->name, "collection"))
+                {
+                    ptr = ptr->children;
+                    continue;
+                }
+                if (!strcmp((const char *) ptr->name, "record"))
+                {
+                    int r = yaz_marc_read_xml(mt, ptr);
+                    if (r)
+                        fprintf(stderr, "yaz_marc_read_xml failed\n");
+                    else
+                    {
+                        yaz_marc_write_mode(mt, wrbuf);
+                        
+                        fputs(wrbuf_cstr(wrbuf), stdout);
+                        wrbuf_rewind(wrbuf);
+                    }
+                }
+            }
+        }
+        xmlFreeDoc(doc);
+    }
+#endif
+    fputs(wrbuf_cstr(wrbuf), stdout);
+    wrbuf_destroy(wrbuf);
 }
 #endif
 
 static void dump(const char *fname, const char *from, const char *to,
-                 int read_xml, int xml,
+                 int input_format, int output_format,
+                 int write_using_libxml2,
                  int print_offset, const char *split_fname, int split_chunk,
                  int verbose, FILE *cfile, const char *leader_spec)
 {
@@ -109,18 +215,22 @@ static void dump(const char *fname, const char *from, const char *to,
         }
         yaz_marc_iconv(mt, cd);
     }
-    yaz_marc_xml(mt, xml);
+    yaz_marc_xml(mt, output_format);
+    yaz_marc_enable_collection(mt);
+    yaz_marc_write_using_libxml2(mt, write_using_libxml2);
     yaz_marc_debug(mt, verbose);
 
-    if (read_xml)
+    if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_XCHANGE)
     {
 #if YAZ_HAVE_XML2
         marcdump_read_xml(mt, fname);
-#else
-        return;
 #endif
     }
-    else
+    else if (input_format == YAZ_MARC_LINE)
+    {
+        marcdump_read_line(mt, fname);
+    }
+    else if (input_format == YAZ_MARC_ISO2709)
     {
         FILE *inf = fopen(fname, "rb");
         int num = 1;
@@ -136,10 +246,10 @@ static void dump(const char *fname, const char *from, const char *to,
             fprintf (cfile, "char *marc_records[] = {\n");
         for(;; marc_no++)
         {
-            char *result = 0;
+            const char *result = 0;
             size_t len;
             size_t rlen;
-            int len_result;
+            size_t len_result;
             size_t r;
             char buf[100001];
             
@@ -190,6 +300,15 @@ static void dump(const char *fname, const char *from, const char *to,
             r = fread (buf + 5, 1, rlen, inf);
             if (r < rlen)
                 break;
+            while (buf[len-1] != ISO2709_RS)
+            {
+                if (len > sizeof(buf)-2)
+                    break;
+                r = fread (buf + len, 1, 1, inf);
+                if (r != 1)
+                    break;
+                len++;
+            }
             if (split_fname)
             {
                 char fname[256];
@@ -220,11 +339,15 @@ static void dump(const char *fname, const char *from, const char *to,
                     fclose(sf);
                 }
             }
-            len_result = (int) rlen;
+            len_result = rlen;
             r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
             if (r > 0 && result)
             {
-                fwrite (result, len_result, 1, stdout);
+                if (fwrite(result, len_result, 1, stdout) != 1)
+                {
+                    fprintf(stderr, "Write to stdout failed\n");
+                    break;
+                }
             }
             if (r > 0 && cfile)
             {
@@ -253,6 +376,12 @@ static void dump(const char *fname, const char *from, const char *to,
             fprintf (cfile, "};\n");
         fclose(inf);
     }
+    {
+        WRBUF wrbuf = wrbuf_alloc();
+        yaz_marc_write_trailer(mt, wrbuf);
+        fputs(wrbuf_cstr(wrbuf), stdout);
+        wrbuf_destroy(wrbuf);
+    }
     if (cd)
         yaz_iconv_close(cd);
     yaz_marc_destroy(mt);
@@ -265,14 +394,15 @@ int main (int argc, char **argv)
     char *arg;
     int verbose = 0;
     int no = 0;
-    int xml = 0;
+    int output_format = YAZ_MARC_LINE;
     FILE *cfile = 0;
     char *from = 0, *to = 0;
-    int read_xml = 0;
+    int input_format = YAZ_MARC_ISO2709;
     int split_chunk = 1;
     const char *split_fname = 0;
     const char *leader_spec = 0;
-    
+    int write_using_libxml2 = 0;
+
 #if HAVE_LOCALE_H
     setlocale(LC_CTYPE, "");
 #endif
@@ -283,11 +413,43 @@ int main (int argc, char **argv)
 #endif
 
     prog = *argv;
-    while ((r = options("C:npvc:xOeXIf:t:s:l:", argv, argc, &arg)) != -2)
+    while ((r = options("i:o:C:npc:xOeXIf:t:s:l:Vv", argv, argc, &arg)) != -2)
     {
         no++;
         switch (r)
         {
+        case 'i':
+            input_format = yaz_marc_decode_formatstr(arg);
+            if (input_format == -1)
+            {
+                fprintf(stderr, "%s: bad input format: %s\n", prog, arg);
+                exit(1);
+            }
+#if YAZ_HAVE_XML2
+#else
+            if (input_format == YAZ_MARC_MARCXML 
+                || input_format == YAZ_MARC_XCHANGE)
+            {
+                fprintf(stderr, "%s: Libxml2 support not enabled\n", prog);
+                exit(3);
+            }
+#endif
+            break;
+        case 'o':
+            /* dirty hack so we can make Libxml2 do the writing ..
+               rather than WRBUF */
+            if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0)
+            {
+                arg = arg + 4;
+                write_using_libxml2 = 1;
+            }
+            output_format = yaz_marc_decode_formatstr(arg);
+            if (output_format == -1)
+            {
+                fprintf(stderr, "%s: bad output format: %s\n", prog, arg);
+                exit(1);
+            }
+            break;
         case 'l':
             leader_spec = arg;
             break;
@@ -303,13 +465,9 @@ int main (int argc, char **argv)
             cfile = fopen(arg, "w");
             break;
         case 'x':
-#if YAZ_HAVE_XML2
-            read_xml = 1;
-#else
-            fprintf(stderr, "%s: -x not supported."
-                    " YAZ not compiled with Libxml2 support\n", prog);
-            exit(3);
-#endif
+            fprintf(stderr, "%s: -x no longer supported. "
+                    "Use -i marcxml instead\n", prog);
+            exit(1);
             break;
         case 'O':
             fprintf(stderr, "%s: OAI MARC no longer supported."
@@ -317,16 +475,22 @@ int main (int argc, char **argv)
             exit(1);
             break;
         case 'e':
-            xml = YAZ_MARC_XCHANGE;
+            fprintf(stderr, "%s: -e no longer supported. "
+                    "Use -o marcxchange instead\n", prog);
+            exit(1);
             break;
         case 'X':
-            xml = YAZ_MARC_MARCXML;
+            fprintf(stderr, "%s: -X no longer supported. "
+                    "Use -o marcxml instead\n", prog);
+            exit(1);
             break;
         case 'I':
-            xml = YAZ_MARC_ISO2709;
+            fprintf(stderr, "%s: -I no longer supported. "
+                    "Use -o marc instead\n", prog);
+            exit(1);
             break;
         case 'n':
-            xml = YAZ_MARC_CHECK;
+            output_format = YAZ_MARC_CHECK;
             break;
         case 'p':
             print_offset = 1;
@@ -338,16 +502,20 @@ int main (int argc, char **argv)
             split_chunk = atoi(arg);
             break;
         case 0:
-            dump(arg, from, to, read_xml, xml,
+            dump(arg, from, to, input_format, output_format,
+                 write_using_libxml2,
                  print_offset, split_fname, split_chunk,
                  verbose, cfile, leader_spec);
             break;
         case 'v':
             verbose++;
             break;
+        case 'V': 
+            show_version();
+            break;
         default:
             usage(prog);
-            exit (1);
+            exit(1);
         }
     }
     if (cfile)