Marcdump -c tries to make more readable strings
[yaz-moved-to-github.git] / util / marcdump.c
index ed2096a..f92204e 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2008 Index Data
+ * Copyright (C) 1995-2013 Index Data
  * See the file LICENSE for details.
  */
 
 
 static char *prog;
 
+static int no_errors = 0;
+
 static void usage(const char *prog)
 {
-    fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] "
-             "[-i format] [-o format] "
-             "[-n] [-l pos=value] [-v] [-C chunk] [-s splitfname] [-p] file...\n",
-             prog);
-} 
+    fprintf(stderr, "Usage: %s [-i format] [-o format] [-f from] [-t to] "
+            "[-l pos=value] [-c cfile] [-s prefix] [-C size] [-n] "
+            "[-p] [-v] [-V] file...\n",
+            prog);
+}
+
+static void show_version(void)
+{
+    char vstr[20], sha1_str[41];
+
+    yaz_version(vstr, sha1_str);
+    printf("YAZ version: %s %s\n", YAZ_VERSION, YAZ_VERSION_SHA1);
+    if (strcmp(sha1_str, YAZ_VERSION_SHA1))
+        printf("YAZ DLL/SO: %s %s\n", vstr, sha1_str);
+    exit(0);
+}
 
 static int getbyte_stream(void *client_data)
 {
@@ -88,11 +101,11 @@ static void marcdump_read_line(yaz_marc_t mt, const char *fname)
     FILE *inf = fopen(fname, "rb");
     if (!inf)
     {
-        fprintf (stderr, "%s: cannot open %s:%s\n",
-                 prog, fname, strerror (errno));
+        fprintf(stderr, "%s: cannot open %s:%s\n",
+                prog, fname, strerror(errno));
         exit(1);
     }
-    
+
     while (yaz_marc_read_line(mt, getbyte_stream,
                               ungetbyte_stream, inf) == 0)
     {
@@ -120,19 +133,27 @@ static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
             int type = xmlTextReaderNodeType(reader);
             if (type == XML_READER_TYPE_ELEMENT)
             {
-                const char *name = (const char *) 
+                const char *name = (const char *)
                     xmlTextReaderLocalName(reader);
-                if (!strcmp(name, "record"))
+                if (!strcmp(name, "record") || !strcmp(name, "r"))
                 {
                     xmlNodePtr ptr = xmlTextReaderExpand(reader);
-        
+
                     int r = yaz_marc_read_xml(mt, ptr);
                     if (r)
+                    {
+                        no_errors++;
                         fprintf(stderr, "yaz_marc_read_xml failed\n");
+                    }
                     else
                     {
-                        yaz_marc_write_mode(mt, wrbuf);
-                        
+                        int write_rc = yaz_marc_write_mode(mt, wrbuf);
+                        if (write_rc)
+                        {
+                            yaz_log(YLOG_WARN, "yaz_marc_write_mode: "
+                                    "write error: %d", write_rc);
+                            no_errors++;
+                        }
                         fputs(wrbuf_cstr(wrbuf), stdout);
                         wrbuf_rewind(wrbuf);
                     }
@@ -154,15 +175,19 @@ static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
                     ptr = ptr->children;
                     continue;
                 }
-                if (!strcmp((const char *) ptr->name, "record"))
+                if (!strcmp((const char *) ptr->name, "record") ||
+                    !strcmp((const char *) ptr->name, "r"))
                 {
                     int r = yaz_marc_read_xml(mt, ptr);
                     if (r)
+                    {
+                        no_errors++;
                         fprintf(stderr, "yaz_marc_read_xml failed\n");
+                    }
                     else
                     {
                         yaz_marc_write_mode(mt, wrbuf);
-                        
+
                         fputs(wrbuf_cstr(wrbuf), stdout);
                         wrbuf_rewind(wrbuf);
                     }
@@ -204,12 +229,12 @@ static void dump(const char *fname, const char *from, const char *to,
         }
         yaz_marc_iconv(mt, cd);
     }
-    yaz_marc_xml(mt, output_format);
     yaz_marc_enable_collection(mt);
+    yaz_marc_xml(mt, output_format);
     yaz_marc_write_using_libxml2(mt, write_using_libxml2);
     yaz_marc_debug(mt, verbose);
 
-    if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_XCHANGE)
+    if (input_format == YAZ_MARC_MARCXML || input_format == YAZ_MARC_TURBOMARC || input_format == YAZ_MARC_XCHANGE)
     {
 #if YAZ_HAVE_XML2
         marcdump_read_xml(mt, fname);
@@ -227,12 +252,12 @@ static void dump(const char *fname, const char *from, const char *to,
         int split_file_no = -1;
         if (!inf)
         {
-            fprintf (stderr, "%s: cannot open %s:%s\n",
-                     prog, fname, strerror (errno));
+            fprintf(stderr, "%s: cannot open %s:%s\n",
+                    prog, fname, strerror(errno));
             exit(1);
         }
         if (cfile)
-            fprintf (cfile, "char *marc_records[] = {\n");
+            fprintf(cfile, "char *marc_records[] = {\n");
         for(;; marc_no++)
         {
             const char *result = 0;
@@ -241,63 +266,83 @@ static void dump(const char *fname, const char *from, const char *to,
             size_t len_result;
             size_t r;
             char buf[100001];
-            
-            r = fread (buf, 1, 5, inf);
+
+            r = fread(buf, 1, 5, inf);
             if (r < 5)
             {
-                if (r && print_offset && verbose)
-                    printf ("<!-- Extra %ld bytes at end of file -->\n",
-                            (long) r);
+                if (r == 0) /* normal EOF, all good */
+                    break;
+                if (print_offset && verbose)
+                {
+                    printf("<!-- Extra %ld bytes at end of file -->\n",
+                           (long) r);
+                }
                 break;
             }
             while (*buf < '0' || *buf > '9')
             {
                 int i;
                 long off = ftell(inf) - 5;
-                if (verbose || print_offset)
-                    printf("<!-- Skipping bad byte %d (0x%02X) at offset "
-                           "%ld (0x%lx) -->\n", 
-                           *buf & 0xff, *buf & 0xff,
-                           off, off);
+                printf("<!-- Skipping bad byte %d (0x%02X) at offset "
+                       "%ld (0x%lx) -->\n",
+                       *buf & 0xff, *buf & 0xff,
+                       off, off);
                 for (i = 0; i<4; i++)
                     buf[i] = buf[i+1];
                 r = fread(buf+4, 1, 1, inf);
+                no_errors++;
                 if (r < 1)
                     break;
             }
             if (r < 1)
             {
                 if (verbose || print_offset)
-                    printf ("<!-- End of file with data -->\n");
+                    printf("<!-- End of file with data -->\n");
                 break;
             }
             if (print_offset)
             {
                 long off = ftell(inf) - 5;
-                printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
-                        num, off, off);
+                printf("<!-- Record %d offset %ld (0x%lx) -->\n",
+                       num, off, off);
             }
             len = atoi_n(buf, 5);
             if (len < 25 || len > 100000)
             {
                 long off = ftell(inf) - 5;
-                printf("Bad Length %ld read at offset %ld (%lx)\n",
+                printf("<!-- Bad Length %ld read at offset %ld (%lx) -->\n",
                        (long)len, (long) off, (long) off);
+                no_errors++;
                 break;
             }
             rlen = len - 5;
-            r = fread (buf + 5, 1, rlen, inf);
+            r = fread(buf + 5, 1, rlen, inf);
             if (r < rlen)
+            {
+                long off = ftell(inf);
+                printf("<!-- Premature EOF at offset %ld (%lx) -->\n",
+                       (long) off, (long) off);
+                no_errors++;
                 break;
+            }
             while (buf[len-1] != ISO2709_RS)
             {
                 if (len > sizeof(buf)-2)
+                {
+                    r = 0;
                     break;
-                r = fread (buf + len, 1, 1, inf);
+                }
+                r = fread(buf + len, 1, 1, inf);
                 if (r != 1)
                     break;
                 len++;
             }
+            if (r < 1)
+            {
+                printf("<!-- EOF while searching for RS -->\n");
+                no_errors++;
+                break;
+            }
             if (split_fname)
             {
                 char fname[256];
@@ -324,41 +369,52 @@ static void dump(const char *fname, const char *from, const char *to,
                         fprintf(stderr, "Could write content to %s\n",
                                 fname);
                         split_fname = 0;
+                        no_errors++;
                     }
                     fclose(sf);
                 }
             }
             len_result = rlen;
             r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
-            if (r > 0 && result)
+            if (r == -1)
+                no_errors++;
+            if (r > 0 && result && len_result)
             {
-                fwrite (result, len_result, 1, stdout);
+                if (fwrite(result, len_result, 1, stdout) != 1)
+                {
+                    fprintf(stderr, "Write to stdout failed\n");
+                    no_errors++;
+                    break;
+                }
             }
             if (r > 0 && cfile)
             {
                 char *p = buf;
                 size_t i;
                 if (marc_no)
-                    fprintf (cfile, ",");
-                fprintf (cfile, "\n");
+                    fprintf(cfile, ",");
+                fprintf(cfile, "\n");
                 for (i = 0; i < r; i++)
                 {
                     if ((i & 15) == 0)
-                        fprintf (cfile, "  \"");
-                    fprintf (cfile, "\\x%02X", p[i] & 255);
-                    
+                        fprintf(cfile, "  \"");
+                    if (p[i] < 32 || p[i] > 126)
+                        fprintf(cfile, "\" \"\\x%02X\" \"", p[i] & 255);
+                    else
+                        fputc(p[i], cfile);
+
                     if (i < r - 1 && (i & 15) == 15)
-                        fprintf (cfile, "\"\n");
-                    
+                        fprintf(cfile, "\"\n");
+
                 }
-                fprintf (cfile, "\"\n");
+                fprintf(cfile, "\"\n");
             }
             num++;
             if (verbose)
                 printf("\n");
         }
         if (cfile)
-            fprintf (cfile, "};\n");
+            fprintf(cfile, "};\n");
         fclose(inf);
     }
     {
@@ -398,7 +454,7 @@ int main (int argc, char **argv)
 #endif
 
     prog = *argv;
-    while ((r = options("i:o:C:npvc:xOeXIf:t:s:l:", argv, argc, &arg)) != -2)
+    while ((r = options("i:o:C:npc:xOeXIf:t:s:l:Vv", argv, argc, &arg)) != -2)
     {
         no++;
         switch (r)
@@ -412,7 +468,7 @@ int main (int argc, char **argv)
             }
 #if YAZ_HAVE_XML2
 #else
-            if (input_format == YAZ_MARC_MARCXML 
+            if (input_format == YAZ_MARC_MARCXML
                 || input_format == YAZ_MARC_XCHANGE)
             {
                 fprintf(stderr, "%s: Libxml2 support not enabled\n", prog);
@@ -425,8 +481,14 @@ int main (int argc, char **argv)
                rather than WRBUF */
             if (strlen(arg) > 4 && strncmp(arg, "xml,", 4) == 0)
             {
+                /* Only supported for Libxml2 2.6.0 or later */
+#if LIBXML_VERSION >= 20600
                 arg = arg + 4;
                 write_using_libxml2 = 1;
+#else
+                fprintf(stderr, "%s: output using Libxml2 unsupported\n", prog);
+                exit(4);
+#endif
             }
             output_format = yaz_marc_decode_formatstr(arg);
             if (output_format == -1)
@@ -446,7 +508,7 @@ int main (int argc, char **argv)
             break;
         case 'c':
             if (cfile)
-                fclose (cfile);
+                fclose(cfile);
             cfile = fopen(arg, "w");
             break;
         case 'x':
@@ -495,23 +557,29 @@ int main (int argc, char **argv)
         case 'v':
             verbose++;
             break;
+        case 'V':
+            show_version();
+            break;
         default:
             usage(prog);
             exit(1);
         }
     }
     if (cfile)
-        fclose (cfile);
+        fclose(cfile);
     if (!no)
     {
         usage(prog);
-        exit (1);
+        exit(1);
     }
-    exit (0);
+    if (no_errors)
+        exit(5);
+    exit(0);
 }
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab