CCL: split-list deals with use attr YAZ-844
[yaz-moved-to-github.git] / util / marcdump.c
index adc6cba..adfc015 100644 (file)
@@ -1,5 +1,5 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2012 Index Data
+ * Copyright (C) Index Data
  * See the file LICENSE for details.
  */
 
 #endif
 
 #include <yaz/marcdisp.h>
+#include <yaz/json.h>
 #include <yaz/yaz-util.h>
 #include <yaz/xmalloc.h>
 #include <yaz/options.h>
+#include <yaz/backtrace.h>
 
 #ifndef SEEK_SET
 #define SEEK_SET 0
@@ -56,6 +58,8 @@
 
 static char *prog;
 
+static int no_errors = 0;
+
 static void usage(const char *prog)
 {
     fprintf(stderr, "Usage: %s [-i format] [-o format] [-f from] [-t to] "
@@ -115,6 +119,53 @@ static void marcdump_read_line(yaz_marc_t mt, const char *fname)
     fclose(inf);
 }
 
+static void marcdump_read_json(yaz_marc_t mt, const char *fname)
+{
+    FILE *inf = fopen(fname, "rb");
+    if (!inf)
+    {
+        fprintf(stderr, "%s: cannot open %s:%s\n",
+                prog, fname, strerror(errno));
+        exit(1);
+    }
+    else
+    {
+        const char *errmsg;
+        size_t errpos;
+        WRBUF w = wrbuf_alloc();
+        struct json_node *n;
+        int c;
+
+        while ((c = getc(inf)) != EOF)
+            wrbuf_putc(w, c);
+        n = json_parse2(wrbuf_cstr(w), &errmsg, &errpos);
+        if (n)
+        {
+            int r = yaz_marc_read_json_node(mt, n);
+            if (r == 0)
+            {
+                wrbuf_rewind(w);
+                yaz_marc_write_mode(mt, w);
+                fputs(wrbuf_cstr(w), stdout);
+                wrbuf_rewind(w);
+            }
+            else
+            {
+                fprintf(stderr, "%s: JSON MARC parsing failed ret=%d\n", fname,
+                        r);
+            }
+        }
+        else
+        {
+            fprintf(stderr, "%s: JSON parse error: %s . pos=%ld\n", fname,
+                    errmsg, (long) errpos);
+        }
+        wrbuf_destroy(w);
+        fclose(inf);
+    }
+}
+
+
 #if YAZ_HAVE_XML2
 static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
 {
@@ -131,27 +182,34 @@ static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
             int type = xmlTextReaderNodeType(reader);
             if (type == XML_READER_TYPE_ELEMENT)
             {
-                const char *name = (const char *)
-                    xmlTextReaderLocalName(reader);
+                char *name = (char *) xmlTextReaderLocalName(reader);
                 if (!strcmp(name, "record") || !strcmp(name, "r"))
                 {
                     xmlNodePtr ptr = xmlTextReaderExpand(reader);
 
                     int r = yaz_marc_read_xml(mt, ptr);
                     if (r)
+                    {
+                        no_errors++;
                         fprintf(stderr, "yaz_marc_read_xml failed\n");
+                    }
                     else
                     {
                         int write_rc = yaz_marc_write_mode(mt, wrbuf);
                         if (write_rc)
-                            yaz_log(YLOG_WARN, "yaz_marc_write_mode: write error: %d", write_rc);
-
+                        {
+                            yaz_log(YLOG_WARN, "yaz_marc_write_mode: "
+                                    "write error: %d", write_rc);
+                            no_errors++;
+                        }
                         fputs(wrbuf_cstr(wrbuf), stdout);
                         wrbuf_rewind(wrbuf);
                     }
                 }
+                xmlFree(name);
             }
         }
+        xmlFreeTextReader(reader);
     }
 #else
     xmlDocPtr doc = xmlParseFile(fname);
@@ -172,7 +230,10 @@ static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
                 {
                     int r = yaz_marc_read_xml(mt, ptr);
                     if (r)
+                    {
+                        no_errors++;
                         fprintf(stderr, "yaz_marc_read_xml failed\n");
+                    }
                     else
                     {
                         yaz_marc_write_mode(mt, wrbuf);
@@ -233,6 +294,10 @@ static void dump(const char *fname, const char *from, const char *to,
     {
         marcdump_read_line(mt, fname);
     }
+    else if (input_format == YAZ_MARC_JSON)
+    {
+        marcdump_read_json(mt, fname);
+    }
     else if (input_format == YAZ_MARC_ISO2709)
     {
         FILE *inf = fopen(fname, "rb");
@@ -255,27 +320,32 @@ static void dump(const char *fname, const char *from, const char *to,
             size_t len_result;
             size_t r;
             char buf[100001];
+            yaz_iconv_t cd1 = 0;
 
             r = fread(buf, 1, 5, inf);
             if (r < 5)
             {
-                if (r && print_offset && verbose)
+                if (r == 0) /* normal EOF, all good */
+                    break;
+                if (print_offset && verbose)
+                {
                     printf("<!-- Extra %ld bytes at end of file -->\n",
                            (long) r);
+                }
                 break;
             }
             while (*buf < '0' || *buf > '9')
             {
                 int i;
                 long off = ftell(inf) - 5;
-                if (verbose || print_offset)
-                    printf("<!-- Skipping bad byte %d (0x%02X) at offset "
-                           "%ld (0x%lx) -->\n",
-                           *buf & 0xff, *buf & 0xff,
-                           off, off);
+                printf("<!-- Skipping bad byte %d (0x%02X) at offset "
+                       "%ld (0x%lx) -->\n",
+                       *buf & 0xff, *buf & 0xff,
+                       off, off);
                 for (i = 0; i<4; i++)
                     buf[i] = buf[i+1];
                 r = fread(buf+4, 1, 1, inf);
+                no_errors++;
                 if (r < 1)
                     break;
             }
@@ -295,23 +365,39 @@ static void dump(const char *fname, const char *from, const char *to,
             if (len < 25 || len > 100000)
             {
                 long off = ftell(inf) - 5;
-                printf("Bad Length %ld read at offset %ld (%lx)\n",
+                printf("<!-- Bad Length %ld read at offset %ld (%lx) -->\n",
                        (long)len, (long) off, (long) off);
+                no_errors++;
                 break;
             }
             rlen = len - 5;
             r = fread(buf + 5, 1, rlen, inf);
             if (r < rlen)
+            {
+                long off = ftell(inf);
+                printf("<!-- Premature EOF at offset %ld (%lx) -->\n",
+                       (long) off, (long) off);
+                no_errors++;
                 break;
+            }
             while (buf[len-1] != ISO2709_RS)
             {
                 if (len > sizeof(buf)-2)
+                {
+                    r = 0;
                     break;
+                }
                 r = fread(buf + len, 1, 1, inf);
                 if (r != 1)
                     break;
                 len++;
             }
+            if (r < 1)
+            {
+                printf("<!-- EOF while searching for RS -->\n");
+                no_errors++;
+                break;
+            }
             if (split_fname)
             {
                 char fname[256];
@@ -338,17 +424,35 @@ static void dump(const char *fname, const char *from, const char *to,
                         fprintf(stderr, "Could write content to %s\n",
                                 fname);
                         split_fname = 0;
+                        no_errors++;
                     }
                     fclose(sf);
                 }
             }
             len_result = rlen;
+
+            if (yaz_marc_check_marc21_coding(from, buf, 26))
+            {
+                cd1 = yaz_iconv_open(to, "utf-8");
+                if (cd1)
+                    yaz_marc_iconv(mt, cd1);
+            }
             r = yaz_marc_decode_buf(mt, buf, -1, &result, &len_result);
+
+            if (cd1)
+            {
+                yaz_iconv_close(cd1);
+                yaz_marc_iconv(mt, cd);
+            }
+
+            if (r == -1)
+                no_errors++;
             if (r > 0 && result && len_result)
             {
                 if (fwrite(result, len_result, 1, stdout) != 1)
                 {
                     fprintf(stderr, "Write to stdout failed\n");
+                    no_errors++;
                     break;
                 }
             }
@@ -363,7 +467,10 @@ static void dump(const char *fname, const char *from, const char *to,
                 {
                     if ((i & 15) == 0)
                         fprintf(cfile, "  \"");
-                    fprintf(cfile, "\\x%02X", p[i] & 255);
+                    if (p[i] < 32 || p[i] > 126)
+                        fprintf(cfile, "\" \"\\x%02X\" \"", p[i] & 255);
+                    else
+                        fputc(p[i], cfile);
 
                     if (i < r - 1 && (i & 15) == 15)
                         fprintf(cfile, "\"\n");
@@ -416,6 +523,7 @@ int main (int argc, char **argv)
 #endif
 
     prog = *argv;
+    yaz_enable_panic_backtrace(prog);
     while ((r = options("i:o:C:npc:xOeXIf:t:s:l:Vv", argv, argc, &arg)) != -2)
     {
         no++;
@@ -534,6 +642,8 @@ int main (int argc, char **argv)
         usage(prog);
         exit(1);
     }
+    if (no_errors)
+        exit(5);
     exit(0);
 }
 /*