Expanded tabs in all source files. Added vim/emacs local variables
[yaz-moved-to-github.git] / util / marcdump.c
index 2b254cf..0c38835 100644 (file)
@@ -1,18 +1,42 @@
 /*
- * Copyright (C) 1994, Index Data I/S 
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: marcdump.c,v $
- * Revision 1.1  1995-04-10 10:28:47  quinn
- * Added copy of CCL and MARC display
+ * Copyright (C) 1995-2005, Index Data ApS
+ * See the file LICENSE for details.
  *
+ * $Id: marcdump.c,v 1.32 2005-06-25 15:46:07 adam Exp $
  */
 
+#define _FILE_OFFSET_BITS 64
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#if HAVE_XML2
+#include <libxml/parser.h>
+#include <libxml/tree.h>
+
+#include <libxml/xpath.h>
+#include <libxml/xpathInternals.h>
+
+#endif
+
 #include <stdio.h>
 #include <stdlib.h>
+#include <string.h>
 #include <errno.h>
-#include <marcdisp.h>
+#include <assert.h>
+
+#if HAVE_LOCALE_H
+#include <locale.h>
+#endif
+#if HAVE_LANGINFO_H
+#include <langinfo.h>
+#endif
+
+#include <yaz/marcdisp.h>
+#include <yaz/yaz-util.h>
+#include <yaz/xmalloc.h>
+#include <yaz/options.h>
 
 #ifndef SEEK_SET
 #define SEEK_SET 0
 #ifndef SEEK_END
 #define SEEK_END 2
 #endif
+
+static void usage(const char *prog)
+{
+    fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n",
+             prog);
+} 
+
+#if HAVE_XML2
+void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) {
+    xmlNodePtr cur;
+    int size;
+    int i;
+    
+    assert(output);
+    size = (nodes) ? nodes->nodeNr : 0;
+    
+    fprintf(output, "Result (%d nodes):\n", size);
+    for(i = 0; i < size; ++i) {
+        assert(nodes->nodeTab[i]);
+        
+        if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL)
+        {
+            xmlNsPtr ns;
+            
+            ns = (xmlNsPtr)nodes->nodeTab[i];
+            cur = (xmlNodePtr)ns->next;
+            if(cur->ns) { 
+                fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", 
+                    ns->prefix, ns->href, cur->ns->href, cur->name);
+            } else {
+                fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", 
+                    ns->prefix, ns->href, cur->name);
+            }
+        } 
+        else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE)
+        {
+            cur = nodes->nodeTab[i];        
+            if(cur->ns) { 
+                fprintf(output, "= element node \"%s:%s\"\n", 
+                    cur->ns->href, cur->name);
+            } 
+            else
+            {
+                fprintf(output, "= element node \"%s\"\n", 
+                    cur->name);
+            }
+        }
+        else
+        {
+            cur = nodes->nodeTab[i];    
+            fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type);
+        }
+    }
+}
+#endif
+
 int main (int argc, char **argv)
 {
-    FILE *inf;
-    long file_size;
-    char *buf;
     int r;
+    int libxml_dom_test = 0;
+    int print_offset = 0;
+    char *arg;
+    int verbose = 0;
+    FILE *inf;
+    char buf[100001];
+    char *prog = *argv;
+    int no = 0;
+    int xml = 0;
+    FILE *cfile = 0;
+    char *from = 0, *to = 0;
+    int num = 1;
+    
+#if HAVE_LOCALE_H
+    setlocale(LC_CTYPE, "");
+#endif
+#if HAVE_LANGINFO_H
+#ifdef CODESET
+    to = nl_langinfo(CODESET);
+#endif
+#endif
 
-    if (argc < 2)
-    {
-        fprintf (stderr, "usage\n%s <file>\n", *argv);
-       exit (1);
-    }
-    inf = fopen (argv[1], "r");
-    if (!inf)
-    {
-        fprintf (stderr, "%s: cannot open %s:%s\n",
-                *argv, argv[1], strerror (errno));
-        exit (1);
-    }
-    if (fseek (inf, 0L, SEEK_END))
-    {
-        fprintf (stderr, "%s: cannot seek in %s:%s\n",
-                *argv, argv[1], strerror (errno));
-        exit (1);
-    }
-    file_size = ftell (inf);    
-    if (fseek (inf, 0L, SEEK_SET))
+    while ((r = options("pvc:xOeXIf:t:2", argv, argc, &arg)) != -2)
     {
-        fprintf (stderr, "%s: cannot seek in %s:%s\n",
-                *argv, argv[1], strerror (errno));
-        exit (1);
-    }
-    buf = malloc (file_size);
-    if (!buf)
-    {
-        fprintf (stderr, "%s: cannot malloc: %s\n",
-                *argv, strerror (errno));
-        exit (1);
+        int count;
+        no++;
+        switch (r)
+        {
+        case 'f':
+            from = arg;
+            break;
+        case 't':
+            to = arg;
+            break;
+        case 'c':
+            if (cfile)
+                fclose (cfile);
+            cfile = fopen(arg, "w");
+            break;
+        case 'x':
+            xml = YAZ_MARC_SIMPLEXML;
+            break;
+        case 'O':
+            xml = YAZ_MARC_OAIMARC;
+            break;
+        case 'e':
+            xml = YAZ_MARC_XCHANGE;
+            break;
+        case 'X':
+            xml = YAZ_MARC_MARCXML;
+            break;
+        case 'I':
+            xml = YAZ_MARC_ISO2709;
+            break;
+        case 'p':
+            print_offset = 1;
+            break;
+        case '2':
+            libxml_dom_test = 1;
+            break;
+        case 0:
+            inf = fopen(arg, "rb");
+            count = 0;
+            if (!inf)
+            {
+                fprintf (stderr, "%s: cannot open %s:%s\n",
+                         prog, arg, strerror (errno));
+                exit(1);
+            }
+            if (cfile)
+                fprintf (cfile, "char *marc_records[] = {\n");
+            if (1)
+            {
+                yaz_marc_t mt = yaz_marc_create();
+                yaz_iconv_t cd = 0;
+
+                if (from && to)
+                {
+                    cd = yaz_iconv_open(to, from);
+                    if (!cd)
+                    {
+                        fprintf(stderr, "conversion from %s to %s "
+                                "unsupported\n", from, to);
+                        exit(2);
+                    }
+                    yaz_marc_iconv(mt, cd);
+                }
+                yaz_marc_xml(mt, xml);
+                yaz_marc_debug(mt, verbose);
+                while (1)
+                {
+                    int len;
+                    char *result = 0;
+                    int rlen;
+                    
+                    r = fread (buf, 1, 5, inf);
+                    if (r < 5)
+                    {
+                        if (r && print_offset && verbose)
+                            printf ("<!-- Extra %d bytes at end of file -->\n", r);
+                        break;
+                    }
+                    while (*buf < '0' || *buf > '9')
+                    {
+                        int i;
+                        long off = ftell(inf) - 5;
+                        if (verbose || print_offset)
+                            printf("<!-- Skipping bad byte %d (0x%02X) at offset "
+                                   "%ld (0x%lx) -->\n", 
+                                   *buf & 0xff, *buf & 0xff,
+                                   off, off);
+                        for (i = 0; i<4; i++)
+                            buf[i] = buf[i+1];
+                        r = fread(buf+4, 1, 1, inf);
+                        if (r < 1)
+                            break;
+                    }
+                    if (r < 1)
+                    {
+                        if (verbose || print_offset)
+                            printf ("<!-- End of file with data -->\n");
+                        break;
+                    }
+                    if (print_offset)
+                    {
+                        long off = ftell(inf) - 5;
+                        printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
+                                num, off, off);
+                    }
+                    len = atoi_n(buf, 5);
+                    if (len < 25 || len > 100000)
+                    {
+                        long off = ftell(inf) - 5;
+                        printf("Bad Length %d read at offset %ld (%lx)\n",
+                               len, (long) off, (long) off);
+                        break;
+                    }
+                    len = len - 5;
+                    r = fread (buf + 5, 1, len, inf);
+                    if (r < len)
+                        break;
+                    r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
+                    if (result)
+                        fwrite (result, rlen, 1, stdout);
+#if HAVE_XML2
+                    if (r > 0 && libxml_dom_test)
+                    {
+                        xmlDocPtr doc = xmlParseMemory(result, rlen);
+                        if (!doc)
+                            fprintf(stderr, "xmLParseMemory failed\n");
+                        else
+                        {
+                            int i;
+                            xmlXPathContextPtr xpathCtx; 
+                            xmlXPathObjectPtr xpathObj; 
+                            static const char *xpathExpr[] = {
+                                "/record/datafield[@tag='245']/subfield[@code='a']",
+                                "/record/datafield[@tag='100']/subfield",
+                                "/record/datafield[@tag='245']/subfield[@code='a']",
+                                "/record/datafield[@tag='650']/subfield",
+                                "/record/datafield[@tag='650']",
+                                0};
+                            
+                            xpathCtx = xmlXPathNewContext(doc);
+
+                            for (i = 0; xpathExpr[i]; i++) {
+                                xpathObj = xmlXPathEvalExpression(xpathExpr[i], xpathCtx);
+                                if(xpathObj == NULL) {
+                                    fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]);
+                                }
+                                else
+                                {
+                                    print_xpath_nodes(xpathObj->nodesetval, stdout);
+                                    xmlXPathFreeObject(xpathObj);
+                                }
+                            }
+                            xmlXPathFreeContext(xpathCtx); 
+                            xmlFreeDoc(doc);
+                        }
+                    }
+#endif
+                    if (r > 0 && cfile)
+                    {
+                        char *p = buf;
+                        int i;
+                        if (count)
+                            fprintf (cfile, ",");
+                        fprintf (cfile, "\n");
+                        for (i = 0; i < r; i++)
+                        {
+                            if ((i & 15) == 0)
+                                fprintf (cfile, "  \"");
+                            fprintf (cfile, "\\x%02X", p[i] & 255);
+                            
+                            if (i < r - 1 && (i & 15) == 15)
+                                fprintf (cfile, "\"\n");
+                            
+                        }
+                        fprintf (cfile, "\"\n");
+                    }
+                    num++;
+                    if (verbose)
+                        printf("\n");
+                }
+                count++;
+                if (cd)
+                    yaz_iconv_close(cd);
+                yaz_marc_destroy(mt);
+            }
+            if (cfile)
+                fprintf (cfile, "};\n");
+            fclose(inf);
+            break;
+        case 'v':
+            verbose++;
+            break;
+        default:
+            usage(prog);
+            exit (1);
+        }
     }
-    if (fread (buf, 1, file_size, inf) != file_size)
+    if (cfile)
+        fclose (cfile);
+    if (!no)
     {
-        fprintf (stderr, "%s: cannot read %s: %s\n",
-                *argv, argv[1], strerror (errno));
+        usage(prog);
         exit (1);
     }
-    while ((r = marc_display (buf, stdout)) > 0)
-        buf += r;
     exit (0);
 }
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+