X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=util%2Fmarcdump.c;h=82377b30d2b679afe3882e4492b175c2f16a6a77;hb=b414bc74229f18537e6742b081aad7c4679006c8;hp=e46beab02cea854d0894a44708b4c84f52505bf1;hpb=72d242591ea2f6374ffb3166383f3ffddc342215;p=yaz-moved-to-github.git diff --git a/util/marcdump.c b/util/marcdump.c index e46beab..82377b3 100644 --- a/util/marcdump.c +++ b/util/marcdump.c @@ -1,19 +1,28 @@ /* - * Copyright (c) 1995-2003, Index Data + * Copyright (C) 1995-2005, Index Data ApS * See the file LICENSE for details. - * Sebastian Hammer, Adam Dickmeiss * - * $Id: marcdump.c,v 1.21 2003-02-12 22:02:33 adam Exp $ + * $Id: marcdump.c,v 1.29 2005-04-20 13:04:04 adam Exp $ */ #if HAVE_CONFIG_H #include #endif +#if HAVE_XML2 +#include +#include + +#include +#include + +#endif + #include #include #include #include +#include #if HAVE_LOCALE_H #include @@ -36,13 +45,64 @@ static void usage(const char *prog) { - fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-v] file...\n", + fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n", prog); } +#if HAVE_XML2 +void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) { + xmlNodePtr cur; + int size; + int i; + + assert(output); + size = (nodes) ? nodes->nodeNr : 0; + + fprintf(output, "Result (%d nodes):\n", size); + for(i = 0; i < size; ++i) { + assert(nodes->nodeTab[i]); + + if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL) + { + xmlNsPtr ns; + + ns = (xmlNsPtr)nodes->nodeTab[i]; + cur = (xmlNodePtr)ns->next; + if(cur->ns) { + fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", + ns->prefix, ns->href, cur->ns->href, cur->name); + } else { + fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", + ns->prefix, ns->href, cur->name); + } + } + else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE) + { + cur = nodes->nodeTab[i]; + if(cur->ns) { + fprintf(output, "= element node \"%s:%s\"\n", + cur->ns->href, cur->name); + } + else + { + fprintf(output, "= element node \"%s\"\n", + cur->name); + } + } + else + { + cur = nodes->nodeTab[i]; + fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type); + } + } +} +#endif + int main (int argc, char **argv) { int r; + int libxml_dom_test = 0; + int print_offset = 0; char *arg; int verbose = 0; FILE *inf; @@ -52,16 +112,18 @@ int main (int argc, char **argv) int xml = 0; FILE *cfile = 0; char *from = 0, *to = 0; - + int num = 1; #if HAVE_LOCALE_H setlocale(LC_CTYPE, ""); #endif #if HAVE_LANGINFO_H +#ifdef CODESET to = nl_langinfo(CODESET); #endif +#endif - while ((r = options("vc:xOXf:t:", argv, argc, &arg)) != -2) + while ((r = options("pvc:xOeXIf:t:2", argv, argc, &arg)) != -2) { int count; no++; @@ -84,11 +146,23 @@ int main (int argc, char **argv) case 'O': xml = YAZ_MARC_OAIMARC; break; + case 'e': + xml = YAZ_MARC_XCHANGE; + break; case 'X': xml = YAZ_MARC_MARCXML; break; + case 'I': + xml = YAZ_MARC_ISO2709; + break; + case 'p': + print_offset = 1; + break; + case '2': + libxml_dom_test = 1; + break; case 0: - inf = fopen (arg, "r"); + inf = fopen (arg, "rb"); count = 0; if (!inf) { @@ -112,54 +186,103 @@ int main (int argc, char **argv) "unsupported\n", from, to); exit(2); } + yaz_marc_iconv(mt, cd); } yaz_marc_xml(mt, xml); yaz_marc_debug(mt, verbose); while (1) { int len; - char *result; + char *result = 0; int rlen; r = fread (buf, 1, 5, inf); if (r < 5) + { + if (r && print_offset && verbose) + printf ("Extra %d bytes at end of file", r); break; + } + while (*buf < '0' || *buf > '9') + { + int i; + long off = ftell(inf) - 5; + if (verbose || print_offset) + printf("Skipping bad byte %d (0x%02X) at offset " + "%ld (0x%lx)\n", + *buf & 0xff, *buf & 0xff, + off, off); + for (i = 0; i<4; i++) + buf[i] = buf[i+1]; + r = fread(buf+4, 1, 1, inf); + if (r < 1) + break; + } + if (r < 1) + { + if (verbose || print_offset) + printf ("End of file with extra garbage\n"); + break; + } + if (print_offset) + { + long off = ftell(inf) - 5; + printf ("Record %d offset %ld (0x%lx)\n", num, + off, off); + } len = atoi_n(buf, 5); if (len < 25 || len > 100000) + { + long off = ftell(inf) - 5; + printf("Bad Length %d read at offset %ld (%lx)\n", + len, (long) off, (long) off); break; + } len = len - 5; r = fread (buf + 5, 1, len, inf); if (r < len) break; r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen); - if (r <= 0) - break; - if (!cd) - fwrite (result, rlen, 1, stdout); - else - { - char outbuf[12]; - size_t inbytesleft = rlen; - const char *inp = result; - - while (inbytesleft) - { - size_t outbytesleft = sizeof(outbuf); - char *outp = outbuf; - size_t r = yaz_iconv (cd, (char**) &inp, - &inbytesleft, - &outp, &outbytesleft); - if (r == (size_t) (-1)) - { - int e = yaz_iconv_error(cd); - if (e != YAZ_ICONV_E2BIG) - break; - } - fwrite (outbuf, outp - outbuf, 1, stdout); - } - } + if (result) + fwrite (result, rlen, 1, stdout); +#if HAVE_XML2 + if (r > 0 && libxml_dom_test) + { + xmlDocPtr doc = xmlParseMemory(result, rlen); + if (!doc) + fprintf(stderr, "xmLParseMemory failed\n"); + else + { + int i; + xmlXPathContextPtr xpathCtx; + xmlXPathObjectPtr xpathObj; + static const char *xpathExpr[] = { + "/record/datafield[@tag='245']/subfield[@code='a']", + "/record/datafield[@tag='100']/subfield", + "/record/datafield[@tag='245']/subfield[@code='a']", + "/record/datafield[@tag='650']/subfield", + "/record/datafield[@tag='650']", + 0}; + + xpathCtx = xmlXPathNewContext(doc); - if (cfile) + for (i = 0; xpathExpr[i]; i++) { + xpathObj = xmlXPathEvalExpression(xpathExpr[i], xpathCtx); + if(xpathObj == NULL) { + fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]); + } + else + { + print_xpath_nodes(xpathObj->nodesetval, stdout); + xmlXPathFreeObject(xpathObj); + } + } + xmlXPathFreeContext(xpathCtx); + xmlFreeDoc(doc); + } + } +#endif + if (r > 0 && cfile) { char *p = buf; int i; @@ -178,13 +301,16 @@ int main (int argc, char **argv) } fprintf (cfile, "\"\n"); } + num++; } count++; if (cd) yaz_iconv_close(cd); + yaz_marc_destroy(mt); } if (cfile) fprintf (cfile, "};\n"); + fclose(inf); break; case 'v': verbose++;