yaz-marcdump skips non-digit chars betwen recs and warns about it
[yaz-moved-to-github.git] / util / marcdump.c
1 /*
2  * Copyright (C) 1995-2005, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdump.c,v 1.29 2005-04-20 13:04:04 adam Exp $
6  */
7
8 #if HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #if HAVE_XML2
13 #include <libxml/parser.h>
14 #include <libxml/tree.h>
15
16 #include <libxml/xpath.h>
17 #include <libxml/xpathInternals.h>
18
19 #endif
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <errno.h>
25 #include <assert.h>
26
27 #if HAVE_LOCALE_H
28 #include <locale.h>
29 #endif
30 #if HAVE_LANGINFO_H
31 #include <langinfo.h>
32 #endif
33
34 #include <yaz/marcdisp.h>
35 #include <yaz/yaz-util.h>
36 #include <yaz/xmalloc.h>
37 #include <yaz/options.h>
38
39 #ifndef SEEK_SET
40 #define SEEK_SET 0
41 #endif
42 #ifndef SEEK_END
43 #define SEEK_END 2
44 #endif
45
46 static void usage(const char *prog)
47 {
48     fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n",
49              prog);
50
51
52 #if HAVE_XML2
53 void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) {
54     xmlNodePtr cur;
55     int size;
56     int i;
57     
58     assert(output);
59     size = (nodes) ? nodes->nodeNr : 0;
60     
61     fprintf(output, "Result (%d nodes):\n", size);
62     for(i = 0; i < size; ++i) {
63         assert(nodes->nodeTab[i]);
64         
65         if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL)
66         {
67             xmlNsPtr ns;
68             
69             ns = (xmlNsPtr)nodes->nodeTab[i];
70             cur = (xmlNodePtr)ns->next;
71             if(cur->ns) { 
72                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", 
73                     ns->prefix, ns->href, cur->ns->href, cur->name);
74             } else {
75                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", 
76                     ns->prefix, ns->href, cur->name);
77             }
78         } 
79         else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE)
80         {
81             cur = nodes->nodeTab[i];        
82             if(cur->ns) { 
83                 fprintf(output, "= element node \"%s:%s\"\n", 
84                     cur->ns->href, cur->name);
85             } 
86             else
87             {
88                 fprintf(output, "= element node \"%s\"\n", 
89                     cur->name);
90             }
91         }
92         else
93         {
94             cur = nodes->nodeTab[i];    
95             fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type);
96         }
97     }
98 }
99 #endif
100
101 int main (int argc, char **argv)
102 {
103     int r;
104     int libxml_dom_test = 0;
105     int print_offset = 0;
106     char *arg;
107     int verbose = 0;
108     FILE *inf;
109     char buf[100001];
110     char *prog = *argv;
111     int no = 0;
112     int xml = 0;
113     FILE *cfile = 0;
114     char *from = 0, *to = 0;
115     int num = 1;
116     
117 #if HAVE_LOCALE_H
118     setlocale(LC_CTYPE, "");
119 #endif
120 #if HAVE_LANGINFO_H
121 #ifdef CODESET
122     to = nl_langinfo(CODESET);
123 #endif
124 #endif
125
126     while ((r = options("pvc:xOeXIf:t:2", argv, argc, &arg)) != -2)
127     {
128         int count;
129         no++;
130         switch (r)
131         {
132         case 'f':
133             from = arg;
134             break;
135         case 't':
136             to = arg;
137             break;
138         case 'c':
139             if (cfile)
140                 fclose (cfile);
141             cfile = fopen (arg, "w");
142             break;
143         case 'x':
144             xml = YAZ_MARC_SIMPLEXML;
145             break;
146         case 'O':
147             xml = YAZ_MARC_OAIMARC;
148             break;
149         case 'e':
150             xml = YAZ_MARC_XCHANGE;
151             break;
152         case 'X':
153             xml = YAZ_MARC_MARCXML;
154             break;
155         case 'I':
156             xml = YAZ_MARC_ISO2709;
157             break;
158         case 'p':
159             print_offset = 1;
160             break;
161         case '2':
162             libxml_dom_test = 1;
163             break;
164         case 0:
165             inf = fopen (arg, "rb");
166             count = 0;
167             if (!inf)
168             {
169                 fprintf (stderr, "%s: cannot open %s:%s\n",
170                          prog, arg, strerror (errno));
171                 exit(1);
172             }
173             if (cfile)
174                 fprintf (cfile, "char *marc_records[] = {\n");
175             if (1)
176             {
177                 yaz_marc_t mt = yaz_marc_create();
178                 yaz_iconv_t cd = 0;
179
180                 if (from && to)
181                 {
182                     cd = yaz_iconv_open(to, from);
183                     if (!cd)
184                     {
185                         fprintf(stderr, "conversion from %s to %s "
186                                 "unsupported\n", from, to);
187                         exit(2);
188                     }
189                     yaz_marc_iconv(mt, cd);
190                 }
191                 yaz_marc_xml(mt, xml);
192                 yaz_marc_debug(mt, verbose);
193                 while (1)
194                 {
195                     int len;
196                     char *result = 0;
197                     int rlen;
198                     
199                     r = fread (buf, 1, 5, inf);
200                     if (r < 5)
201                     {
202                         if (r && print_offset && verbose)
203                             printf ("Extra %d bytes at end of file", r);
204                         break;
205                     }
206                     while (*buf < '0' || *buf > '9')
207                     {
208                         int i;
209                         long off = ftell(inf) - 5;
210                         if (verbose || print_offset)
211                             printf("Skipping bad byte %d (0x%02X) at offset "
212                                    "%ld (0x%lx)\n", 
213                                    *buf & 0xff, *buf & 0xff,
214                                    off, off);
215                         for (i = 0; i<4; i++)
216                             buf[i] = buf[i+1];
217                         r = fread(buf+4, 1, 1, inf);
218                         if (r < 1)
219                             break;
220                     }
221                     if (r < 1)
222                     {
223                         if (verbose || print_offset)
224                             printf ("End of file with extra garbage\n");
225                         break;
226                     }
227                     if (print_offset)
228                     {
229                         long off = ftell(inf) - 5;
230                         printf ("Record %d offset %ld (0x%lx)\n", num, 
231                                 off, off);
232                     }
233                     len = atoi_n(buf, 5);
234                     if (len < 25 || len > 100000)
235                     {
236                         long off = ftell(inf) - 5;
237                         printf("Bad Length %d read at offset %ld (%lx)\n",
238                                len, (long) off, (long) off);
239                         break;
240                     }
241                     len = len - 5;
242                     r = fread (buf + 5, 1, len, inf);
243                     if (r < len)
244                         break;
245                     r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
246                     if (result)
247                         fwrite (result, rlen, 1, stdout);
248 #if HAVE_XML2
249                     if (r > 0 && libxml_dom_test)
250                     {
251                         xmlDocPtr doc = xmlParseMemory(result, rlen);
252                         if (!doc)
253                             fprintf(stderr, "xmLParseMemory failed\n");
254                         else
255                         {
256                             int i;
257                             xmlXPathContextPtr xpathCtx; 
258                             xmlXPathObjectPtr xpathObj; 
259                             static const char *xpathExpr[] = {
260                                 "/record/datafield[@tag='245']/subfield[@code='a']",
261                                 "/record/datafield[@tag='100']/subfield",
262                                 "/record/datafield[@tag='245']/subfield[@code='a']",
263                                 "/record/datafield[@tag='650']/subfield",
264                                 "/record/datafield[@tag='650']",
265                                 0};
266                             
267                             xpathCtx = xmlXPathNewContext(doc);
268
269                             for (i = 0; xpathExpr[i]; i++) {
270                                 xpathObj = xmlXPathEvalExpression(xpathExpr[i], xpathCtx);
271                                 if(xpathObj == NULL) {
272                                     fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]);
273                                 }
274                                 else
275                                 {
276                                     print_xpath_nodes(xpathObj->nodesetval, stdout);
277                                     xmlXPathFreeObject(xpathObj);
278                                 }
279                             }
280                             xmlXPathFreeContext(xpathCtx); 
281                             xmlFreeDoc(doc);
282                         }
283                     }
284 #endif
285                     if (r > 0 && cfile)
286                     {
287                         char *p = buf;
288                         int i;
289                         if (count)
290                             fprintf (cfile, ",");
291                         fprintf (cfile, "\n");
292                         for (i = 0; i < r; i++)
293                         {
294                             if ((i & 15) == 0)
295                                 fprintf (cfile, "  \"");
296                             fprintf (cfile, "\\x%02X", p[i] & 255);
297                             
298                             if (i < r - 1 && (i & 15) == 15)
299                                 fprintf (cfile, "\"\n");
300                             
301                         }
302                         fprintf (cfile, "\"\n");
303                     }
304                     num++;
305                 }
306                 count++;
307                 if (cd)
308                     yaz_iconv_close(cd);
309                 yaz_marc_destroy(mt);
310             }
311             if (cfile)
312                 fprintf (cfile, "};\n");
313             fclose(inf);
314             break;
315         case 'v':
316             verbose++;
317             break;
318         default:
319             usage(prog);
320             exit (1);
321         }
322     }
323     if (cfile)
324         fclose (cfile);
325     if (!no)
326     {
327         usage(prog);
328         exit (1);
329     }
330     exit (0);
331 }