Added utility program yaz-xmlquery.
[yaz-moved-to-github.git] / util / marcdump.c
1 /*
2  * Copyright (C) 1995-2005, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdump.c,v 1.35 2005-12-18 15:58:02 adam Exp $
6  */
7
8 #define _FILE_OFFSET_BITS 64
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #if HAVE_XML2
15 #include <libxml/parser.h>
16 #include <libxml/tree.h>
17
18 #include <libxml/xpath.h>
19 #include <libxml/xpathInternals.h>
20
21 #endif
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <errno.h>
27 #include <assert.h>
28
29 #if HAVE_LOCALE_H
30 #include <locale.h>
31 #endif
32 #if HAVE_LANGINFO_H
33 #include <langinfo.h>
34 #endif
35
36 #include <yaz/marcdisp.h>
37 #include <yaz/yaz-util.h>
38 #include <yaz/xmalloc.h>
39 #include <yaz/options.h>
40
41 #ifndef SEEK_SET
42 #define SEEK_SET 0
43 #endif
44 #ifndef SEEK_END
45 #define SEEK_END 2
46 #endif
47
48 static void usage(const char *prog)
49 {
50     fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n",
51              prog);
52
53
54 #if HAVE_XML2
55 void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) {
56     xmlNodePtr cur;
57     int size;
58     int i;
59     
60     assert(output);
61     size = (nodes) ? nodes->nodeNr : 0;
62     
63     fprintf(output, "Result (%d nodes):\n", size);
64     for(i = 0; i < size; ++i) {
65         assert(nodes->nodeTab[i]);
66         
67         if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL)
68         {
69             xmlNsPtr ns;
70             
71             ns = (xmlNsPtr)nodes->nodeTab[i];
72             cur = (xmlNodePtr)ns->next;
73             if(cur->ns) { 
74                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", 
75                     ns->prefix, ns->href, cur->ns->href, cur->name);
76             } else {
77                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", 
78                     ns->prefix, ns->href, cur->name);
79             }
80         } 
81         else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE)
82         {
83             cur = nodes->nodeTab[i];        
84             if(cur->ns) { 
85                 fprintf(output, "= element node \"%s:%s\"\n", 
86                     cur->ns->href, cur->name);
87             } 
88             else
89             {
90                 fprintf(output, "= element node \"%s\"\n", 
91                     cur->name);
92             }
93         }
94         else
95         {
96             cur = nodes->nodeTab[i];    
97             fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type);
98         }
99     }
100 }
101 #endif
102
103 int main (int argc, char **argv)
104 {
105     int r;
106     int libxml_dom_test = 0;
107     int print_offset = 0;
108     char *arg;
109     int verbose = 0;
110     FILE *inf;
111     char buf[100001];
112     char *prog = *argv;
113     int no = 0;
114     int xml = 0;
115     FILE *cfile = 0;
116     char *from = 0, *to = 0;
117     int num = 1;
118     const char *split_fname = 0;
119     
120 #if HAVE_LOCALE_H
121     setlocale(LC_CTYPE, "");
122 #endif
123 #if HAVE_LANGINFO_H
124 #ifdef CODESET
125     to = nl_langinfo(CODESET);
126 #endif
127 #endif
128
129     while ((r = options("pvc:xOeXIf:t:2s:", argv, argc, &arg)) != -2)
130     {
131         int count;
132         no++;
133         switch (r)
134         {
135         case 'f':
136             from = arg;
137             break;
138         case 't':
139             to = arg;
140             break;
141         case 'c':
142             if (cfile)
143                 fclose (cfile);
144             cfile = fopen(arg, "w");
145             break;
146         case 'x':
147             xml = YAZ_MARC_SIMPLEXML;
148             break;
149         case 'O':
150             xml = YAZ_MARC_OAIMARC;
151             break;
152         case 'e':
153             xml = YAZ_MARC_XCHANGE;
154             break;
155         case 'X':
156             xml = YAZ_MARC_MARCXML;
157             break;
158         case 'I':
159             xml = YAZ_MARC_ISO2709;
160             break;
161         case 'p':
162             print_offset = 1;
163             break;
164         case '2':
165             libxml_dom_test = 1;
166             break;
167         case 's':
168             split_fname = arg;
169             break;
170         case 0:
171             inf = fopen(arg, "rb");
172             count = 0;
173             if (!inf)
174             {
175                 fprintf (stderr, "%s: cannot open %s:%s\n",
176                          prog, arg, strerror (errno));
177                 exit(1);
178             }
179             if (cfile)
180                 fprintf (cfile, "char *marc_records[] = {\n");
181             if (1)
182             {
183                 yaz_marc_t mt = yaz_marc_create();
184                 yaz_iconv_t cd = 0;
185                 int marc_no = 0;
186
187                 if (from && to)
188                 {
189                     cd = yaz_iconv_open(to, from);
190                     if (!cd)
191                     {
192                         fprintf(stderr, "conversion from %s to %s "
193                                 "unsupported\n", from, to);
194                         exit(2);
195                     }
196                     yaz_marc_iconv(mt, cd);
197                 }
198                 yaz_marc_xml(mt, xml);
199                 yaz_marc_debug(mt, verbose);
200                 for(;; marc_no++)
201                 {
202                     int len;
203                     char *result = 0;
204                     int rlen;
205                     
206                     r = fread (buf, 1, 5, inf);
207                     if (r < 5)
208                     {
209                         if (r && print_offset && verbose)
210                             printf ("<!-- Extra %d bytes at end of file -->\n", r);
211                         break;
212                     }
213                     while (*buf < '0' || *buf > '9')
214                     {
215                         int i;
216                         long off = ftell(inf) - 5;
217                         if (verbose || print_offset)
218                             printf("<!-- Skipping bad byte %d (0x%02X) at offset "
219                                    "%ld (0x%lx) -->\n", 
220                                    *buf & 0xff, *buf & 0xff,
221                                    off, off);
222                         for (i = 0; i<4; i++)
223                             buf[i] = buf[i+1];
224                         r = fread(buf+4, 1, 1, inf);
225                         if (r < 1)
226                             break;
227                     }
228                     if (r < 1)
229                     {
230                         if (verbose || print_offset)
231                             printf ("<!-- End of file with data -->\n");
232                         break;
233                     }
234                     if (print_offset)
235                     {
236                         long off = ftell(inf) - 5;
237                         printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
238                                 num, off, off);
239                     }
240                     len = atoi_n(buf, 5);
241                     if (len < 25 || len > 100000)
242                     {
243                         long off = ftell(inf) - 5;
244                         printf("Bad Length %d read at offset %ld (%lx)\n",
245                                len, (long) off, (long) off);
246                         break;
247                     }
248                     rlen = len - 5;
249                     r = fread (buf + 5, 1, rlen, inf);
250                     if (r < rlen)
251                         break;
252                     if (split_fname)
253                     {
254                         char fname[256];
255                         FILE *sf;
256                         sprintf(fname, "%.200s%07d", split_fname, marc_no);
257                         sf = fopen(fname, "wb");
258                         if (!sf)
259                         {
260                             fprintf(stderr, "Could not open %s\n", fname);
261                             split_fname = 0;
262                         }
263                         else
264                         {
265                             if (fwrite(buf, 1, len, sf) != len)
266                             {
267                                 fprintf(stderr, "Could write content to %s\n",
268                                         fname);
269                                 split_fname = 0;
270                             }
271                             fclose(sf);
272                         }
273                     }
274                     r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
275                     if (result)
276                         fwrite (result, rlen, 1, stdout);
277 #if HAVE_XML2
278                     if (r > 0 && libxml_dom_test)
279                     {
280                         xmlDocPtr doc = xmlParseMemory(result, rlen);
281                         if (!doc)
282                             fprintf(stderr, "xmLParseMemory failed\n");
283                         else
284                         {
285                             int i;
286                             xmlXPathContextPtr xpathCtx; 
287                             xmlXPathObjectPtr xpathObj; 
288                             static const char *xpathExpr[] = {
289                                 "/record/datafield[@tag='245']/subfield[@code='a']",
290                                 "/record/datafield[@tag='100']/subfield",
291                                 "/record/datafield[@tag='245']/subfield[@code='a']",
292                                 "/record/datafield[@tag='650']/subfield",
293                                 "/record/datafield[@tag='650']",
294                                 0};
295                             
296                             xpathCtx = xmlXPathNewContext(doc);
297
298                             for (i = 0; xpathExpr[i]; i++) {
299                                 xpathObj = xmlXPathEvalExpression(BAD_CAST xpathExpr[i], xpathCtx);
300                                 if(xpathObj == NULL) {
301                                     fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]);
302                                 }
303                                 else
304                                 {
305                                     print_xpath_nodes(xpathObj->nodesetval, stdout);
306                                     xmlXPathFreeObject(xpathObj);
307                                 }
308                             }
309                             xmlXPathFreeContext(xpathCtx); 
310                             xmlFreeDoc(doc);
311                         }
312                     }
313 #endif
314                     if (r > 0 && cfile)
315                     {
316                         char *p = buf;
317                         int i;
318                         if (count)
319                             fprintf (cfile, ",");
320                         fprintf (cfile, "\n");
321                         for (i = 0; i < r; i++)
322                         {
323                             if ((i & 15) == 0)
324                                 fprintf (cfile, "  \"");
325                             fprintf (cfile, "\\x%02X", p[i] & 255);
326                             
327                             if (i < r - 1 && (i & 15) == 15)
328                                 fprintf (cfile, "\"\n");
329                             
330                         }
331                         fprintf (cfile, "\"\n");
332                     }
333                     num++;
334                     if (verbose)
335                         printf("\n");
336                 }
337                 count++;
338                 if (cd)
339                     yaz_iconv_close(cd);
340                 yaz_marc_destroy(mt);
341             }
342             if (cfile)
343                 fprintf (cfile, "};\n");
344             fclose(inf);
345             break;
346         case 'v':
347             verbose++;
348             break;
349         default:
350             usage(prog);
351             exit (1);
352         }
353     }
354     if (cfile)
355         fclose (cfile);
356     if (!no)
357     {
358         usage(prog);
359         exit (1);
360     }
361     exit (0);
362 }
363 /*
364  * Local variables:
365  * c-basic-offset: 4
366  * indent-tabs-mode: nil
367  * End:
368  * vim: shiftwidth=4 tabstop=8 expandtab
369  */
370