LFS mode for yaz-marcdump so that it can read large ISO2709 files
[yaz-moved-to-github.git] / util / marcdump.c
1 /*
2  * Copyright (C) 1995-2005, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdump.c,v 1.31 2005-06-09 18:47:07 adam Exp $
6  */
7
8 #define _FILE_OFFSET_BITS 64
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #if HAVE_XML2
15 #include <libxml/parser.h>
16 #include <libxml/tree.h>
17
18 #include <libxml/xpath.h>
19 #include <libxml/xpathInternals.h>
20
21 #endif
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <errno.h>
27 #include <assert.h>
28
29 #if HAVE_LOCALE_H
30 #include <locale.h>
31 #endif
32 #if HAVE_LANGINFO_H
33 #include <langinfo.h>
34 #endif
35
36 #include <yaz/marcdisp.h>
37 #include <yaz/yaz-util.h>
38 #include <yaz/xmalloc.h>
39 #include <yaz/options.h>
40
41 #ifndef SEEK_SET
42 #define SEEK_SET 0
43 #endif
44 #ifndef SEEK_END
45 #define SEEK_END 2
46 #endif
47
48 static void usage(const char *prog)
49 {
50     fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n",
51              prog);
52
53
54 #if HAVE_XML2
55 void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) {
56     xmlNodePtr cur;
57     int size;
58     int i;
59     
60     assert(output);
61     size = (nodes) ? nodes->nodeNr : 0;
62     
63     fprintf(output, "Result (%d nodes):\n", size);
64     for(i = 0; i < size; ++i) {
65         assert(nodes->nodeTab[i]);
66         
67         if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL)
68         {
69             xmlNsPtr ns;
70             
71             ns = (xmlNsPtr)nodes->nodeTab[i];
72             cur = (xmlNodePtr)ns->next;
73             if(cur->ns) { 
74                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", 
75                     ns->prefix, ns->href, cur->ns->href, cur->name);
76             } else {
77                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", 
78                     ns->prefix, ns->href, cur->name);
79             }
80         } 
81         else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE)
82         {
83             cur = nodes->nodeTab[i];        
84             if(cur->ns) { 
85                 fprintf(output, "= element node \"%s:%s\"\n", 
86                     cur->ns->href, cur->name);
87             } 
88             else
89             {
90                 fprintf(output, "= element node \"%s\"\n", 
91                     cur->name);
92             }
93         }
94         else
95         {
96             cur = nodes->nodeTab[i];    
97             fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type);
98         }
99     }
100 }
101 #endif
102
103 int main (int argc, char **argv)
104 {
105     int r;
106     int libxml_dom_test = 0;
107     int print_offset = 0;
108     char *arg;
109     int verbose = 0;
110     FILE *inf;
111     char buf[100001];
112     char *prog = *argv;
113     int no = 0;
114     int xml = 0;
115     FILE *cfile = 0;
116     char *from = 0, *to = 0;
117     int num = 1;
118     
119 #if HAVE_LOCALE_H
120     setlocale(LC_CTYPE, "");
121 #endif
122 #if HAVE_LANGINFO_H
123 #ifdef CODESET
124     to = nl_langinfo(CODESET);
125 #endif
126 #endif
127
128     while ((r = options("pvc:xOeXIf:t:2", argv, argc, &arg)) != -2)
129     {
130         int count;
131         no++;
132         switch (r)
133         {
134         case 'f':
135             from = arg;
136             break;
137         case 't':
138             to = arg;
139             break;
140         case 'c':
141             if (cfile)
142                 fclose (cfile);
143             cfile = fopen(arg, "w");
144             break;
145         case 'x':
146             xml = YAZ_MARC_SIMPLEXML;
147             break;
148         case 'O':
149             xml = YAZ_MARC_OAIMARC;
150             break;
151         case 'e':
152             xml = YAZ_MARC_XCHANGE;
153             break;
154         case 'X':
155             xml = YAZ_MARC_MARCXML;
156             break;
157         case 'I':
158             xml = YAZ_MARC_ISO2709;
159             break;
160         case 'p':
161             print_offset = 1;
162             break;
163         case '2':
164             libxml_dom_test = 1;
165             break;
166         case 0:
167             inf = fopen(arg, "rb");
168             count = 0;
169             if (!inf)
170             {
171                 fprintf (stderr, "%s: cannot open %s:%s\n",
172                          prog, arg, strerror (errno));
173                 exit(1);
174             }
175             if (cfile)
176                 fprintf (cfile, "char *marc_records[] = {\n");
177             if (1)
178             {
179                 yaz_marc_t mt = yaz_marc_create();
180                 yaz_iconv_t cd = 0;
181
182                 if (from && to)
183                 {
184                     cd = yaz_iconv_open(to, from);
185                     if (!cd)
186                     {
187                         fprintf(stderr, "conversion from %s to %s "
188                                 "unsupported\n", from, to);
189                         exit(2);
190                     }
191                     yaz_marc_iconv(mt, cd);
192                 }
193                 yaz_marc_xml(mt, xml);
194                 yaz_marc_debug(mt, verbose);
195                 while (1)
196                 {
197                     int len;
198                     char *result = 0;
199                     int rlen;
200                     
201                     r = fread (buf, 1, 5, inf);
202                     if (r < 5)
203                     {
204                         if (r && print_offset && verbose)
205                             printf ("<!-- Extra %d bytes at end of file -->\n", r);
206                         break;
207                     }
208                     while (*buf < '0' || *buf > '9')
209                     {
210                         int i;
211                         long off = ftell(inf) - 5;
212                         if (verbose || print_offset)
213                             printf("<!-- Skipping bad byte %d (0x%02X) at offset "
214                                    "%ld (0x%lx) -->\n", 
215                                    *buf & 0xff, *buf & 0xff,
216                                    off, off);
217                         for (i = 0; i<4; i++)
218                             buf[i] = buf[i+1];
219                         r = fread(buf+4, 1, 1, inf);
220                         if (r < 1)
221                             break;
222                     }
223                     if (r < 1)
224                     {
225                         if (verbose || print_offset)
226                             printf ("<!-- End of file with data -->\n");
227                         break;
228                     }
229                     if (print_offset)
230                     {
231                         long off = ftell(inf) - 5;
232                         printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
233                                 num, off, off);
234                     }
235                     len = atoi_n(buf, 5);
236                     if (len < 25 || len > 100000)
237                     {
238                         long off = ftell(inf) - 5;
239                         printf("Bad Length %d read at offset %ld (%lx)\n",
240                                len, (long) off, (long) off);
241                         break;
242                     }
243                     len = len - 5;
244                     r = fread (buf + 5, 1, len, inf);
245                     if (r < len)
246                         break;
247                     r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
248                     if (result)
249                         fwrite (result, rlen, 1, stdout);
250 #if HAVE_XML2
251                     if (r > 0 && libxml_dom_test)
252                     {
253                         xmlDocPtr doc = xmlParseMemory(result, rlen);
254                         if (!doc)
255                             fprintf(stderr, "xmLParseMemory failed\n");
256                         else
257                         {
258                             int i;
259                             xmlXPathContextPtr xpathCtx; 
260                             xmlXPathObjectPtr xpathObj; 
261                             static const char *xpathExpr[] = {
262                                 "/record/datafield[@tag='245']/subfield[@code='a']",
263                                 "/record/datafield[@tag='100']/subfield",
264                                 "/record/datafield[@tag='245']/subfield[@code='a']",
265                                 "/record/datafield[@tag='650']/subfield",
266                                 "/record/datafield[@tag='650']",
267                                 0};
268                             
269                             xpathCtx = xmlXPathNewContext(doc);
270
271                             for (i = 0; xpathExpr[i]; i++) {
272                                 xpathObj = xmlXPathEvalExpression(xpathExpr[i], xpathCtx);
273                                 if(xpathObj == NULL) {
274                                     fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]);
275                                 }
276                                 else
277                                 {
278                                     print_xpath_nodes(xpathObj->nodesetval, stdout);
279                                     xmlXPathFreeObject(xpathObj);
280                                 }
281                             }
282                             xmlXPathFreeContext(xpathCtx); 
283                             xmlFreeDoc(doc);
284                         }
285                     }
286 #endif
287                     if (r > 0 && cfile)
288                     {
289                         char *p = buf;
290                         int i;
291                         if (count)
292                             fprintf (cfile, ",");
293                         fprintf (cfile, "\n");
294                         for (i = 0; i < r; i++)
295                         {
296                             if ((i & 15) == 0)
297                                 fprintf (cfile, "  \"");
298                             fprintf (cfile, "\\x%02X", p[i] & 255);
299                             
300                             if (i < r - 1 && (i & 15) == 15)
301                                 fprintf (cfile, "\"\n");
302                             
303                         }
304                         fprintf (cfile, "\"\n");
305                     }
306                     num++;
307                     if (verbose)
308                         printf("\n");
309                 }
310                 count++;
311                 if (cd)
312                     yaz_iconv_close(cd);
313                 yaz_marc_destroy(mt);
314             }
315             if (cfile)
316                 fprintf (cfile, "};\n");
317             fclose(inf);
318             break;
319         case 'v':
320             verbose++;
321             break;
322         default:
323             usage(prog);
324             exit (1);
325         }
326     }
327     if (cfile)
328         fclose (cfile);
329     if (!no)
330     {
331         usage(prog);
332         exit (1);
333     }
334     exit (0);
335 }