yaz-marcdump may dump raw records separately using -s splitfname option
[yaz-moved-to-github.git] / util / marcdump.c
1 /*
2  * Copyright (C) 1995-2005, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdump.c,v 1.34 2005-12-17 20:22:01 adam Exp $
6  */
7
8 #define _FILE_OFFSET_BITS 64
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #if HAVE_XML2
15 #include <libxml/parser.h>
16 #include <libxml/tree.h>
17
18 #include <libxml/xpath.h>
19 #include <libxml/xpathInternals.h>
20
21 #endif
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <errno.h>
27 #include <assert.h>
28
29 #if HAVE_LOCALE_H
30 #include <locale.h>
31 #endif
32 #if HAVE_LANGINFO_H
33 #include <langinfo.h>
34 #endif
35
36 #include <yaz/marcdisp.h>
37 #include <yaz/yaz-util.h>
38 #include <yaz/xmalloc.h>
39 #include <yaz/options.h>
40
41 #ifndef SEEK_SET
42 #define SEEK_SET 0
43 #endif
44 #ifndef SEEK_END
45 #define SEEK_END 2
46 #endif
47
48 static void usage(const char *prog)
49 {
50     fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n",
51              prog);
52
53
54 #if HAVE_XML2
55 void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) {
56     xmlNodePtr cur;
57     int size;
58     int i;
59     
60     assert(output);
61     size = (nodes) ? nodes->nodeNr : 0;
62     
63     fprintf(output, "Result (%d nodes):\n", size);
64     for(i = 0; i < size; ++i) {
65         assert(nodes->nodeTab[i]);
66         
67         if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL)
68         {
69             xmlNsPtr ns;
70             
71             ns = (xmlNsPtr)nodes->nodeTab[i];
72             cur = (xmlNodePtr)ns->next;
73             if(cur->ns) { 
74                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", 
75                     ns->prefix, ns->href, cur->ns->href, cur->name);
76             } else {
77                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", 
78                     ns->prefix, ns->href, cur->name);
79             }
80         } 
81         else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE)
82         {
83             cur = nodes->nodeTab[i];        
84             if(cur->ns) { 
85                 fprintf(output, "= element node \"%s:%s\"\n", 
86                     cur->ns->href, cur->name);
87             } 
88             else
89             {
90                 fprintf(output, "= element node \"%s\"\n", 
91                     cur->name);
92             }
93         }
94         else
95         {
96             cur = nodes->nodeTab[i];    
97             fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type);
98         }
99     }
100 }
101 #endif
102
103 int main (int argc, char **argv)
104 {
105     int r;
106     int libxml_dom_test = 0;
107     int print_offset = 0;
108     char *arg;
109     int verbose = 0;
110     FILE *inf;
111     char buf[100001];
112     char *prog = *argv;
113     int no = 0;
114     int xml = 0;
115     FILE *cfile = 0;
116     char *from = 0, *to = 0;
117     int num = 1;
118     const char *split_fname = 0;
119     
120 #if HAVE_LOCALE_H
121     setlocale(LC_CTYPE, "");
122 #endif
123 #if HAVE_LANGINFO_H
124 #ifdef CODESET
125     to = nl_langinfo(CODESET);
126 #endif
127 #endif
128
129     while ((r = options("pvc:xOeXIf:t:2s:", argv, argc, &arg)) != -2)
130     {
131         int count;
132         no++;
133         switch (r)
134         {
135         case 'f':
136             from = arg;
137             break;
138         case 't':
139             to = arg;
140             break;
141         case 'c':
142             if (cfile)
143                 fclose (cfile);
144             cfile = fopen(arg, "w");
145             break;
146         case 'x':
147             xml = YAZ_MARC_SIMPLEXML;
148             break;
149         case 'O':
150             xml = YAZ_MARC_OAIMARC;
151             break;
152         case 'e':
153             xml = YAZ_MARC_XCHANGE;
154             break;
155         case 'X':
156             xml = YAZ_MARC_MARCXML;
157             break;
158         case 'I':
159             xml = YAZ_MARC_ISO2709;
160             break;
161         case 'p':
162             print_offset = 1;
163             break;
164         case '2':
165             libxml_dom_test = 1;
166             break;
167         case 's':
168             split_fname = arg;
169             break;
170         case 0:
171             inf = fopen(arg, "rb");
172             count = 0;
173             if (!inf)
174             {
175                 fprintf (stderr, "%s: cannot open %s:%s\n",
176                          prog, arg, strerror (errno));
177                 exit(1);
178             }
179             if (cfile)
180                 fprintf (cfile, "char *marc_records[] = {\n");
181             if (1)
182             {
183                 yaz_marc_t mt = yaz_marc_create();
184                 yaz_iconv_t cd = 0;
185                 int marc_no = 0;
186
187                 if (from && to)
188                 {
189                     cd = yaz_iconv_open(to, from);
190                     if (!cd)
191                     {
192                         fprintf(stderr, "conversion from %s to %s "
193                                 "unsupported\n", from, to);
194                         exit(2);
195                     }
196                     yaz_marc_iconv(mt, cd);
197                 }
198                 yaz_marc_xml(mt, xml);
199                 yaz_marc_debug(mt, verbose);
200                 for(;; marc_no++)
201                 {
202                     int len;
203                     char *result = 0;
204                     int rlen;
205                     
206                     r = fread (buf, 1, 5, inf);
207                     if (r < 5)
208                     {
209                         if (r && print_offset && verbose)
210                             printf ("<!-- Extra %d bytes at end of file -->\n", r);
211                         break;
212                     }
213                     while (*buf < '0' || *buf > '9')
214                     {
215                         int i;
216                         long off = ftell(inf) - 5;
217                         if (verbose || print_offset)
218                             printf("<!-- Skipping bad byte %d (0x%02X) at offset "
219                                    "%ld (0x%lx) -->\n", 
220                                    *buf & 0xff, *buf & 0xff,
221                                    off, off);
222                         for (i = 0; i<4; i++)
223                             buf[i] = buf[i+1];
224                         r = fread(buf+4, 1, 1, inf);
225                         if (r < 1)
226                             break;
227                     }
228                     if (r < 1)
229                     {
230                         if (verbose || print_offset)
231                             printf ("<!-- End of file with data -->\n");
232                         break;
233                     }
234                     if (print_offset)
235                     {
236                         long off = ftell(inf) - 5;
237                         printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
238                                 num, off, off);
239                     }
240                     len = atoi_n(buf, 5);
241                     if (len < 25 || len > 100000)
242                     {
243                         long off = ftell(inf) - 5;
244                         printf("Bad Length %d read at offset %ld (%lx)\n",
245                                len, (long) off, (long) off);
246                         break;
247                     }
248                     rlen = len - 5;
249                     r = fread (buf + 5, 1, rlen, inf);
250                     if (r < rlen)
251                         break;
252                     if (split_fname)
253                     {
254                         char fname[256];
255                         sprintf(fname, "%.200s%07d", split_fname, marc_no);
256                         FILE *sf = fopen(fname, "wb");
257                         if (!sf)
258                         {
259                             fprintf(stderr, "Could not open %s\n", fname);
260                             split_fname = 0;
261                         }
262                         else
263                         {
264                             if (fwrite(buf, 1, len, sf) != len)
265                             {
266                                 fprintf(stderr, "Could write content to %s\n",
267                                         fname);
268                                 split_fname = 0;
269                             }
270                             fclose(sf);
271                         }
272                     }
273                     r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
274                     if (result)
275                         fwrite (result, rlen, 1, stdout);
276 #if HAVE_XML2
277                     if (r > 0 && libxml_dom_test)
278                     {
279                         xmlDocPtr doc = xmlParseMemory(result, rlen);
280                         if (!doc)
281                             fprintf(stderr, "xmLParseMemory failed\n");
282                         else
283                         {
284                             int i;
285                             xmlXPathContextPtr xpathCtx; 
286                             xmlXPathObjectPtr xpathObj; 
287                             static const char *xpathExpr[] = {
288                                 "/record/datafield[@tag='245']/subfield[@code='a']",
289                                 "/record/datafield[@tag='100']/subfield",
290                                 "/record/datafield[@tag='245']/subfield[@code='a']",
291                                 "/record/datafield[@tag='650']/subfield",
292                                 "/record/datafield[@tag='650']",
293                                 0};
294                             
295                             xpathCtx = xmlXPathNewContext(doc);
296
297                             for (i = 0; xpathExpr[i]; i++) {
298                                 xpathObj = xmlXPathEvalExpression(BAD_CAST xpathExpr[i], xpathCtx);
299                                 if(xpathObj == NULL) {
300                                     fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]);
301                                 }
302                                 else
303                                 {
304                                     print_xpath_nodes(xpathObj->nodesetval, stdout);
305                                     xmlXPathFreeObject(xpathObj);
306                                 }
307                             }
308                             xmlXPathFreeContext(xpathCtx); 
309                             xmlFreeDoc(doc);
310                         }
311                     }
312 #endif
313                     if (r > 0 && cfile)
314                     {
315                         char *p = buf;
316                         int i;
317                         if (count)
318                             fprintf (cfile, ",");
319                         fprintf (cfile, "\n");
320                         for (i = 0; i < r; i++)
321                         {
322                             if ((i & 15) == 0)
323                                 fprintf (cfile, "  \"");
324                             fprintf (cfile, "\\x%02X", p[i] & 255);
325                             
326                             if (i < r - 1 && (i & 15) == 15)
327                                 fprintf (cfile, "\"\n");
328                             
329                         }
330                         fprintf (cfile, "\"\n");
331                     }
332                     num++;
333                     if (verbose)
334                         printf("\n");
335                 }
336                 count++;
337                 if (cd)
338                     yaz_iconv_close(cd);
339                 yaz_marc_destroy(mt);
340             }
341             if (cfile)
342                 fprintf (cfile, "};\n");
343             fclose(inf);
344             break;
345         case 'v':
346             verbose++;
347             break;
348         default:
349             usage(prog);
350             exit (1);
351         }
352     }
353     if (cfile)
354         fclose (cfile);
355     if (!no)
356     {
357         usage(prog);
358         exit (1);
359     }
360     exit (0);
361 }
362 /*
363  * Local variables:
364  * c-basic-offset: 4
365  * indent-tabs-mode: nil
366  * End:
367  * vim: shiftwidth=4 tabstop=8 expandtab
368  */
369