New option for yaz-marcdump -lpos=value which allows setting a portion
[yaz-moved-to-github.git] / util / marcdump.c
1 /*
2  * Copyright (C) 1995-2006, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdump.c,v 1.42 2006-08-28 14:18:23 adam Exp $
6  */
7
8 #define _FILE_OFFSET_BITS 64
9
10 #if HAVE_CONFIG_H
11 #include <config.h>
12 #endif
13
14 #if YAZ_HAVE_XML2
15 #include <libxml/parser.h>
16 #include <libxml/tree.h>
17
18 #include <libxml/xpath.h>
19 #include <libxml/xpathInternals.h>
20
21 #endif
22
23 #include <stdio.h>
24 #include <stdlib.h>
25 #include <string.h>
26 #include <errno.h>
27 #include <assert.h>
28
29 #if HAVE_LOCALE_H
30 #include <locale.h>
31 #endif
32 #if HAVE_LANGINFO_H
33 #include <langinfo.h>
34 #endif
35
36 #include <yaz/marcdisp.h>
37 #include <yaz/yaz-util.h>
38 #include <yaz/xmalloc.h>
39 #include <yaz/options.h>
40
41 #ifndef SEEK_SET
42 #define SEEK_SET 0
43 #endif
44 #ifndef SEEK_END
45 #define SEEK_END 2
46 #endif
47
48
49 static char *prog;
50
51 static void usage(const char *prog)
52 {
53     fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-X] [-e] "
54              "[-I] [-l pos=value] [-v] [-s splitfname] file...\n",
55              prog);
56
57
58 #if YAZ_HAVE_XML2
59 static void marcdump_read_xml(yaz_marc_t mt, const char *fname)
60 {
61     xmlNodePtr ptr;
62     xmlDocPtr doc = xmlParseFile(fname);
63     if (!doc)
64         return;
65
66     ptr = xmlDocGetRootElement(doc);
67     if (ptr)
68     {
69         int r;
70         WRBUF wrbuf = wrbuf_alloc();
71         r = yaz_marc_read_xml(mt, ptr);
72         if (r)
73             fprintf(stderr, "yaz_marc_read_xml failed\n");
74         
75         yaz_marc_write_mode(mt, wrbuf);
76
77         fputs(wrbuf_buf(wrbuf), stdout);
78
79         wrbuf_free(wrbuf, 1);
80     }
81     xmlFreeDoc(doc);
82 }
83 #endif
84
85 static void dump(const char *fname, const char *from, const char *to,
86                  int read_xml, int xml,
87                  int print_offset, const char *split_fname, int verbose,
88                  FILE *cfile, const char *leader_spec)
89 {
90     yaz_marc_t mt = yaz_marc_create();
91     yaz_iconv_t cd = 0;
92
93     if (yaz_marc_leader_spec(mt, leader_spec))
94     {
95         fprintf(stderr, "bad leader spec: %s\n", leader_spec);
96         yaz_marc_destroy(mt);
97         exit(2);
98     }
99     if (from && to)
100     {
101         cd = yaz_iconv_open(to, from);
102         if (!cd)
103         {
104             fprintf(stderr, "conversion from %s to %s "
105                     "unsupported\n", from, to);
106             yaz_marc_destroy(mt);
107             exit(2);
108         }
109         yaz_marc_iconv(mt, cd);
110     }
111     yaz_marc_xml(mt, xml);
112     yaz_marc_debug(mt, verbose);
113
114     if (read_xml)
115     {
116 #if YAZ_HAVE_XML2
117         marcdump_read_xml(mt, fname);
118 #else
119         return;
120 #endif
121     }
122     else
123     {
124         FILE *inf = fopen(fname, "rb");
125         int count = 0;
126         int num = 1;
127         if (!inf)
128         {
129             fprintf (stderr, "%s: cannot open %s:%s\n",
130                      prog, fname, strerror (errno));
131             exit(1);
132         }
133         if (cfile)
134             fprintf (cfile, "char *marc_records[] = {\n");
135         if (1)
136         {
137             int marc_no = 0;
138             for(;; marc_no++)
139             {
140                 size_t len;
141                 char *result = 0;
142                 size_t rlen;
143                 size_t r;
144                 char buf[100001];
145                 
146                 r = fread (buf, 1, 5, inf);
147                 if (r < 5)
148                 {
149                     if (r && print_offset && verbose)
150                         printf ("<!-- Extra %ld bytes at end of file -->\n",
151                                 (long) r);
152                     break;
153                 }
154                 while (*buf < '0' || *buf > '9')
155                 {
156                     int i;
157                     long off = ftell(inf) - 5;
158                     if (verbose || print_offset)
159                         printf("<!-- Skipping bad byte %d (0x%02X) at offset "
160                                "%ld (0x%lx) -->\n", 
161                                *buf & 0xff, *buf & 0xff,
162                                off, off);
163                     for (i = 0; i<4; i++)
164                         buf[i] = buf[i+1];
165                     r = fread(buf+4, 1, 1, inf);
166                     if (r < 1)
167                         break;
168                 }
169                 if (r < 1)
170                 {
171                     if (verbose || print_offset)
172                         printf ("<!-- End of file with data -->\n");
173                     break;
174                 }
175                 if (print_offset)
176                 {
177                     long off = ftell(inf) - 5;
178                     printf ("<!-- Record %d offset %ld (0x%lx) -->\n",
179                             num, off, off);
180                 }
181                 len = atoi_n(buf, 5);
182                 if (len < 25 || len > 100000)
183                 {
184                     long off = ftell(inf) - 5;
185                     printf("Bad Length %d read at offset %ld (%lx)\n",
186                            len, (long) off, (long) off);
187                     break;
188                 }
189                 rlen = len - 5;
190                 r = fread (buf + 5, 1, rlen, inf);
191                 if (r < rlen)
192                     break;
193                 if (split_fname)
194                 {
195                     char fname[256];
196                     FILE *sf;
197                     sprintf(fname, "%.200s%07d", split_fname, marc_no);
198                     sf = fopen(fname, "wb");
199                     if (!sf)
200                     {
201                         fprintf(stderr, "Could not open %s\n", fname);
202                         split_fname = 0;
203                     }
204                     else
205                     {
206                         if (fwrite(buf, 1, len, sf) != len)
207                         {
208                             fprintf(stderr, "Could write content to %s\n",
209                                     fname);
210                             split_fname = 0;
211                         }
212                         fclose(sf);
213                     }
214                 }
215                 {
216                     int rlentmp = (int) rlen;
217                     r = yaz_marc_decode_buf(mt, buf, -1, &result, &rlentmp);
218                     rlen = (size_t) rlentmp;
219                 }
220                 if (r > 0 && result)
221                 {
222                     fwrite (result, rlen, 1, stdout);
223                 }
224                 if (r > 0 && cfile)
225                 {
226                     char *p = buf;
227                     size_t i;
228                     if (count)
229                         fprintf (cfile, ",");
230                     fprintf (cfile, "\n");
231                     for (i = 0; i < r; i++)
232                     {
233                         if ((i & 15) == 0)
234                             fprintf (cfile, "  \"");
235                         fprintf (cfile, "\\x%02X", p[i] & 255);
236                         
237                         if (i < r - 1 && (i & 15) == 15)
238                             fprintf (cfile, "\"\n");
239                         
240                     }
241                     fprintf (cfile, "\"\n");
242                 }
243                 num++;
244                 if (verbose)
245                     printf("\n");
246             }
247             count++;
248         }
249         if (cfile)
250             fprintf (cfile, "};\n");
251         fclose(inf);
252     }
253     if (cd)
254         yaz_iconv_close(cd);
255     yaz_marc_destroy(mt);
256 }
257
258 int main (int argc, char **argv)
259 {
260     int r;
261     int print_offset = 0;
262     char *arg;
263     int verbose = 0;
264     int no = 0;
265     int xml = 0;
266     FILE *cfile = 0;
267     char *from = 0, *to = 0;
268     int read_xml = 0;
269     const char *split_fname = 0;
270     const char *leader_spec = 0;
271     
272 #if HAVE_LOCALE_H
273     setlocale(LC_CTYPE, "");
274 #endif
275 #if HAVE_LANGINFO_H
276 #ifdef CODESET
277     to = nl_langinfo(CODESET);
278 #endif
279 #endif
280
281     prog = *argv;
282     while ((r = options("pvc:xOeXIf:t:s:l:", argv, argc, &arg)) != -2)
283     {
284         no++;
285         switch (r)
286         {
287         case 'l':
288             leader_spec = arg;
289             break;
290         case 'f':
291             from = arg;
292             break;
293         case 't':
294             to = arg;
295             break;
296         case 'c':
297             if (cfile)
298                 fclose (cfile);
299             cfile = fopen(arg, "w");
300             break;
301         case 'x':
302 #if YAZ_HAVE_XML2
303             read_xml = 1;
304 #else
305             fprintf(stderr, "%s: -x not supported."
306                     " YAZ not compiled with Libxml2 support\n", prog);
307             exit(3);
308 #endif
309             break;
310         case 'O':
311             fprintf(stderr, "%s: OAI MARC no longer supported."
312                     " Use MARCXML instead.\n", prog);
313             exit(1);
314             break;
315         case 'e':
316             xml = YAZ_MARC_XCHANGE;
317             break;
318         case 'X':
319             xml = YAZ_MARC_MARCXML;
320             break;
321         case 'I':
322             xml = YAZ_MARC_ISO2709;
323             break;
324         case 'p':
325             print_offset = 1;
326             break;
327         case 's':
328             split_fname = arg;
329             break;
330         case 0:
331             dump(arg, from, to, read_xml, xml,
332                  print_offset, split_fname, verbose, cfile, leader_spec);
333             break;
334         case 'v':
335             verbose++;
336             break;
337         default:
338             usage(prog);
339             exit (1);
340         }
341     }
342     if (cfile)
343         fclose (cfile);
344     if (!no)
345     {
346         usage(prog);
347         exit (1);
348     }
349     exit (0);
350 }
351 /*
352  * Local variables:
353  * c-basic-offset: 4
354  * indent-tabs-mode: nil
355  * End:
356  * vim: shiftwidth=4 tabstop=8 expandtab
357  */
358