Added MarcXchange support.
[yaz-moved-to-github.git] / util / marcdump.c
1 /*
2  * Copyright (C) 1995-2005, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdump.c,v 1.27 2005-02-08 13:51:31 adam Exp $
6  */
7
8 #if HAVE_CONFIG_H
9 #include <config.h>
10 #endif
11
12 #if HAVE_XML2
13 #include <libxml/parser.h>
14 #include <libxml/tree.h>
15
16 #include <libxml/xpath.h>
17 #include <libxml/xpathInternals.h>
18
19 #endif
20
21 #include <stdio.h>
22 #include <stdlib.h>
23 #include <string.h>
24 #include <errno.h>
25 #include <assert.h>
26
27 #if HAVE_LOCALE_H
28 #include <locale.h>
29 #endif
30 #if HAVE_LANGINFO_H
31 #include <langinfo.h>
32 #endif
33
34 #include <yaz/marcdisp.h>
35 #include <yaz/yaz-util.h>
36 #include <yaz/xmalloc.h>
37 #include <yaz/options.h>
38
39 #ifndef SEEK_SET
40 #define SEEK_SET 0
41 #endif
42 #ifndef SEEK_END
43 #define SEEK_END 2
44 #endif
45
46 static void usage(const char *prog)
47 {
48     fprintf (stderr, "Usage: %s [-c cfile] [-f from] [-t to] [-x] [-O] [-X] [-e] [-I] [-v] file...\n",
49              prog);
50
51
52 #if HAVE_XML2
53 void print_xpath_nodes(xmlNodeSetPtr nodes, FILE* output) {
54     xmlNodePtr cur;
55     int size;
56     int i;
57     
58     assert(output);
59     size = (nodes) ? nodes->nodeNr : 0;
60     
61     fprintf(output, "Result (%d nodes):\n", size);
62     for(i = 0; i < size; ++i) {
63         assert(nodes->nodeTab[i]);
64         
65         if(nodes->nodeTab[i]->type == XML_NAMESPACE_DECL)
66         {
67             xmlNsPtr ns;
68             
69             ns = (xmlNsPtr)nodes->nodeTab[i];
70             cur = (xmlNodePtr)ns->next;
71             if(cur->ns) { 
72                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s:%s\n", 
73                     ns->prefix, ns->href, cur->ns->href, cur->name);
74             } else {
75                 fprintf(output, "= namespace \"%s\"=\"%s\" for node %s\n", 
76                     ns->prefix, ns->href, cur->name);
77             }
78         } 
79         else if(nodes->nodeTab[i]->type == XML_ELEMENT_NODE)
80         {
81             cur = nodes->nodeTab[i];        
82             if(cur->ns) { 
83                 fprintf(output, "= element node \"%s:%s\"\n", 
84                     cur->ns->href, cur->name);
85             } 
86             else
87             {
88                 fprintf(output, "= element node \"%s\"\n", 
89                     cur->name);
90             }
91         }
92         else
93         {
94             cur = nodes->nodeTab[i];    
95             fprintf(output, "= node \"%s\": type %d\n", cur->name, cur->type);
96         }
97     }
98 }
99 #endif
100
101 int main (int argc, char **argv)
102 {
103     int r;
104     int libxml_dom_test = 0;
105     int print_offset = 0;
106     char *arg;
107     int verbose = 0;
108     FILE *inf;
109     char buf[100001];
110     char *prog = *argv;
111     int no = 0;
112     int xml = 0;
113     FILE *cfile = 0;
114     char *from = 0, *to = 0;
115     int num = 1;
116     
117 #if HAVE_LOCALE_H
118     setlocale(LC_CTYPE, "");
119 #endif
120 #if HAVE_LANGINFO_H
121 #ifdef CODESET
122     to = nl_langinfo(CODESET);
123 #endif
124 #endif
125
126     while ((r = options("pvc:xOeXIf:t:2", argv, argc, &arg)) != -2)
127     {
128         int count;
129         no++;
130         switch (r)
131         {
132         case 'f':
133             from = arg;
134             break;
135         case 't':
136             to = arg;
137             break;
138         case 'c':
139             if (cfile)
140                 fclose (cfile);
141             cfile = fopen (arg, "w");
142             break;
143         case 'x':
144             xml = YAZ_MARC_SIMPLEXML;
145             break;
146         case 'O':
147             xml = YAZ_MARC_OAIMARC;
148             break;
149         case 'e':
150             xml = YAZ_MARC_XCHANGE;
151             break;
152         case 'X':
153             xml = YAZ_MARC_MARCXML;
154             break;
155         case 'I':
156             xml = YAZ_MARC_ISO2709;
157             break;
158         case 'p':
159             print_offset = 1;
160             break;
161         case '2':
162             libxml_dom_test = 1;
163             break;
164         case 0:
165             inf = fopen (arg, "rb");
166             count = 0;
167             if (!inf)
168             {
169                 fprintf (stderr, "%s: cannot open %s:%s\n",
170                          prog, arg, strerror (errno));
171                 exit(1);
172             }
173             if (cfile)
174                 fprintf (cfile, "char *marc_records[] = {\n");
175             if (1)
176             {
177                 yaz_marc_t mt = yaz_marc_create();
178                 yaz_iconv_t cd = 0;
179
180                 if (from && to)
181                 {
182                     cd = yaz_iconv_open(to, from);
183                     if (!cd)
184                     {
185                         fprintf(stderr, "conversion from %s to %s "
186                                 "unsupported\n", from, to);
187                         exit(2);
188                     }
189                     yaz_marc_iconv(mt, cd);
190                 }
191                 yaz_marc_xml(mt, xml);
192                 yaz_marc_debug(mt, verbose);
193                 while (1)
194                 {
195                     int len;
196                     char *result;
197                     int rlen;
198                     
199                     r = fread (buf, 1, 5, inf);
200                     if (r < 5)
201                     {
202                         if (r && print_offset)
203                             printf ("Extra %d bytes", r);
204                         break;
205                     }
206                     if (print_offset)
207                     {
208                         long off = ftell(inf);
209                         printf ("Record %d offset %ld\n", num, (long) off);
210                     }
211                     len = atoi_n(buf, 5);
212                     if (len < 25 || len > 100000)
213                         break;
214                     len = len - 5;
215                     r = fread (buf + 5, 1, len, inf);
216                     if (r < len)
217                         break;
218                     r = yaz_marc_decode_buf (mt, buf, -1, &result, &rlen);
219                     if (r <= 0)
220                         break;
221                     fwrite (result, rlen, 1, stdout);
222 #if HAVE_XML2
223                     if (libxml_dom_test)
224                     {
225                         xmlDocPtr doc = xmlParseMemory(result, rlen);
226                         if (!doc)
227                             fprintf(stderr, "xmLParseMemory failed\n");
228                         else
229                         {
230                             int i;
231                             xmlXPathContextPtr xpathCtx; 
232                             xmlXPathObjectPtr xpathObj; 
233                             static const char *xpathExpr[] = {
234                                 "/record/datafield[@tag='245']/subfield[@code='a']",
235                                 "/record/datafield[@tag='100']/subfield",
236                                 "/record/datafield[@tag='245']/subfield[@code='a']",
237                                 "/record/datafield[@tag='650']/subfield",
238                                 "/record/datafield[@tag='650']",
239                                 0};
240                             
241                             xpathCtx = xmlXPathNewContext(doc);
242
243                             for (i = 0; xpathExpr[i]; i++) {
244                                 xpathObj = xmlXPathEvalExpression(xpathExpr[i], xpathCtx);
245                                 if(xpathObj == NULL) {
246                                     fprintf(stderr,"Error: unable to evaluate xpath expression \"%s\"\n", xpathExpr[i]);
247                                 }
248                                 else
249                                 {
250                                     print_xpath_nodes(xpathObj->nodesetval, stdout);
251                                     xmlXPathFreeObject(xpathObj);
252                                 }
253                             }
254                             xmlXPathFreeContext(xpathCtx); 
255                             xmlFreeDoc(doc);
256                         }
257                     }
258 #endif
259                     if (cfile)
260                     {
261                         char *p = buf;
262                         int i;
263                         if (count)
264                             fprintf (cfile, ",");
265                         fprintf (cfile, "\n");
266                         for (i = 0; i < r; i++)
267                         {
268                             if ((i & 15) == 0)
269                                 fprintf (cfile, "  \"");
270                             fprintf (cfile, "\\x%02X", p[i] & 255);
271                             
272                             if (i < r - 1 && (i & 15) == 15)
273                                 fprintf (cfile, "\"\n");
274                             
275                         }
276                         fprintf (cfile, "\"\n");
277                     }
278                     num++;
279                 }
280                 count++;
281                 if (cd)
282                     yaz_iconv_close(cd);
283                 yaz_marc_destroy(mt);
284             }
285             if (cfile)
286                 fprintf (cfile, "};\n");
287             fclose(inf);
288             break;
289         case 'v':
290             verbose++;
291             break;
292         default:
293             usage(prog);
294             exit (1);
295         }
296     }
297     if (cfile)
298         fclose (cfile);
299     if (!no)
300     {
301         usage(prog);
302         exit (1);
303     }
304     exit (0);
305 }