Fixed bug #2352: yaz-marcdump crashes for certain record.
[yaz-moved-to-github.git] / src / marcdisp.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2008 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marcdisp.c
8  * \brief Implements MARC conversion utilities
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
29
30 #if YAZ_HAVE_XML2
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
33 #endif
34
35 enum yaz_collection_state {
36     no_collection,
37     collection_first,
38     collection_second
39 };
40    
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
43
44     YAZ_MARC_DATAFIELD,
45     YAZ_MARC_CONTROLFIELD,
46     YAZ_MARC_COMMENT,
47     YAZ_MARC_LEADER
48 };
49
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
52     char *tag;
53     char *indicator;
54     struct yaz_marc_subfield *subfields;
55 };
56
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
59     char *tag;
60     char *data;
61 };
62
63 /** \brief a comment node */
64 struct yaz_marc_comment {
65     char *comment;
66 };
67
68 /** \brief MARC node */
69 struct yaz_marc_node {
70     enum YAZ_MARC_NODE_TYPE which;
71     union {
72         struct yaz_marc_datafield datafield;
73         struct yaz_marc_controlfield controlfield;
74         char *comment;
75         char *leader;
76     } u;
77     struct yaz_marc_node *next;
78 };
79
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
82     char *code_data;
83     struct yaz_marc_subfield *next;
84 };
85
86 /** \brief the internals of a yaz_marc_t handle */
87 struct yaz_marc_t_ {
88     WRBUF m_wr;
89     NMEM nmem;
90     int xml;
91     int debug;
92     int write_using_libxml2;
93     enum yaz_collection_state enable_collection;
94     yaz_iconv_t iconv_cd;
95     char subfield_str[8];
96     char endline_str[8];
97     char *leader_spec;
98     struct yaz_marc_node *nodes;
99     struct yaz_marc_node **nodes_pp;
100     struct yaz_marc_subfield **subfield_pp;
101 };
102
103 yaz_marc_t yaz_marc_create(void)
104 {
105     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
106     mt->xml = YAZ_MARC_LINE;
107     mt->debug = 0;
108     mt->write_using_libxml2 = 0;
109     mt->enable_collection = no_collection;
110     mt->m_wr = wrbuf_alloc();
111     mt->iconv_cd = 0;
112     mt->leader_spec = 0;
113     strcpy(mt->subfield_str, " $");
114     strcpy(mt->endline_str, "\n");
115
116     mt->nmem = nmem_create();
117     yaz_marc_reset(mt);
118     return mt;
119 }
120
121 void yaz_marc_destroy(yaz_marc_t mt)
122 {
123     if (!mt)
124         return ;
125     nmem_destroy(mt->nmem);
126     wrbuf_destroy(mt->m_wr);
127     xfree(mt->leader_spec);
128     xfree(mt);
129 }
130
131 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
132 {
133     return mt->nmem;
134 }
135
136 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 {
138     wrbuf_iconv_reset(wr, mt->iconv_cd);
139 }
140
141 static int marc_exec_leader(const char *leader_spec, char *leader,
142                             size_t size);
143
144
145 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
146 {
147     struct yaz_marc_node *n = (struct yaz_marc_node *)
148         nmem_malloc(mt->nmem, sizeof(*n));
149     n->next = 0;
150     *mt->nodes_pp = n;
151     mt->nodes_pp = &n->next;
152     return n;
153 }
154
155 #if YAZ_HAVE_XML2
156 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
157                                    const xmlNode *ptr_data)
158 {
159     struct yaz_marc_node *n = yaz_marc_add_node(mt);
160     n->which = YAZ_MARC_CONTROLFIELD;
161     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
162     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
163 }
164 #endif
165
166
167 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
168 {
169     struct yaz_marc_node *n = yaz_marc_add_node(mt);
170     n->which = YAZ_MARC_COMMENT;
171     n->u.comment = nmem_strdup(mt->nmem, comment);
172 }
173
174 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
175 {
176     va_list ap;
177     char buf[200];
178
179     va_start(ap, fmt);
180     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
181     yaz_marc_add_comment(mt, buf);
182     va_end (ap);
183 }
184
185 int yaz_marc_get_debug(yaz_marc_t mt)
186 {
187     return mt->debug;
188 }
189
190 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
191 {
192     struct yaz_marc_node *n = yaz_marc_add_node(mt);
193     n->which = YAZ_MARC_LEADER;
194     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
195     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
196 }
197
198 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
199                                const char *data, size_t data_len)
200 {
201     struct yaz_marc_node *n = yaz_marc_add_node(mt);
202     n->which = YAZ_MARC_CONTROLFIELD;
203     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
204     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
205     if (mt->debug)
206     {
207         size_t i;
208         char msg[80];
209
210         sprintf(msg, "controlfield:");
211         for (i = 0; i < 16 && i < data_len; i++)
212             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
213         if (i < data_len)
214             sprintf(msg + strlen(msg), " ..");
215         yaz_marc_add_comment(mt, msg);
216     }
217 }
218
219 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
220                             const char *indicator, size_t indicator_len)
221 {
222     struct yaz_marc_node *n = yaz_marc_add_node(mt);
223     n->which = YAZ_MARC_DATAFIELD;
224     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
225     n->u.datafield.indicator =
226         nmem_strdupn(mt->nmem, indicator, indicator_len);
227     n->u.datafield.subfields = 0;
228
229     /* make subfield_pp the current (last one) */
230     mt->subfield_pp = &n->u.datafield.subfields;
231 }
232
233 #if YAZ_HAVE_XML2
234 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
235                                 const char *indicator, size_t indicator_len)
236 {
237     struct yaz_marc_node *n = yaz_marc_add_node(mt);
238     n->which = YAZ_MARC_DATAFIELD;
239     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
240     n->u.datafield.indicator =
241         nmem_strdupn(mt->nmem, indicator, indicator_len);
242     n->u.datafield.subfields = 0;
243
244     /* make subfield_pp the current (last one) */
245     mt->subfield_pp = &n->u.datafield.subfields;
246 }
247 #endif
248
249 void yaz_marc_add_subfield(yaz_marc_t mt,
250                            const char *code_data, size_t code_data_len)
251 {
252     if (mt->debug)
253     {
254         size_t i;
255         char msg[80];
256
257         sprintf(msg, "subfield:");
258         for (i = 0; i < 16 && i < code_data_len; i++)
259             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
260         if (i < code_data_len)
261             sprintf(msg + strlen(msg), " ..");
262         yaz_marc_add_comment(mt, msg);
263     }
264
265     if (mt->subfield_pp)
266     {
267         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
268             nmem_malloc(mt->nmem, sizeof(*n));
269         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
270         n->next = 0;
271         /* mark subfield_pp to point to this one, so we append here next */
272         *mt->subfield_pp = n;
273         mt->subfield_pp = &n->next;
274     }
275 }
276
277 int atoi_n_check(const char *buf, int size, int *val)
278 {
279     int i;
280     for (i = 0; i < size; i++)
281         if (!isdigit(i[(const unsigned char *) buf]))
282             return 0;
283     *val = atoi_n(buf, size);
284     return 1;
285 }
286
287 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
288                          int *indicator_length,
289                          int *identifier_length,
290                          int *base_address,
291                          int *length_data_entry,
292                          int *length_starting,
293                          int *length_implementation)
294 {
295     char leader[24];
296
297     memcpy(leader, leader_c, 24);
298
299     if (!atoi_n_check(leader+10, 1, indicator_length))
300     {
301         yaz_marc_cprintf(mt, 
302                          "Indicator length at offset 10 should hold a digit."
303                          " Assuming 2");
304         leader[10] = '2';
305         *indicator_length = 2;
306     }
307     if (!atoi_n_check(leader+11, 1, identifier_length))
308     {
309         yaz_marc_cprintf(mt, 
310                          "Identifier length at offset 11 should hold a digit."
311                          " Assuming 2");
312         leader[11] = '2';
313         *identifier_length = 2;
314     }
315     if (!atoi_n_check(leader+12, 5, base_address))
316     {
317         yaz_marc_cprintf(mt, 
318                          "Base address at offsets 12..16 should hold a number."
319                          " Assuming 0");
320         *base_address = 0;
321     }
322     if (!atoi_n_check(leader+20, 1, length_data_entry))
323     {
324         yaz_marc_cprintf(mt, 
325                          "Length data entry at offset 20 should hold a digit."
326                          " Assuming 4");
327         *length_data_entry = 4;
328         leader[20] = '4';
329     }
330     if (!atoi_n_check(leader+21, 1, length_starting))
331     {
332         yaz_marc_cprintf(mt,
333                          "Length starting at offset 21 should hold a digit."
334                          " Assuming 5");
335         *length_starting = 5;
336         leader[21] = '5';
337     }
338     if (!atoi_n_check(leader+22, 1, length_implementation))
339     {
340         yaz_marc_cprintf(mt, 
341                          "Length implementation at offset 22 should hold a digit."
342                          " Assuming 0");
343         *length_implementation = 0;
344         leader[22] = '0';
345     }
346
347     if (mt->debug)
348     {
349         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
350         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
351         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
352         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
353         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
354         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
355     }
356     yaz_marc_add_leader(mt, leader, 24);
357 }
358
359 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
360 {
361     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
362     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
363 }
364
365 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
366 {
367     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
368     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
369 }
370
371 /* try to guess how many bytes the identifier really is! */
372 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
373 {
374     if (mt->iconv_cd)
375     {
376         size_t i;
377         for (i = 1; i<5; i++)
378         {
379             char outbuf[12];
380             size_t outbytesleft = sizeof(outbuf);
381             char *outp = outbuf;
382             const char *inp = buf;
383
384             size_t inbytesleft = i;
385             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
386                                  &outp, &outbytesleft);
387             if (r != (size_t) (-1))
388                 return i;  /* got a complete sequence */
389         }
390         return 1; /* giving up */
391     }
392     return 1; /* we don't know */
393 }
394                               
395 void yaz_marc_reset(yaz_marc_t mt)
396 {
397     nmem_reset(mt->nmem);
398     mt->nodes = 0;
399     mt->nodes_pp = &mt->nodes;
400     mt->subfield_pp = 0;
401 }
402
403 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
404 {
405     struct yaz_marc_node *n;
406     int identifier_length;
407     const char *leader = 0;
408
409     for (n = mt->nodes; n; n = n->next)
410         if (n->which == YAZ_MARC_LEADER)
411         {
412             leader = n->u.leader;
413             break;
414         }
415     
416     if (!leader)
417         return -1;
418     if (!atoi_n_check(leader+11, 1, &identifier_length))
419         return -1;
420
421     for (n = mt->nodes; n; n = n->next)
422     {
423         switch(n->which)
424         {
425         case YAZ_MARC_COMMENT:
426             wrbuf_iconv_write(wr, mt->iconv_cd, 
427                               n->u.comment, strlen(n->u.comment));
428             wrbuf_puts(wr, ")\n");
429             break;
430         default:
431             break;
432         }
433     }
434     return 0;
435 }
436
437
438 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
439 {
440     struct yaz_marc_node *n;
441     int identifier_length;
442     const char *leader = 0;
443
444     for (n = mt->nodes; n; n = n->next)
445         if (n->which == YAZ_MARC_LEADER)
446         {
447             leader = n->u.leader;
448             break;
449         }
450     
451     if (!leader)
452         return -1;
453     if (!atoi_n_check(leader+11, 1, &identifier_length))
454         return -1;
455
456     for (n = mt->nodes; n; n = n->next)
457     {
458         struct yaz_marc_subfield *s;
459         switch(n->which)
460         {
461         case YAZ_MARC_DATAFIELD:
462             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
463                          n->u.datafield.indicator);
464             for (s = n->u.datafield.subfields; s; s = s->next)
465             {
466                 /* if identifier length is 2 (most MARCs),
467                    the code is a single character .. However we've
468                    seen multibyte codes, so see how big it really is */
469                 size_t using_code_len = 
470                     (identifier_length > 2) ? identifier_length - 1
471                     :
472                     cdata_one_character(mt, s->code_data);
473                 
474                 wrbuf_puts (wr, mt->subfield_str); 
475                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
476                                   using_code_len);
477                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
478                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
479                                  s->code_data + using_code_len);
480                 marc_iconv_reset(mt, wr);
481             }
482             wrbuf_puts (wr, mt->endline_str);
483             break;
484         case YAZ_MARC_CONTROLFIELD:
485             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
486             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
487             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
488             marc_iconv_reset(mt, wr);
489             wrbuf_puts (wr, mt->endline_str);
490             break;
491         case YAZ_MARC_COMMENT:
492             wrbuf_puts(wr, "(");
493             wrbuf_iconv_write(wr, mt->iconv_cd, 
494                               n->u.comment, strlen(n->u.comment));
495             marc_iconv_reset(mt, wr);
496             wrbuf_puts(wr, ")\n");
497             break;
498         case YAZ_MARC_LEADER:
499             wrbuf_printf(wr, "%s\n", n->u.leader);
500         }
501     }
502     wrbuf_puts(wr, "\n");
503     return 0;
504 }
505
506 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
507 {
508     if (mt->enable_collection == collection_second)
509     {
510         switch(mt->xml)
511         {
512         case YAZ_MARC_MARCXML:
513             wrbuf_printf(wr, "</collection>\n");
514             break;
515         case YAZ_MARC_XCHANGE:
516             wrbuf_printf(wr, "</collection>\n");
517             break;
518         }
519     }
520     return 0;
521 }
522
523 void yaz_marc_enable_collection(yaz_marc_t mt)
524 {
525     mt->enable_collection = collection_first;
526 }
527
528 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
529 {
530     switch(mt->xml)
531     {
532     case YAZ_MARC_LINE:
533         return yaz_marc_write_line(mt, wr);
534     case YAZ_MARC_MARCXML:
535         return yaz_marc_write_marcxml(mt, wr);
536     case YAZ_MARC_XCHANGE:
537         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
538     case YAZ_MARC_ISO2709:
539         return yaz_marc_write_iso2709(mt, wr);
540     case YAZ_MARC_CHECK:
541         return yaz_marc_write_check(mt, wr);
542     }
543     return -1;
544 }
545
546 /** \brief common MARC XML/Xchange writer
547     \param mt handle
548     \param wr WRBUF output
549     \param ns XMLNS for the elements
550     \param format record format (e.g. "MARC21")
551     \param type record type (e.g. "Bibliographic")
552 */
553 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
554                                       const char *ns, 
555                                       const char *format,
556                                       const char *type)
557 {
558     struct yaz_marc_node *n;
559     int identifier_length;
560     const char *leader = 0;
561
562     for (n = mt->nodes; n; n = n->next)
563         if (n->which == YAZ_MARC_LEADER)
564         {
565             leader = n->u.leader;
566             break;
567         }
568     
569     if (!leader)
570         return -1;
571     if (!atoi_n_check(leader+11, 1, &identifier_length))
572         return -1;
573     
574     if (mt->enable_collection != no_collection)
575     {
576         if (mt->enable_collection == collection_first)
577             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
578         mt->enable_collection = collection_second;
579         wrbuf_printf(wr, "<record");
580     }
581     else
582     {
583         wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
584     }
585     if (format)
586         wrbuf_printf(wr, " format=\"%.80s\"", format);
587     if (type)
588         wrbuf_printf(wr, " type=\"%.80s\"", type);
589     wrbuf_printf(wr, ">\n");
590     for (n = mt->nodes; n; n = n->next)
591     {
592         struct yaz_marc_subfield *s;
593
594         switch(n->which)
595         {
596         case YAZ_MARC_DATAFIELD:
597             wrbuf_printf(wr, "  <datafield tag=\"");
598             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
599                                     strlen(n->u.datafield.tag));
600             wrbuf_printf(wr, "\"");
601             if (n->u.datafield.indicator)
602             {
603                 int i;
604                 for (i = 0; n->u.datafield.indicator[i]; i++)
605                 {
606                     wrbuf_printf(wr, " ind%d=\"", i+1);
607                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
608                                           n->u.datafield.indicator+i, 1);
609                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
610                 }
611             }
612             wrbuf_printf(wr, ">\n");
613             for (s = n->u.datafield.subfields; s; s = s->next)
614             {
615                 /* if identifier length is 2 (most MARCs),
616                    the code is a single character .. However we've
617                    seen multibyte codes, so see how big it really is */
618                 size_t using_code_len = 
619                     (identifier_length > 2) ? identifier_length - 1
620                     :
621                     cdata_one_character(mt, s->code_data);
622                 
623                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
624                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
625                                         s->code_data, using_code_len);
626                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
627                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
628                                         s->code_data + using_code_len,
629                                         strlen(s->code_data + using_code_len));
630                 marc_iconv_reset(mt, wr);
631                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
632                 wrbuf_puts(wr, "\n");
633             }
634             wrbuf_printf(wr, "  </datafield>\n");
635             break;
636         case YAZ_MARC_CONTROLFIELD:
637             wrbuf_printf(wr, "  <controlfield tag=\"");
638             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
639                                     strlen(n->u.controlfield.tag));
640             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
641             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
642                                     n->u.controlfield.data,
643                                     strlen(n->u.controlfield.data));
644
645             marc_iconv_reset(mt, wr);
646             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
647             wrbuf_puts(wr, "\n");
648             break;
649         case YAZ_MARC_COMMENT:
650             wrbuf_printf(wr, "<!-- ");
651             wrbuf_puts(wr, n->u.comment);
652             wrbuf_printf(wr, " -->\n");
653             break;
654         case YAZ_MARC_LEADER:
655             wrbuf_printf(wr, "  <leader>");
656             wrbuf_iconv_write_cdata(wr, 
657                                     0 /* no charset conversion for leader */,
658                                     n->u.leader, strlen(n->u.leader));
659             wrbuf_printf(wr, "</leader>\n");
660         }
661     }
662     wrbuf_puts(wr, "</record>\n");
663     return 0;
664 }
665
666 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
667                                      const char *ns, 
668                                      const char *format,
669                                      const char *type)
670 {
671     if (mt->write_using_libxml2)
672     {
673 #if YAZ_HAVE_XML2
674         int ret;
675         xmlNode *root_ptr;
676
677         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
678         if (ret == 0)
679         {
680             xmlChar *buf_out;
681             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
682             int len_out;
683
684             xmlDocSetRootElement(doc, root_ptr);
685             xmlDocDumpMemory(doc, &buf_out, &len_out);
686
687             wrbuf_write(wr, (const char *) buf_out, len_out);
688             wrbuf_puts(wr, "");
689             xmlFree(buf_out);
690             xmlFreeDoc(doc);
691         }
692         return ret;
693 #else
694         return -1;
695 #endif
696     }
697     else
698         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
699 }
700
701 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
702 {
703     /* set leader 09 to 'a' for UNICODE */
704     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
705     if (!mt->leader_spec)
706         yaz_marc_modify_leader(mt, 9, "a");
707     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
708                                      0, 0);
709 }
710
711 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
712                                const char *format,
713                                const char *type)
714 {
715     return yaz_marc_write_marcxml_ns(mt, wr,
716                                      "http://www.bs.dk/standards/MarcXchange",
717                                      0, 0);
718 }
719
720
721 #if YAZ_HAVE_XML2
722 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
723                        const char *ns, 
724                        const char *format,
725                        const char *type)
726 {
727     struct yaz_marc_node *n;
728     int identifier_length;
729     const char *leader = 0;
730     xmlNode *record_ptr;
731     xmlNsPtr ns_record;
732     WRBUF wr_cdata = 0;
733
734     for (n = mt->nodes; n; n = n->next)
735         if (n->which == YAZ_MARC_LEADER)
736         {
737             leader = n->u.leader;
738             break;
739         }
740     
741     if (!leader)
742         return -1;
743     if (!atoi_n_check(leader+11, 1, &identifier_length))
744         return -1;
745
746     wr_cdata = wrbuf_alloc();
747
748     record_ptr = xmlNewNode(0, BAD_CAST "record");
749     *root_ptr = record_ptr;
750
751     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
752     xmlSetNs(record_ptr, ns_record);
753
754     if (format)
755         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
756     if (type)
757         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
758     for (n = mt->nodes; n; n = n->next)
759     {
760         struct yaz_marc_subfield *s;
761         xmlNode *ptr;
762
763         switch(n->which)
764         {
765         case YAZ_MARC_DATAFIELD:
766             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
767             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
768             if (n->u.datafield.indicator)
769             {
770                 int i;
771                 for (i = 0; n->u.datafield.indicator[i]; i++)
772                 {
773                     char ind_str[6];
774                     char ind_val[2];
775
776                     sprintf(ind_str, "ind%d", i+1);
777                     ind_val[0] = n->u.datafield.indicator[i];
778                     ind_val[1] = '\0';
779                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
780                 }
781             }
782             for (s = n->u.datafield.subfields; s; s = s->next)
783             {
784                 xmlNode *ptr_subfield;
785                 /* if identifier length is 2 (most MARCs),
786                    the code is a single character .. However we've
787                    seen multibyte codes, so see how big it really is */
788                 size_t using_code_len = 
789                     (identifier_length > 2) ? identifier_length - 1
790                     :
791                     cdata_one_character(mt, s->code_data);
792
793                 wrbuf_rewind(wr_cdata);
794                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
795                                  s->code_data + using_code_len);
796                 marc_iconv_reset(mt, wr_cdata);
797                 ptr_subfield = xmlNewTextChild(
798                     ptr, ns_record, 
799                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
800
801                 wrbuf_rewind(wr_cdata);
802                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
803                                   s->code_data, using_code_len);
804                 xmlNewProp(ptr_subfield, BAD_CAST "code",
805                            BAD_CAST wrbuf_cstr(wr_cdata));
806             }
807             break;
808         case YAZ_MARC_CONTROLFIELD:
809             wrbuf_rewind(wr_cdata);
810             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
811             marc_iconv_reset(mt, wr_cdata);
812             
813             ptr = xmlNewTextChild(record_ptr, ns_record,
814                                   BAD_CAST "controlfield",
815                                   BAD_CAST wrbuf_cstr(wr_cdata));
816             
817             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
818             break;
819         case YAZ_MARC_COMMENT:
820             ptr = xmlNewComment(BAD_CAST n->u.comment);
821             xmlAddChild(record_ptr, ptr);
822             break;
823         case YAZ_MARC_LEADER:
824             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
825                             BAD_CAST n->u.leader);
826             break;
827         }
828     }
829     wrbuf_destroy(wr_cdata);
830     return 0;
831 }
832 #endif
833
834 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
835 {
836     struct yaz_marc_node *n;
837     int indicator_length;
838     int identifier_length;
839     int length_data_entry;
840     int length_starting;
841     int length_implementation;
842     int data_offset = 0;
843     const char *leader = 0;
844     WRBUF wr_dir, wr_head, wr_data_tmp;
845     int base_address;
846     
847     for (n = mt->nodes; n; n = n->next)
848         if (n->which == YAZ_MARC_LEADER)
849             leader = n->u.leader;
850     
851     if (!leader)
852         return -1;
853     if (!atoi_n_check(leader+10, 1, &indicator_length))
854         return -1;
855     if (!atoi_n_check(leader+11, 1, &identifier_length))
856         return -1;
857     if (!atoi_n_check(leader+20, 1, &length_data_entry))
858         return -1;
859     if (!atoi_n_check(leader+21, 1, &length_starting))
860         return -1;
861     if (!atoi_n_check(leader+22, 1, &length_implementation))
862         return -1;
863
864     wr_data_tmp = wrbuf_alloc();
865     wr_dir = wrbuf_alloc();
866     for (n = mt->nodes; n; n = n->next)
867     {
868         int data_length = 0;
869         struct yaz_marc_subfield *s;
870
871         switch(n->which)
872         {
873         case YAZ_MARC_DATAFIELD:
874             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
875             data_length += indicator_length;
876             wrbuf_rewind(wr_data_tmp);
877             for (s = n->u.datafield.subfields; s; s = s->next)
878             {
879                 /* write dummy IDFS + content */
880                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
881                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
882                 marc_iconv_reset(mt, wr_data_tmp);
883             }
884             /* write dummy FS (makes MARC-8 to become ASCII) */
885             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
886             marc_iconv_reset(mt, wr_data_tmp);
887             data_length += wrbuf_len(wr_data_tmp);
888             break;
889         case YAZ_MARC_CONTROLFIELD:
890             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
891
892             wrbuf_rewind(wr_data_tmp);
893             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
894                              n->u.controlfield.data);
895             marc_iconv_reset(mt, wr_data_tmp);
896             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
897             marc_iconv_reset(mt, wr_data_tmp);
898             data_length += wrbuf_len(wr_data_tmp);
899             break;
900         case YAZ_MARC_COMMENT:
901             break;
902         case YAZ_MARC_LEADER:
903             break;
904         }
905         if (data_length)
906         {
907             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
908             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
909             data_offset += data_length;
910         }
911     }
912     /* mark end of directory */
913     wrbuf_putc(wr_dir, ISO2709_FS);
914
915     /* base address of data (comes after leader+directory) */
916     base_address = 24 + wrbuf_len(wr_dir);
917
918     wr_head = wrbuf_alloc();
919
920     /* write record length */
921     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
922     /* from "original" leader */
923     wrbuf_write(wr_head, leader+5, 7);
924     /* base address of data */
925     wrbuf_printf(wr_head, "%05d", base_address);
926     /* from "original" leader */
927     wrbuf_write(wr_head, leader+17, 7);
928     
929     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
930     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
931     wrbuf_destroy(wr_head);
932     wrbuf_destroy(wr_dir);
933     wrbuf_destroy(wr_data_tmp);
934
935     for (n = mt->nodes; n; n = n->next)
936     {
937         struct yaz_marc_subfield *s;
938
939         switch(n->which)
940         {
941         case YAZ_MARC_DATAFIELD:
942             wrbuf_printf(wr, "%.*s", indicator_length,
943                          n->u.datafield.indicator);
944             for (s = n->u.datafield.subfields; s; s = s->next)
945             {
946                 wrbuf_putc(wr, ISO2709_IDFS);
947                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
948                 marc_iconv_reset(mt, wr);
949             }
950             wrbuf_putc(wr, ISO2709_FS);
951             break;
952         case YAZ_MARC_CONTROLFIELD:
953             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
954             marc_iconv_reset(mt, wr);
955             wrbuf_putc(wr, ISO2709_FS);
956             break;
957         case YAZ_MARC_COMMENT:
958             break;
959         case YAZ_MARC_LEADER:
960             break;
961         }
962     }
963     wrbuf_printf(wr, "%c", ISO2709_RS);
964     return 0;
965 }
966
967
968 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
969 {
970     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
971     if (r <= 0)
972         return r;
973     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
974     if (s != 0)
975         return -1; /* error */
976     return r; /* OK, return length > 0 */
977 }
978
979 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
980                          const char **result, size_t *rsize)
981 {
982     int r;
983
984     wrbuf_rewind(mt->m_wr);
985     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
986     if (result)
987         *result = wrbuf_cstr(mt->m_wr);
988     if (rsize)
989         *rsize = wrbuf_len(mt->m_wr);
990     return r;
991 }
992
993 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
994 {
995     if (mt)
996         mt->xml = xmlmode;
997 }
998
999 void yaz_marc_debug(yaz_marc_t mt, int level)
1000 {
1001     if (mt)
1002         mt->debug = level;
1003 }
1004
1005 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1006 {
1007     mt->iconv_cd = cd;
1008 }
1009
1010 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1011 {
1012     return mt->iconv_cd;
1013 }
1014
1015 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1016 {
1017     struct yaz_marc_node *n;
1018     char *leader = 0;
1019     for (n = mt->nodes; n; n = n->next)
1020         if (n->which == YAZ_MARC_LEADER)
1021         {
1022             leader = n->u.leader;
1023             memcpy(leader+off, str, strlen(str));
1024             break;
1025         }
1026 }
1027
1028 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1029 {
1030     xfree(mt->leader_spec);
1031     mt->leader_spec = 0;
1032     if (leader_spec)
1033     {
1034         char dummy_leader[24];
1035         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1036             return -1;
1037         mt->leader_spec = xstrdup(leader_spec);
1038     }
1039     return 0;
1040 }
1041
1042 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1043 {
1044     const char *cp = leader_spec;
1045     while (cp)
1046     {
1047         char val[21];
1048         int pos;
1049         int no_read = 0, no = 0;
1050
1051         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1052         if (no < 2 || no_read < 3)
1053             return -1;
1054         if (pos < 0 || (size_t) pos >= size)
1055             return -1;
1056
1057         if (*val == '\'')
1058         {
1059             const char *vp = strchr(val+1, '\'');
1060             size_t len;
1061             
1062             if (!vp)
1063                 return -1;
1064             len = vp-val-1;
1065             if (len + pos > size)
1066                 return -1;
1067             memcpy(leader + pos, val+1, len);
1068         }
1069         else if (*val >= '0' && *val <= '9')
1070         {
1071             int ch = atoi(val);
1072             leader[pos] = ch;
1073         }
1074         else
1075             return -1;
1076         cp += no_read;
1077         if (*cp != ',')
1078             break;
1079
1080         cp++;
1081     }
1082     return 0;
1083 }
1084
1085 int yaz_marc_decode_formatstr(const char *arg)
1086 {
1087     int mode = -1; 
1088     if (!strcmp(arg, "marc"))
1089         mode = YAZ_MARC_ISO2709;
1090     if (!strcmp(arg, "marcxml"))
1091         mode = YAZ_MARC_MARCXML;
1092     if (!strcmp(arg, "marcxchange"))
1093         mode = YAZ_MARC_XCHANGE;
1094     if (!strcmp(arg, "line"))
1095         mode = YAZ_MARC_LINE;
1096     return mode;
1097 }
1098
1099 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1100 {
1101     mt->write_using_libxml2 = enable;
1102 }
1103
1104 /*
1105  * Local variables:
1106  * c-basic-offset: 4
1107  * indent-tabs-mode: nil
1108  * End:
1109  * vim: shiftwidth=4 tabstop=8 expandtab
1110  */
1111