Fixed bug in conversion to XML from MARC: Non-XML characters in control
[yaz-moved-to-github.git] / src / marcdisp.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdisp.c,v 1.51 2007-09-20 17:22:45 adam Exp $
6  */
7
8 /**
9  * \file marcdisp.c
10  * \brief Implements MARC conversion utilities
11  */
12
13 #if HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16
17 #ifdef WIN32
18 #include <windows.h>
19 #endif
20
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
28 #include <yaz/nmem_xml.h>
29 #include <yaz/snprintf.h>
30
31 #if YAZ_HAVE_XML2
32 #include <libxml/parser.h>
33 #include <libxml/tree.h>
34 #endif
35
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
38
39     YAZ_MARC_DATAFIELD,
40     YAZ_MARC_CONTROLFIELD,
41     YAZ_MARC_COMMENT,
42     YAZ_MARC_LEADER
43 };
44
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
47     char *tag;
48     char *indicator;
49     struct yaz_marc_subfield *subfields;
50 };
51
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
54     char *tag;
55     char *data;
56 };
57
58 /** \brief a comment node */
59 struct yaz_marc_comment {
60     char *comment;
61 };
62
63 /** \brief MARC node */
64 struct yaz_marc_node {
65     enum YAZ_MARC_NODE_TYPE which;
66     union {
67         struct yaz_marc_datafield datafield;
68         struct yaz_marc_controlfield controlfield;
69         char *comment;
70         char *leader;
71     } u;
72     struct yaz_marc_node *next;
73 };
74
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
77     char *code_data;
78     struct yaz_marc_subfield *next;
79 };
80
81 /** \brief the internals of a yaz_marc_t handle */
82 struct yaz_marc_t_ {
83     WRBUF m_wr;
84     NMEM nmem;
85     int xml;
86     int debug;
87     int write_using_libxml2;
88     yaz_iconv_t iconv_cd;
89     char subfield_str[8];
90     char endline_str[8];
91     char *leader_spec;
92     struct yaz_marc_node *nodes;
93     struct yaz_marc_node **nodes_pp;
94     struct yaz_marc_subfield **subfield_pp;
95 };
96
97 yaz_marc_t yaz_marc_create(void)
98 {
99     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
100     mt->xml = YAZ_MARC_LINE;
101     mt->debug = 0;
102     mt->write_using_libxml2 = 0;
103     mt->m_wr = wrbuf_alloc();
104     mt->iconv_cd = 0;
105     mt->leader_spec = 0;
106     strcpy(mt->subfield_str, " $");
107     strcpy(mt->endline_str, "\n");
108
109     mt->nmem = nmem_create();
110     yaz_marc_reset(mt);
111     return mt;
112 }
113
114 void yaz_marc_destroy(yaz_marc_t mt)
115 {
116     if (!mt)
117         return ;
118     nmem_destroy(mt->nmem);
119     wrbuf_destroy(mt->m_wr);
120     xfree(mt->leader_spec);
121     xfree(mt);
122 }
123
124 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
125 {
126     return mt->nmem;
127 }
128
129 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
130 {
131     wrbuf_iconv_reset(wr, mt->iconv_cd);
132 }
133
134 static int marc_exec_leader(const char *leader_spec, char *leader,
135                             size_t size);
136
137
138 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
139 {
140     struct yaz_marc_node *n = (struct yaz_marc_node *)
141         nmem_malloc(mt->nmem, sizeof(*n));
142     n->next = 0;
143     *mt->nodes_pp = n;
144     mt->nodes_pp = &n->next;
145     return n;
146 }
147
148 #if YAZ_HAVE_XML2
149 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
150                                    const xmlNode *ptr_data)
151 {
152     struct yaz_marc_node *n = yaz_marc_add_node(mt);
153     n->which = YAZ_MARC_CONTROLFIELD;
154     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
155     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
156 }
157 #endif
158
159
160 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
161 {
162     struct yaz_marc_node *n = yaz_marc_add_node(mt);
163     n->which = YAZ_MARC_COMMENT;
164     n->u.comment = nmem_strdup(mt->nmem, comment);
165 }
166
167 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
168 {
169     va_list ap;
170     char buf[200];
171
172     va_start(ap, fmt);
173     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
174     yaz_marc_add_comment(mt, buf);
175     va_end (ap);
176 }
177
178 int yaz_marc_get_debug(yaz_marc_t mt)
179 {
180     return mt->debug;
181 }
182
183 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
184 {
185     struct yaz_marc_node *n = yaz_marc_add_node(mt);
186     n->which = YAZ_MARC_LEADER;
187     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
188     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
189 }
190
191 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
192                                const char *data, size_t data_len)
193 {
194     struct yaz_marc_node *n = yaz_marc_add_node(mt);
195     n->which = YAZ_MARC_CONTROLFIELD;
196     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
197     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
198     if (mt->debug)
199     {
200         size_t i;
201         char msg[80];
202
203         sprintf(msg, "controlfield:");
204         for (i = 0; i < 16 && i < data_len; i++)
205             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
206         if (i < data_len)
207             sprintf(msg + strlen(msg), " ..");
208         yaz_marc_add_comment(mt, msg);
209     }
210 }
211
212 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
213                             const char *indicator, size_t indicator_len)
214 {
215     struct yaz_marc_node *n = yaz_marc_add_node(mt);
216     n->which = YAZ_MARC_DATAFIELD;
217     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
218     n->u.datafield.indicator =
219         nmem_strdupn(mt->nmem, indicator, indicator_len);
220     n->u.datafield.subfields = 0;
221
222     /* make subfield_pp the current (last one) */
223     mt->subfield_pp = &n->u.datafield.subfields;
224 }
225
226 #if YAZ_HAVE_XML2
227 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
228                                 const char *indicator, size_t indicator_len)
229 {
230     struct yaz_marc_node *n = yaz_marc_add_node(mt);
231     n->which = YAZ_MARC_DATAFIELD;
232     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
233     n->u.datafield.indicator =
234         nmem_strdupn(mt->nmem, indicator, indicator_len);
235     n->u.datafield.subfields = 0;
236
237     /* make subfield_pp the current (last one) */
238     mt->subfield_pp = &n->u.datafield.subfields;
239 }
240 #endif
241
242 void yaz_marc_add_subfield(yaz_marc_t mt,
243                            const char *code_data, size_t code_data_len)
244 {
245     if (mt->debug)
246     {
247         size_t i;
248         char msg[80];
249
250         sprintf(msg, "subfield:");
251         for (i = 0; i < 16 && i < code_data_len; i++)
252             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
253         if (i < code_data_len)
254             sprintf(msg + strlen(msg), " ..");
255         yaz_marc_add_comment(mt, msg);
256     }
257
258     if (mt->subfield_pp)
259     {
260         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
261             nmem_malloc(mt->nmem, sizeof(*n));
262         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
263         n->next = 0;
264         /* mark subfield_pp to point to this one, so we append here next */
265         *mt->subfield_pp = n;
266         mt->subfield_pp = &n->next;
267     }
268 }
269
270 int atoi_n_check(const char *buf, int size, int *val)
271 {
272     int i;
273     for (i = 0; i < size; i++)
274         if (!isdigit(i[(const unsigned char *) buf]))
275             return 0;
276     *val = atoi_n(buf, size);
277     return 1;
278 }
279
280 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
281                          int *indicator_length,
282                          int *identifier_length,
283                          int *base_address,
284                          int *length_data_entry,
285                          int *length_starting,
286                          int *length_implementation)
287 {
288     char leader[24];
289
290     memcpy(leader, leader_c, 24);
291
292     if (!atoi_n_check(leader+10, 1, indicator_length))
293     {
294         yaz_marc_cprintf(mt, 
295                          "Indicator length at offset 10 should hold a digit."
296                          " Assuming 2");
297         leader[10] = '2';
298         *indicator_length = 2;
299     }
300     if (!atoi_n_check(leader+11, 1, identifier_length))
301     {
302         yaz_marc_cprintf(mt, 
303                          "Identifier length at offset 11 should hold a digit."
304                          " Assuming 2");
305         leader[11] = '2';
306         *identifier_length = 2;
307     }
308     if (!atoi_n_check(leader+12, 5, base_address))
309     {
310         yaz_marc_cprintf(mt, 
311                          "Base address at offsets 12..16 should hold a number."
312                          " Assuming 0");
313         *base_address = 0;
314     }
315     if (!atoi_n_check(leader+20, 1, length_data_entry))
316     {
317         yaz_marc_cprintf(mt, 
318                          "Length data entry at offset 20 should hold a digit."
319                          " Assuming 4");
320         *length_data_entry = 4;
321         leader[20] = '4';
322     }
323     if (!atoi_n_check(leader+21, 1, length_starting))
324     {
325         yaz_marc_cprintf(mt,
326                          "Length starting at offset 21 should hold a digit."
327                          " Assuming 5");
328         *length_starting = 5;
329         leader[21] = '5';
330     }
331     if (!atoi_n_check(leader+22, 1, length_implementation))
332     {
333         yaz_marc_cprintf(mt, 
334                          "Length implementation at offset 22 should hold a digit."
335                          " Assuming 0");
336         *length_implementation = 0;
337         leader[22] = '0';
338     }
339
340     if (mt->debug)
341     {
342         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
343         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
344         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
345         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
346         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
347         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
348     }
349     yaz_marc_add_leader(mt, leader, 24);
350 }
351
352 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
353 {
354     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
355     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
356 }
357
358 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
359 {
360     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
361     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
362 }
363
364 /* try to guess how many bytes the identifier really is! */
365 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
366 {
367     if (mt->iconv_cd)
368     {
369         size_t i;
370         for (i = 1; i<5; i++)
371         {
372             char outbuf[12];
373             size_t outbytesleft = sizeof(outbuf);
374             char *outp = outbuf;
375             const char *inp = buf;
376
377             size_t inbytesleft = i;
378             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
379                                  &outp, &outbytesleft);
380             if (r != (size_t) (-1))
381                 return i;  /* got a complete sequence */
382         }
383         return 1; /* giving up */
384     }
385     return 1; /* we don't know */
386 }
387                               
388 void yaz_marc_reset(yaz_marc_t mt)
389 {
390     nmem_reset(mt->nmem);
391     mt->nodes = 0;
392     mt->nodes_pp = &mt->nodes;
393     mt->subfield_pp = 0;
394 }
395
396 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
397 {
398     struct yaz_marc_node *n;
399     int identifier_length;
400     const char *leader = 0;
401
402     for (n = mt->nodes; n; n = n->next)
403         if (n->which == YAZ_MARC_LEADER)
404         {
405             leader = n->u.leader;
406             break;
407         }
408     
409     if (!leader)
410         return -1;
411     if (!atoi_n_check(leader+11, 1, &identifier_length))
412         return -1;
413
414     for (n = mt->nodes; n; n = n->next)
415     {
416         switch(n->which)
417         {
418         case YAZ_MARC_COMMENT:
419             wrbuf_iconv_write(wr, mt->iconv_cd, 
420                               n->u.comment, strlen(n->u.comment));
421             wrbuf_puts(wr, ")\n");
422             break;
423         default:
424             break;
425         }
426     }
427     return 0;
428 }
429
430
431 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
432 {
433     struct yaz_marc_node *n;
434     int identifier_length;
435     const char *leader = 0;
436
437     for (n = mt->nodes; n; n = n->next)
438         if (n->which == YAZ_MARC_LEADER)
439         {
440             leader = n->u.leader;
441             break;
442         }
443     
444     if (!leader)
445         return -1;
446     if (!atoi_n_check(leader+11, 1, &identifier_length))
447         return -1;
448
449     for (n = mt->nodes; n; n = n->next)
450     {
451         struct yaz_marc_subfield *s;
452         switch(n->which)
453         {
454         case YAZ_MARC_DATAFIELD:
455             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
456                          n->u.datafield.indicator);
457             for (s = n->u.datafield.subfields; s; s = s->next)
458             {
459                 /* if identifier length is 2 (most MARCs),
460                    the code is a single character .. However we've
461                    seen multibyte codes, so see how big it really is */
462                 size_t using_code_len = 
463                     (identifier_length != 2) ? identifier_length - 1
464                     :
465                     cdata_one_character(mt, s->code_data);
466                 
467                 wrbuf_puts (wr, mt->subfield_str); 
468                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
469                                   using_code_len);
470                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
471                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
472                                  s->code_data + using_code_len);
473                 marc_iconv_reset(mt, wr);
474             }
475             wrbuf_puts (wr, mt->endline_str);
476             break;
477         case YAZ_MARC_CONTROLFIELD:
478             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
479             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
480             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
481             marc_iconv_reset(mt, wr);
482             wrbuf_puts (wr, mt->endline_str);
483             break;
484         case YAZ_MARC_COMMENT:
485             wrbuf_puts(wr, "(");
486             wrbuf_iconv_write(wr, mt->iconv_cd, 
487                               n->u.comment, strlen(n->u.comment));
488             marc_iconv_reset(mt, wr);
489             wrbuf_puts(wr, ")\n");
490             break;
491         case YAZ_MARC_LEADER:
492             wrbuf_printf(wr, "%s\n", n->u.leader);
493         }
494     }
495     wrbuf_puts(wr, "\n");
496     return 0;
497 }
498
499 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
500 {
501     switch(mt->xml)
502     {
503     case YAZ_MARC_LINE:
504         return yaz_marc_write_line(mt, wr);
505     case YAZ_MARC_MARCXML:
506         return yaz_marc_write_marcxml(mt, wr);
507     case YAZ_MARC_XCHANGE:
508         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
509     case YAZ_MARC_ISO2709:
510         return yaz_marc_write_iso2709(mt, wr);
511     case YAZ_MARC_CHECK:
512         return yaz_marc_write_check(mt, wr);
513     }
514     return -1;
515 }
516
517 /** \brief common MARC XML/Xchange writer
518     \param mt handle
519     \param wr WRBUF output
520     \param ns XMLNS for the elements
521     \param format record format (e.g. "MARC21")
522     \param type record type (e.g. "Bibliographic")
523 */
524 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
525                                       const char *ns, 
526                                       const char *format,
527                                       const char *type)
528 {
529     struct yaz_marc_node *n;
530     int identifier_length;
531     const char *leader = 0;
532
533     for (n = mt->nodes; n; n = n->next)
534         if (n->which == YAZ_MARC_LEADER)
535         {
536             leader = n->u.leader;
537             break;
538         }
539     
540     if (!leader)
541         return -1;
542     if (!atoi_n_check(leader+11, 1, &identifier_length))
543         return -1;
544
545     wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
546     if (format)
547         wrbuf_printf(wr, " format=\"%.80s\"", format);
548     if (type)
549         wrbuf_printf(wr, " type=\"%.80s\"", type);
550     wrbuf_printf(wr, ">\n");
551     for (n = mt->nodes; n; n = n->next)
552     {
553         struct yaz_marc_subfield *s;
554
555         switch(n->which)
556         {
557         case YAZ_MARC_DATAFIELD:
558             wrbuf_printf(wr, "  <datafield tag=\"");
559             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
560                                     strlen(n->u.datafield.tag));
561             wrbuf_printf(wr, "\"");
562             if (n->u.datafield.indicator)
563             {
564                 int i;
565                 for (i = 0; n->u.datafield.indicator[i]; i++)
566                 {
567                     wrbuf_printf(wr, " ind%d=\"", i+1);
568                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
569                                           n->u.datafield.indicator+i, 1);
570                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
571                 }
572             }
573             wrbuf_printf(wr, ">\n");
574             for (s = n->u.datafield.subfields; s; s = s->next)
575             {
576                 /* if identifier length is 2 (most MARCs),
577                    the code is a single character .. However we've
578                    seen multibyte codes, so see how big it really is */
579                 size_t using_code_len = 
580                     (identifier_length != 2) ? identifier_length - 1
581                     :
582                     cdata_one_character(mt, s->code_data);
583                 
584                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
585                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
586                                         s->code_data, using_code_len);
587                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
588                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
589                                         s->code_data + using_code_len,
590                                         strlen(s->code_data + using_code_len));
591                 marc_iconv_reset(mt, wr);
592                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
593                 wrbuf_puts(wr, "\n");
594             }
595             wrbuf_printf(wr, "  </datafield>\n");
596             break;
597         case YAZ_MARC_CONTROLFIELD:
598             wrbuf_printf(wr, "  <controlfield tag=\"");
599             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
600                                     strlen(n->u.controlfield.tag));
601             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
602             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
603                                     n->u.controlfield.data,
604                                     strlen(n->u.controlfield.data));
605
606             marc_iconv_reset(mt, wr);
607             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
608             wrbuf_puts(wr, "\n");
609             break;
610         case YAZ_MARC_COMMENT:
611             wrbuf_printf(wr, "<!-- ");
612             wrbuf_puts(wr, n->u.comment);
613             wrbuf_printf(wr, " -->\n");
614             break;
615         case YAZ_MARC_LEADER:
616             wrbuf_printf(wr, "  <leader>");
617             wrbuf_iconv_write_cdata(wr, 
618                                     0 /* no charset conversion for leader */,
619                                     n->u.leader, strlen(n->u.leader));
620             wrbuf_printf(wr, "</leader>\n");
621         }
622     }
623     wrbuf_puts(wr, "</record>\n");
624     return 0;
625 }
626
627 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
628                                      const char *ns, 
629                                      const char *format,
630                                      const char *type)
631 {
632     if (mt->write_using_libxml2)
633     {
634 #if YAZ_HAVE_XML2
635         int ret;
636         xmlNode *root_ptr;
637
638         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
639         if (ret == 0)
640         {
641             xmlChar *buf_out;
642             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
643             int len_out;
644
645             xmlDocSetRootElement(doc, root_ptr);
646             xmlDocDumpMemory(doc, &buf_out, &len_out);
647
648             wrbuf_write(wr, (const char *) buf_out, len_out);
649             wrbuf_puts(wr, "");
650             xmlFree(buf_out);
651             xmlFreeDoc(doc);
652         }
653         return ret;
654 #else
655         return -1;
656 #endif
657     }
658     else
659         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
660 }
661
662 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
663 {
664     if (!mt->leader_spec)
665         yaz_marc_modify_leader(mt, 9, "a");
666     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
667                                      0, 0);
668 }
669
670 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
671                                const char *format,
672                                const char *type)
673 {
674     return yaz_marc_write_marcxml_ns(mt, wr,
675                                      "http://www.bs.dk/standards/MarcXchange",
676                                      0, 0);
677 }
678
679
680 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
681                        const char *ns, 
682                        const char *format,
683                        const char *type)
684 {
685 #if YAZ_HAVE_XML2
686     struct yaz_marc_node *n;
687     int identifier_length;
688     const char *leader = 0;
689     xmlNode *record_ptr;
690     xmlNsPtr ns_record;
691     WRBUF wr_cdata = 0;
692
693     for (n = mt->nodes; n; n = n->next)
694         if (n->which == YAZ_MARC_LEADER)
695         {
696             leader = n->u.leader;
697             break;
698         }
699     
700     if (!leader)
701         return -1;
702     if (!atoi_n_check(leader+11, 1, &identifier_length))
703         return -1;
704
705     wr_cdata = wrbuf_alloc();
706
707     record_ptr = xmlNewNode(0, BAD_CAST "record");
708     *root_ptr = record_ptr;
709
710     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
711     xmlSetNs(record_ptr, ns_record);
712
713     if (format)
714         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
715     if (type)
716         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
717     for (n = mt->nodes; n; n = n->next)
718     {
719         struct yaz_marc_subfield *s;
720         xmlNode *ptr;
721
722         switch(n->which)
723         {
724         case YAZ_MARC_DATAFIELD:
725             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
726             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
727             if (n->u.datafield.indicator)
728             {
729                 int i;
730                 for (i = 0; n->u.datafield.indicator[i]; i++)
731                 {
732                     char ind_str[6];
733                     char ind_val[2];
734
735                     sprintf(ind_str, "ind%d", i+1);
736                     ind_val[0] = n->u.datafield.indicator[i];
737                     ind_val[1] = '\0';
738                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
739                 }
740             }
741             for (s = n->u.datafield.subfields; s; s = s->next)
742             {
743                 xmlNode *ptr_subfield;
744                 /* if identifier length is 2 (most MARCs),
745                    the code is a single character .. However we've
746                    seen multibyte codes, so see how big it really is */
747                 size_t using_code_len = 
748                     (identifier_length != 2) ? identifier_length - 1
749                     :
750                     cdata_one_character(mt, s->code_data);
751
752                 wrbuf_rewind(wr_cdata);
753                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
754                                  s->code_data + using_code_len);
755                 marc_iconv_reset(mt, wr_cdata);
756                 ptr_subfield = xmlNewTextChild(
757                     ptr, ns_record, 
758                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
759
760                 wrbuf_rewind(wr_cdata);
761                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
762                                   s->code_data, using_code_len);
763                 xmlNewProp(ptr_subfield, BAD_CAST "code",
764                            BAD_CAST wrbuf_cstr(wr_cdata));
765             }
766             break;
767         case YAZ_MARC_CONTROLFIELD:
768             wrbuf_rewind(wr_cdata);
769             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
770             marc_iconv_reset(mt, wr_cdata);
771             
772             ptr = xmlNewTextChild(record_ptr, ns_record,
773                                   BAD_CAST "controlfield",
774                                   BAD_CAST wrbuf_cstr(wr_cdata));
775             
776             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
777             break;
778         case YAZ_MARC_COMMENT:
779             ptr = xmlNewComment(BAD_CAST n->u.comment);
780             xmlAddChild(record_ptr, ptr);
781             break;
782         case YAZ_MARC_LEADER:
783             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
784                             BAD_CAST n->u.leader);
785             break;
786         }
787     }
788     wrbuf_destroy(wr_cdata);
789     return 0;
790 #else
791     return -1;
792 #endif
793 }
794
795 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
796 {
797     struct yaz_marc_node *n;
798     int indicator_length;
799     int identifier_length;
800     int length_data_entry;
801     int length_starting;
802     int length_implementation;
803     int data_offset = 0;
804     const char *leader = 0;
805     WRBUF wr_dir, wr_head, wr_data_tmp;
806     int base_address;
807     
808     for (n = mt->nodes; n; n = n->next)
809         if (n->which == YAZ_MARC_LEADER)
810             leader = n->u.leader;
811     
812     if (!leader)
813         return -1;
814     if (!atoi_n_check(leader+10, 1, &indicator_length))
815         return -1;
816     if (!atoi_n_check(leader+11, 1, &identifier_length))
817         return -1;
818     if (!atoi_n_check(leader+20, 1, &length_data_entry))
819         return -1;
820     if (!atoi_n_check(leader+21, 1, &length_starting))
821         return -1;
822     if (!atoi_n_check(leader+22, 1, &length_implementation))
823         return -1;
824
825     wr_data_tmp = wrbuf_alloc();
826     wr_dir = wrbuf_alloc();
827     for (n = mt->nodes; n; n = n->next)
828     {
829         int data_length = 0;
830         struct yaz_marc_subfield *s;
831
832         switch(n->which)
833         {
834         case YAZ_MARC_DATAFIELD:
835             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
836             data_length += indicator_length;
837             wrbuf_rewind(wr_data_tmp);
838             for (s = n->u.datafield.subfields; s; s = s->next)
839             {
840                 /* write dummy IDFS + content */
841                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
842                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
843                 marc_iconv_reset(mt, wr_data_tmp);
844             }
845             /* write dummy FS (makes MARC-8 to become ASCII) */
846             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
847             marc_iconv_reset(mt, wr_data_tmp);
848             data_length += wrbuf_len(wr_data_tmp);
849             break;
850         case YAZ_MARC_CONTROLFIELD:
851             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
852
853             wrbuf_rewind(wr_data_tmp);
854             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
855                              n->u.controlfield.data);
856             marc_iconv_reset(mt, wr_data_tmp);
857             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
858             marc_iconv_reset(mt, wr_data_tmp);
859             data_length += wrbuf_len(wr_data_tmp);
860             break;
861         case YAZ_MARC_COMMENT:
862             break;
863         case YAZ_MARC_LEADER:
864             break;
865         }
866         if (data_length)
867         {
868             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
869             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
870             data_offset += data_length;
871         }
872     }
873     /* mark end of directory */
874     wrbuf_putc(wr_dir, ISO2709_FS);
875
876     /* base address of data (comes after leader+directory) */
877     base_address = 24 + wrbuf_len(wr_dir);
878
879     wr_head = wrbuf_alloc();
880
881     /* write record length */
882     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
883     /* from "original" leader */
884     wrbuf_write(wr_head, leader+5, 7);
885     /* base address of data */
886     wrbuf_printf(wr_head, "%05d", base_address);
887     /* from "original" leader */
888     wrbuf_write(wr_head, leader+17, 7);
889     
890     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
891     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
892     wrbuf_destroy(wr_head);
893     wrbuf_destroy(wr_dir);
894     wrbuf_destroy(wr_data_tmp);
895
896     for (n = mt->nodes; n; n = n->next)
897     {
898         struct yaz_marc_subfield *s;
899
900         switch(n->which)
901         {
902         case YAZ_MARC_DATAFIELD:
903             wrbuf_printf(wr, "%.*s", indicator_length,
904                          n->u.datafield.indicator);
905             for (s = n->u.datafield.subfields; s; s = s->next)
906             {
907                 wrbuf_putc(wr, ISO2709_IDFS);
908                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
909                 marc_iconv_reset(mt, wr);
910             }
911             wrbuf_putc(wr, ISO2709_FS);
912             break;
913         case YAZ_MARC_CONTROLFIELD:
914             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
915             marc_iconv_reset(mt, wr);
916             wrbuf_putc(wr, ISO2709_FS);
917             break;
918         case YAZ_MARC_COMMENT:
919             break;
920         case YAZ_MARC_LEADER:
921             break;
922         }
923     }
924     wrbuf_printf(wr, "%c", ISO2709_RS);
925     return 0;
926 }
927
928
929 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
930 {
931     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
932     if (r <= 0)
933         return r;
934     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
935     if (s != 0)
936         return -1; /* error */
937     return r; /* OK, return length > 0 */
938 }
939
940 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
941                          const char **result, size_t *rsize)
942 {
943     int r;
944
945     wrbuf_rewind(mt->m_wr);
946     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
947     if (result)
948         *result = wrbuf_cstr(mt->m_wr);
949     if (rsize)
950         *rsize = wrbuf_len(mt->m_wr);
951     return r;
952 }
953
954 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
955 {
956     if (mt)
957         mt->xml = xmlmode;
958 }
959
960 void yaz_marc_debug(yaz_marc_t mt, int level)
961 {
962     if (mt)
963         mt->debug = level;
964 }
965
966 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
967 {
968     mt->iconv_cd = cd;
969 }
970
971 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
972 {
973     struct yaz_marc_node *n;
974     char *leader = 0;
975     for (n = mt->nodes; n; n = n->next)
976         if (n->which == YAZ_MARC_LEADER)
977         {
978             leader = n->u.leader;
979             memcpy(leader+off, str, strlen(str));
980             break;
981         }
982 }
983
984 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
985 {
986     xfree(mt->leader_spec);
987     mt->leader_spec = 0;
988     if (leader_spec)
989     {
990         char dummy_leader[24];
991         if (marc_exec_leader(leader_spec, dummy_leader, 24))
992             return -1;
993         mt->leader_spec = xstrdup(leader_spec);
994     }
995     return 0;
996 }
997
998 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
999 {
1000     const char *cp = leader_spec;
1001     while (cp)
1002     {
1003         char val[21];
1004         int pos;
1005         int no_read = 0, no = 0;
1006
1007         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1008         if (no < 2 || no_read < 3)
1009             return -1;
1010         if (pos < 0 || pos >= size)
1011             return -1;
1012
1013         if (*val == '\'')
1014         {
1015             const char *vp = strchr(val+1, '\'');
1016             size_t len;
1017             
1018             if (!vp)
1019                 return -1;
1020             len = vp-val-1;
1021             if (len + pos > size)
1022                 return -1;
1023             memcpy(leader + pos, val+1, len);
1024         }
1025         else if (*val >= '0' && *val <= '9')
1026         {
1027             int ch = atoi(val);
1028             leader[pos] = ch;
1029         }
1030         else
1031             return -1;
1032         cp += no_read;
1033         if (*cp != ',')
1034             break;
1035
1036         cp++;
1037     }
1038     return 0;
1039 }
1040
1041 int yaz_marc_decode_formatstr(const char *arg)
1042 {
1043     int mode = -1; 
1044     if (!strcmp(arg, "marc"))
1045         mode = YAZ_MARC_ISO2709;
1046     if (!strcmp(arg, "marcxml"))
1047         mode = YAZ_MARC_MARCXML;
1048     if (!strcmp(arg, "marcxchange"))
1049         mode = YAZ_MARC_XCHANGE;
1050     if (!strcmp(arg, "line"))
1051         mode = YAZ_MARC_LINE;
1052     return mode;
1053 }
1054
1055 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1056 {
1057     mt->write_using_libxml2 = enable;
1058 }
1059
1060 /*
1061  * Local variables:
1062  * c-basic-offset: 4
1063  * indent-tabs-mode: nil
1064  * End:
1065  * vim: shiftwidth=4 tabstop=8 expandtab
1066  */
1067