Working in progress: refactor the xml_write to handle two formats
[yaz-moved-to-github.git] / src / marcdisp.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marcdisp.c
8  * \brief Implements MARC conversion utilities
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
29
30 #if YAZ_HAVE_XML2
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
33 #endif
34
35 enum yaz_collection_state {
36     no_collection,
37     collection_first,
38     collection_second
39 };
40    
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
43
44     YAZ_MARC_DATAFIELD,
45     YAZ_MARC_CONTROLFIELD,
46     YAZ_MARC_COMMENT,
47     YAZ_MARC_LEADER
48 };
49
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
52     char *tag;
53     char *indicator;
54     struct yaz_marc_subfield *subfields;
55 };
56
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
59     char *tag;
60     char *data;
61 };
62
63 /** \brief a comment node */
64 struct yaz_marc_comment {
65     char *comment;
66 };
67
68 /** \brief MARC node */
69 struct yaz_marc_node {
70     enum YAZ_MARC_NODE_TYPE which;
71     union {
72         struct yaz_marc_datafield datafield;
73         struct yaz_marc_controlfield controlfield;
74         char *comment;
75         char *leader;
76     } u;
77     struct yaz_marc_node *next;
78 };
79
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
82     char *code_data;
83     struct yaz_marc_subfield *next;
84 };
85
86 /** \brief the internals of a yaz_marc_t handle */
87 struct yaz_marc_t_ {
88     WRBUF m_wr;
89     NMEM nmem;
90     int xml;
91     int debug;
92     int write_using_libxml2;
93     enum yaz_collection_state enable_collection;
94     yaz_iconv_t iconv_cd;
95     char subfield_str[8];
96     char endline_str[8];
97     char *leader_spec;
98     struct yaz_marc_node *nodes;
99     struct yaz_marc_node **nodes_pp;
100     struct yaz_marc_subfield **subfield_pp;
101 };
102
103 yaz_marc_t yaz_marc_create(void)
104 {
105     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
106     mt->xml = YAZ_MARC_LINE;
107     mt->debug = 0;
108     mt->write_using_libxml2 = 0;
109     mt->enable_collection = no_collection;
110     mt->m_wr = wrbuf_alloc();
111     mt->iconv_cd = 0;
112     mt->leader_spec = 0;
113     strcpy(mt->subfield_str, " $");
114     strcpy(mt->endline_str, "\n");
115
116     mt->nmem = nmem_create();
117     yaz_marc_reset(mt);
118     return mt;
119 }
120
121 void yaz_marc_destroy(yaz_marc_t mt)
122 {
123     if (!mt)
124         return ;
125     nmem_destroy(mt->nmem);
126     wrbuf_destroy(mt->m_wr);
127     xfree(mt->leader_spec);
128     xfree(mt);
129 }
130
131 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
132 {
133     return mt->nmem;
134 }
135
136 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 {
138     wrbuf_iconv_reset(wr, mt->iconv_cd);
139 }
140
141 static int marc_exec_leader(const char *leader_spec, char *leader,
142                             size_t size);
143
144
145 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
146 {
147     struct yaz_marc_node *n = (struct yaz_marc_node *)
148         nmem_malloc(mt->nmem, sizeof(*n));
149     n->next = 0;
150     *mt->nodes_pp = n;
151     mt->nodes_pp = &n->next;
152     return n;
153 }
154
155 #if YAZ_HAVE_XML2
156 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
157                                    const xmlNode *ptr_data)
158 {
159     struct yaz_marc_node *n = yaz_marc_add_node(mt);
160     n->which = YAZ_MARC_CONTROLFIELD;
161     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
162     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
163 }
164 #endif
165
166
167 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
168 {
169     struct yaz_marc_node *n = yaz_marc_add_node(mt);
170     n->which = YAZ_MARC_COMMENT;
171     n->u.comment = nmem_strdup(mt->nmem, comment);
172 }
173
174 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
175 {
176     va_list ap;
177     char buf[200];
178
179     va_start(ap, fmt);
180     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
181     yaz_marc_add_comment(mt, buf);
182     va_end (ap);
183 }
184
185 int yaz_marc_get_debug(yaz_marc_t mt)
186 {
187     return mt->debug;
188 }
189
190 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
191 {
192     struct yaz_marc_node *n = yaz_marc_add_node(mt);
193     n->which = YAZ_MARC_LEADER;
194     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
195     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
196 }
197
198 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
199                                const char *data, size_t data_len)
200 {
201     struct yaz_marc_node *n = yaz_marc_add_node(mt);
202     n->which = YAZ_MARC_CONTROLFIELD;
203     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
204     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
205     if (mt->debug)
206     {
207         size_t i;
208         char msg[80];
209
210         sprintf(msg, "controlfield:");
211         for (i = 0; i < 16 && i < data_len; i++)
212             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
213         if (i < data_len)
214             sprintf(msg + strlen(msg), " ..");
215         yaz_marc_add_comment(mt, msg);
216     }
217 }
218
219 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
220                             const char *indicator, size_t indicator_len)
221 {
222     struct yaz_marc_node *n = yaz_marc_add_node(mt);
223     n->which = YAZ_MARC_DATAFIELD;
224     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
225     n->u.datafield.indicator =
226         nmem_strdupn(mt->nmem, indicator, indicator_len);
227     n->u.datafield.subfields = 0;
228
229     /* make subfield_pp the current (last one) */
230     mt->subfield_pp = &n->u.datafield.subfields;
231 }
232
233 #if YAZ_HAVE_XML2
234 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
235                                 const char *indicator, size_t indicator_len)
236 {
237     struct yaz_marc_node *n = yaz_marc_add_node(mt);
238     n->which = YAZ_MARC_DATAFIELD;
239     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
240     n->u.datafield.indicator =
241         nmem_strdupn(mt->nmem, indicator, indicator_len);
242     n->u.datafield.subfields = 0;
243
244     /* make subfield_pp the current (last one) */
245     mt->subfield_pp = &n->u.datafield.subfields;
246 }
247 #endif
248
249 void yaz_marc_add_subfield(yaz_marc_t mt,
250                            const char *code_data, size_t code_data_len)
251 {
252     if (mt->debug)
253     {
254         size_t i;
255         char msg[80];
256
257         sprintf(msg, "subfield:");
258         for (i = 0; i < 16 && i < code_data_len; i++)
259             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
260         if (i < code_data_len)
261             sprintf(msg + strlen(msg), " ..");
262         yaz_marc_add_comment(mt, msg);
263     }
264
265     if (mt->subfield_pp)
266     {
267         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
268             nmem_malloc(mt->nmem, sizeof(*n));
269         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
270         n->next = 0;
271         /* mark subfield_pp to point to this one, so we append here next */
272         *mt->subfield_pp = n;
273         mt->subfield_pp = &n->next;
274     }
275 }
276
277 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
278                          int *indicator_length,
279                          int *identifier_length,
280                          int *base_address,
281                          int *length_data_entry,
282                          int *length_starting,
283                          int *length_implementation)
284 {
285     char leader[24];
286
287     memcpy(leader, leader_c, 24);
288
289     if (!atoi_n_check(leader+10, 1, indicator_length))
290     {
291         yaz_marc_cprintf(mt, 
292                          "Indicator length at offset 10 should hold a digit."
293                          " Assuming 2");
294         leader[10] = '2';
295         *indicator_length = 2;
296     }
297     if (!atoi_n_check(leader+11, 1, identifier_length))
298     {
299         yaz_marc_cprintf(mt, 
300                          "Identifier length at offset 11 should hold a digit."
301                          " Assuming 2");
302         leader[11] = '2';
303         *identifier_length = 2;
304     }
305     if (!atoi_n_check(leader+12, 5, base_address))
306     {
307         yaz_marc_cprintf(mt, 
308                          "Base address at offsets 12..16 should hold a number."
309                          " Assuming 0");
310         *base_address = 0;
311     }
312     if (!atoi_n_check(leader+20, 1, length_data_entry))
313     {
314         yaz_marc_cprintf(mt, 
315                          "Length data entry at offset 20 should hold a digit."
316                          " Assuming 4");
317         *length_data_entry = 4;
318         leader[20] = '4';
319     }
320     if (!atoi_n_check(leader+21, 1, length_starting))
321     {
322         yaz_marc_cprintf(mt,
323                          "Length starting at offset 21 should hold a digit."
324                          " Assuming 5");
325         *length_starting = 5;
326         leader[21] = '5';
327     }
328     if (!atoi_n_check(leader+22, 1, length_implementation))
329     {
330         yaz_marc_cprintf(mt, 
331                          "Length implementation at offset 22 should hold a digit."
332                          " Assuming 0");
333         *length_implementation = 0;
334         leader[22] = '0';
335     }
336
337     if (mt->debug)
338     {
339         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
340         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
341         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
342         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
343         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
344         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
345     }
346     yaz_marc_add_leader(mt, leader, 24);
347 }
348
349 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
350 {
351     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
352     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
353 }
354
355 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
356 {
357     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
358     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
359 }
360
361 /* try to guess how many bytes the identifier really is! */
362 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
363 {
364     if (mt->iconv_cd)
365     {
366         size_t i;
367         for (i = 1; i<5; i++)
368         {
369             char outbuf[12];
370             size_t outbytesleft = sizeof(outbuf);
371             char *outp = outbuf;
372             const char *inp = buf;
373
374             size_t inbytesleft = i;
375             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
376                                  &outp, &outbytesleft);
377             if (r != (size_t) (-1))
378                 return i;  /* got a complete sequence */
379         }
380         return 1; /* giving up */
381     }
382     return 1; /* we don't know */
383 }
384                               
385 void yaz_marc_reset(yaz_marc_t mt)
386 {
387     nmem_reset(mt->nmem);
388     mt->nodes = 0;
389     mt->nodes_pp = &mt->nodes;
390     mt->subfield_pp = 0;
391 }
392
393 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
394 {
395     struct yaz_marc_node *n;
396     int identifier_length;
397     const char *leader = 0;
398
399     for (n = mt->nodes; n; n = n->next)
400         if (n->which == YAZ_MARC_LEADER)
401         {
402             leader = n->u.leader;
403             break;
404         }
405     
406     if (!leader)
407         return -1;
408     if (!atoi_n_check(leader+11, 1, &identifier_length))
409         return -1;
410
411     for (n = mt->nodes; n; n = n->next)
412     {
413         switch(n->which)
414         {
415         case YAZ_MARC_COMMENT:
416             wrbuf_iconv_write(wr, mt->iconv_cd, 
417                               n->u.comment, strlen(n->u.comment));
418             wrbuf_puts(wr, "\n");
419             break;
420         default:
421             break;
422         }
423     }
424     return 0;
425 }
426
427 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
428                                int identifier_length)
429 {
430     /* if identifier length is 2 (most MARCs) or less (probably an error),
431        the code is a single character .. However we've
432        seen multibyte codes, so see how big it really is */
433     if (identifier_length > 2)
434         return identifier_length - 1;
435     else
436         return cdata_one_character(mt, data);
437 }
438
439 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
440 {
441     struct yaz_marc_node *n;
442     int identifier_length;
443     const char *leader = 0;
444
445     for (n = mt->nodes; n; n = n->next)
446         if (n->which == YAZ_MARC_LEADER)
447         {
448             leader = n->u.leader;
449             break;
450         }
451     
452     if (!leader)
453         return -1;
454     if (!atoi_n_check(leader+11, 1, &identifier_length))
455         return -1;
456
457     for (n = mt->nodes; n; n = n->next)
458     {
459         struct yaz_marc_subfield *s;
460         switch(n->which)
461         {
462         case YAZ_MARC_DATAFIELD:
463             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
464                          n->u.datafield.indicator);
465             for (s = n->u.datafield.subfields; s; s = s->next)
466             {
467                 size_t using_code_len = get_subfield_len(mt, s->code_data,
468                                                          identifier_length);
469                 
470                 wrbuf_puts (wr, mt->subfield_str); 
471                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
472                                   using_code_len);
473                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
474                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
475                                  s->code_data + using_code_len);
476                 marc_iconv_reset(mt, wr);
477             }
478             wrbuf_puts (wr, mt->endline_str);
479             break;
480         case YAZ_MARC_CONTROLFIELD:
481             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
482             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
483             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
484             marc_iconv_reset(mt, wr);
485             wrbuf_puts (wr, mt->endline_str);
486             break;
487         case YAZ_MARC_COMMENT:
488             wrbuf_puts(wr, "(");
489             wrbuf_iconv_write(wr, mt->iconv_cd, 
490                               n->u.comment, strlen(n->u.comment));
491             marc_iconv_reset(mt, wr);
492             wrbuf_puts(wr, ")\n");
493             break;
494         case YAZ_MARC_LEADER:
495             wrbuf_printf(wr, "%s\n", n->u.leader);
496         }
497     }
498     wrbuf_puts(wr, "\n");
499     return 0;
500 }
501
502 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
503 {
504     if (mt->enable_collection == collection_second)
505     {
506         switch(mt->xml)
507         {
508         case YAZ_MARC_MARCXML:
509             wrbuf_printf(wr, "</collection>\n");
510             break;
511         case YAZ_MARC_XCHANGE:
512             wrbuf_printf(wr, "</collection>\n");
513             break;
514         }
515     }
516     return 0;
517 }
518
519 void yaz_marc_enable_collection(yaz_marc_t mt)
520 {
521     mt->enable_collection = collection_first;
522 }
523
524 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
525 {
526     switch(mt->xml)
527     {
528     case YAZ_MARC_LINE:
529         return yaz_marc_write_line(mt, wr);
530     case YAZ_MARC_MARCXML:
531         return yaz_marc_write_marcxml(mt, wr);
532     case YAZ_MARC_XCHANGE:
533         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
534     case YAZ_MARC_ISO2709:
535         return yaz_marc_write_iso2709(mt, wr);
536     case YAZ_MARC_CHECK:
537         return yaz_marc_write_check(mt, wr);
538     }
539     return -1;
540 }
541
542 /** \brief common MARC XML/Xchange writer
543     \param mt handle
544     \param wr WRBUF output
545     \param ns XMLNS for the elements
546     \param format record format (e.g. "MARC21")
547     \param type record type (e.g. "Bibliographic")
548 */
549 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
550                                       const char *ns, 
551                                       const char *format,
552                                       const char *type)
553 {
554     struct yaz_marc_node *n;
555     int identifier_length;
556     const char *leader = 0;
557
558     for (n = mt->nodes; n; n = n->next)
559         if (n->which == YAZ_MARC_LEADER)
560         {
561             leader = n->u.leader;
562             break;
563         }
564     
565     if (!leader)
566         return -1;
567     if (!atoi_n_check(leader+11, 1, &identifier_length))
568         return -1;
569     
570     if (mt->enable_collection != no_collection)
571     {
572         if (mt->enable_collection == collection_first)
573             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
574         mt->enable_collection = collection_second;
575         wrbuf_printf(wr, "<record");
576     }
577     else
578     {
579         wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
580     }
581     if (format)
582         wrbuf_printf(wr, " format=\"%.80s\"", format);
583     if (type)
584         wrbuf_printf(wr, " type=\"%.80s\"", type);
585     wrbuf_printf(wr, ">\n");
586     for (n = mt->nodes; n; n = n->next)
587     {
588         struct yaz_marc_subfield *s;
589
590         switch(n->which)
591         {
592         case YAZ_MARC_DATAFIELD:
593             wrbuf_printf(wr, "  <datafield tag=\"");
594             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
595                                     strlen(n->u.datafield.tag));
596             wrbuf_printf(wr, "\"");
597             if (n->u.datafield.indicator)
598             {
599                 int i;
600                 for (i = 0; n->u.datafield.indicator[i]; i++)
601                 {
602                     wrbuf_printf(wr, " ind%d=\"", i+1);
603                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
604                                           n->u.datafield.indicator+i, 1);
605                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
606                 }
607             }
608             wrbuf_printf(wr, ">\n");
609             for (s = n->u.datafield.subfields; s; s = s->next)
610             {
611                 size_t using_code_len = get_subfield_len(mt, s->code_data,
612                                                          identifier_length);
613                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
614                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
615                                         s->code_data, using_code_len);
616                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
617                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
618                                         s->code_data + using_code_len,
619                                         strlen(s->code_data + using_code_len));
620                 marc_iconv_reset(mt, wr);
621                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
622                 wrbuf_puts(wr, "\n");
623             }
624             wrbuf_printf(wr, "  </datafield>\n");
625             break;
626         case YAZ_MARC_CONTROLFIELD:
627             wrbuf_printf(wr, "  <controlfield tag=\"");
628             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
629                                     strlen(n->u.controlfield.tag));
630             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
631             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
632                                     n->u.controlfield.data,
633                                     strlen(n->u.controlfield.data));
634
635             marc_iconv_reset(mt, wr);
636             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
637             wrbuf_puts(wr, "\n");
638             break;
639         case YAZ_MARC_COMMENT:
640             wrbuf_printf(wr, "<!-- ");
641             wrbuf_puts(wr, n->u.comment);
642             wrbuf_printf(wr, " -->\n");
643             break;
644         case YAZ_MARC_LEADER:
645             wrbuf_printf(wr, "  <leader>");
646             wrbuf_iconv_write_cdata(wr, 
647                                     0 /* no charset conversion for leader */,
648                                     n->u.leader, strlen(n->u.leader));
649             wrbuf_printf(wr, "</leader>\n");
650         }
651     }
652     wrbuf_puts(wr, "</record>\n");
653     return 0;
654 }
655
656 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
657                                      const char *ns, 
658                                      const char *format,
659                                      const char *type)
660 {
661     if (mt->write_using_libxml2)
662     {
663 #if YAZ_HAVE_XML2
664         int ret;
665         xmlNode *root_ptr;
666
667         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
668         if (ret == 0)
669         {
670             xmlChar *buf_out;
671             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
672             int len_out;
673
674             xmlDocSetRootElement(doc, root_ptr);
675             xmlDocDumpMemory(doc, &buf_out, &len_out);
676
677             wrbuf_write(wr, (const char *) buf_out, len_out);
678             wrbuf_puts(wr, "");
679             xmlFree(buf_out);
680             xmlFreeDoc(doc);
681         }
682         return ret;
683 #else
684         return -1;
685 #endif
686     }
687     else
688         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
689 }
690
691 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
692 {
693     /* set leader 09 to 'a' for UNICODE */
694     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
695     if (!mt->leader_spec)
696         yaz_marc_modify_leader(mt, 9, "a");
697     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
698                                      0, 0);
699 }
700
701 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
702                                const char *format,
703                                const char *type)
704 {
705     return yaz_marc_write_marcxml_ns(mt, wr,
706                                      "info:lc/xmlns/marcxchange-v1",
707                                      0, 0);
708 }
709
710 #if YAZ_HAVE_XML2
711
712 void addMarcDatafield(xmlNode *record_ptr, xmlNameSpace *ns_record, const char* datafield, int turbo, WRBUF wr_cdata) 
713 {
714     if (!turbo) {
715         ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
716         xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
717     }
718     else {
719         char *field = "datXXX";
720         sprintf(field +3,"%s",  n->u.datafield.tag);
721         ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "dat", 0);
722         xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);        
723     }
724     if (n->u.datafield.indicator)
725     {
726         int i;
727         for (i = 0; n->u.datafield.indicator[i]; i++)
728         {
729             char ind_str[6];
730             char ind_val[2];
731             
732             sprintf(ind_str, "ind%d", i+1);
733             ind_val[0] = n->u.datafield.indicator[i];
734             ind_val[1] = '\0';
735             xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
736         }
737     }
738     for (s = n->u.datafield.subfields; s; s = s->next)
739     {
740         xmlNode *ptr_subfield;
741         size_t using_code_len = get_subfield_len(mt, s->code_data,
742                                                  identifier_length);
743         wrbuf_rewind(wr_cdata);
744         wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
745                          s->code_data + using_code_len);
746         marc_iconv_reset(mt, wr_cdata);
747         ptr_subfield = xmlNewTextChild(
748             ptr, ns_record, 
749             BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
750         
751         wrbuf_rewind(wr_cdata);
752         wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
753                           s->code_data, using_code_len);
754         xmlNewProp(ptr_subfield, BAD_CAST "code",
755                    BAD_CAST wrbuf_cstr(wr_cdata));
756     }
757 }
758
759 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
760                        const char *ns, 
761                        const char *format,
762                        const char *type)
763 {
764     struct yaz_marc_node *n;
765     int identifier_length;
766     const char *leader = 0;
767     xmlNode *record_ptr;
768     xmlNsPtr ns_record;
769     WRBUF wr_cdata = 0;
770
771     for (n = mt->nodes; n; n = n->next)
772         if (n->which == YAZ_MARC_LEADER)
773         {
774             leader = n->u.leader;
775             break;
776         }
777     
778     if (!leader)
779         return -1;
780     if (!atoi_n_check(leader+11, 1, &identifier_length))
781         return -1;
782
783     wr_cdata = wrbuf_alloc();
784
785     record_ptr = xmlNewNode(0, BAD_CAST "record");
786     *root_ptr = record_ptr;
787
788     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
789     xmlSetNs(record_ptr, ns_record);
790
791     if (format)
792         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
793     if (type)
794         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
795     for (n = mt->nodes; n; n = n->next)
796     {
797         struct yaz_marc_subfield *s;
798         xmlNode *ptr;
799
800         switch(n->which)
801         {
802         case YAZ_MARC_DATAFIELD:
803             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
804             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
805             if (n->u.datafield.indicator)
806             {
807                 int i;
808                 for (i = 0; n->u.datafield.indicator[i]; i++)
809                 {
810                     char ind_str[6];
811                     char ind_val[2];
812
813                     sprintf(ind_str, "ind%d", i+1);
814                     ind_val[0] = n->u.datafield.indicator[i];
815                     ind_val[1] = '\0';
816                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
817                 }
818             }
819             for (s = n->u.datafield.subfields; s; s = s->next)
820             {
821                 xmlNode *ptr_subfield;
822                 size_t using_code_len = get_subfield_len(mt, s->code_data,
823                                                          identifier_length);
824                 wrbuf_rewind(wr_cdata);
825                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
826                                  s->code_data + using_code_len);
827                 marc_iconv_reset(mt, wr_cdata);
828                 ptr_subfield = xmlNewTextChild(
829                     ptr, ns_record, 
830                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
831
832                 wrbuf_rewind(wr_cdata);
833                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
834                                   s->code_data, using_code_len);
835                 xmlNewProp(ptr_subfield, BAD_CAST "code",
836                            BAD_CAST wrbuf_cstr(wr_cdata));
837             }
838             break;
839         case YAZ_MARC_CONTROLFIELD:
840             wrbuf_rewind(wr_cdata);
841             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
842             marc_iconv_reset(mt, wr_cdata);
843             
844             ptr = xmlNewTextChild(record_ptr, ns_record,
845                                   BAD_CAST "controlfield",
846                                   BAD_CAST wrbuf_cstr(wr_cdata));
847             
848             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
849             break;
850         case YAZ_MARC_COMMENT:
851             ptr = xmlNewComment(BAD_CAST n->u.comment);
852             xmlAddChild(record_ptr, ptr);
853             break;
854         case YAZ_MARC_LEADER:
855             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
856                             BAD_CAST n->u.leader);
857             break;
858         }
859     }
860     wrbuf_destroy(wr_cdata);
861     return 0;
862 }
863
864
865 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
866                        const char *ns, 
867                        const char *format,
868                        const char *type)
869 {
870     struct yaz_marc_node *n;
871     int identifier_length;
872     const char *leader = 0;
873     xmlNode *record_ptr;
874     xmlNsPtr ns_record;
875     WRBUF wr_cdata = 0;
876
877     for (n = mt->nodes; n; n = n->next)
878         if (n->which == YAZ_MARC_LEADER)
879         {
880             leader = n->u.leader;
881             break;
882         }
883     
884     if (!leader)
885         return -1;
886     if (!atoi_n_check(leader+11, 1, &identifier_length))
887         return -1;
888
889     wr_cdata = wrbuf_alloc();
890
891     record_ptr = xmlNewNode(0, BAD_CAST "record");
892     *root_ptr = record_ptr;
893
894     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
895     xmlSetNs(record_ptr, ns_record);
896
897     if (format)
898         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
899     if (type)
900         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
901     for (n = mt->nodes; n; n = n->next)
902     {
903         struct yaz_marc_subfield *s;
904         xmlNode *ptr;
905
906         switch(n->which)
907         {
908         case YAZ_MARC_DATAFIELD:
909
910             addMarcDatafield(record_ptr, ns_record, datafield, turbo, wr_cdata);
911             break;
912         case YAZ_MARC_CONTROLFIELD:
913             wrbuf_rewind(wr_cdata);
914             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
915             marc_iconv_reset(mt, wr_cdata);
916             
917             ptr = xmlNewTextChild(record_ptr, ns_record,
918                                   BAD_CAST "controlfield",
919                                   BAD_CAST wrbuf_cstr(wr_cdata));
920             
921             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
922             break;
923         case YAZ_MARC_COMMENT:
924             ptr = xmlNewComment(BAD_CAST n->u.comment);
925             xmlAddChild(record_ptr, ptr);
926             break;
927         case YAZ_MARC_LEADER:
928             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
929                             BAD_CAST n->u.leader);
930             break;
931         }
932     }
933     wrbuf_destroy(wr_cdata);
934     return 0;
935 }
936
937
938 #endif
939
940 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
941 {
942     struct yaz_marc_node *n;
943     int indicator_length;
944     int identifier_length;
945     int length_data_entry;
946     int length_starting;
947     int length_implementation;
948     int data_offset = 0;
949     const char *leader = 0;
950     WRBUF wr_dir, wr_head, wr_data_tmp;
951     int base_address;
952     
953     for (n = mt->nodes; n; n = n->next)
954         if (n->which == YAZ_MARC_LEADER)
955             leader = n->u.leader;
956     
957     if (!leader)
958         return -1;
959     if (!atoi_n_check(leader+10, 1, &indicator_length))
960         return -1;
961     if (!atoi_n_check(leader+11, 1, &identifier_length))
962         return -1;
963     if (!atoi_n_check(leader+20, 1, &length_data_entry))
964         return -1;
965     if (!atoi_n_check(leader+21, 1, &length_starting))
966         return -1;
967     if (!atoi_n_check(leader+22, 1, &length_implementation))
968         return -1;
969
970     wr_data_tmp = wrbuf_alloc();
971     wr_dir = wrbuf_alloc();
972     for (n = mt->nodes; n; n = n->next)
973     {
974         int data_length = 0;
975         struct yaz_marc_subfield *s;
976
977         switch(n->which)
978         {
979         case YAZ_MARC_DATAFIELD:
980             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
981             data_length += indicator_length;
982             wrbuf_rewind(wr_data_tmp);
983             for (s = n->u.datafield.subfields; s; s = s->next)
984             {
985                 /* write dummy IDFS + content */
986                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
987                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
988                 marc_iconv_reset(mt, wr_data_tmp);
989             }
990             /* write dummy FS (makes MARC-8 to become ASCII) */
991             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
992             marc_iconv_reset(mt, wr_data_tmp);
993             data_length += wrbuf_len(wr_data_tmp);
994             break;
995         case YAZ_MARC_CONTROLFIELD:
996             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
997
998             wrbuf_rewind(wr_data_tmp);
999             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
1000                              n->u.controlfield.data);
1001             marc_iconv_reset(mt, wr_data_tmp);
1002             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1003             marc_iconv_reset(mt, wr_data_tmp);
1004             data_length += wrbuf_len(wr_data_tmp);
1005             break;
1006         case YAZ_MARC_COMMENT:
1007             break;
1008         case YAZ_MARC_LEADER:
1009             break;
1010         }
1011         if (data_length)
1012         {
1013             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1014             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1015             data_offset += data_length;
1016         }
1017     }
1018     /* mark end of directory */
1019     wrbuf_putc(wr_dir, ISO2709_FS);
1020
1021     /* base address of data (comes after leader+directory) */
1022     base_address = 24 + wrbuf_len(wr_dir);
1023
1024     wr_head = wrbuf_alloc();
1025
1026     /* write record length */
1027     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1028     /* from "original" leader */
1029     wrbuf_write(wr_head, leader+5, 7);
1030     /* base address of data */
1031     wrbuf_printf(wr_head, "%05d", base_address);
1032     /* from "original" leader */
1033     wrbuf_write(wr_head, leader+17, 7);
1034     
1035     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1036     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1037     wrbuf_destroy(wr_head);
1038     wrbuf_destroy(wr_dir);
1039     wrbuf_destroy(wr_data_tmp);
1040
1041     for (n = mt->nodes; n; n = n->next)
1042     {
1043         struct yaz_marc_subfield *s;
1044
1045         switch(n->which)
1046         {
1047         case YAZ_MARC_DATAFIELD:
1048             wrbuf_printf(wr, "%.*s", indicator_length,
1049                          n->u.datafield.indicator);
1050             for (s = n->u.datafield.subfields; s; s = s->next)
1051             {
1052                 wrbuf_putc(wr, ISO2709_IDFS);
1053                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1054                 marc_iconv_reset(mt, wr);
1055             }
1056             wrbuf_putc(wr, ISO2709_FS);
1057             break;
1058         case YAZ_MARC_CONTROLFIELD:
1059             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1060             marc_iconv_reset(mt, wr);
1061             wrbuf_putc(wr, ISO2709_FS);
1062             break;
1063         case YAZ_MARC_COMMENT:
1064             break;
1065         case YAZ_MARC_LEADER:
1066             break;
1067         }
1068     }
1069     wrbuf_printf(wr, "%c", ISO2709_RS);
1070     return 0;
1071 }
1072
1073
1074 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1075 {
1076     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1077     if (r <= 0)
1078         return r;
1079     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1080     if (s != 0)
1081         return -1; /* error */
1082     return r; /* OK, return length > 0 */
1083 }
1084
1085 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1086                          const char **result, size_t *rsize)
1087 {
1088     int r;
1089
1090     wrbuf_rewind(mt->m_wr);
1091     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1092     if (result)
1093         *result = wrbuf_cstr(mt->m_wr);
1094     if (rsize)
1095         *rsize = wrbuf_len(mt->m_wr);
1096     return r;
1097 }
1098
1099 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1100 {
1101     if (mt)
1102         mt->xml = xmlmode;
1103 }
1104
1105 void yaz_marc_debug(yaz_marc_t mt, int level)
1106 {
1107     if (mt)
1108         mt->debug = level;
1109 }
1110
1111 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1112 {
1113     mt->iconv_cd = cd;
1114 }
1115
1116 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1117 {
1118     return mt->iconv_cd;
1119 }
1120
1121 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1122 {
1123     struct yaz_marc_node *n;
1124     char *leader = 0;
1125     for (n = mt->nodes; n; n = n->next)
1126         if (n->which == YAZ_MARC_LEADER)
1127         {
1128             leader = n->u.leader;
1129             memcpy(leader+off, str, strlen(str));
1130             break;
1131         }
1132 }
1133
1134 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1135 {
1136     xfree(mt->leader_spec);
1137     mt->leader_spec = 0;
1138     if (leader_spec)
1139     {
1140         char dummy_leader[24];
1141         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1142             return -1;
1143         mt->leader_spec = xstrdup(leader_spec);
1144     }
1145     return 0;
1146 }
1147
1148 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1149 {
1150     const char *cp = leader_spec;
1151     while (cp)
1152     {
1153         char val[21];
1154         int pos;
1155         int no_read = 0, no = 0;
1156
1157         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1158         if (no < 2 || no_read < 3)
1159             return -1;
1160         if (pos < 0 || (size_t) pos >= size)
1161             return -1;
1162
1163         if (*val == '\'')
1164         {
1165             const char *vp = strchr(val+1, '\'');
1166             size_t len;
1167             
1168             if (!vp)
1169                 return -1;
1170             len = vp-val-1;
1171             if (len + pos > size)
1172                 return -1;
1173             memcpy(leader + pos, val+1, len);
1174         }
1175         else if (*val >= '0' && *val <= '9')
1176         {
1177             int ch = atoi(val);
1178             leader[pos] = ch;
1179         }
1180         else
1181             return -1;
1182         cp += no_read;
1183         if (*cp != ',')
1184             break;
1185
1186         cp++;
1187     }
1188     return 0;
1189 }
1190
1191 int yaz_marc_decode_formatstr(const char *arg)
1192 {
1193     int mode = -1; 
1194     if (!strcmp(arg, "marc"))
1195         mode = YAZ_MARC_ISO2709;
1196     if (!strcmp(arg, "marcxml"))
1197         mode = YAZ_MARC_MARCXML;
1198     if (!strcmp(arg, "marcxchange"))
1199         mode = YAZ_MARC_XCHANGE;
1200     if (!strcmp(arg, "line"))
1201         mode = YAZ_MARC_LINE;
1202     return mode;
1203 }
1204
1205 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1206 {
1207     mt->write_using_libxml2 = enable;
1208 }
1209
1210 /*
1211  * Local variables:
1212  * c-basic-offset: 4
1213  * c-file-style: "Stroustrup"
1214  * indent-tabs-mode: nil
1215  * End:
1216  * vim: shiftwidth=4 tabstop=8 expandtab
1217  */
1218