Support read/write MARCXML collections.
[yaz-moved-to-github.git] / src / marcdisp.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdisp.c,v 1.52 2007-12-17 20:59:30 adam Exp $
6  */
7
8 /**
9  * \file marcdisp.c
10  * \brief Implements MARC conversion utilities
11  */
12
13 #if HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16
17 #ifdef WIN32
18 #include <windows.h>
19 #endif
20
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
28 #include <yaz/nmem_xml.h>
29 #include <yaz/snprintf.h>
30
31 #if YAZ_HAVE_XML2
32 #include <libxml/parser.h>
33 #include <libxml/tree.h>
34 #endif
35
36 enum yaz_collection_state {
37     no_collection,
38     collection_first,
39     collection_second
40 };
41    
42 /** \brief node types for yaz_marc_node */
43 enum YAZ_MARC_NODE_TYPE
44
45     YAZ_MARC_DATAFIELD,
46     YAZ_MARC_CONTROLFIELD,
47     YAZ_MARC_COMMENT,
48     YAZ_MARC_LEADER
49 };
50
51 /** \brief represets a data field */
52 struct yaz_marc_datafield {
53     char *tag;
54     char *indicator;
55     struct yaz_marc_subfield *subfields;
56 };
57
58 /** \brief represents a control field */
59 struct yaz_marc_controlfield {
60     char *tag;
61     char *data;
62 };
63
64 /** \brief a comment node */
65 struct yaz_marc_comment {
66     char *comment;
67 };
68
69 /** \brief MARC node */
70 struct yaz_marc_node {
71     enum YAZ_MARC_NODE_TYPE which;
72     union {
73         struct yaz_marc_datafield datafield;
74         struct yaz_marc_controlfield controlfield;
75         char *comment;
76         char *leader;
77     } u;
78     struct yaz_marc_node *next;
79 };
80
81 /** \brief represents a subfield */
82 struct yaz_marc_subfield {
83     char *code_data;
84     struct yaz_marc_subfield *next;
85 };
86
87 /** \brief the internals of a yaz_marc_t handle */
88 struct yaz_marc_t_ {
89     WRBUF m_wr;
90     NMEM nmem;
91     int xml;
92     int debug;
93     int write_using_libxml2;
94     enum yaz_collection_state enable_collection;
95     yaz_iconv_t iconv_cd;
96     char subfield_str[8];
97     char endline_str[8];
98     char *leader_spec;
99     struct yaz_marc_node *nodes;
100     struct yaz_marc_node **nodes_pp;
101     struct yaz_marc_subfield **subfield_pp;
102 };
103
104 yaz_marc_t yaz_marc_create(void)
105 {
106     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107     mt->xml = YAZ_MARC_LINE;
108     mt->debug = 0;
109     mt->write_using_libxml2 = 0;
110     mt->enable_collection = no_collection;
111     mt->m_wr = wrbuf_alloc();
112     mt->iconv_cd = 0;
113     mt->leader_spec = 0;
114     strcpy(mt->subfield_str, " $");
115     strcpy(mt->endline_str, "\n");
116
117     mt->nmem = nmem_create();
118     yaz_marc_reset(mt);
119     return mt;
120 }
121
122 void yaz_marc_destroy(yaz_marc_t mt)
123 {
124     if (!mt)
125         return ;
126     nmem_destroy(mt->nmem);
127     wrbuf_destroy(mt->m_wr);
128     xfree(mt->leader_spec);
129     xfree(mt);
130 }
131
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
133 {
134     return mt->nmem;
135 }
136
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
138 {
139     wrbuf_iconv_reset(wr, mt->iconv_cd);
140 }
141
142 static int marc_exec_leader(const char *leader_spec, char *leader,
143                             size_t size);
144
145
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
147 {
148     struct yaz_marc_node *n = (struct yaz_marc_node *)
149         nmem_malloc(mt->nmem, sizeof(*n));
150     n->next = 0;
151     *mt->nodes_pp = n;
152     mt->nodes_pp = &n->next;
153     return n;
154 }
155
156 #if YAZ_HAVE_XML2
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158                                    const xmlNode *ptr_data)
159 {
160     struct yaz_marc_node *n = yaz_marc_add_node(mt);
161     n->which = YAZ_MARC_CONTROLFIELD;
162     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
164 }
165 #endif
166
167
168 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
169 {
170     struct yaz_marc_node *n = yaz_marc_add_node(mt);
171     n->which = YAZ_MARC_COMMENT;
172     n->u.comment = nmem_strdup(mt->nmem, comment);
173 }
174
175 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
176 {
177     va_list ap;
178     char buf[200];
179
180     va_start(ap, fmt);
181     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
182     yaz_marc_add_comment(mt, buf);
183     va_end (ap);
184 }
185
186 int yaz_marc_get_debug(yaz_marc_t mt)
187 {
188     return mt->debug;
189 }
190
191 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
192 {
193     struct yaz_marc_node *n = yaz_marc_add_node(mt);
194     n->which = YAZ_MARC_LEADER;
195     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
196     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
197 }
198
199 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
200                                const char *data, size_t data_len)
201 {
202     struct yaz_marc_node *n = yaz_marc_add_node(mt);
203     n->which = YAZ_MARC_CONTROLFIELD;
204     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
205     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
206     if (mt->debug)
207     {
208         size_t i;
209         char msg[80];
210
211         sprintf(msg, "controlfield:");
212         for (i = 0; i < 16 && i < data_len; i++)
213             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
214         if (i < data_len)
215             sprintf(msg + strlen(msg), " ..");
216         yaz_marc_add_comment(mt, msg);
217     }
218 }
219
220 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
221                             const char *indicator, size_t indicator_len)
222 {
223     struct yaz_marc_node *n = yaz_marc_add_node(mt);
224     n->which = YAZ_MARC_DATAFIELD;
225     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
226     n->u.datafield.indicator =
227         nmem_strdupn(mt->nmem, indicator, indicator_len);
228     n->u.datafield.subfields = 0;
229
230     /* make subfield_pp the current (last one) */
231     mt->subfield_pp = &n->u.datafield.subfields;
232 }
233
234 #if YAZ_HAVE_XML2
235 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
236                                 const char *indicator, size_t indicator_len)
237 {
238     struct yaz_marc_node *n = yaz_marc_add_node(mt);
239     n->which = YAZ_MARC_DATAFIELD;
240     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
241     n->u.datafield.indicator =
242         nmem_strdupn(mt->nmem, indicator, indicator_len);
243     n->u.datafield.subfields = 0;
244
245     /* make subfield_pp the current (last one) */
246     mt->subfield_pp = &n->u.datafield.subfields;
247 }
248 #endif
249
250 void yaz_marc_add_subfield(yaz_marc_t mt,
251                            const char *code_data, size_t code_data_len)
252 {
253     if (mt->debug)
254     {
255         size_t i;
256         char msg[80];
257
258         sprintf(msg, "subfield:");
259         for (i = 0; i < 16 && i < code_data_len; i++)
260             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
261         if (i < code_data_len)
262             sprintf(msg + strlen(msg), " ..");
263         yaz_marc_add_comment(mt, msg);
264     }
265
266     if (mt->subfield_pp)
267     {
268         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
269             nmem_malloc(mt->nmem, sizeof(*n));
270         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
271         n->next = 0;
272         /* mark subfield_pp to point to this one, so we append here next */
273         *mt->subfield_pp = n;
274         mt->subfield_pp = &n->next;
275     }
276 }
277
278 int atoi_n_check(const char *buf, int size, int *val)
279 {
280     int i;
281     for (i = 0; i < size; i++)
282         if (!isdigit(i[(const unsigned char *) buf]))
283             return 0;
284     *val = atoi_n(buf, size);
285     return 1;
286 }
287
288 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
289                          int *indicator_length,
290                          int *identifier_length,
291                          int *base_address,
292                          int *length_data_entry,
293                          int *length_starting,
294                          int *length_implementation)
295 {
296     char leader[24];
297
298     memcpy(leader, leader_c, 24);
299
300     if (!atoi_n_check(leader+10, 1, indicator_length))
301     {
302         yaz_marc_cprintf(mt, 
303                          "Indicator length at offset 10 should hold a digit."
304                          " Assuming 2");
305         leader[10] = '2';
306         *indicator_length = 2;
307     }
308     if (!atoi_n_check(leader+11, 1, identifier_length))
309     {
310         yaz_marc_cprintf(mt, 
311                          "Identifier length at offset 11 should hold a digit."
312                          " Assuming 2");
313         leader[11] = '2';
314         *identifier_length = 2;
315     }
316     if (!atoi_n_check(leader+12, 5, base_address))
317     {
318         yaz_marc_cprintf(mt, 
319                          "Base address at offsets 12..16 should hold a number."
320                          " Assuming 0");
321         *base_address = 0;
322     }
323     if (!atoi_n_check(leader+20, 1, length_data_entry))
324     {
325         yaz_marc_cprintf(mt, 
326                          "Length data entry at offset 20 should hold a digit."
327                          " Assuming 4");
328         *length_data_entry = 4;
329         leader[20] = '4';
330     }
331     if (!atoi_n_check(leader+21, 1, length_starting))
332     {
333         yaz_marc_cprintf(mt,
334                          "Length starting at offset 21 should hold a digit."
335                          " Assuming 5");
336         *length_starting = 5;
337         leader[21] = '5';
338     }
339     if (!atoi_n_check(leader+22, 1, length_implementation))
340     {
341         yaz_marc_cprintf(mt, 
342                          "Length implementation at offset 22 should hold a digit."
343                          " Assuming 0");
344         *length_implementation = 0;
345         leader[22] = '0';
346     }
347
348     if (mt->debug)
349     {
350         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
351         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
352         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
353         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
354         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
355         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
356     }
357     yaz_marc_add_leader(mt, leader, 24);
358 }
359
360 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
361 {
362     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
363     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
364 }
365
366 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
367 {
368     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
369     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
370 }
371
372 /* try to guess how many bytes the identifier really is! */
373 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
374 {
375     if (mt->iconv_cd)
376     {
377         size_t i;
378         for (i = 1; i<5; i++)
379         {
380             char outbuf[12];
381             size_t outbytesleft = sizeof(outbuf);
382             char *outp = outbuf;
383             const char *inp = buf;
384
385             size_t inbytesleft = i;
386             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
387                                  &outp, &outbytesleft);
388             if (r != (size_t) (-1))
389                 return i;  /* got a complete sequence */
390         }
391         return 1; /* giving up */
392     }
393     return 1; /* we don't know */
394 }
395                               
396 void yaz_marc_reset(yaz_marc_t mt)
397 {
398     nmem_reset(mt->nmem);
399     mt->nodes = 0;
400     mt->nodes_pp = &mt->nodes;
401     mt->subfield_pp = 0;
402 }
403
404 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
405 {
406     struct yaz_marc_node *n;
407     int identifier_length;
408     const char *leader = 0;
409
410     for (n = mt->nodes; n; n = n->next)
411         if (n->which == YAZ_MARC_LEADER)
412         {
413             leader = n->u.leader;
414             break;
415         }
416     
417     if (!leader)
418         return -1;
419     if (!atoi_n_check(leader+11, 1, &identifier_length))
420         return -1;
421
422     for (n = mt->nodes; n; n = n->next)
423     {
424         switch(n->which)
425         {
426         case YAZ_MARC_COMMENT:
427             wrbuf_iconv_write(wr, mt->iconv_cd, 
428                               n->u.comment, strlen(n->u.comment));
429             wrbuf_puts(wr, ")\n");
430             break;
431         default:
432             break;
433         }
434     }
435     return 0;
436 }
437
438
439 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
440 {
441     struct yaz_marc_node *n;
442     int identifier_length;
443     const char *leader = 0;
444
445     for (n = mt->nodes; n; n = n->next)
446         if (n->which == YAZ_MARC_LEADER)
447         {
448             leader = n->u.leader;
449             break;
450         }
451     
452     if (!leader)
453         return -1;
454     if (!atoi_n_check(leader+11, 1, &identifier_length))
455         return -1;
456
457     for (n = mt->nodes; n; n = n->next)
458     {
459         struct yaz_marc_subfield *s;
460         switch(n->which)
461         {
462         case YAZ_MARC_DATAFIELD:
463             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
464                          n->u.datafield.indicator);
465             for (s = n->u.datafield.subfields; s; s = s->next)
466             {
467                 /* if identifier length is 2 (most MARCs),
468                    the code is a single character .. However we've
469                    seen multibyte codes, so see how big it really is */
470                 size_t using_code_len = 
471                     (identifier_length != 2) ? identifier_length - 1
472                     :
473                     cdata_one_character(mt, s->code_data);
474                 
475                 wrbuf_puts (wr, mt->subfield_str); 
476                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
477                                   using_code_len);
478                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
479                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
480                                  s->code_data + using_code_len);
481                 marc_iconv_reset(mt, wr);
482             }
483             wrbuf_puts (wr, mt->endline_str);
484             break;
485         case YAZ_MARC_CONTROLFIELD:
486             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
487             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
488             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
489             marc_iconv_reset(mt, wr);
490             wrbuf_puts (wr, mt->endline_str);
491             break;
492         case YAZ_MARC_COMMENT:
493             wrbuf_puts(wr, "(");
494             wrbuf_iconv_write(wr, mt->iconv_cd, 
495                               n->u.comment, strlen(n->u.comment));
496             marc_iconv_reset(mt, wr);
497             wrbuf_puts(wr, ")\n");
498             break;
499         case YAZ_MARC_LEADER:
500             wrbuf_printf(wr, "%s\n", n->u.leader);
501         }
502     }
503     wrbuf_puts(wr, "\n");
504     return 0;
505 }
506
507 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
508 {
509     if (mt->enable_collection == collection_second)
510     {
511         switch(mt->xml)
512         {
513         case YAZ_MARC_MARCXML:
514             wrbuf_printf(wr, "</collection>\n");
515             break;
516         case YAZ_MARC_XCHANGE:
517             wrbuf_printf(wr, "</collection>\n");
518             break;
519         }
520     }
521     return 0;
522 }
523
524 void yaz_marc_enable_collection(yaz_marc_t mt)
525 {
526     mt->enable_collection = collection_first;
527 }
528
529 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
530 {
531     switch(mt->xml)
532     {
533     case YAZ_MARC_LINE:
534         return yaz_marc_write_line(mt, wr);
535     case YAZ_MARC_MARCXML:
536         return yaz_marc_write_marcxml(mt, wr);
537     case YAZ_MARC_XCHANGE:
538         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
539     case YAZ_MARC_ISO2709:
540         return yaz_marc_write_iso2709(mt, wr);
541     case YAZ_MARC_CHECK:
542         return yaz_marc_write_check(mt, wr);
543     }
544     return -1;
545 }
546
547 /** \brief common MARC XML/Xchange writer
548     \param mt handle
549     \param wr WRBUF output
550     \param ns XMLNS for the elements
551     \param format record format (e.g. "MARC21")
552     \param type record type (e.g. "Bibliographic")
553 */
554 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
555                                       const char *ns, 
556                                       const char *format,
557                                       const char *type)
558 {
559     struct yaz_marc_node *n;
560     int identifier_length;
561     const char *leader = 0;
562
563     for (n = mt->nodes; n; n = n->next)
564         if (n->which == YAZ_MARC_LEADER)
565         {
566             leader = n->u.leader;
567             break;
568         }
569     
570     if (!leader)
571         return -1;
572     if (!atoi_n_check(leader+11, 1, &identifier_length))
573         return -1;
574     
575     if (mt->enable_collection != no_collection)
576     {
577         if (mt->enable_collection == collection_first)
578             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
579         mt->enable_collection = collection_second;
580         wrbuf_printf(wr, "<record");
581     }
582     else
583     {
584         wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
585     }
586     if (format)
587         wrbuf_printf(wr, " format=\"%.80s\"", format);
588     if (type)
589         wrbuf_printf(wr, " type=\"%.80s\"", type);
590     wrbuf_printf(wr, ">\n");
591     for (n = mt->nodes; n; n = n->next)
592     {
593         struct yaz_marc_subfield *s;
594
595         switch(n->which)
596         {
597         case YAZ_MARC_DATAFIELD:
598             wrbuf_printf(wr, "  <datafield tag=\"");
599             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
600                                     strlen(n->u.datafield.tag));
601             wrbuf_printf(wr, "\"");
602             if (n->u.datafield.indicator)
603             {
604                 int i;
605                 for (i = 0; n->u.datafield.indicator[i]; i++)
606                 {
607                     wrbuf_printf(wr, " ind%d=\"", i+1);
608                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
609                                           n->u.datafield.indicator+i, 1);
610                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
611                 }
612             }
613             wrbuf_printf(wr, ">\n");
614             for (s = n->u.datafield.subfields; s; s = s->next)
615             {
616                 /* if identifier length is 2 (most MARCs),
617                    the code is a single character .. However we've
618                    seen multibyte codes, so see how big it really is */
619                 size_t using_code_len = 
620                     (identifier_length != 2) ? identifier_length - 1
621                     :
622                     cdata_one_character(mt, s->code_data);
623                 
624                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
625                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
626                                         s->code_data, using_code_len);
627                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
628                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
629                                         s->code_data + using_code_len,
630                                         strlen(s->code_data + using_code_len));
631                 marc_iconv_reset(mt, wr);
632                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
633                 wrbuf_puts(wr, "\n");
634             }
635             wrbuf_printf(wr, "  </datafield>\n");
636             break;
637         case YAZ_MARC_CONTROLFIELD:
638             wrbuf_printf(wr, "  <controlfield tag=\"");
639             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
640                                     strlen(n->u.controlfield.tag));
641             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
642             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
643                                     n->u.controlfield.data,
644                                     strlen(n->u.controlfield.data));
645
646             marc_iconv_reset(mt, wr);
647             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
648             wrbuf_puts(wr, "\n");
649             break;
650         case YAZ_MARC_COMMENT:
651             wrbuf_printf(wr, "<!-- ");
652             wrbuf_puts(wr, n->u.comment);
653             wrbuf_printf(wr, " -->\n");
654             break;
655         case YAZ_MARC_LEADER:
656             wrbuf_printf(wr, "  <leader>");
657             wrbuf_iconv_write_cdata(wr, 
658                                     0 /* no charset conversion for leader */,
659                                     n->u.leader, strlen(n->u.leader));
660             wrbuf_printf(wr, "</leader>\n");
661         }
662     }
663     wrbuf_puts(wr, "</record>\n");
664     return 0;
665 }
666
667 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
668                                      const char *ns, 
669                                      const char *format,
670                                      const char *type)
671 {
672     if (mt->write_using_libxml2)
673     {
674 #if YAZ_HAVE_XML2
675         int ret;
676         xmlNode *root_ptr;
677
678         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
679         if (ret == 0)
680         {
681             xmlChar *buf_out;
682             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
683             int len_out;
684
685             xmlDocSetRootElement(doc, root_ptr);
686             xmlDocDumpMemory(doc, &buf_out, &len_out);
687
688             wrbuf_write(wr, (const char *) buf_out, len_out);
689             wrbuf_puts(wr, "");
690             xmlFree(buf_out);
691             xmlFreeDoc(doc);
692         }
693         return ret;
694 #else
695         return -1;
696 #endif
697     }
698     else
699         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
700 }
701
702 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
703 {
704     if (!mt->leader_spec)
705         yaz_marc_modify_leader(mt, 9, "a");
706     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
707                                      0, 0);
708 }
709
710 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
711                                const char *format,
712                                const char *type)
713 {
714     return yaz_marc_write_marcxml_ns(mt, wr,
715                                      "http://www.bs.dk/standards/MarcXchange",
716                                      0, 0);
717 }
718
719
720 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
721                        const char *ns, 
722                        const char *format,
723                        const char *type)
724 {
725 #if YAZ_HAVE_XML2
726     struct yaz_marc_node *n;
727     int identifier_length;
728     const char *leader = 0;
729     xmlNode *record_ptr;
730     xmlNsPtr ns_record;
731     WRBUF wr_cdata = 0;
732
733     for (n = mt->nodes; n; n = n->next)
734         if (n->which == YAZ_MARC_LEADER)
735         {
736             leader = n->u.leader;
737             break;
738         }
739     
740     if (!leader)
741         return -1;
742     if (!atoi_n_check(leader+11, 1, &identifier_length))
743         return -1;
744
745     wr_cdata = wrbuf_alloc();
746
747     record_ptr = xmlNewNode(0, BAD_CAST "record");
748     *root_ptr = record_ptr;
749
750     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
751     xmlSetNs(record_ptr, ns_record);
752
753     if (format)
754         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
755     if (type)
756         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
757     for (n = mt->nodes; n; n = n->next)
758     {
759         struct yaz_marc_subfield *s;
760         xmlNode *ptr;
761
762         switch(n->which)
763         {
764         case YAZ_MARC_DATAFIELD:
765             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
766             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
767             if (n->u.datafield.indicator)
768             {
769                 int i;
770                 for (i = 0; n->u.datafield.indicator[i]; i++)
771                 {
772                     char ind_str[6];
773                     char ind_val[2];
774
775                     sprintf(ind_str, "ind%d", i+1);
776                     ind_val[0] = n->u.datafield.indicator[i];
777                     ind_val[1] = '\0';
778                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
779                 }
780             }
781             for (s = n->u.datafield.subfields; s; s = s->next)
782             {
783                 xmlNode *ptr_subfield;
784                 /* if identifier length is 2 (most MARCs),
785                    the code is a single character .. However we've
786                    seen multibyte codes, so see how big it really is */
787                 size_t using_code_len = 
788                     (identifier_length != 2) ? identifier_length - 1
789                     :
790                     cdata_one_character(mt, s->code_data);
791
792                 wrbuf_rewind(wr_cdata);
793                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
794                                  s->code_data + using_code_len);
795                 marc_iconv_reset(mt, wr_cdata);
796                 ptr_subfield = xmlNewTextChild(
797                     ptr, ns_record, 
798                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
799
800                 wrbuf_rewind(wr_cdata);
801                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
802                                   s->code_data, using_code_len);
803                 xmlNewProp(ptr_subfield, BAD_CAST "code",
804                            BAD_CAST wrbuf_cstr(wr_cdata));
805             }
806             break;
807         case YAZ_MARC_CONTROLFIELD:
808             wrbuf_rewind(wr_cdata);
809             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
810             marc_iconv_reset(mt, wr_cdata);
811             
812             ptr = xmlNewTextChild(record_ptr, ns_record,
813                                   BAD_CAST "controlfield",
814                                   BAD_CAST wrbuf_cstr(wr_cdata));
815             
816             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
817             break;
818         case YAZ_MARC_COMMENT:
819             ptr = xmlNewComment(BAD_CAST n->u.comment);
820             xmlAddChild(record_ptr, ptr);
821             break;
822         case YAZ_MARC_LEADER:
823             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
824                             BAD_CAST n->u.leader);
825             break;
826         }
827     }
828     wrbuf_destroy(wr_cdata);
829     return 0;
830 #else
831     return -1;
832 #endif
833 }
834
835 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
836 {
837     struct yaz_marc_node *n;
838     int indicator_length;
839     int identifier_length;
840     int length_data_entry;
841     int length_starting;
842     int length_implementation;
843     int data_offset = 0;
844     const char *leader = 0;
845     WRBUF wr_dir, wr_head, wr_data_tmp;
846     int base_address;
847     
848     for (n = mt->nodes; n; n = n->next)
849         if (n->which == YAZ_MARC_LEADER)
850             leader = n->u.leader;
851     
852     if (!leader)
853         return -1;
854     if (!atoi_n_check(leader+10, 1, &indicator_length))
855         return -1;
856     if (!atoi_n_check(leader+11, 1, &identifier_length))
857         return -1;
858     if (!atoi_n_check(leader+20, 1, &length_data_entry))
859         return -1;
860     if (!atoi_n_check(leader+21, 1, &length_starting))
861         return -1;
862     if (!atoi_n_check(leader+22, 1, &length_implementation))
863         return -1;
864
865     wr_data_tmp = wrbuf_alloc();
866     wr_dir = wrbuf_alloc();
867     for (n = mt->nodes; n; n = n->next)
868     {
869         int data_length = 0;
870         struct yaz_marc_subfield *s;
871
872         switch(n->which)
873         {
874         case YAZ_MARC_DATAFIELD:
875             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
876             data_length += indicator_length;
877             wrbuf_rewind(wr_data_tmp);
878             for (s = n->u.datafield.subfields; s; s = s->next)
879             {
880                 /* write dummy IDFS + content */
881                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
882                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
883                 marc_iconv_reset(mt, wr_data_tmp);
884             }
885             /* write dummy FS (makes MARC-8 to become ASCII) */
886             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
887             marc_iconv_reset(mt, wr_data_tmp);
888             data_length += wrbuf_len(wr_data_tmp);
889             break;
890         case YAZ_MARC_CONTROLFIELD:
891             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
892
893             wrbuf_rewind(wr_data_tmp);
894             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
895                              n->u.controlfield.data);
896             marc_iconv_reset(mt, wr_data_tmp);
897             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
898             marc_iconv_reset(mt, wr_data_tmp);
899             data_length += wrbuf_len(wr_data_tmp);
900             break;
901         case YAZ_MARC_COMMENT:
902             break;
903         case YAZ_MARC_LEADER:
904             break;
905         }
906         if (data_length)
907         {
908             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
909             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
910             data_offset += data_length;
911         }
912     }
913     /* mark end of directory */
914     wrbuf_putc(wr_dir, ISO2709_FS);
915
916     /* base address of data (comes after leader+directory) */
917     base_address = 24 + wrbuf_len(wr_dir);
918
919     wr_head = wrbuf_alloc();
920
921     /* write record length */
922     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
923     /* from "original" leader */
924     wrbuf_write(wr_head, leader+5, 7);
925     /* base address of data */
926     wrbuf_printf(wr_head, "%05d", base_address);
927     /* from "original" leader */
928     wrbuf_write(wr_head, leader+17, 7);
929     
930     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
931     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
932     wrbuf_destroy(wr_head);
933     wrbuf_destroy(wr_dir);
934     wrbuf_destroy(wr_data_tmp);
935
936     for (n = mt->nodes; n; n = n->next)
937     {
938         struct yaz_marc_subfield *s;
939
940         switch(n->which)
941         {
942         case YAZ_MARC_DATAFIELD:
943             wrbuf_printf(wr, "%.*s", indicator_length,
944                          n->u.datafield.indicator);
945             for (s = n->u.datafield.subfields; s; s = s->next)
946             {
947                 wrbuf_putc(wr, ISO2709_IDFS);
948                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
949                 marc_iconv_reset(mt, wr);
950             }
951             wrbuf_putc(wr, ISO2709_FS);
952             break;
953         case YAZ_MARC_CONTROLFIELD:
954             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
955             marc_iconv_reset(mt, wr);
956             wrbuf_putc(wr, ISO2709_FS);
957             break;
958         case YAZ_MARC_COMMENT:
959             break;
960         case YAZ_MARC_LEADER:
961             break;
962         }
963     }
964     wrbuf_printf(wr, "%c", ISO2709_RS);
965     return 0;
966 }
967
968
969 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
970 {
971     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
972     if (r <= 0)
973         return r;
974     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
975     if (s != 0)
976         return -1; /* error */
977     return r; /* OK, return length > 0 */
978 }
979
980 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
981                          const char **result, size_t *rsize)
982 {
983     int r;
984
985     wrbuf_rewind(mt->m_wr);
986     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
987     if (result)
988         *result = wrbuf_cstr(mt->m_wr);
989     if (rsize)
990         *rsize = wrbuf_len(mt->m_wr);
991     return r;
992 }
993
994 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
995 {
996     if (mt)
997         mt->xml = xmlmode;
998 }
999
1000 void yaz_marc_debug(yaz_marc_t mt, int level)
1001 {
1002     if (mt)
1003         mt->debug = level;
1004 }
1005
1006 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1007 {
1008     mt->iconv_cd = cd;
1009 }
1010
1011 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1012 {
1013     struct yaz_marc_node *n;
1014     char *leader = 0;
1015     for (n = mt->nodes; n; n = n->next)
1016         if (n->which == YAZ_MARC_LEADER)
1017         {
1018             leader = n->u.leader;
1019             memcpy(leader+off, str, strlen(str));
1020             break;
1021         }
1022 }
1023
1024 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1025 {
1026     xfree(mt->leader_spec);
1027     mt->leader_spec = 0;
1028     if (leader_spec)
1029     {
1030         char dummy_leader[24];
1031         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1032             return -1;
1033         mt->leader_spec = xstrdup(leader_spec);
1034     }
1035     return 0;
1036 }
1037
1038 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1039 {
1040     const char *cp = leader_spec;
1041     while (cp)
1042     {
1043         char val[21];
1044         int pos;
1045         int no_read = 0, no = 0;
1046
1047         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1048         if (no < 2 || no_read < 3)
1049             return -1;
1050         if (pos < 0 || pos >= size)
1051             return -1;
1052
1053         if (*val == '\'')
1054         {
1055             const char *vp = strchr(val+1, '\'');
1056             size_t len;
1057             
1058             if (!vp)
1059                 return -1;
1060             len = vp-val-1;
1061             if (len + pos > size)
1062                 return -1;
1063             memcpy(leader + pos, val+1, len);
1064         }
1065         else if (*val >= '0' && *val <= '9')
1066         {
1067             int ch = atoi(val);
1068             leader[pos] = ch;
1069         }
1070         else
1071             return -1;
1072         cp += no_read;
1073         if (*cp != ',')
1074             break;
1075
1076         cp++;
1077     }
1078     return 0;
1079 }
1080
1081 int yaz_marc_decode_formatstr(const char *arg)
1082 {
1083     int mode = -1; 
1084     if (!strcmp(arg, "marc"))
1085         mode = YAZ_MARC_ISO2709;
1086     if (!strcmp(arg, "marcxml"))
1087         mode = YAZ_MARC_MARCXML;
1088     if (!strcmp(arg, "marcxchange"))
1089         mode = YAZ_MARC_XCHANGE;
1090     if (!strcmp(arg, "line"))
1091         mode = YAZ_MARC_LINE;
1092     return mode;
1093 }
1094
1095 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1096 {
1097     mt->write_using_libxml2 = enable;
1098 }
1099
1100 /*
1101  * Local variables:
1102  * c-basic-offset: 4
1103  * indent-tabs-mode: nil
1104  * End:
1105  * vim: shiftwidth=4 tabstop=8 expandtab
1106  */
1107