Added yaz_marc_get_iconv
[yaz-moved-to-github.git] / src / marcdisp.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdisp.c,v 1.54 2008-01-30 19:59:02 adam Exp $
6  */
7
8 /**
9  * \file marcdisp.c
10  * \brief Implements MARC conversion utilities
11  */
12
13 #if HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16
17 #ifdef WIN32
18 #include <windows.h>
19 #endif
20
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
28 #include <yaz/nmem_xml.h>
29 #include <yaz/snprintf.h>
30
31 #if YAZ_HAVE_XML2
32 #include <libxml/parser.h>
33 #include <libxml/tree.h>
34 #endif
35
36 enum yaz_collection_state {
37     no_collection,
38     collection_first,
39     collection_second
40 };
41    
42 /** \brief node types for yaz_marc_node */
43 enum YAZ_MARC_NODE_TYPE
44
45     YAZ_MARC_DATAFIELD,
46     YAZ_MARC_CONTROLFIELD,
47     YAZ_MARC_COMMENT,
48     YAZ_MARC_LEADER
49 };
50
51 /** \brief represets a data field */
52 struct yaz_marc_datafield {
53     char *tag;
54     char *indicator;
55     struct yaz_marc_subfield *subfields;
56 };
57
58 /** \brief represents a control field */
59 struct yaz_marc_controlfield {
60     char *tag;
61     char *data;
62 };
63
64 /** \brief a comment node */
65 struct yaz_marc_comment {
66     char *comment;
67 };
68
69 /** \brief MARC node */
70 struct yaz_marc_node {
71     enum YAZ_MARC_NODE_TYPE which;
72     union {
73         struct yaz_marc_datafield datafield;
74         struct yaz_marc_controlfield controlfield;
75         char *comment;
76         char *leader;
77     } u;
78     struct yaz_marc_node *next;
79 };
80
81 /** \brief represents a subfield */
82 struct yaz_marc_subfield {
83     char *code_data;
84     struct yaz_marc_subfield *next;
85 };
86
87 /** \brief the internals of a yaz_marc_t handle */
88 struct yaz_marc_t_ {
89     WRBUF m_wr;
90     NMEM nmem;
91     int xml;
92     int debug;
93     int write_using_libxml2;
94     enum yaz_collection_state enable_collection;
95     yaz_iconv_t iconv_cd;
96     char subfield_str[8];
97     char endline_str[8];
98     char *leader_spec;
99     struct yaz_marc_node *nodes;
100     struct yaz_marc_node **nodes_pp;
101     struct yaz_marc_subfield **subfield_pp;
102 };
103
104 yaz_marc_t yaz_marc_create(void)
105 {
106     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107     mt->xml = YAZ_MARC_LINE;
108     mt->debug = 0;
109     mt->write_using_libxml2 = 0;
110     mt->enable_collection = no_collection;
111     mt->m_wr = wrbuf_alloc();
112     mt->iconv_cd = 0;
113     mt->leader_spec = 0;
114     strcpy(mt->subfield_str, " $");
115     strcpy(mt->endline_str, "\n");
116
117     mt->nmem = nmem_create();
118     yaz_marc_reset(mt);
119     return mt;
120 }
121
122 void yaz_marc_destroy(yaz_marc_t mt)
123 {
124     if (!mt)
125         return ;
126     nmem_destroy(mt->nmem);
127     wrbuf_destroy(mt->m_wr);
128     xfree(mt->leader_spec);
129     xfree(mt);
130 }
131
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
133 {
134     return mt->nmem;
135 }
136
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
138 {
139     wrbuf_iconv_reset(wr, mt->iconv_cd);
140 }
141
142 static int marc_exec_leader(const char *leader_spec, char *leader,
143                             size_t size);
144
145
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
147 {
148     struct yaz_marc_node *n = (struct yaz_marc_node *)
149         nmem_malloc(mt->nmem, sizeof(*n));
150     n->next = 0;
151     *mt->nodes_pp = n;
152     mt->nodes_pp = &n->next;
153     return n;
154 }
155
156 #if YAZ_HAVE_XML2
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158                                    const xmlNode *ptr_data)
159 {
160     struct yaz_marc_node *n = yaz_marc_add_node(mt);
161     n->which = YAZ_MARC_CONTROLFIELD;
162     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
164 }
165 #endif
166
167
168 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
169 {
170     struct yaz_marc_node *n = yaz_marc_add_node(mt);
171     n->which = YAZ_MARC_COMMENT;
172     n->u.comment = nmem_strdup(mt->nmem, comment);
173 }
174
175 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
176 {
177     va_list ap;
178     char buf[200];
179
180     va_start(ap, fmt);
181     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
182     yaz_marc_add_comment(mt, buf);
183     va_end (ap);
184 }
185
186 int yaz_marc_get_debug(yaz_marc_t mt)
187 {
188     return mt->debug;
189 }
190
191 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
192 {
193     struct yaz_marc_node *n = yaz_marc_add_node(mt);
194     n->which = YAZ_MARC_LEADER;
195     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
196     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
197 }
198
199 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
200                                const char *data, size_t data_len)
201 {
202     struct yaz_marc_node *n = yaz_marc_add_node(mt);
203     n->which = YAZ_MARC_CONTROLFIELD;
204     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
205     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
206     if (mt->debug)
207     {
208         size_t i;
209         char msg[80];
210
211         sprintf(msg, "controlfield:");
212         for (i = 0; i < 16 && i < data_len; i++)
213             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
214         if (i < data_len)
215             sprintf(msg + strlen(msg), " ..");
216         yaz_marc_add_comment(mt, msg);
217     }
218 }
219
220 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
221                             const char *indicator, size_t indicator_len)
222 {
223     struct yaz_marc_node *n = yaz_marc_add_node(mt);
224     n->which = YAZ_MARC_DATAFIELD;
225     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
226     n->u.datafield.indicator =
227         nmem_strdupn(mt->nmem, indicator, indicator_len);
228     n->u.datafield.subfields = 0;
229
230     /* make subfield_pp the current (last one) */
231     mt->subfield_pp = &n->u.datafield.subfields;
232 }
233
234 #if YAZ_HAVE_XML2
235 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
236                                 const char *indicator, size_t indicator_len)
237 {
238     struct yaz_marc_node *n = yaz_marc_add_node(mt);
239     n->which = YAZ_MARC_DATAFIELD;
240     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
241     n->u.datafield.indicator =
242         nmem_strdupn(mt->nmem, indicator, indicator_len);
243     n->u.datafield.subfields = 0;
244
245     /* make subfield_pp the current (last one) */
246     mt->subfield_pp = &n->u.datafield.subfields;
247 }
248 #endif
249
250 void yaz_marc_add_subfield(yaz_marc_t mt,
251                            const char *code_data, size_t code_data_len)
252 {
253     if (mt->debug)
254     {
255         size_t i;
256         char msg[80];
257
258         sprintf(msg, "subfield:");
259         for (i = 0; i < 16 && i < code_data_len; i++)
260             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
261         if (i < code_data_len)
262             sprintf(msg + strlen(msg), " ..");
263         yaz_marc_add_comment(mt, msg);
264     }
265
266     if (mt->subfield_pp)
267     {
268         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
269             nmem_malloc(mt->nmem, sizeof(*n));
270         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
271         n->next = 0;
272         /* mark subfield_pp to point to this one, so we append here next */
273         *mt->subfield_pp = n;
274         mt->subfield_pp = &n->next;
275     }
276 }
277
278 int atoi_n_check(const char *buf, int size, int *val)
279 {
280     int i;
281     for (i = 0; i < size; i++)
282         if (!isdigit(i[(const unsigned char *) buf]))
283             return 0;
284     *val = atoi_n(buf, size);
285     return 1;
286 }
287
288 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
289                          int *indicator_length,
290                          int *identifier_length,
291                          int *base_address,
292                          int *length_data_entry,
293                          int *length_starting,
294                          int *length_implementation)
295 {
296     char leader[24];
297
298     memcpy(leader, leader_c, 24);
299
300     if (!atoi_n_check(leader+10, 1, indicator_length))
301     {
302         yaz_marc_cprintf(mt, 
303                          "Indicator length at offset 10 should hold a digit."
304                          " Assuming 2");
305         leader[10] = '2';
306         *indicator_length = 2;
307     }
308     if (!atoi_n_check(leader+11, 1, identifier_length))
309     {
310         yaz_marc_cprintf(mt, 
311                          "Identifier length at offset 11 should hold a digit."
312                          " Assuming 2");
313         leader[11] = '2';
314         *identifier_length = 2;
315     }
316     if (!atoi_n_check(leader+12, 5, base_address))
317     {
318         yaz_marc_cprintf(mt, 
319                          "Base address at offsets 12..16 should hold a number."
320                          " Assuming 0");
321         *base_address = 0;
322     }
323     if (!atoi_n_check(leader+20, 1, length_data_entry))
324     {
325         yaz_marc_cprintf(mt, 
326                          "Length data entry at offset 20 should hold a digit."
327                          " Assuming 4");
328         *length_data_entry = 4;
329         leader[20] = '4';
330     }
331     if (!atoi_n_check(leader+21, 1, length_starting))
332     {
333         yaz_marc_cprintf(mt,
334                          "Length starting at offset 21 should hold a digit."
335                          " Assuming 5");
336         *length_starting = 5;
337         leader[21] = '5';
338     }
339     if (!atoi_n_check(leader+22, 1, length_implementation))
340     {
341         yaz_marc_cprintf(mt, 
342                          "Length implementation at offset 22 should hold a digit."
343                          " Assuming 0");
344         *length_implementation = 0;
345         leader[22] = '0';
346     }
347
348     if (mt->debug)
349     {
350         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
351         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
352         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
353         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
354         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
355         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
356     }
357     yaz_marc_add_leader(mt, leader, 24);
358 }
359
360 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
361 {
362     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
363     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
364 }
365
366 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
367 {
368     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
369     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
370 }
371
372 /* try to guess how many bytes the identifier really is! */
373 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
374 {
375     if (mt->iconv_cd)
376     {
377         size_t i;
378         for (i = 1; i<5; i++)
379         {
380             char outbuf[12];
381             size_t outbytesleft = sizeof(outbuf);
382             char *outp = outbuf;
383             const char *inp = buf;
384
385             size_t inbytesleft = i;
386             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
387                                  &outp, &outbytesleft);
388             if (r != (size_t) (-1))
389                 return i;  /* got a complete sequence */
390         }
391         return 1; /* giving up */
392     }
393     return 1; /* we don't know */
394 }
395                               
396 void yaz_marc_reset(yaz_marc_t mt)
397 {
398     nmem_reset(mt->nmem);
399     mt->nodes = 0;
400     mt->nodes_pp = &mt->nodes;
401     mt->subfield_pp = 0;
402 }
403
404 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
405 {
406     struct yaz_marc_node *n;
407     int identifier_length;
408     const char *leader = 0;
409
410     for (n = mt->nodes; n; n = n->next)
411         if (n->which == YAZ_MARC_LEADER)
412         {
413             leader = n->u.leader;
414             break;
415         }
416     
417     if (!leader)
418         return -1;
419     if (!atoi_n_check(leader+11, 1, &identifier_length))
420         return -1;
421
422     for (n = mt->nodes; n; n = n->next)
423     {
424         switch(n->which)
425         {
426         case YAZ_MARC_COMMENT:
427             wrbuf_iconv_write(wr, mt->iconv_cd, 
428                               n->u.comment, strlen(n->u.comment));
429             wrbuf_puts(wr, ")\n");
430             break;
431         default:
432             break;
433         }
434     }
435     return 0;
436 }
437
438
439 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
440 {
441     struct yaz_marc_node *n;
442     int identifier_length;
443     const char *leader = 0;
444
445     for (n = mt->nodes; n; n = n->next)
446         if (n->which == YAZ_MARC_LEADER)
447         {
448             leader = n->u.leader;
449             break;
450         }
451     
452     if (!leader)
453         return -1;
454     if (!atoi_n_check(leader+11, 1, &identifier_length))
455         return -1;
456
457     for (n = mt->nodes; n; n = n->next)
458     {
459         struct yaz_marc_subfield *s;
460         switch(n->which)
461         {
462         case YAZ_MARC_DATAFIELD:
463             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
464                          n->u.datafield.indicator);
465             for (s = n->u.datafield.subfields; s; s = s->next)
466             {
467                 /* if identifier length is 2 (most MARCs),
468                    the code is a single character .. However we've
469                    seen multibyte codes, so see how big it really is */
470                 size_t using_code_len = 
471                     (identifier_length != 2) ? identifier_length - 1
472                     :
473                     cdata_one_character(mt, s->code_data);
474                 
475                 wrbuf_puts (wr, mt->subfield_str); 
476                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
477                                   using_code_len);
478                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
479                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
480                                  s->code_data + using_code_len);
481                 marc_iconv_reset(mt, wr);
482             }
483             wrbuf_puts (wr, mt->endline_str);
484             break;
485         case YAZ_MARC_CONTROLFIELD:
486             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
487             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
488             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
489             marc_iconv_reset(mt, wr);
490             wrbuf_puts (wr, mt->endline_str);
491             break;
492         case YAZ_MARC_COMMENT:
493             wrbuf_puts(wr, "(");
494             wrbuf_iconv_write(wr, mt->iconv_cd, 
495                               n->u.comment, strlen(n->u.comment));
496             marc_iconv_reset(mt, wr);
497             wrbuf_puts(wr, ")\n");
498             break;
499         case YAZ_MARC_LEADER:
500             wrbuf_printf(wr, "%s\n", n->u.leader);
501         }
502     }
503     wrbuf_puts(wr, "\n");
504     return 0;
505 }
506
507 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
508 {
509     if (mt->enable_collection == collection_second)
510     {
511         switch(mt->xml)
512         {
513         case YAZ_MARC_MARCXML:
514             wrbuf_printf(wr, "</collection>\n");
515             break;
516         case YAZ_MARC_XCHANGE:
517             wrbuf_printf(wr, "</collection>\n");
518             break;
519         }
520     }
521     return 0;
522 }
523
524 void yaz_marc_enable_collection(yaz_marc_t mt)
525 {
526     mt->enable_collection = collection_first;
527 }
528
529 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
530 {
531     switch(mt->xml)
532     {
533     case YAZ_MARC_LINE:
534         return yaz_marc_write_line(mt, wr);
535     case YAZ_MARC_MARCXML:
536         return yaz_marc_write_marcxml(mt, wr);
537     case YAZ_MARC_XCHANGE:
538         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
539     case YAZ_MARC_ISO2709:
540         return yaz_marc_write_iso2709(mt, wr);
541     case YAZ_MARC_CHECK:
542         return yaz_marc_write_check(mt, wr);
543     }
544     return -1;
545 }
546
547 /** \brief common MARC XML/Xchange writer
548     \param mt handle
549     \param wr WRBUF output
550     \param ns XMLNS for the elements
551     \param format record format (e.g. "MARC21")
552     \param type record type (e.g. "Bibliographic")
553 */
554 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
555                                       const char *ns, 
556                                       const char *format,
557                                       const char *type)
558 {
559     struct yaz_marc_node *n;
560     int identifier_length;
561     const char *leader = 0;
562
563     for (n = mt->nodes; n; n = n->next)
564         if (n->which == YAZ_MARC_LEADER)
565         {
566             leader = n->u.leader;
567             break;
568         }
569     
570     if (!leader)
571         return -1;
572     if (!atoi_n_check(leader+11, 1, &identifier_length))
573         return -1;
574     
575     if (mt->enable_collection != no_collection)
576     {
577         if (mt->enable_collection == collection_first)
578             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
579         mt->enable_collection = collection_second;
580         wrbuf_printf(wr, "<record");
581     }
582     else
583     {
584         wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
585     }
586     if (format)
587         wrbuf_printf(wr, " format=\"%.80s\"", format);
588     if (type)
589         wrbuf_printf(wr, " type=\"%.80s\"", type);
590     wrbuf_printf(wr, ">\n");
591     for (n = mt->nodes; n; n = n->next)
592     {
593         struct yaz_marc_subfield *s;
594
595         switch(n->which)
596         {
597         case YAZ_MARC_DATAFIELD:
598             wrbuf_printf(wr, "  <datafield tag=\"");
599             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
600                                     strlen(n->u.datafield.tag));
601             wrbuf_printf(wr, "\"");
602             if (n->u.datafield.indicator)
603             {
604                 int i;
605                 for (i = 0; n->u.datafield.indicator[i]; i++)
606                 {
607                     wrbuf_printf(wr, " ind%d=\"", i+1);
608                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
609                                           n->u.datafield.indicator+i, 1);
610                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
611                 }
612             }
613             wrbuf_printf(wr, ">\n");
614             for (s = n->u.datafield.subfields; s; s = s->next)
615             {
616                 /* if identifier length is 2 (most MARCs),
617                    the code is a single character .. However we've
618                    seen multibyte codes, so see how big it really is */
619                 size_t using_code_len = 
620                     (identifier_length != 2) ? identifier_length - 1
621                     :
622                     cdata_one_character(mt, s->code_data);
623                 
624                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
625                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
626                                         s->code_data, using_code_len);
627                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
628                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
629                                         s->code_data + using_code_len,
630                                         strlen(s->code_data + using_code_len));
631                 marc_iconv_reset(mt, wr);
632                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
633                 wrbuf_puts(wr, "\n");
634             }
635             wrbuf_printf(wr, "  </datafield>\n");
636             break;
637         case YAZ_MARC_CONTROLFIELD:
638             wrbuf_printf(wr, "  <controlfield tag=\"");
639             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
640                                     strlen(n->u.controlfield.tag));
641             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
642             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
643                                     n->u.controlfield.data,
644                                     strlen(n->u.controlfield.data));
645
646             marc_iconv_reset(mt, wr);
647             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
648             wrbuf_puts(wr, "\n");
649             break;
650         case YAZ_MARC_COMMENT:
651             wrbuf_printf(wr, "<!-- ");
652             wrbuf_puts(wr, n->u.comment);
653             wrbuf_printf(wr, " -->\n");
654             break;
655         case YAZ_MARC_LEADER:
656             wrbuf_printf(wr, "  <leader>");
657             wrbuf_iconv_write_cdata(wr, 
658                                     0 /* no charset conversion for leader */,
659                                     n->u.leader, strlen(n->u.leader));
660             wrbuf_printf(wr, "</leader>\n");
661         }
662     }
663     wrbuf_puts(wr, "</record>\n");
664     return 0;
665 }
666
667 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
668                                      const char *ns, 
669                                      const char *format,
670                                      const char *type)
671 {
672     if (mt->write_using_libxml2)
673     {
674 #if YAZ_HAVE_XML2
675         int ret;
676         xmlNode *root_ptr;
677
678         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
679         if (ret == 0)
680         {
681             xmlChar *buf_out;
682             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
683             int len_out;
684
685             xmlDocSetRootElement(doc, root_ptr);
686             xmlDocDumpMemory(doc, &buf_out, &len_out);
687
688             wrbuf_write(wr, (const char *) buf_out, len_out);
689             wrbuf_puts(wr, "");
690             xmlFree(buf_out);
691             xmlFreeDoc(doc);
692         }
693         return ret;
694 #else
695         return -1;
696 #endif
697     }
698     else
699         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
700 }
701
702 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
703 {
704     /* set leader 09 to 'a' for UNICODE */
705     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
706     if (!mt->leader_spec)
707         yaz_marc_modify_leader(mt, 9, "a");
708     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
709                                      0, 0);
710 }
711
712 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
713                                const char *format,
714                                const char *type)
715 {
716     return yaz_marc_write_marcxml_ns(mt, wr,
717                                      "http://www.bs.dk/standards/MarcXchange",
718                                      0, 0);
719 }
720
721
722 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
723                        const char *ns, 
724                        const char *format,
725                        const char *type)
726 {
727 #if YAZ_HAVE_XML2
728     struct yaz_marc_node *n;
729     int identifier_length;
730     const char *leader = 0;
731     xmlNode *record_ptr;
732     xmlNsPtr ns_record;
733     WRBUF wr_cdata = 0;
734
735     for (n = mt->nodes; n; n = n->next)
736         if (n->which == YAZ_MARC_LEADER)
737         {
738             leader = n->u.leader;
739             break;
740         }
741     
742     if (!leader)
743         return -1;
744     if (!atoi_n_check(leader+11, 1, &identifier_length))
745         return -1;
746
747     wr_cdata = wrbuf_alloc();
748
749     record_ptr = xmlNewNode(0, BAD_CAST "record");
750     *root_ptr = record_ptr;
751
752     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
753     xmlSetNs(record_ptr, ns_record);
754
755     if (format)
756         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
757     if (type)
758         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
759     for (n = mt->nodes; n; n = n->next)
760     {
761         struct yaz_marc_subfield *s;
762         xmlNode *ptr;
763
764         switch(n->which)
765         {
766         case YAZ_MARC_DATAFIELD:
767             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
768             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
769             if (n->u.datafield.indicator)
770             {
771                 int i;
772                 for (i = 0; n->u.datafield.indicator[i]; i++)
773                 {
774                     char ind_str[6];
775                     char ind_val[2];
776
777                     sprintf(ind_str, "ind%d", i+1);
778                     ind_val[0] = n->u.datafield.indicator[i];
779                     ind_val[1] = '\0';
780                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
781                 }
782             }
783             for (s = n->u.datafield.subfields; s; s = s->next)
784             {
785                 xmlNode *ptr_subfield;
786                 /* if identifier length is 2 (most MARCs),
787                    the code is a single character .. However we've
788                    seen multibyte codes, so see how big it really is */
789                 size_t using_code_len = 
790                     (identifier_length != 2) ? identifier_length - 1
791                     :
792                     cdata_one_character(mt, s->code_data);
793
794                 wrbuf_rewind(wr_cdata);
795                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
796                                  s->code_data + using_code_len);
797                 marc_iconv_reset(mt, wr_cdata);
798                 ptr_subfield = xmlNewTextChild(
799                     ptr, ns_record, 
800                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
801
802                 wrbuf_rewind(wr_cdata);
803                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
804                                   s->code_data, using_code_len);
805                 xmlNewProp(ptr_subfield, BAD_CAST "code",
806                            BAD_CAST wrbuf_cstr(wr_cdata));
807             }
808             break;
809         case YAZ_MARC_CONTROLFIELD:
810             wrbuf_rewind(wr_cdata);
811             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
812             marc_iconv_reset(mt, wr_cdata);
813             
814             ptr = xmlNewTextChild(record_ptr, ns_record,
815                                   BAD_CAST "controlfield",
816                                   BAD_CAST wrbuf_cstr(wr_cdata));
817             
818             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
819             break;
820         case YAZ_MARC_COMMENT:
821             ptr = xmlNewComment(BAD_CAST n->u.comment);
822             xmlAddChild(record_ptr, ptr);
823             break;
824         case YAZ_MARC_LEADER:
825             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
826                             BAD_CAST n->u.leader);
827             break;
828         }
829     }
830     wrbuf_destroy(wr_cdata);
831     return 0;
832 #else
833     return -1;
834 #endif
835 }
836
837 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
838 {
839     struct yaz_marc_node *n;
840     int indicator_length;
841     int identifier_length;
842     int length_data_entry;
843     int length_starting;
844     int length_implementation;
845     int data_offset = 0;
846     const char *leader = 0;
847     WRBUF wr_dir, wr_head, wr_data_tmp;
848     int base_address;
849     
850     for (n = mt->nodes; n; n = n->next)
851         if (n->which == YAZ_MARC_LEADER)
852             leader = n->u.leader;
853     
854     if (!leader)
855         return -1;
856     if (!atoi_n_check(leader+10, 1, &indicator_length))
857         return -1;
858     if (!atoi_n_check(leader+11, 1, &identifier_length))
859         return -1;
860     if (!atoi_n_check(leader+20, 1, &length_data_entry))
861         return -1;
862     if (!atoi_n_check(leader+21, 1, &length_starting))
863         return -1;
864     if (!atoi_n_check(leader+22, 1, &length_implementation))
865         return -1;
866
867     wr_data_tmp = wrbuf_alloc();
868     wr_dir = wrbuf_alloc();
869     for (n = mt->nodes; n; n = n->next)
870     {
871         int data_length = 0;
872         struct yaz_marc_subfield *s;
873
874         switch(n->which)
875         {
876         case YAZ_MARC_DATAFIELD:
877             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
878             data_length += indicator_length;
879             wrbuf_rewind(wr_data_tmp);
880             for (s = n->u.datafield.subfields; s; s = s->next)
881             {
882                 /* write dummy IDFS + content */
883                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
884                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
885                 marc_iconv_reset(mt, wr_data_tmp);
886             }
887             /* write dummy FS (makes MARC-8 to become ASCII) */
888             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
889             marc_iconv_reset(mt, wr_data_tmp);
890             data_length += wrbuf_len(wr_data_tmp);
891             break;
892         case YAZ_MARC_CONTROLFIELD:
893             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
894
895             wrbuf_rewind(wr_data_tmp);
896             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
897                              n->u.controlfield.data);
898             marc_iconv_reset(mt, wr_data_tmp);
899             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
900             marc_iconv_reset(mt, wr_data_tmp);
901             data_length += wrbuf_len(wr_data_tmp);
902             break;
903         case YAZ_MARC_COMMENT:
904             break;
905         case YAZ_MARC_LEADER:
906             break;
907         }
908         if (data_length)
909         {
910             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
911             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
912             data_offset += data_length;
913         }
914     }
915     /* mark end of directory */
916     wrbuf_putc(wr_dir, ISO2709_FS);
917
918     /* base address of data (comes after leader+directory) */
919     base_address = 24 + wrbuf_len(wr_dir);
920
921     wr_head = wrbuf_alloc();
922
923     /* write record length */
924     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
925     /* from "original" leader */
926     wrbuf_write(wr_head, leader+5, 7);
927     /* base address of data */
928     wrbuf_printf(wr_head, "%05d", base_address);
929     /* from "original" leader */
930     wrbuf_write(wr_head, leader+17, 7);
931     
932     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
933     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
934     wrbuf_destroy(wr_head);
935     wrbuf_destroy(wr_dir);
936     wrbuf_destroy(wr_data_tmp);
937
938     for (n = mt->nodes; n; n = n->next)
939     {
940         struct yaz_marc_subfield *s;
941
942         switch(n->which)
943         {
944         case YAZ_MARC_DATAFIELD:
945             wrbuf_printf(wr, "%.*s", indicator_length,
946                          n->u.datafield.indicator);
947             for (s = n->u.datafield.subfields; s; s = s->next)
948             {
949                 wrbuf_putc(wr, ISO2709_IDFS);
950                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
951                 marc_iconv_reset(mt, wr);
952             }
953             wrbuf_putc(wr, ISO2709_FS);
954             break;
955         case YAZ_MARC_CONTROLFIELD:
956             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
957             marc_iconv_reset(mt, wr);
958             wrbuf_putc(wr, ISO2709_FS);
959             break;
960         case YAZ_MARC_COMMENT:
961             break;
962         case YAZ_MARC_LEADER:
963             break;
964         }
965     }
966     wrbuf_printf(wr, "%c", ISO2709_RS);
967     return 0;
968 }
969
970
971 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
972 {
973     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
974     if (r <= 0)
975         return r;
976     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
977     if (s != 0)
978         return -1; /* error */
979     return r; /* OK, return length > 0 */
980 }
981
982 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
983                          const char **result, size_t *rsize)
984 {
985     int r;
986
987     wrbuf_rewind(mt->m_wr);
988     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
989     if (result)
990         *result = wrbuf_cstr(mt->m_wr);
991     if (rsize)
992         *rsize = wrbuf_len(mt->m_wr);
993     return r;
994 }
995
996 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
997 {
998     if (mt)
999         mt->xml = xmlmode;
1000 }
1001
1002 void yaz_marc_debug(yaz_marc_t mt, int level)
1003 {
1004     if (mt)
1005         mt->debug = level;
1006 }
1007
1008 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1009 {
1010     mt->iconv_cd = cd;
1011 }
1012
1013 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1014 {
1015     return mt->iconv_cd;
1016 }
1017
1018 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1019 {
1020     struct yaz_marc_node *n;
1021     char *leader = 0;
1022     for (n = mt->nodes; n; n = n->next)
1023         if (n->which == YAZ_MARC_LEADER)
1024         {
1025             leader = n->u.leader;
1026             memcpy(leader+off, str, strlen(str));
1027             break;
1028         }
1029 }
1030
1031 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1032 {
1033     xfree(mt->leader_spec);
1034     mt->leader_spec = 0;
1035     if (leader_spec)
1036     {
1037         char dummy_leader[24];
1038         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1039             return -1;
1040         mt->leader_spec = xstrdup(leader_spec);
1041     }
1042     return 0;
1043 }
1044
1045 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1046 {
1047     const char *cp = leader_spec;
1048     while (cp)
1049     {
1050         char val[21];
1051         int pos;
1052         int no_read = 0, no = 0;
1053
1054         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1055         if (no < 2 || no_read < 3)
1056             return -1;
1057         if (pos < 0 || pos >= size)
1058             return -1;
1059
1060         if (*val == '\'')
1061         {
1062             const char *vp = strchr(val+1, '\'');
1063             size_t len;
1064             
1065             if (!vp)
1066                 return -1;
1067             len = vp-val-1;
1068             if (len + pos > size)
1069                 return -1;
1070             memcpy(leader + pos, val+1, len);
1071         }
1072         else if (*val >= '0' && *val <= '9')
1073         {
1074             int ch = atoi(val);
1075             leader[pos] = ch;
1076         }
1077         else
1078             return -1;
1079         cp += no_read;
1080         if (*cp != ',')
1081             break;
1082
1083         cp++;
1084     }
1085     return 0;
1086 }
1087
1088 int yaz_marc_decode_formatstr(const char *arg)
1089 {
1090     int mode = -1; 
1091     if (!strcmp(arg, "marc"))
1092         mode = YAZ_MARC_ISO2709;
1093     if (!strcmp(arg, "marcxml"))
1094         mode = YAZ_MARC_MARCXML;
1095     if (!strcmp(arg, "marcxchange"))
1096         mode = YAZ_MARC_XCHANGE;
1097     if (!strcmp(arg, "line"))
1098         mode = YAZ_MARC_LINE;
1099     return mode;
1100 }
1101
1102 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1103 {
1104     mt->write_using_libxml2 = enable;
1105 }
1106
1107 /*
1108  * Local variables:
1109  * c-basic-offset: 4
1110  * indent-tabs-mode: nil
1111  * End:
1112  * vim: shiftwidth=4 tabstop=8 expandtab
1113  */
1114