10927cd8d68761f12d62a766d20dcf6f16a0d980
[yaz-moved-to-github.git] / src / marcdisp.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marcdisp.c
8  * \brief Implements MARC conversion utilities
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
29
30 #if YAZ_HAVE_XML2
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
33 #endif
34
35 enum yaz_collection_state {
36     no_collection,
37     collection_first,
38     collection_second
39 };
40    
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
43
44     YAZ_MARC_DATAFIELD,
45     YAZ_MARC_CONTROLFIELD,
46     YAZ_MARC_COMMENT,
47     YAZ_MARC_LEADER
48 };
49
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
52     char *tag;
53     char *indicator;
54     struct yaz_marc_subfield *subfields;
55 };
56
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
59     char *tag;
60     char *data;
61 };
62
63 /** \brief a comment node */
64 struct yaz_marc_comment {
65     char *comment;
66 };
67
68 /** \brief MARC node */
69 struct yaz_marc_node {
70     enum YAZ_MARC_NODE_TYPE which;
71     union {
72         struct yaz_marc_datafield datafield;
73         struct yaz_marc_controlfield controlfield;
74         char *comment;
75         char *leader;
76     } u;
77     struct yaz_marc_node *next;
78 };
79
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
82     char *code_data;
83     struct yaz_marc_subfield *next;
84 };
85
86 /** \brief the internals of a yaz_marc_t handle */
87 struct yaz_marc_t_ {
88     WRBUF m_wr;
89     NMEM nmem;
90     int xml;
91     int debug;
92     int write_using_libxml2;
93     int turbo_format;
94     enum yaz_collection_state enable_collection;
95     yaz_iconv_t iconv_cd;
96     char subfield_str[8];
97     char endline_str[8];
98     char *leader_spec;
99     struct yaz_marc_node *nodes;
100     struct yaz_marc_node **nodes_pp;
101     struct yaz_marc_subfield **subfield_pp;
102 };
103
104 yaz_marc_t yaz_marc_create(void)
105 {
106     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107     mt->xml = YAZ_MARC_LINE;
108     mt->debug = 0;
109     mt->write_using_libxml2 = 0;
110     mt->enable_collection = no_collection;
111     mt->m_wr = wrbuf_alloc();
112     mt->iconv_cd = 0;
113     mt->leader_spec = 0;
114     strcpy(mt->subfield_str, " $");
115     strcpy(mt->endline_str, "\n");
116
117     mt->nmem = nmem_create();
118     yaz_marc_reset(mt);
119     return mt;
120 }
121
122 void yaz_marc_destroy(yaz_marc_t mt)
123 {
124     if (!mt)
125         return ;
126     nmem_destroy(mt->nmem);
127     wrbuf_destroy(mt->m_wr);
128     xfree(mt->leader_spec);
129     xfree(mt);
130 }
131
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
133 {
134     return mt->nmem;
135 }
136
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
138 {
139     wrbuf_iconv_reset(wr, mt->iconv_cd);
140 }
141
142 static int marc_exec_leader(const char *leader_spec, char *leader,
143                             size_t size);
144
145
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
147 {
148     struct yaz_marc_node *n = (struct yaz_marc_node *)
149         nmem_malloc(mt->nmem, sizeof(*n));
150     n->next = 0;
151     *mt->nodes_pp = n;
152     mt->nodes_pp = &n->next;
153     return n;
154 }
155
156 #if YAZ_HAVE_XML2
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158                                    const xmlNode *ptr_data)
159 {
160     struct yaz_marc_node *n = yaz_marc_add_node(mt);
161     n->which = YAZ_MARC_CONTROLFIELD;
162     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
164 }
165 #endif
166
167
168 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
169 {
170     struct yaz_marc_node *n = yaz_marc_add_node(mt);
171     n->which = YAZ_MARC_COMMENT;
172     n->u.comment = nmem_strdup(mt->nmem, comment);
173 }
174
175 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
176 {
177     va_list ap;
178     char buf[200];
179
180     va_start(ap, fmt);
181     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
182     yaz_marc_add_comment(mt, buf);
183     va_end (ap);
184 }
185
186 int yaz_marc_get_debug(yaz_marc_t mt)
187 {
188     return mt->debug;
189 }
190
191 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
192 {
193     struct yaz_marc_node *n = yaz_marc_add_node(mt);
194     n->which = YAZ_MARC_LEADER;
195     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
196     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
197 }
198
199 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
200                                const char *data, size_t data_len)
201 {
202     struct yaz_marc_node *n = yaz_marc_add_node(mt);
203     n->which = YAZ_MARC_CONTROLFIELD;
204     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
205     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
206     if (mt->debug)
207     {
208         size_t i;
209         char msg[80];
210
211         sprintf(msg, "controlfield:");
212         for (i = 0; i < 16 && i < data_len; i++)
213             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
214         if (i < data_len)
215             sprintf(msg + strlen(msg), " ..");
216         yaz_marc_add_comment(mt, msg);
217     }
218 }
219
220 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
221                             const char *indicator, size_t indicator_len)
222 {
223     struct yaz_marc_node *n = yaz_marc_add_node(mt);
224     n->which = YAZ_MARC_DATAFIELD;
225     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
226     n->u.datafield.indicator =
227         nmem_strdupn(mt->nmem, indicator, indicator_len);
228     n->u.datafield.subfields = 0;
229
230     /* make subfield_pp the current (last one) */
231     mt->subfield_pp = &n->u.datafield.subfields;
232 }
233
234 #if YAZ_HAVE_XML2
235 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
236                                 const char *indicator, size_t indicator_len)
237 {
238     struct yaz_marc_node *n = yaz_marc_add_node(mt);
239     n->which = YAZ_MARC_DATAFIELD;
240     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
241     n->u.datafield.indicator =
242         nmem_strdupn(mt->nmem, indicator, indicator_len);
243     n->u.datafield.subfields = 0;
244
245     /* make subfield_pp the current (last one) */
246     mt->subfield_pp = &n->u.datafield.subfields;
247 }
248 #endif
249
250 void yaz_marc_add_subfield(yaz_marc_t mt,
251                            const char *code_data, size_t code_data_len)
252 {
253     if (mt->debug)
254     {
255         size_t i;
256         char msg[80];
257
258         sprintf(msg, "subfield:");
259         for (i = 0; i < 16 && i < code_data_len; i++)
260             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
261         if (i < code_data_len)
262             sprintf(msg + strlen(msg), " ..");
263         yaz_marc_add_comment(mt, msg);
264     }
265
266     if (mt->subfield_pp)
267     {
268         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
269             nmem_malloc(mt->nmem, sizeof(*n));
270         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
271         n->next = 0;
272         /* mark subfield_pp to point to this one, so we append here next */
273         *mt->subfield_pp = n;
274         mt->subfield_pp = &n->next;
275     }
276 }
277
278 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
279                          int *indicator_length,
280                          int *identifier_length,
281                          int *base_address,
282                          int *length_data_entry,
283                          int *length_starting,
284                          int *length_implementation)
285 {
286     char leader[24];
287
288     memcpy(leader, leader_c, 24);
289
290     if (!atoi_n_check(leader+10, 1, indicator_length))
291     {
292         yaz_marc_cprintf(mt, 
293                          "Indicator length at offset 10 should hold a digit."
294                          " Assuming 2");
295         leader[10] = '2';
296         *indicator_length = 2;
297     }
298     if (!atoi_n_check(leader+11, 1, identifier_length))
299     {
300         yaz_marc_cprintf(mt, 
301                          "Identifier length at offset 11 should hold a digit."
302                          " Assuming 2");
303         leader[11] = '2';
304         *identifier_length = 2;
305     }
306     if (!atoi_n_check(leader+12, 5, base_address))
307     {
308         yaz_marc_cprintf(mt, 
309                          "Base address at offsets 12..16 should hold a number."
310                          " Assuming 0");
311         *base_address = 0;
312     }
313     if (!atoi_n_check(leader+20, 1, length_data_entry))
314     {
315         yaz_marc_cprintf(mt, 
316                          "Length data entry at offset 20 should hold a digit."
317                          " Assuming 4");
318         *length_data_entry = 4;
319         leader[20] = '4';
320     }
321     if (!atoi_n_check(leader+21, 1, length_starting))
322     {
323         yaz_marc_cprintf(mt,
324                          "Length starting at offset 21 should hold a digit."
325                          " Assuming 5");
326         *length_starting = 5;
327         leader[21] = '5';
328     }
329     if (!atoi_n_check(leader+22, 1, length_implementation))
330     {
331         yaz_marc_cprintf(mt, 
332                          "Length implementation at offset 22 should hold a digit."
333                          " Assuming 0");
334         *length_implementation = 0;
335         leader[22] = '0';
336     }
337
338     if (mt->debug)
339     {
340         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
341         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
342         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
343         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
344         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
345         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
346     }
347     yaz_marc_add_leader(mt, leader, 24);
348 }
349
350 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
351 {
352     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
353     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
354 }
355
356 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
357 {
358     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
359     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
360 }
361
362 /* try to guess how many bytes the identifier really is! */
363 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
364 {
365     if (mt->iconv_cd)
366     {
367         size_t i;
368         for (i = 1; i<5; i++)
369         {
370             char outbuf[12];
371             size_t outbytesleft = sizeof(outbuf);
372             char *outp = outbuf;
373             const char *inp = buf;
374
375             size_t inbytesleft = i;
376             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
377                                  &outp, &outbytesleft);
378             if (r != (size_t) (-1))
379                 return i;  /* got a complete sequence */
380         }
381         return 1; /* giving up */
382     }
383     return 1; /* we don't know */
384 }
385                               
386 void yaz_marc_reset(yaz_marc_t mt)
387 {
388     nmem_reset(mt->nmem);
389     mt->nodes = 0;
390     mt->nodes_pp = &mt->nodes;
391     mt->subfield_pp = 0;
392 }
393
394 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
395 {
396     struct yaz_marc_node *n;
397     int identifier_length;
398     const char *leader = 0;
399
400     for (n = mt->nodes; n; n = n->next)
401         if (n->which == YAZ_MARC_LEADER)
402         {
403             leader = n->u.leader;
404             break;
405         }
406     
407     if (!leader)
408         return -1;
409     if (!atoi_n_check(leader+11, 1, &identifier_length))
410         return -1;
411
412     for (n = mt->nodes; n; n = n->next)
413     {
414         switch(n->which)
415         {
416         case YAZ_MARC_COMMENT:
417             wrbuf_iconv_write(wr, mt->iconv_cd, 
418                               n->u.comment, strlen(n->u.comment));
419             wrbuf_puts(wr, "\n");
420             break;
421         default:
422             break;
423         }
424     }
425     return 0;
426 }
427
428 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
429                                int identifier_length)
430 {
431     /* if identifier length is 2 (most MARCs) or less (probably an error),
432        the code is a single character .. However we've
433        seen multibyte codes, so see how big it really is */
434     if (identifier_length > 2)
435         return identifier_length - 1;
436     else
437         return cdata_one_character(mt, data);
438 }
439
440 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
441 {
442     struct yaz_marc_node *n;
443     int identifier_length;
444     const char *leader = 0;
445
446     for (n = mt->nodes; n; n = n->next)
447         if (n->which == YAZ_MARC_LEADER)
448         {
449             leader = n->u.leader;
450             break;
451         }
452     
453     if (!leader)
454         return -1;
455     if (!atoi_n_check(leader+11, 1, &identifier_length))
456         return -1;
457
458     for (n = mt->nodes; n; n = n->next)
459     {
460         struct yaz_marc_subfield *s;
461         switch(n->which)
462         {
463         case YAZ_MARC_DATAFIELD:
464             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
465                          n->u.datafield.indicator);
466             for (s = n->u.datafield.subfields; s; s = s->next)
467             {
468                 size_t using_code_len = get_subfield_len(mt, s->code_data,
469                                                          identifier_length);
470                 
471                 wrbuf_puts (wr, mt->subfield_str); 
472                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
473                                   using_code_len);
474                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
475                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
476                                  s->code_data + using_code_len);
477                 marc_iconv_reset(mt, wr);
478             }
479             wrbuf_puts (wr, mt->endline_str);
480             break;
481         case YAZ_MARC_CONTROLFIELD:
482             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
483             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
484             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
485             marc_iconv_reset(mt, wr);
486             wrbuf_puts (wr, mt->endline_str);
487             break;
488         case YAZ_MARC_COMMENT:
489             wrbuf_puts(wr, "(");
490             wrbuf_iconv_write(wr, mt->iconv_cd, 
491                               n->u.comment, strlen(n->u.comment));
492             marc_iconv_reset(mt, wr);
493             wrbuf_puts(wr, ")\n");
494             break;
495         case YAZ_MARC_LEADER:
496             wrbuf_printf(wr, "%s\n", n->u.leader);
497         }
498     }
499     wrbuf_puts(wr, "\n");
500     return 0;
501 }
502
503 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
504 {
505     if (mt->enable_collection == collection_second)
506     {
507         switch(mt->xml)
508         {
509         case YAZ_MARC_MARCXML:
510             wrbuf_printf(wr, "</collection>\n");
511             break;
512         case YAZ_MARC_XCHANGE:
513             wrbuf_printf(wr, "</collection>\n");
514             break;
515         }
516     }
517     return 0;
518 }
519
520 void yaz_marc_enable_collection(yaz_marc_t mt)
521 {
522     mt->enable_collection = collection_first;
523 }
524
525 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
526 {
527     switch(mt->xml)
528     {
529     case YAZ_MARC_LINE:
530         return yaz_marc_write_line(mt, wr);
531     case YAZ_MARC_MARCXML:
532     case YAZ_MARC_TMARCXML:
533         return yaz_marc_write_marcxml(mt, wr);
534     case YAZ_MARC_XCHANGE:
535         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
536     case YAZ_MARC_ISO2709:
537         return yaz_marc_write_iso2709(mt, wr);
538     case YAZ_MARC_CHECK:
539         return yaz_marc_write_check(mt, wr);
540     }
541     return -1;
542 }
543
544 /** \brief common MARC XML/Xchange writer
545     \param mt handle
546     \param wr WRBUF output
547     \param ns XMLNS for the elements
548     \param format record format (e.g. "MARC21")
549     \param type record type (e.g. "Bibliographic")
550 */
551 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
552                                       const char *ns, 
553                                       const char *format,
554                                       const char *type)
555 {
556     struct yaz_marc_node *n;
557     int identifier_length;
558     const char *leader = 0;
559
560     for (n = mt->nodes; n; n = n->next)
561         if (n->which == YAZ_MARC_LEADER)
562         {
563             leader = n->u.leader;
564             break;
565         }
566     
567     if (!leader)
568         return -1;
569     if (!atoi_n_check(leader+11, 1, &identifier_length))
570         return -1;
571     
572     if (mt->enable_collection != no_collection)
573     {
574         if (mt->enable_collection == collection_first)
575             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
576         mt->enable_collection = collection_second;
577         wrbuf_printf(wr, "<record");
578     }
579     else
580     {
581         wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
582     }
583     if (format)
584         wrbuf_printf(wr, " format=\"%.80s\"", format);
585     if (type)
586         wrbuf_printf(wr, " type=\"%.80s\"", type);
587     wrbuf_printf(wr, ">\n");
588     for (n = mt->nodes; n; n = n->next)
589     {
590         struct yaz_marc_subfield *s;
591
592         switch(n->which)
593         {
594         case YAZ_MARC_DATAFIELD:
595             wrbuf_printf(wr, "  <datafield tag=\"");
596             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
597                                     strlen(n->u.datafield.tag));
598             wrbuf_printf(wr, "\"");
599             if (n->u.datafield.indicator)
600             {
601                 int i;
602                 for (i = 0; n->u.datafield.indicator[i]; i++)
603                 {
604                     wrbuf_printf(wr, " ind%d=\"", i+1);
605                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
606                                           n->u.datafield.indicator+i, 1);
607                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
608                 }
609             }
610             wrbuf_printf(wr, ">\n");
611             for (s = n->u.datafield.subfields; s; s = s->next)
612             {
613                 size_t using_code_len = get_subfield_len(mt, s->code_data,
614                                                          identifier_length);
615                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
616                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
617                                         s->code_data, using_code_len);
618                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
619                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
620                                         s->code_data + using_code_len,
621                                         strlen(s->code_data + using_code_len));
622                 marc_iconv_reset(mt, wr);
623                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
624                 wrbuf_puts(wr, "\n");
625             }
626             wrbuf_printf(wr, "  </datafield>\n");
627             break;
628         case YAZ_MARC_CONTROLFIELD:
629             wrbuf_printf(wr, "  <controlfield tag=\"");
630             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
631                                     strlen(n->u.controlfield.tag));
632             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
633             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
634                                     n->u.controlfield.data,
635                                     strlen(n->u.controlfield.data));
636
637             marc_iconv_reset(mt, wr);
638             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
639             wrbuf_puts(wr, "\n");
640             break;
641         case YAZ_MARC_COMMENT:
642             wrbuf_printf(wr, "<!-- ");
643             wrbuf_puts(wr, n->u.comment);
644             wrbuf_printf(wr, " -->\n");
645             break;
646         case YAZ_MARC_LEADER:
647             wrbuf_printf(wr, "  <leader>");
648             wrbuf_iconv_write_cdata(wr, 
649                                     0 /* no charset conversion for leader */,
650                                     n->u.leader, strlen(n->u.leader));
651             wrbuf_printf(wr, "</leader>\n");
652         }
653     }
654     wrbuf_puts(wr, "</record>\n");
655     return 0;
656 }
657
658 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
659                                      const char *ns, 
660                                      const char *format,
661                                      const char *type)
662 {
663     if (mt->write_using_libxml2)
664     {
665 #if YAZ_HAVE_XML2
666         int ret;
667         xmlNode *root_ptr;
668
669         if (!mt->turbo_format)
670                 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
671         else
672                 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
673         if (ret == 0)
674         {
675             xmlChar *buf_out;
676             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
677             int len_out;
678
679             xmlDocSetRootElement(doc, root_ptr);
680             xmlDocDumpMemory(doc, &buf_out, &len_out);
681
682             wrbuf_write(wr, (const char *) buf_out, len_out);
683             wrbuf_puts(wr, "");
684             xmlFree(buf_out);
685             xmlFreeDoc(doc);
686         }
687         return ret;
688 #else
689         return -1;
690 #endif
691     }
692     else
693         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
694 }
695
696 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
697 {
698     /* set leader 09 to 'a' for UNICODE */
699     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
700     if (!mt->leader_spec)
701         yaz_marc_modify_leader(mt, 9, "a");
702     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
703                                      0, 0);
704 }
705
706 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
707                                const char *format,
708                                const char *type)
709 {
710     return yaz_marc_write_marcxml_ns(mt, wr,
711                                      "info:lc/xmlns/marcxchange-v1",
712                                      0, 0);
713 }
714
715 #if YAZ_HAVE_XML2
716
717 void add_marc_datafield_xml2(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
718 {
719     xmlNode *ptr;
720     struct yaz_marc_subfield *s;
721     int turbo = mt->turbo_format;
722     if (!turbo) {
723         ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
724         xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
725     }
726     else {
727         //TODO consider if safe
728         char field[10];
729         field[0] = 'd';
730         strncpy(field + 1, n->u.datafield.tag, 3);
731         ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
732     }
733     if (n->u.datafield.indicator)
734     {
735         int i;
736         for (i = 0; n->u.datafield.indicator[i]; i++)
737         {
738             char ind_str[6];
739             char ind_val[2];
740             
741             sprintf(ind_str, "ind%d", i+1);
742             ind_val[0] = n->u.datafield.indicator[i];
743             ind_val[1] = '\0';
744             xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
745         }
746     }
747         WRBUF subfield_name = wrbuf_alloc();
748     for (s = n->u.datafield.subfields; s; s = s->next)
749     {
750         xmlNode *ptr_subfield;
751         size_t using_code_len = get_subfield_len(mt, s->code_data,
752                                                  identifier_length);
753         wrbuf_rewind(wr_cdata);
754         wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
755         marc_iconv_reset(mt, wr_cdata);
756         
757         if (!turbo) {
758                 ptr_subfield = xmlNewTextChild(
759                                 ptr, ns_record,
760                                 BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
761                 wrbuf_rewind(wr_cdata);
762                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
763                 xmlNewProp(ptr_subfield, BAD_CAST "code",
764                                 BAD_CAST wrbuf_cstr(wr_cdata));
765         }
766         else { // Turbo format
767                 wrbuf_rewind(subfield_name);
768                 wrbuf_puts(subfield_name, "s");
769                 // TODO Map special codes to something possible for XML ELEMENT names
770                 if ((s->code_data[0] >= '0' && s->code_data[0] <= '9') ||
771                     (s->code_data[0] >= 'a' && s->code_data[0] <= 'z') ||
772                                 (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z'))
773                 {
774                         wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len);
775                 ptr_subfield = xmlNewTextChild(ptr, ns_record,
776                                 BAD_CAST wrbuf_cstr(subfield_name),
777                                 BAD_CAST wrbuf_cstr(wr_cdata));
778                 }
779                 else
780                         //TODO FIX
781                                 yaz_log(YLOG_WARN, "Dropping subfield: %s", s->code_data);
782         }
783     }
784         wrbuf_destroy(subfield_name);
785 }
786
787 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
788                        const char *ns, 
789                        const char *format,
790                        const char *type)
791 {
792     struct yaz_marc_node *n;
793     int identifier_length;
794     const char *leader = 0;
795     xmlNode *record_ptr;
796     xmlNsPtr ns_record;
797     WRBUF wr_cdata = 0;
798     int turbo = mt->turbo_format;
799     for (n = mt->nodes; n; n = n->next)
800         if (n->which == YAZ_MARC_LEADER)
801         {
802             leader = n->u.leader;
803             break;
804         }
805     
806     if (!leader)
807         return -1;
808     if (!atoi_n_check(leader+11, 1, &identifier_length))
809         return -1;
810
811     wr_cdata = wrbuf_alloc();
812
813     record_ptr = xmlNewNode(0, BAD_CAST "record");
814     *root_ptr = record_ptr;
815
816     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
817     xmlSetNs(record_ptr, ns_record);
818
819     if (format)
820         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
821     if (type)
822         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
823     for (n = mt->nodes; n; n = n->next)
824     {
825         struct yaz_marc_subfield *s;
826         xmlNode *ptr;
827
828         switch(n->which)
829         {
830         case YAZ_MARC_DATAFIELD:
831                 add_marc_datafield_xml2(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
832             break;
833         case YAZ_MARC_CONTROLFIELD:
834             wrbuf_rewind(wr_cdata);
835             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
836             marc_iconv_reset(mt, wr_cdata);
837             
838             if (!turbo) {
839                                 ptr = xmlNewTextChild(record_ptr, ns_record,
840                                                                           BAD_CAST "controlfield",
841                                                                           BAD_CAST wrbuf_cstr(wr_cdata));
842                                 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
843             }
844             else {
845                 // TODO required iconv?
846                 char field[10];
847                                 field[0] = 'c';
848                 strncpy(field + 1, n->u.controlfield.tag, 3);
849                 ptr = xmlNewTextChild(record_ptr, ns_record,
850                                                                           BAD_CAST field,
851                                                                           BAD_CAST wrbuf_cstr(wr_cdata));
852             }
853
854             break;
855         case YAZ_MARC_COMMENT:
856             ptr = xmlNewComment(BAD_CAST n->u.comment);
857             xmlAddChild(record_ptr, ptr);
858             break;
859         case YAZ_MARC_LEADER:
860                         {
861                                 char *field = "leader";
862                                 if (turbo)
863                                         field = "l";
864                                 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
865                                                                 BAD_CAST n->u.leader);
866                         }
867             break;
868         }
869     }
870     wrbuf_destroy(wr_cdata);
871     return 0;
872 }
873
874
875 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
876                        const char *ns, 
877                        const char *format,
878                        const char *type)
879 {
880     struct yaz_marc_node *n;
881     int identifier_length;
882     const char *leader = 0;
883     xmlNode *record_ptr;
884     xmlNsPtr ns_record;
885     WRBUF wr_cdata = 0;
886
887     for (n = mt->nodes; n; n = n->next)
888         if (n->which == YAZ_MARC_LEADER)
889         {
890             leader = n->u.leader;
891             break;
892         }
893     
894     if (!leader)
895         return -1;
896     if (!atoi_n_check(leader+11, 1, &identifier_length))
897         return -1;
898
899     wr_cdata = wrbuf_alloc();
900
901     record_ptr = xmlNewNode(0, BAD_CAST "record");
902     *root_ptr = record_ptr;
903
904     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
905     xmlSetNs(record_ptr, ns_record);
906
907     if (format)
908         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
909     if (type)
910         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
911     for (n = mt->nodes; n; n = n->next)
912     {
913         struct yaz_marc_subfield *s;
914         xmlNode *ptr;
915
916         switch(n->which)
917         {
918         case YAZ_MARC_DATAFIELD:
919             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
920             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
921             if (n->u.datafield.indicator)
922             {
923                 int i;
924                 for (i = 0; n->u.datafield.indicator[i]; i++)
925                 {
926                     char ind_str[6];
927                     char ind_val[2];
928
929                     sprintf(ind_str, "ind%d", i+1);
930                     ind_val[0] = n->u.datafield.indicator[i];
931                     ind_val[1] = '\0';
932                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
933                 }
934             }
935             for (s = n->u.datafield.subfields; s; s = s->next)
936             {
937                 xmlNode *ptr_subfield;
938                 size_t using_code_len = get_subfield_len(mt, s->code_data,
939                                                          identifier_length);
940                 wrbuf_rewind(wr_cdata);
941                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
942                                  s->code_data + using_code_len);
943                 marc_iconv_reset(mt, wr_cdata);
944                 ptr_subfield = xmlNewTextChild(
945                     ptr, ns_record,
946                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
947
948                 wrbuf_rewind(wr_cdata);
949                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
950                                   s->code_data, using_code_len);
951                 xmlNewProp(ptr_subfield, BAD_CAST "code",
952                            BAD_CAST wrbuf_cstr(wr_cdata));
953             }
954             break;
955         case YAZ_MARC_CONTROLFIELD:
956             wrbuf_rewind(wr_cdata);
957             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
958             marc_iconv_reset(mt, wr_cdata);
959             
960             ptr = xmlNewTextChild(record_ptr, ns_record,
961                                   BAD_CAST "controlfield",
962                                   BAD_CAST wrbuf_cstr(wr_cdata));
963             
964             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
965             break;
966         case YAZ_MARC_COMMENT:
967             ptr = xmlNewComment(BAD_CAST n->u.comment);
968             xmlAddChild(record_ptr, ptr);
969             break;
970         case YAZ_MARC_LEADER:
971             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
972                             BAD_CAST n->u.leader);
973             break;
974         }
975     }
976     wrbuf_destroy(wr_cdata);
977     return 0;
978 }
979
980
981
982
983 #endif
984
985 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
986 {
987     struct yaz_marc_node *n;
988     int indicator_length;
989     int identifier_length;
990     int length_data_entry;
991     int length_starting;
992     int length_implementation;
993     int data_offset = 0;
994     const char *leader = 0;
995     WRBUF wr_dir, wr_head, wr_data_tmp;
996     int base_address;
997     
998     for (n = mt->nodes; n; n = n->next)
999         if (n->which == YAZ_MARC_LEADER)
1000             leader = n->u.leader;
1001     
1002     if (!leader)
1003         return -1;
1004     if (!atoi_n_check(leader+10, 1, &indicator_length))
1005         return -1;
1006     if (!atoi_n_check(leader+11, 1, &identifier_length))
1007         return -1;
1008     if (!atoi_n_check(leader+20, 1, &length_data_entry))
1009         return -1;
1010     if (!atoi_n_check(leader+21, 1, &length_starting))
1011         return -1;
1012     if (!atoi_n_check(leader+22, 1, &length_implementation))
1013         return -1;
1014
1015     wr_data_tmp = wrbuf_alloc();
1016     wr_dir = wrbuf_alloc();
1017     for (n = mt->nodes; n; n = n->next)
1018     {
1019         int data_length = 0;
1020         struct yaz_marc_subfield *s;
1021
1022         switch(n->which)
1023         {
1024         case YAZ_MARC_DATAFIELD:
1025             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1026             data_length += indicator_length;
1027             wrbuf_rewind(wr_data_tmp);
1028             for (s = n->u.datafield.subfields; s; s = s->next)
1029             {
1030                 /* write dummy IDFS + content */
1031                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1032                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1033                 marc_iconv_reset(mt, wr_data_tmp);
1034             }
1035             /* write dummy FS (makes MARC-8 to become ASCII) */
1036             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1037             marc_iconv_reset(mt, wr_data_tmp);
1038             data_length += wrbuf_len(wr_data_tmp);
1039             break;
1040         case YAZ_MARC_CONTROLFIELD:
1041             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1042
1043             wrbuf_rewind(wr_data_tmp);
1044             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
1045                              n->u.controlfield.data);
1046             marc_iconv_reset(mt, wr_data_tmp);
1047             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1048             marc_iconv_reset(mt, wr_data_tmp);
1049             data_length += wrbuf_len(wr_data_tmp);
1050             break;
1051         case YAZ_MARC_COMMENT:
1052             break;
1053         case YAZ_MARC_LEADER:
1054             break;
1055         }
1056         if (data_length)
1057         {
1058             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1059             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1060             data_offset += data_length;
1061         }
1062     }
1063     /* mark end of directory */
1064     wrbuf_putc(wr_dir, ISO2709_FS);
1065
1066     /* base address of data (comes after leader+directory) */
1067     base_address = 24 + wrbuf_len(wr_dir);
1068
1069     wr_head = wrbuf_alloc();
1070
1071     /* write record length */
1072     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1073     /* from "original" leader */
1074     wrbuf_write(wr_head, leader+5, 7);
1075     /* base address of data */
1076     wrbuf_printf(wr_head, "%05d", base_address);
1077     /* from "original" leader */
1078     wrbuf_write(wr_head, leader+17, 7);
1079     
1080     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1081     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1082     wrbuf_destroy(wr_head);
1083     wrbuf_destroy(wr_dir);
1084     wrbuf_destroy(wr_data_tmp);
1085
1086     for (n = mt->nodes; n; n = n->next)
1087     {
1088         struct yaz_marc_subfield *s;
1089
1090         switch(n->which)
1091         {
1092         case YAZ_MARC_DATAFIELD:
1093             wrbuf_printf(wr, "%.*s", indicator_length,
1094                          n->u.datafield.indicator);
1095             for (s = n->u.datafield.subfields; s; s = s->next)
1096             {
1097                 wrbuf_putc(wr, ISO2709_IDFS);
1098                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1099                 marc_iconv_reset(mt, wr);
1100             }
1101             wrbuf_putc(wr, ISO2709_FS);
1102             break;
1103         case YAZ_MARC_CONTROLFIELD:
1104             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1105             marc_iconv_reset(mt, wr);
1106             wrbuf_putc(wr, ISO2709_FS);
1107             break;
1108         case YAZ_MARC_COMMENT:
1109             break;
1110         case YAZ_MARC_LEADER:
1111             break;
1112         }
1113     }
1114     wrbuf_printf(wr, "%c", ISO2709_RS);
1115     return 0;
1116 }
1117
1118
1119 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1120 {
1121     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1122     if (r <= 0)
1123         return r;
1124     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1125     if (s != 0)
1126         return -1; /* error */
1127     return r; /* OK, return length > 0 */
1128 }
1129
1130 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1131                          const char **result, size_t *rsize)
1132 {
1133     int r;
1134
1135     wrbuf_rewind(mt->m_wr);
1136     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1137     if (result)
1138         *result = wrbuf_cstr(mt->m_wr);
1139     if (rsize)
1140         *rsize = wrbuf_len(mt->m_wr);
1141     return r;
1142 }
1143
1144 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1145 {
1146     if (mt)
1147         mt->xml = xmlmode;
1148 }
1149
1150 void yaz_marc_debug(yaz_marc_t mt, int level)
1151 {
1152     if (mt)
1153         mt->debug = level;
1154 }
1155
1156 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1157 {
1158     mt->iconv_cd = cd;
1159 }
1160
1161 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1162 {
1163     return mt->iconv_cd;
1164 }
1165
1166 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1167 {
1168     struct yaz_marc_node *n;
1169     char *leader = 0;
1170     for (n = mt->nodes; n; n = n->next)
1171         if (n->which == YAZ_MARC_LEADER)
1172         {
1173             leader = n->u.leader;
1174             memcpy(leader+off, str, strlen(str));
1175             break;
1176         }
1177 }
1178
1179 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1180 {
1181     xfree(mt->leader_spec);
1182     mt->leader_spec = 0;
1183     if (leader_spec)
1184     {
1185         char dummy_leader[24];
1186         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1187             return -1;
1188         mt->leader_spec = xstrdup(leader_spec);
1189     }
1190     return 0;
1191 }
1192
1193 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1194 {
1195     const char *cp = leader_spec;
1196     while (cp)
1197     {
1198         char val[21];
1199         int pos;
1200         int no_read = 0, no = 0;
1201
1202         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1203         if (no < 2 || no_read < 3)
1204             return -1;
1205         if (pos < 0 || (size_t) pos >= size)
1206             return -1;
1207
1208         if (*val == '\'')
1209         {
1210             const char *vp = strchr(val+1, '\'');
1211             size_t len;
1212             
1213             if (!vp)
1214                 return -1;
1215             len = vp-val-1;
1216             if (len + pos > size)
1217                 return -1;
1218             memcpy(leader + pos, val+1, len);
1219         }
1220         else if (*val >= '0' && *val <= '9')
1221         {
1222             int ch = atoi(val);
1223             leader[pos] = ch;
1224         }
1225         else
1226             return -1;
1227         cp += no_read;
1228         if (*cp != ',')
1229             break;
1230
1231         cp++;
1232     }
1233     return 0;
1234 }
1235
1236 int yaz_marc_decode_formatstr(const char *arg)
1237 {
1238     int mode = -1; 
1239     if (!strcmp(arg, "marc"))
1240         mode = YAZ_MARC_ISO2709;
1241     if (!strcmp(arg, "marcxml"))
1242         mode = YAZ_MARC_MARCXML;
1243     if (!strcmp(arg, "tmarcxml"))
1244         mode = YAZ_MARC_TMARCXML;
1245     if (!strcmp(arg, "marcxchange"))
1246         mode = YAZ_MARC_XCHANGE;
1247     if (!strcmp(arg, "line"))
1248         mode = YAZ_MARC_LINE;
1249     return mode;
1250 }
1251
1252 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1253 {
1254     mt->write_using_libxml2 = enable;
1255 }
1256
1257 void yaz_marc_write_turbo_format(yaz_marc_t mt, int enable)
1258 {
1259     mt->turbo_format = enable;
1260 }
1261
1262
1263 /*
1264  * Local variables:
1265  * c-basic-offset: 4
1266  * c-file-style: "Stroustrup"
1267  * indent-tabs-mode: nil
1268  * End:
1269  * vim: shiftwidth=4 tabstop=8 expandtab
1270  */
1271