Doc MARC utils. Remove yaz_display_OPAC
[yaz-moved-to-github.git] / src / marcdisp.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2010 Index Data
3  * See the file LICENSE for details.
4  */
5
6 /**
7  * \file marcdisp.c
8  * \brief Implements MARC conversion utilities
9  */
10
11 #if HAVE_CONFIG_H
12 #include <config.h>
13 #endif
14
15 #ifdef WIN32
16 #include <windows.h>
17 #endif
18
19 #include <stdarg.h>
20 #include <stdio.h>
21 #include <stdlib.h>
22 #include <string.h>
23 #include <ctype.h>
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
29
30 #if YAZ_HAVE_XML2
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
33 #endif
34
35 enum yaz_collection_state {
36     no_collection,
37     collection_first,
38     collection_second
39 };
40    
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
43
44     YAZ_MARC_DATAFIELD,
45     YAZ_MARC_CONTROLFIELD,
46     YAZ_MARC_COMMENT,
47     YAZ_MARC_LEADER
48 };
49
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
52     char *tag;
53     char *indicator;
54     struct yaz_marc_subfield *subfields;
55 };
56
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
59     char *tag;
60     char *data;
61 };
62
63 /** \brief a comment node */
64 struct yaz_marc_comment {
65     char *comment;
66 };
67
68 /** \brief MARC node */
69 struct yaz_marc_node {
70     enum YAZ_MARC_NODE_TYPE which;
71     union {
72         struct yaz_marc_datafield datafield;
73         struct yaz_marc_controlfield controlfield;
74         char *comment;
75         char *leader;
76     } u;
77     struct yaz_marc_node *next;
78 };
79
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
82     char *code_data;
83     struct yaz_marc_subfield *next;
84 };
85
86 /** \brief the internals of a yaz_marc_t handle */
87 struct yaz_marc_t_ {
88     WRBUF m_wr;
89     NMEM nmem;
90     int xml;
91     int debug;
92     int write_using_libxml2;
93     enum yaz_collection_state enable_collection;
94     yaz_iconv_t iconv_cd;
95     char subfield_str[8];
96     char endline_str[8];
97     char *leader_spec;
98     struct yaz_marc_node *nodes;
99     struct yaz_marc_node **nodes_pp;
100     struct yaz_marc_subfield **subfield_pp;
101 };
102
103 yaz_marc_t yaz_marc_create(void)
104 {
105     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
106     mt->xml = YAZ_MARC_LINE;
107     mt->debug = 0;
108     mt->write_using_libxml2 = 0;
109     mt->enable_collection = no_collection;
110     mt->m_wr = wrbuf_alloc();
111     mt->iconv_cd = 0;
112     mt->leader_spec = 0;
113     strcpy(mt->subfield_str, " $");
114     strcpy(mt->endline_str, "\n");
115
116     mt->nmem = nmem_create();
117     yaz_marc_reset(mt);
118     return mt;
119 }
120
121 void yaz_marc_destroy(yaz_marc_t mt)
122 {
123     if (!mt)
124         return ;
125     nmem_destroy(mt->nmem);
126     wrbuf_destroy(mt->m_wr);
127     xfree(mt->leader_spec);
128     xfree(mt);
129 }
130
131 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
132 {
133     return mt->nmem;
134 }
135
136 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 {
138     wrbuf_iconv_reset(wr, mt->iconv_cd);
139 }
140
141 static int marc_exec_leader(const char *leader_spec, char *leader,
142                             size_t size);
143
144
145 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
146 {
147     struct yaz_marc_node *n = (struct yaz_marc_node *)
148         nmem_malloc(mt->nmem, sizeof(*n));
149     n->next = 0;
150     *mt->nodes_pp = n;
151     mt->nodes_pp = &n->next;
152     return n;
153 }
154
155 #if YAZ_HAVE_XML2
156 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
157                                    const xmlNode *ptr_data)
158 {
159     struct yaz_marc_node *n = yaz_marc_add_node(mt);
160     n->which = YAZ_MARC_CONTROLFIELD;
161     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
162     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
163 }
164 #endif
165
166
167 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
168 {
169     struct yaz_marc_node *n = yaz_marc_add_node(mt);
170     n->which = YAZ_MARC_COMMENT;
171     n->u.comment = nmem_strdup(mt->nmem, comment);
172 }
173
174 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
175 {
176     va_list ap;
177     char buf[200];
178
179     va_start(ap, fmt);
180     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
181     yaz_marc_add_comment(mt, buf);
182     va_end (ap);
183 }
184
185 int yaz_marc_get_debug(yaz_marc_t mt)
186 {
187     return mt->debug;
188 }
189
190 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
191 {
192     struct yaz_marc_node *n = yaz_marc_add_node(mt);
193     n->which = YAZ_MARC_LEADER;
194     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
195     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
196 }
197
198 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
199                                const char *data, size_t data_len)
200 {
201     struct yaz_marc_node *n = yaz_marc_add_node(mt);
202     n->which = YAZ_MARC_CONTROLFIELD;
203     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
204     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
205     if (mt->debug)
206     {
207         size_t i;
208         char msg[80];
209
210         sprintf(msg, "controlfield:");
211         for (i = 0; i < 16 && i < data_len; i++)
212             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
213         if (i < data_len)
214             sprintf(msg + strlen(msg), " ..");
215         yaz_marc_add_comment(mt, msg);
216     }
217 }
218
219 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
220                             const char *indicator, size_t indicator_len)
221 {
222     struct yaz_marc_node *n = yaz_marc_add_node(mt);
223     n->which = YAZ_MARC_DATAFIELD;
224     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
225     n->u.datafield.indicator =
226         nmem_strdupn(mt->nmem, indicator, indicator_len);
227     n->u.datafield.subfields = 0;
228
229     /* make subfield_pp the current (last one) */
230     mt->subfield_pp = &n->u.datafield.subfields;
231 }
232
233 #if YAZ_HAVE_XML2
234 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
235                                 const char *indicator, size_t indicator_len)
236 {
237     struct yaz_marc_node *n = yaz_marc_add_node(mt);
238     n->which = YAZ_MARC_DATAFIELD;
239     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
240     n->u.datafield.indicator =
241         nmem_strdupn(mt->nmem, indicator, indicator_len);
242     n->u.datafield.subfields = 0;
243
244     /* make subfield_pp the current (last one) */
245     mt->subfield_pp = &n->u.datafield.subfields;
246 }
247 #endif
248
249 void yaz_marc_add_subfield(yaz_marc_t mt,
250                            const char *code_data, size_t code_data_len)
251 {
252     if (mt->debug)
253     {
254         size_t i;
255         char msg[80];
256
257         sprintf(msg, "subfield:");
258         for (i = 0; i < 16 && i < code_data_len; i++)
259             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
260         if (i < code_data_len)
261             sprintf(msg + strlen(msg), " ..");
262         yaz_marc_add_comment(mt, msg);
263     }
264
265     if (mt->subfield_pp)
266     {
267         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
268             nmem_malloc(mt->nmem, sizeof(*n));
269         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
270         n->next = 0;
271         /* mark subfield_pp to point to this one, so we append here next */
272         *mt->subfield_pp = n;
273         mt->subfield_pp = &n->next;
274     }
275 }
276
277 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
278                          int *indicator_length,
279                          int *identifier_length,
280                          int *base_address,
281                          int *length_data_entry,
282                          int *length_starting,
283                          int *length_implementation)
284 {
285     char leader[24];
286
287     memcpy(leader, leader_c, 24);
288
289     if (!atoi_n_check(leader+10, 1, indicator_length))
290     {
291         yaz_marc_cprintf(mt, 
292                          "Indicator length at offset 10 should hold a digit."
293                          " Assuming 2");
294         leader[10] = '2';
295         *indicator_length = 2;
296     }
297     if (!atoi_n_check(leader+11, 1, identifier_length))
298     {
299         yaz_marc_cprintf(mt, 
300                          "Identifier length at offset 11 should hold a digit."
301                          " Assuming 2");
302         leader[11] = '2';
303         *identifier_length = 2;
304     }
305     if (!atoi_n_check(leader+12, 5, base_address))
306     {
307         yaz_marc_cprintf(mt, 
308                          "Base address at offsets 12..16 should hold a number."
309                          " Assuming 0");
310         *base_address = 0;
311     }
312     if (!atoi_n_check(leader+20, 1, length_data_entry))
313     {
314         yaz_marc_cprintf(mt, 
315                          "Length data entry at offset 20 should hold a digit."
316                          " Assuming 4");
317         *length_data_entry = 4;
318         leader[20] = '4';
319     }
320     if (!atoi_n_check(leader+21, 1, length_starting))
321     {
322         yaz_marc_cprintf(mt,
323                          "Length starting at offset 21 should hold a digit."
324                          " Assuming 5");
325         *length_starting = 5;
326         leader[21] = '5';
327     }
328     if (!atoi_n_check(leader+22, 1, length_implementation))
329     {
330         yaz_marc_cprintf(mt, 
331                          "Length implementation at offset 22 should hold a digit."
332                          " Assuming 0");
333         *length_implementation = 0;
334         leader[22] = '0';
335     }
336
337     if (mt->debug)
338     {
339         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
340         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
341         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
342         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
343         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
344         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
345     }
346     yaz_marc_add_leader(mt, leader, 24);
347 }
348
349 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
350 {
351     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
352     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
353 }
354
355 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
356 {
357     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
358     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
359 }
360
361 /* try to guess how many bytes the identifier really is! */
362 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
363 {
364     if (mt->iconv_cd)
365     {
366         size_t i;
367         for (i = 1; i<5; i++)
368         {
369             char outbuf[12];
370             size_t outbytesleft = sizeof(outbuf);
371             char *outp = outbuf;
372             const char *inp = buf;
373
374             size_t inbytesleft = i;
375             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
376                                  &outp, &outbytesleft);
377             if (r != (size_t) (-1))
378                 return i;  /* got a complete sequence */
379         }
380         return 1; /* giving up */
381     }
382     return 1; /* we don't know */
383 }
384                               
385 void yaz_marc_reset(yaz_marc_t mt)
386 {
387     nmem_reset(mt->nmem);
388     mt->nodes = 0;
389     mt->nodes_pp = &mt->nodes;
390     mt->subfield_pp = 0;
391 }
392
393 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
394 {
395     struct yaz_marc_node *n;
396     int identifier_length;
397     const char *leader = 0;
398
399     for (n = mt->nodes; n; n = n->next)
400         if (n->which == YAZ_MARC_LEADER)
401         {
402             leader = n->u.leader;
403             break;
404         }
405     
406     if (!leader)
407         return -1;
408     if (!atoi_n_check(leader+11, 1, &identifier_length))
409         return -1;
410
411     for (n = mt->nodes; n; n = n->next)
412     {
413         switch(n->which)
414         {
415         case YAZ_MARC_COMMENT:
416             wrbuf_iconv_write(wr, mt->iconv_cd, 
417                               n->u.comment, strlen(n->u.comment));
418             wrbuf_puts(wr, "\n");
419             break;
420         default:
421             break;
422         }
423     }
424     return 0;
425 }
426
427 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
428                                int identifier_length)
429 {
430     /* if identifier length is 2 (most MARCs) or less (probably an error),
431        the code is a single character .. However we've
432        seen multibyte codes, so see how big it really is */
433     if (identifier_length > 2)
434         return identifier_length - 1;
435     else
436         return cdata_one_character(mt, data);
437 }
438
439 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
440 {
441     struct yaz_marc_node *n;
442     int identifier_length;
443     const char *leader = 0;
444
445     for (n = mt->nodes; n; n = n->next)
446         if (n->which == YAZ_MARC_LEADER)
447         {
448             leader = n->u.leader;
449             break;
450         }
451     
452     if (!leader)
453         return -1;
454     if (!atoi_n_check(leader+11, 1, &identifier_length))
455         return -1;
456
457     for (n = mt->nodes; n; n = n->next)
458     {
459         struct yaz_marc_subfield *s;
460         switch(n->which)
461         {
462         case YAZ_MARC_DATAFIELD:
463             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
464                          n->u.datafield.indicator);
465             for (s = n->u.datafield.subfields; s; s = s->next)
466             {
467                 size_t using_code_len = get_subfield_len(mt, s->code_data,
468                                                          identifier_length);
469                 
470                 wrbuf_puts (wr, mt->subfield_str); 
471                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
472                                   using_code_len);
473                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
474                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
475                                  s->code_data + using_code_len);
476                 marc_iconv_reset(mt, wr);
477             }
478             wrbuf_puts (wr, mt->endline_str);
479             break;
480         case YAZ_MARC_CONTROLFIELD:
481             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
482             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
483             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
484             marc_iconv_reset(mt, wr);
485             wrbuf_puts (wr, mt->endline_str);
486             break;
487         case YAZ_MARC_COMMENT:
488             wrbuf_puts(wr, "(");
489             wrbuf_iconv_write(wr, mt->iconv_cd, 
490                               n->u.comment, strlen(n->u.comment));
491             marc_iconv_reset(mt, wr);
492             wrbuf_puts(wr, ")\n");
493             break;
494         case YAZ_MARC_LEADER:
495             wrbuf_printf(wr, "%s\n", n->u.leader);
496         }
497     }
498     wrbuf_puts(wr, "\n");
499     return 0;
500 }
501
502 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
503 {
504     if (mt->enable_collection == collection_second)
505     {
506         switch(mt->xml)
507         {
508         case YAZ_MARC_MARCXML:
509             wrbuf_printf(wr, "</collection>\n");
510             break;
511         case YAZ_MARC_XCHANGE:
512             wrbuf_printf(wr, "</collection>\n");
513             break;
514         }
515     }
516     return 0;
517 }
518
519 void yaz_marc_enable_collection(yaz_marc_t mt)
520 {
521     mt->enable_collection = collection_first;
522 }
523
524 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
525 {
526     switch(mt->xml)
527     {
528     case YAZ_MARC_LINE:
529         return yaz_marc_write_line(mt, wr);
530     case YAZ_MARC_MARCXML:
531         return yaz_marc_write_marcxml(mt, wr);
532     case YAZ_MARC_XCHANGE:
533         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
534     case YAZ_MARC_ISO2709:
535         return yaz_marc_write_iso2709(mt, wr);
536     case YAZ_MARC_CHECK:
537         return yaz_marc_write_check(mt, wr);
538     }
539     return -1;
540 }
541
542 /** \brief common MARC XML/Xchange writer
543     \param mt handle
544     \param wr WRBUF output
545     \param ns XMLNS for the elements
546     \param format record format (e.g. "MARC21")
547     \param type record type (e.g. "Bibliographic")
548 */
549 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
550                                       const char *ns, 
551                                       const char *format,
552                                       const char *type)
553 {
554     struct yaz_marc_node *n;
555     int identifier_length;
556     const char *leader = 0;
557
558     for (n = mt->nodes; n; n = n->next)
559         if (n->which == YAZ_MARC_LEADER)
560         {
561             leader = n->u.leader;
562             break;
563         }
564     
565     if (!leader)
566         return -1;
567     if (!atoi_n_check(leader+11, 1, &identifier_length))
568         return -1;
569     
570     if (mt->enable_collection != no_collection)
571     {
572         if (mt->enable_collection == collection_first)
573             wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
574         mt->enable_collection = collection_second;
575         wrbuf_printf(wr, "<record");
576     }
577     else
578     {
579         wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
580     }
581     if (format)
582         wrbuf_printf(wr, " format=\"%.80s\"", format);
583     if (type)
584         wrbuf_printf(wr, " type=\"%.80s\"", type);
585     wrbuf_printf(wr, ">\n");
586     for (n = mt->nodes; n; n = n->next)
587     {
588         struct yaz_marc_subfield *s;
589
590         switch(n->which)
591         {
592         case YAZ_MARC_DATAFIELD:
593             wrbuf_printf(wr, "  <datafield tag=\"");
594             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
595                                     strlen(n->u.datafield.tag));
596             wrbuf_printf(wr, "\"");
597             if (n->u.datafield.indicator)
598             {
599                 int i;
600                 for (i = 0; n->u.datafield.indicator[i]; i++)
601                 {
602                     wrbuf_printf(wr, " ind%d=\"", i+1);
603                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
604                                           n->u.datafield.indicator+i, 1);
605                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
606                 }
607             }
608             wrbuf_printf(wr, ">\n");
609             for (s = n->u.datafield.subfields; s; s = s->next)
610             {
611                 size_t using_code_len = get_subfield_len(mt, s->code_data,
612                                                          identifier_length);
613                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
614                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
615                                         s->code_data, using_code_len);
616                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
617                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
618                                         s->code_data + using_code_len,
619                                         strlen(s->code_data + using_code_len));
620                 marc_iconv_reset(mt, wr);
621                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
622                 wrbuf_puts(wr, "\n");
623             }
624             wrbuf_printf(wr, "  </datafield>\n");
625             break;
626         case YAZ_MARC_CONTROLFIELD:
627             wrbuf_printf(wr, "  <controlfield tag=\"");
628             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
629                                     strlen(n->u.controlfield.tag));
630             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
631             wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
632                                     n->u.controlfield.data,
633                                     strlen(n->u.controlfield.data));
634
635             marc_iconv_reset(mt, wr);
636             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
637             wrbuf_puts(wr, "\n");
638             break;
639         case YAZ_MARC_COMMENT:
640             wrbuf_printf(wr, "<!-- ");
641             wrbuf_puts(wr, n->u.comment);
642             wrbuf_printf(wr, " -->\n");
643             break;
644         case YAZ_MARC_LEADER:
645             wrbuf_printf(wr, "  <leader>");
646             wrbuf_iconv_write_cdata(wr, 
647                                     0 /* no charset conversion for leader */,
648                                     n->u.leader, strlen(n->u.leader));
649             wrbuf_printf(wr, "</leader>\n");
650         }
651     }
652     wrbuf_puts(wr, "</record>\n");
653     return 0;
654 }
655
656 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
657                                      const char *ns, 
658                                      const char *format,
659                                      const char *type)
660 {
661     if (mt->write_using_libxml2)
662     {
663 #if YAZ_HAVE_XML2
664         int ret;
665         xmlNode *root_ptr;
666
667         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
668         if (ret == 0)
669         {
670             xmlChar *buf_out;
671             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
672             int len_out;
673
674             xmlDocSetRootElement(doc, root_ptr);
675             xmlDocDumpMemory(doc, &buf_out, &len_out);
676
677             wrbuf_write(wr, (const char *) buf_out, len_out);
678             wrbuf_puts(wr, "");
679             xmlFree(buf_out);
680             xmlFreeDoc(doc);
681         }
682         return ret;
683 #else
684         return -1;
685 #endif
686     }
687     else
688         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
689 }
690
691 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
692 {
693     /* set leader 09 to 'a' for UNICODE */
694     /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
695     if (!mt->leader_spec)
696         yaz_marc_modify_leader(mt, 9, "a");
697     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
698                                      0, 0);
699 }
700
701 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
702                                const char *format,
703                                const char *type)
704 {
705     return yaz_marc_write_marcxml_ns(mt, wr,
706                                      "info:lc/xmlns/marcxchange-v1",
707                                      0, 0);
708 }
709
710
711 #if YAZ_HAVE_XML2
712 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
713                        const char *ns, 
714                        const char *format,
715                        const char *type)
716 {
717     struct yaz_marc_node *n;
718     int identifier_length;
719     const char *leader = 0;
720     xmlNode *record_ptr;
721     xmlNsPtr ns_record;
722     WRBUF wr_cdata = 0;
723
724     for (n = mt->nodes; n; n = n->next)
725         if (n->which == YAZ_MARC_LEADER)
726         {
727             leader = n->u.leader;
728             break;
729         }
730     
731     if (!leader)
732         return -1;
733     if (!atoi_n_check(leader+11, 1, &identifier_length))
734         return -1;
735
736     wr_cdata = wrbuf_alloc();
737
738     record_ptr = xmlNewNode(0, BAD_CAST "record");
739     *root_ptr = record_ptr;
740
741     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
742     xmlSetNs(record_ptr, ns_record);
743
744     if (format)
745         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
746     if (type)
747         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
748     for (n = mt->nodes; n; n = n->next)
749     {
750         struct yaz_marc_subfield *s;
751         xmlNode *ptr;
752
753         switch(n->which)
754         {
755         case YAZ_MARC_DATAFIELD:
756             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
757             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
758             if (n->u.datafield.indicator)
759             {
760                 int i;
761                 for (i = 0; n->u.datafield.indicator[i]; i++)
762                 {
763                     char ind_str[6];
764                     char ind_val[2];
765
766                     sprintf(ind_str, "ind%d", i+1);
767                     ind_val[0] = n->u.datafield.indicator[i];
768                     ind_val[1] = '\0';
769                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
770                 }
771             }
772             for (s = n->u.datafield.subfields; s; s = s->next)
773             {
774                 xmlNode *ptr_subfield;
775                 size_t using_code_len = get_subfield_len(mt, s->code_data,
776                                                          identifier_length);
777                 wrbuf_rewind(wr_cdata);
778                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
779                                  s->code_data + using_code_len);
780                 marc_iconv_reset(mt, wr_cdata);
781                 ptr_subfield = xmlNewTextChild(
782                     ptr, ns_record, 
783                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
784
785                 wrbuf_rewind(wr_cdata);
786                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
787                                   s->code_data, using_code_len);
788                 xmlNewProp(ptr_subfield, BAD_CAST "code",
789                            BAD_CAST wrbuf_cstr(wr_cdata));
790             }
791             break;
792         case YAZ_MARC_CONTROLFIELD:
793             wrbuf_rewind(wr_cdata);
794             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
795             marc_iconv_reset(mt, wr_cdata);
796             
797             ptr = xmlNewTextChild(record_ptr, ns_record,
798                                   BAD_CAST "controlfield",
799                                   BAD_CAST wrbuf_cstr(wr_cdata));
800             
801             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
802             break;
803         case YAZ_MARC_COMMENT:
804             ptr = xmlNewComment(BAD_CAST n->u.comment);
805             xmlAddChild(record_ptr, ptr);
806             break;
807         case YAZ_MARC_LEADER:
808             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
809                             BAD_CAST n->u.leader);
810             break;
811         }
812     }
813     wrbuf_destroy(wr_cdata);
814     return 0;
815 }
816 #endif
817
818 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
819 {
820     struct yaz_marc_node *n;
821     int indicator_length;
822     int identifier_length;
823     int length_data_entry;
824     int length_starting;
825     int length_implementation;
826     int data_offset = 0;
827     const char *leader = 0;
828     WRBUF wr_dir, wr_head, wr_data_tmp;
829     int base_address;
830     
831     for (n = mt->nodes; n; n = n->next)
832         if (n->which == YAZ_MARC_LEADER)
833             leader = n->u.leader;
834     
835     if (!leader)
836         return -1;
837     if (!atoi_n_check(leader+10, 1, &indicator_length))
838         return -1;
839     if (!atoi_n_check(leader+11, 1, &identifier_length))
840         return -1;
841     if (!atoi_n_check(leader+20, 1, &length_data_entry))
842         return -1;
843     if (!atoi_n_check(leader+21, 1, &length_starting))
844         return -1;
845     if (!atoi_n_check(leader+22, 1, &length_implementation))
846         return -1;
847
848     wr_data_tmp = wrbuf_alloc();
849     wr_dir = wrbuf_alloc();
850     for (n = mt->nodes; n; n = n->next)
851     {
852         int data_length = 0;
853         struct yaz_marc_subfield *s;
854
855         switch(n->which)
856         {
857         case YAZ_MARC_DATAFIELD:
858             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
859             data_length += indicator_length;
860             wrbuf_rewind(wr_data_tmp);
861             for (s = n->u.datafield.subfields; s; s = s->next)
862             {
863                 /* write dummy IDFS + content */
864                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
865                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
866                 marc_iconv_reset(mt, wr_data_tmp);
867             }
868             /* write dummy FS (makes MARC-8 to become ASCII) */
869             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
870             marc_iconv_reset(mt, wr_data_tmp);
871             data_length += wrbuf_len(wr_data_tmp);
872             break;
873         case YAZ_MARC_CONTROLFIELD:
874             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
875
876             wrbuf_rewind(wr_data_tmp);
877             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
878                              n->u.controlfield.data);
879             marc_iconv_reset(mt, wr_data_tmp);
880             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
881             marc_iconv_reset(mt, wr_data_tmp);
882             data_length += wrbuf_len(wr_data_tmp);
883             break;
884         case YAZ_MARC_COMMENT:
885             break;
886         case YAZ_MARC_LEADER:
887             break;
888         }
889         if (data_length)
890         {
891             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
892             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
893             data_offset += data_length;
894         }
895     }
896     /* mark end of directory */
897     wrbuf_putc(wr_dir, ISO2709_FS);
898
899     /* base address of data (comes after leader+directory) */
900     base_address = 24 + wrbuf_len(wr_dir);
901
902     wr_head = wrbuf_alloc();
903
904     /* write record length */
905     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
906     /* from "original" leader */
907     wrbuf_write(wr_head, leader+5, 7);
908     /* base address of data */
909     wrbuf_printf(wr_head, "%05d", base_address);
910     /* from "original" leader */
911     wrbuf_write(wr_head, leader+17, 7);
912     
913     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
914     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
915     wrbuf_destroy(wr_head);
916     wrbuf_destroy(wr_dir);
917     wrbuf_destroy(wr_data_tmp);
918
919     for (n = mt->nodes; n; n = n->next)
920     {
921         struct yaz_marc_subfield *s;
922
923         switch(n->which)
924         {
925         case YAZ_MARC_DATAFIELD:
926             wrbuf_printf(wr, "%.*s", indicator_length,
927                          n->u.datafield.indicator);
928             for (s = n->u.datafield.subfields; s; s = s->next)
929             {
930                 wrbuf_putc(wr, ISO2709_IDFS);
931                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
932                 marc_iconv_reset(mt, wr);
933             }
934             wrbuf_putc(wr, ISO2709_FS);
935             break;
936         case YAZ_MARC_CONTROLFIELD:
937             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
938             marc_iconv_reset(mt, wr);
939             wrbuf_putc(wr, ISO2709_FS);
940             break;
941         case YAZ_MARC_COMMENT:
942             break;
943         case YAZ_MARC_LEADER:
944             break;
945         }
946     }
947     wrbuf_printf(wr, "%c", ISO2709_RS);
948     return 0;
949 }
950
951
952 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
953 {
954     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
955     if (r <= 0)
956         return r;
957     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
958     if (s != 0)
959         return -1; /* error */
960     return r; /* OK, return length > 0 */
961 }
962
963 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
964                          const char **result, size_t *rsize)
965 {
966     int r;
967
968     wrbuf_rewind(mt->m_wr);
969     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
970     if (result)
971         *result = wrbuf_cstr(mt->m_wr);
972     if (rsize)
973         *rsize = wrbuf_len(mt->m_wr);
974     return r;
975 }
976
977 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
978 {
979     if (mt)
980         mt->xml = xmlmode;
981 }
982
983 void yaz_marc_debug(yaz_marc_t mt, int level)
984 {
985     if (mt)
986         mt->debug = level;
987 }
988
989 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
990 {
991     mt->iconv_cd = cd;
992 }
993
994 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
995 {
996     return mt->iconv_cd;
997 }
998
999 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1000 {
1001     struct yaz_marc_node *n;
1002     char *leader = 0;
1003     for (n = mt->nodes; n; n = n->next)
1004         if (n->which == YAZ_MARC_LEADER)
1005         {
1006             leader = n->u.leader;
1007             memcpy(leader+off, str, strlen(str));
1008             break;
1009         }
1010 }
1011
1012 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1013 {
1014     xfree(mt->leader_spec);
1015     mt->leader_spec = 0;
1016     if (leader_spec)
1017     {
1018         char dummy_leader[24];
1019         if (marc_exec_leader(leader_spec, dummy_leader, 24))
1020             return -1;
1021         mt->leader_spec = xstrdup(leader_spec);
1022     }
1023     return 0;
1024 }
1025
1026 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1027 {
1028     const char *cp = leader_spec;
1029     while (cp)
1030     {
1031         char val[21];
1032         int pos;
1033         int no_read = 0, no = 0;
1034
1035         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1036         if (no < 2 || no_read < 3)
1037             return -1;
1038         if (pos < 0 || (size_t) pos >= size)
1039             return -1;
1040
1041         if (*val == '\'')
1042         {
1043             const char *vp = strchr(val+1, '\'');
1044             size_t len;
1045             
1046             if (!vp)
1047                 return -1;
1048             len = vp-val-1;
1049             if (len + pos > size)
1050                 return -1;
1051             memcpy(leader + pos, val+1, len);
1052         }
1053         else if (*val >= '0' && *val <= '9')
1054         {
1055             int ch = atoi(val);
1056             leader[pos] = ch;
1057         }
1058         else
1059             return -1;
1060         cp += no_read;
1061         if (*cp != ',')
1062             break;
1063
1064         cp++;
1065     }
1066     return 0;
1067 }
1068
1069 int yaz_marc_decode_formatstr(const char *arg)
1070 {
1071     int mode = -1; 
1072     if (!strcmp(arg, "marc"))
1073         mode = YAZ_MARC_ISO2709;
1074     if (!strcmp(arg, "marcxml"))
1075         mode = YAZ_MARC_MARCXML;
1076     if (!strcmp(arg, "marcxchange"))
1077         mode = YAZ_MARC_XCHANGE;
1078     if (!strcmp(arg, "line"))
1079         mode = YAZ_MARC_LINE;
1080     return mode;
1081 }
1082
1083 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1084 {
1085     mt->write_using_libxml2 = enable;
1086 }
1087
1088 /*
1089  * Local variables:
1090  * c-basic-offset: 4
1091  * c-file-style: "Stroustrup"
1092  * indent-tabs-mode: nil
1093  * End:
1094  * vim: shiftwidth=4 tabstop=8 expandtab
1095  */
1096