33ec6f9283bb078c1ef226f51dd1dafc3062ff39
[yaz-moved-to-github.git] / src / marcdisp.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdisp.c,v 1.49 2007-03-20 21:37:32 adam Exp $
6  */
7
8 /**
9  * \file marcdisp.c
10  * \brief Implements MARC conversion utilities
11  */
12
13 #if HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16
17 #ifdef WIN32
18 #include <windows.h>
19 #endif
20
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
28 #include <yaz/nmem_xml.h>
29 #include <yaz/snprintf.h>
30
31 #if YAZ_HAVE_XML2
32 #include <libxml/parser.h>
33 #include <libxml/tree.h>
34 #endif
35
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
38
39     YAZ_MARC_DATAFIELD,
40     YAZ_MARC_CONTROLFIELD,
41     YAZ_MARC_COMMENT,
42     YAZ_MARC_LEADER
43 };
44
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
47     char *tag;
48     char *indicator;
49     struct yaz_marc_subfield *subfields;
50 };
51
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
54     char *tag;
55     char *data;
56 };
57
58 /** \brief a comment node */
59 struct yaz_marc_comment {
60     char *comment;
61 };
62
63 /** \brief MARC node */
64 struct yaz_marc_node {
65     enum YAZ_MARC_NODE_TYPE which;
66     union {
67         struct yaz_marc_datafield datafield;
68         struct yaz_marc_controlfield controlfield;
69         char *comment;
70         char *leader;
71     } u;
72     struct yaz_marc_node *next;
73 };
74
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
77     char *code_data;
78     struct yaz_marc_subfield *next;
79 };
80
81 /** \brief the internals of a yaz_marc_t handle */
82 struct yaz_marc_t_ {
83     WRBUF m_wr;
84     NMEM nmem;
85     int xml;
86     int debug;
87     int write_using_libxml2;
88     yaz_iconv_t iconv_cd;
89     char subfield_str[8];
90     char endline_str[8];
91     char *leader_spec;
92     struct yaz_marc_node *nodes;
93     struct yaz_marc_node **nodes_pp;
94     struct yaz_marc_subfield **subfield_pp;
95 };
96
97 yaz_marc_t yaz_marc_create(void)
98 {
99     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
100     mt->xml = YAZ_MARC_LINE;
101     mt->debug = 0;
102     mt->write_using_libxml2 = 0;
103     mt->m_wr = wrbuf_alloc();
104     mt->iconv_cd = 0;
105     mt->leader_spec = 0;
106     strcpy(mt->subfield_str, " $");
107     strcpy(mt->endline_str, "\n");
108
109     mt->nmem = nmem_create();
110     yaz_marc_reset(mt);
111     return mt;
112 }
113
114 void yaz_marc_destroy(yaz_marc_t mt)
115 {
116     if (!mt)
117         return ;
118     nmem_destroy(mt->nmem);
119     wrbuf_destroy(mt->m_wr);
120     xfree(mt->leader_spec);
121     xfree(mt);
122 }
123
124 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
125 {
126     return mt->nmem;
127 }
128
129 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
130 {
131     wrbuf_iconv_reset(wr, mt->iconv_cd);
132 }
133
134 static int marc_exec_leader(const char *leader_spec, char *leader,
135                             size_t size);
136
137
138 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
139 {
140     struct yaz_marc_node *n = nmem_malloc(mt->nmem, sizeof(*n));
141     n->next = 0;
142     *mt->nodes_pp = n;
143     mt->nodes_pp = &n->next;
144     return n;
145 }
146
147 #if YAZ_HAVE_XML2
148 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
149                                    const xmlNode *ptr_data)
150 {
151     struct yaz_marc_node *n = yaz_marc_add_node(mt);
152     n->which = YAZ_MARC_CONTROLFIELD;
153     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
154     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
155 }
156 #endif
157
158
159 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
160 {
161     struct yaz_marc_node *n = yaz_marc_add_node(mt);
162     n->which = YAZ_MARC_COMMENT;
163     n->u.comment = nmem_strdup(mt->nmem, comment);
164 }
165
166 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
167 {
168     va_list ap;
169     char buf[200];
170
171     va_start(ap, fmt);
172     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
173     yaz_marc_add_comment(mt, buf);
174     va_end (ap);
175 }
176
177 int yaz_marc_get_debug(yaz_marc_t mt)
178 {
179     return mt->debug;
180 }
181
182 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
183 {
184     struct yaz_marc_node *n = yaz_marc_add_node(mt);
185     n->which = YAZ_MARC_LEADER;
186     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
187     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
188 }
189
190 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
191                                const char *data, size_t data_len)
192 {
193     struct yaz_marc_node *n = yaz_marc_add_node(mt);
194     n->which = YAZ_MARC_CONTROLFIELD;
195     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
196     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
197     if (mt->debug)
198     {
199         size_t i;
200         char msg[80];
201
202         sprintf(msg, "controlfield:");
203         for (i = 0; i < 16 && i < data_len; i++)
204             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
205         if (i < data_len)
206             sprintf(msg + strlen(msg), " ..");
207         yaz_marc_add_comment(mt, msg);
208     }
209 }
210
211 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
212                             const char *indicator, size_t indicator_len)
213 {
214     struct yaz_marc_node *n = yaz_marc_add_node(mt);
215     n->which = YAZ_MARC_DATAFIELD;
216     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
217     n->u.datafield.indicator =
218         nmem_strdupn(mt->nmem, indicator, indicator_len);
219     n->u.datafield.subfields = 0;
220
221     /* make subfield_pp the current (last one) */
222     mt->subfield_pp = &n->u.datafield.subfields;
223 }
224
225 #if YAZ_HAVE_XML2
226 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
227                                 const char *indicator, size_t indicator_len)
228 {
229     struct yaz_marc_node *n = yaz_marc_add_node(mt);
230     n->which = YAZ_MARC_DATAFIELD;
231     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
232     n->u.datafield.indicator =
233         nmem_strdupn(mt->nmem, indicator, indicator_len);
234     n->u.datafield.subfields = 0;
235
236     /* make subfield_pp the current (last one) */
237     mt->subfield_pp = &n->u.datafield.subfields;
238 }
239 #endif
240
241 void yaz_marc_add_subfield(yaz_marc_t mt,
242                            const char *code_data, size_t code_data_len)
243 {
244     if (mt->debug)
245     {
246         size_t i;
247         char msg[80];
248
249         sprintf(msg, "subfield:");
250         for (i = 0; i < 16 && i < code_data_len; i++)
251             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
252         if (i < code_data_len)
253             sprintf(msg + strlen(msg), " ..");
254         yaz_marc_add_comment(mt, msg);
255     }
256
257     if (mt->subfield_pp)
258     {
259         struct yaz_marc_subfield *n = nmem_malloc(mt->nmem, sizeof(*n));
260         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
261         n->next = 0;
262         /* mark subfield_pp to point to this one, so we append here next */
263         *mt->subfield_pp = n;
264         mt->subfield_pp = &n->next;
265     }
266 }
267
268 int atoi_n_check(const char *buf, int size, int *val)
269 {
270     int i;
271     for (i = 0; i < size; i++)
272         if (!isdigit(i[(const unsigned char *) buf]))
273             return 0;
274     *val = atoi_n(buf, size);
275     return 1;
276 }
277
278 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
279                          int *indicator_length,
280                          int *identifier_length,
281                          int *base_address,
282                          int *length_data_entry,
283                          int *length_starting,
284                          int *length_implementation)
285 {
286     char leader[24];
287
288     memcpy(leader, leader_c, 24);
289
290     if (!atoi_n_check(leader+10, 1, indicator_length))
291     {
292         yaz_marc_cprintf(mt, 
293                          "Indicator length at offset 10 should hold a digit."
294                          " Assuming 2");
295         leader[10] = '2';
296         *indicator_length = 2;
297     }
298     if (!atoi_n_check(leader+11, 1, identifier_length))
299     {
300         yaz_marc_cprintf(mt, 
301                          "Identifier length at offset 11 should hold a digit."
302                          " Assuming 2");
303         leader[11] = '2';
304         *identifier_length = 2;
305     }
306     if (!atoi_n_check(leader+12, 5, base_address))
307     {
308         yaz_marc_cprintf(mt, 
309                          "Base address at offsets 12..16 should hold a number."
310                          " Assuming 0");
311         *base_address = 0;
312     }
313     if (!atoi_n_check(leader+20, 1, length_data_entry))
314     {
315         yaz_marc_cprintf(mt, 
316                          "Length data entry at offset 20 should hold a digit."
317                          " Assuming 4");
318         *length_data_entry = 4;
319         leader[20] = '4';
320     }
321     if (!atoi_n_check(leader+21, 1, length_starting))
322     {
323         yaz_marc_cprintf(mt,
324                          "Length starting at offset 21 should hold a digit."
325                          " Assuming 5");
326         *length_starting = 5;
327         leader[21] = '5';
328     }
329     if (!atoi_n_check(leader+22, 1, length_implementation))
330     {
331         yaz_marc_cprintf(mt, 
332                          "Length implementation at offset 22 should hold a digit."
333                          " Assuming 0");
334         *length_implementation = 0;
335         leader[22] = '0';
336     }
337
338     if (mt->debug)
339     {
340         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
341         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
342         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
343         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
344         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
345         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
346     }
347     yaz_marc_add_leader(mt, leader, 24);
348 }
349
350 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
351 {
352     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
353     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
354 }
355
356 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
357 {
358     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
359     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
360 }
361
362 /* try to guess how many bytes the identifier really is! */
363 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
364 {
365     if (mt->iconv_cd)
366     {
367         size_t i;
368         for (i = 1; i<5; i++)
369         {
370             char outbuf[12];
371             size_t outbytesleft = sizeof(outbuf);
372             char *outp = outbuf;
373             const char *inp = buf;
374
375             size_t inbytesleft = i;
376             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
377                                  &outp, &outbytesleft);
378             if (r != (size_t) (-1))
379                 return i;  /* got a complete sequence */
380         }
381         return 1; /* giving up */
382     }
383     return 1; /* we don't know */
384 }
385                               
386 void yaz_marc_reset(yaz_marc_t mt)
387 {
388     nmem_reset(mt->nmem);
389     mt->nodes = 0;
390     mt->nodes_pp = &mt->nodes;
391     mt->subfield_pp = 0;
392 }
393
394 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
395 {
396     struct yaz_marc_node *n;
397     int identifier_length;
398     const char *leader = 0;
399
400     for (n = mt->nodes; n; n = n->next)
401         if (n->which == YAZ_MARC_LEADER)
402         {
403             leader = n->u.leader;
404             break;
405         }
406     
407     if (!leader)
408         return -1;
409     if (!atoi_n_check(leader+11, 1, &identifier_length))
410         return -1;
411
412     for (n = mt->nodes; n; n = n->next)
413     {
414         switch(n->which)
415         {
416         case YAZ_MARC_COMMENT:
417             wrbuf_iconv_write(wr, mt->iconv_cd, 
418                               n->u.comment, strlen(n->u.comment));
419             wrbuf_puts(wr, ")\n");
420             break;
421         default:
422             break;
423         }
424     }
425     return 0;
426 }
427
428
429 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
430 {
431     struct yaz_marc_node *n;
432     int identifier_length;
433     const char *leader = 0;
434
435     for (n = mt->nodes; n; n = n->next)
436         if (n->which == YAZ_MARC_LEADER)
437         {
438             leader = n->u.leader;
439             break;
440         }
441     
442     if (!leader)
443         return -1;
444     if (!atoi_n_check(leader+11, 1, &identifier_length))
445         return -1;
446
447     for (n = mt->nodes; n; n = n->next)
448     {
449         struct yaz_marc_subfield *s;
450         switch(n->which)
451         {
452         case YAZ_MARC_DATAFIELD:
453             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
454                          n->u.datafield.indicator);
455             for (s = n->u.datafield.subfields; s; s = s->next)
456             {
457                 /* if identifier length is 2 (most MARCs),
458                    the code is a single character .. However we've
459                    seen multibyte codes, so see how big it really is */
460                 size_t using_code_len = 
461                     (identifier_length != 2) ? identifier_length - 1
462                     :
463                     cdata_one_character(mt, s->code_data);
464                 
465                 wrbuf_puts (wr, mt->subfield_str); 
466                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
467                                   using_code_len);
468                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
469                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
470                                  s->code_data + using_code_len);
471                 marc_iconv_reset(mt, wr);
472             }
473             wrbuf_puts (wr, mt->endline_str);
474             break;
475         case YAZ_MARC_CONTROLFIELD:
476             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
477             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
478             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
479             marc_iconv_reset(mt, wr);
480             wrbuf_puts (wr, mt->endline_str);
481             break;
482         case YAZ_MARC_COMMENT:
483             wrbuf_puts(wr, "(");
484             wrbuf_iconv_write(wr, mt->iconv_cd, 
485                               n->u.comment, strlen(n->u.comment));
486             marc_iconv_reset(mt, wr);
487             wrbuf_puts(wr, ")\n");
488             break;
489         case YAZ_MARC_LEADER:
490             wrbuf_printf(wr, "%s\n", n->u.leader);
491         }
492     }
493     wrbuf_puts(wr, "\n");
494     return 0;
495 }
496
497 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
498 {
499     switch(mt->xml)
500     {
501     case YAZ_MARC_LINE:
502         return yaz_marc_write_line(mt, wr);
503     case YAZ_MARC_MARCXML:
504         return yaz_marc_write_marcxml(mt, wr);
505     case YAZ_MARC_XCHANGE:
506         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
507     case YAZ_MARC_ISO2709:
508         return yaz_marc_write_iso2709(mt, wr);
509     case YAZ_MARC_CHECK:
510         return yaz_marc_write_check(mt, wr);
511     }
512     return -1;
513 }
514
515 /** \brief common MARC XML/Xchange writer
516     \param mt handle
517     \param wr WRBUF output
518     \param ns XMLNS for the elements
519     \param format record format (e.g. "MARC21")
520     \param type record type (e.g. "Bibliographic")
521 */
522 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
523                                       const char *ns, 
524                                       const char *format,
525                                       const char *type)
526 {
527     struct yaz_marc_node *n;
528     int identifier_length;
529     const char *leader = 0;
530
531     for (n = mt->nodes; n; n = n->next)
532         if (n->which == YAZ_MARC_LEADER)
533         {
534             leader = n->u.leader;
535             break;
536         }
537     
538     if (!leader)
539         return -1;
540     if (!atoi_n_check(leader+11, 1, &identifier_length))
541         return -1;
542
543     wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
544     if (format)
545         wrbuf_printf(wr, " format=\"%.80s\"", format);
546     if (type)
547         wrbuf_printf(wr, " type=\"%.80s\"", type);
548     wrbuf_printf(wr, ">\n");
549     for (n = mt->nodes; n; n = n->next)
550     {
551         struct yaz_marc_subfield *s;
552
553         switch(n->which)
554         {
555         case YAZ_MARC_DATAFIELD:
556             wrbuf_printf(wr, "  <datafield tag=\"");
557             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
558                                     strlen(n->u.datafield.tag));
559             wrbuf_printf(wr, "\"");
560             if (n->u.datafield.indicator)
561             {
562                 int i;
563                 for (i = 0; n->u.datafield.indicator[i]; i++)
564                 {
565                     wrbuf_printf(wr, " ind%d=\"", i+1);
566                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
567                                           n->u.datafield.indicator+i, 1);
568                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
569                 }
570             }
571             wrbuf_printf(wr, ">\n");
572             for (s = n->u.datafield.subfields; s; s = s->next)
573             {
574                 /* if identifier length is 2 (most MARCs),
575                    the code is a single character .. However we've
576                    seen multibyte codes, so see how big it really is */
577                 size_t using_code_len = 
578                     (identifier_length != 2) ? identifier_length - 1
579                     :
580                     cdata_one_character(mt, s->code_data);
581                 
582                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
583                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
584                                         s->code_data, using_code_len);
585                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
586                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
587                                         s->code_data + using_code_len,
588                                         strlen(s->code_data + using_code_len));
589                 marc_iconv_reset(mt, wr);
590                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
591                 wrbuf_puts(wr, "\n");
592             }
593             wrbuf_printf(wr, "  </datafield>\n");
594             break;
595         case YAZ_MARC_CONTROLFIELD:
596             wrbuf_printf(wr, "  <controlfield tag=\"");
597             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
598                                     strlen(n->u.controlfield.tag));
599             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
600             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
601
602             marc_iconv_reset(mt, wr);
603             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
604             wrbuf_puts(wr, "\n");
605             break;
606         case YAZ_MARC_COMMENT:
607             wrbuf_printf(wr, "<!-- ");
608             wrbuf_puts(wr, n->u.comment);
609             wrbuf_printf(wr, " -->\n");
610             break;
611         case YAZ_MARC_LEADER:
612             wrbuf_printf(wr, "  <leader>");
613             wrbuf_iconv_write_cdata(wr, 
614                                     0 /* no charset conversion for leader */,
615                                     n->u.leader, strlen(n->u.leader));
616             wrbuf_printf(wr, "</leader>\n");
617         }
618     }
619     wrbuf_puts(wr, "</record>\n");
620     return 0;
621 }
622
623 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
624                                      const char *ns, 
625                                      const char *format,
626                                      const char *type)
627 {
628     if (mt->write_using_libxml2)
629     {
630 #if YAZ_HAVE_XML2
631         int ret;
632         xmlNode *root_ptr;
633
634         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
635         if (ret == 0)
636         {
637             xmlChar *buf_out;
638             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
639             int len_out;
640
641             xmlDocSetRootElement(doc, root_ptr);
642             xmlDocDumpMemory(doc, &buf_out, &len_out);
643
644             wrbuf_write(wr, (const char *) buf_out, len_out);
645             wrbuf_puts(wr, "");
646             xmlFree(buf_out);
647             xmlFreeDoc(doc);
648         }
649         return ret;
650 #else
651         return -1;
652 #endif
653     }
654     else
655         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
656 }
657
658 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
659 {
660     if (!mt->leader_spec)
661         yaz_marc_modify_leader(mt, 9, "a");
662     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
663                                      0, 0);
664 }
665
666 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
667                                const char *format,
668                                const char *type)
669 {
670     return yaz_marc_write_marcxml_ns(mt, wr,
671                                      "http://www.bs.dk/standards/MarcXchange",
672                                      0, 0);
673 }
674
675
676 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
677                        const char *ns, 
678                        const char *format,
679                        const char *type)
680 {
681 #if YAZ_HAVE_XML2
682     struct yaz_marc_node *n;
683     int identifier_length;
684     const char *leader = 0;
685     xmlNode *record_ptr;
686     xmlNsPtr ns_record;
687     WRBUF wr_cdata = 0;
688
689     for (n = mt->nodes; n; n = n->next)
690         if (n->which == YAZ_MARC_LEADER)
691         {
692             leader = n->u.leader;
693             break;
694         }
695     
696     if (!leader)
697         return -1;
698     if (!atoi_n_check(leader+11, 1, &identifier_length))
699         return -1;
700
701     wr_cdata = wrbuf_alloc();
702
703     record_ptr = xmlNewNode(0, BAD_CAST "record");
704     *root_ptr = record_ptr;
705
706     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
707     xmlSetNs(record_ptr, ns_record);
708
709     if (format)
710         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
711     if (type)
712         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
713     for (n = mt->nodes; n; n = n->next)
714     {
715         struct yaz_marc_subfield *s;
716         xmlNode *ptr;
717
718         switch(n->which)
719         {
720         case YAZ_MARC_DATAFIELD:
721             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
722             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
723             if (n->u.datafield.indicator)
724             {
725                 int i;
726                 for (i = 0; n->u.datafield.indicator[i]; i++)
727                 {
728                     char ind_str[6];
729                     char ind_val[2];
730
731                     sprintf(ind_str, "ind%d", i+1);
732                     ind_val[0] = n->u.datafield.indicator[i];
733                     ind_val[1] = '\0';
734                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
735                 }
736             }
737             for (s = n->u.datafield.subfields; s; s = s->next)
738             {
739                 xmlNode *ptr_subfield;
740                 /* if identifier length is 2 (most MARCs),
741                    the code is a single character .. However we've
742                    seen multibyte codes, so see how big it really is */
743                 size_t using_code_len = 
744                     (identifier_length != 2) ? identifier_length - 1
745                     :
746                     cdata_one_character(mt, s->code_data);
747
748                 wrbuf_rewind(wr_cdata);
749                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
750                                  s->code_data + using_code_len);
751                 marc_iconv_reset(mt, wr_cdata);
752                 ptr_subfield = xmlNewTextChild(
753                     ptr, ns_record, 
754                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
755
756                 wrbuf_rewind(wr_cdata);
757                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
758                                   s->code_data, using_code_len);
759                 xmlNewProp(ptr_subfield, BAD_CAST "code",
760                            BAD_CAST wrbuf_cstr(wr_cdata));
761             }
762             break;
763         case YAZ_MARC_CONTROLFIELD:
764             wrbuf_rewind(wr_cdata);
765             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
766             marc_iconv_reset(mt, wr_cdata);
767             
768             ptr = xmlNewTextChild(record_ptr, ns_record,
769                                   BAD_CAST "controlfield",
770                                   BAD_CAST wrbuf_cstr(wr_cdata));
771             
772             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
773             break;
774         case YAZ_MARC_COMMENT:
775             ptr = xmlNewComment(BAD_CAST n->u.comment);
776             xmlAddChild(record_ptr, ptr);
777             break;
778         case YAZ_MARC_LEADER:
779             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
780                             BAD_CAST n->u.leader);
781             break;
782         }
783     }
784     wrbuf_destroy(wr_cdata);
785     return 0;
786 #else
787     return -1;
788 #endif
789 }
790
791 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
792 {
793     struct yaz_marc_node *n;
794     int indicator_length;
795     int identifier_length;
796     int length_data_entry;
797     int length_starting;
798     int length_implementation;
799     int data_offset = 0;
800     const char *leader = 0;
801     WRBUF wr_dir, wr_head, wr_data_tmp;
802     int base_address;
803     
804     for (n = mt->nodes; n; n = n->next)
805         if (n->which == YAZ_MARC_LEADER)
806             leader = n->u.leader;
807     
808     if (!leader)
809         return -1;
810     if (!atoi_n_check(leader+10, 1, &indicator_length))
811         return -1;
812     if (!atoi_n_check(leader+11, 1, &identifier_length))
813         return -1;
814     if (!atoi_n_check(leader+20, 1, &length_data_entry))
815         return -1;
816     if (!atoi_n_check(leader+21, 1, &length_starting))
817         return -1;
818     if (!atoi_n_check(leader+22, 1, &length_implementation))
819         return -1;
820
821     wr_data_tmp = wrbuf_alloc();
822     wr_dir = wrbuf_alloc();
823     for (n = mt->nodes; n; n = n->next)
824     {
825         int data_length = 0;
826         struct yaz_marc_subfield *s;
827
828         switch(n->which)
829         {
830         case YAZ_MARC_DATAFIELD:
831             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
832             data_length += indicator_length;
833             wrbuf_rewind(wr_data_tmp);
834             for (s = n->u.datafield.subfields; s; s = s->next)
835             {
836                 /* write dummy IDFS + content */
837                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
838                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
839                 marc_iconv_reset(mt, wr_data_tmp);
840             }
841             /* write dummy FS (makes MARC-8 to become ASCII) */
842             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
843             marc_iconv_reset(mt, wr_data_tmp);
844             data_length += wrbuf_len(wr_data_tmp);
845             break;
846         case YAZ_MARC_CONTROLFIELD:
847             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
848
849             wrbuf_rewind(wr_data_tmp);
850             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
851                              n->u.controlfield.data);
852             marc_iconv_reset(mt, wr_data_tmp);
853             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
854             marc_iconv_reset(mt, wr_data_tmp);
855             data_length += wrbuf_len(wr_data_tmp);
856             break;
857         case YAZ_MARC_COMMENT:
858             break;
859         case YAZ_MARC_LEADER:
860             break;
861         }
862         if (data_length)
863         {
864             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
865             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
866             data_offset += data_length;
867         }
868     }
869     /* mark end of directory */
870     wrbuf_putc(wr_dir, ISO2709_FS);
871
872     /* base address of data (comes after leader+directory) */
873     base_address = 24 + wrbuf_len(wr_dir);
874
875     wr_head = wrbuf_alloc();
876
877     /* write record length */
878     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
879     /* from "original" leader */
880     wrbuf_write(wr_head, leader+5, 7);
881     /* base address of data */
882     wrbuf_printf(wr_head, "%05d", base_address);
883     /* from "original" leader */
884     wrbuf_write(wr_head, leader+17, 7);
885     
886     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
887     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
888     wrbuf_destroy(wr_head);
889     wrbuf_destroy(wr_dir);
890     wrbuf_destroy(wr_data_tmp);
891
892     for (n = mt->nodes; n; n = n->next)
893     {
894         struct yaz_marc_subfield *s;
895
896         switch(n->which)
897         {
898         case YAZ_MARC_DATAFIELD:
899             wrbuf_printf(wr, "%.*s", indicator_length,
900                          n->u.datafield.indicator);
901             for (s = n->u.datafield.subfields; s; s = s->next)
902             {
903                 wrbuf_putc(wr, ISO2709_IDFS);
904                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
905                 marc_iconv_reset(mt, wr);
906             }
907             wrbuf_putc(wr, ISO2709_FS);
908             break;
909         case YAZ_MARC_CONTROLFIELD:
910             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
911             marc_iconv_reset(mt, wr);
912             wrbuf_putc(wr, ISO2709_FS);
913             break;
914         case YAZ_MARC_COMMENT:
915             break;
916         case YAZ_MARC_LEADER:
917             break;
918         }
919     }
920     wrbuf_printf(wr, "%c", ISO2709_RS);
921     return 0;
922 }
923
924
925 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
926 {
927     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
928     if (r <= 0)
929         return r;
930     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
931     if (s != 0)
932         return -1; /* error */
933     return r; /* OK, return length > 0 */
934 }
935
936 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
937                          const char **result, size_t *rsize)
938 {
939     int r;
940
941     wrbuf_rewind(mt->m_wr);
942     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
943     if (result)
944         *result = wrbuf_cstr(mt->m_wr);
945     if (rsize)
946         *rsize = wrbuf_len(mt->m_wr);
947     return r;
948 }
949
950 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
951 {
952     if (mt)
953         mt->xml = xmlmode;
954 }
955
956 void yaz_marc_debug(yaz_marc_t mt, int level)
957 {
958     if (mt)
959         mt->debug = level;
960 }
961
962 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
963 {
964     mt->iconv_cd = cd;
965 }
966
967 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
968 {
969     struct yaz_marc_node *n;
970     char *leader = 0;
971     for (n = mt->nodes; n; n = n->next)
972         if (n->which == YAZ_MARC_LEADER)
973         {
974             leader = n->u.leader;
975             memcpy(leader+off, str, strlen(str));
976             break;
977         }
978 }
979
980 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
981 {
982     xfree(mt->leader_spec);
983     mt->leader_spec = 0;
984     if (leader_spec)
985     {
986         char dummy_leader[24];
987         if (marc_exec_leader(leader_spec, dummy_leader, 24))
988             return -1;
989         mt->leader_spec = xstrdup(leader_spec);
990     }
991     return 0;
992 }
993
994 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
995 {
996     const char *cp = leader_spec;
997     while (cp)
998     {
999         char val[21];
1000         int pos;
1001         int no_read = 0, no = 0;
1002
1003         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1004         if (no < 2 || no_read < 3)
1005             return -1;
1006         if (pos < 0 || pos >= size)
1007             return -1;
1008
1009         if (*val == '\'')
1010         {
1011             const char *vp = strchr(val+1, '\'');
1012             size_t len;
1013             
1014             if (!vp)
1015                 return -1;
1016             len = vp-val-1;
1017             if (len + pos > size)
1018                 return -1;
1019             memcpy(leader + pos, val+1, len);
1020         }
1021         else if (*val >= '0' && *val <= '9')
1022         {
1023             int ch = atoi(val);
1024             leader[pos] = ch;
1025         }
1026         else
1027             return -1;
1028         cp += no_read;
1029         if (*cp != ',')
1030             break;
1031
1032         cp++;
1033     }
1034     return 0;
1035 }
1036
1037 int yaz_marc_decode_formatstr(const char *arg)
1038 {
1039     int mode = -1; 
1040     if (!strcmp(arg, "marc"))
1041         mode = YAZ_MARC_ISO2709;
1042     if (!strcmp(arg, "marcxml"))
1043         mode = YAZ_MARC_MARCXML;
1044     if (!strcmp(arg, "marcxchange"))
1045         mode = YAZ_MARC_XCHANGE;
1046     if (!strcmp(arg, "line"))
1047         mode = YAZ_MARC_LINE;
1048     return mode;
1049 }
1050
1051 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1052 {
1053     mt->write_using_libxml2 = enable;
1054 }
1055
1056 /*
1057  * Local variables:
1058  * c-basic-offset: 4
1059  * indent-tabs-mode: nil
1060  * End:
1061  * vim: shiftwidth=4 tabstop=8 expandtab
1062  */
1063