Code updates which makes things compile as C++. Mostly type casts were
[yaz-moved-to-github.git] / src / marcdisp.c
1 /*
2  * Copyright (C) 1995-2007, Index Data ApS
3  * See the file LICENSE for details.
4  *
5  * $Id: marcdisp.c,v 1.50 2007-05-06 20:12:20 adam Exp $
6  */
7
8 /**
9  * \file marcdisp.c
10  * \brief Implements MARC conversion utilities
11  */
12
13 #if HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16
17 #ifdef WIN32
18 #include <windows.h>
19 #endif
20
21 #include <stdarg.h>
22 #include <stdio.h>
23 #include <string.h>
24 #include <ctype.h>
25 #include <yaz/marcdisp.h>
26 #include <yaz/wrbuf.h>
27 #include <yaz/yaz-util.h>
28 #include <yaz/nmem_xml.h>
29 #include <yaz/snprintf.h>
30
31 #if YAZ_HAVE_XML2
32 #include <libxml/parser.h>
33 #include <libxml/tree.h>
34 #endif
35
36 /** \brief node types for yaz_marc_node */
37 enum YAZ_MARC_NODE_TYPE
38
39     YAZ_MARC_DATAFIELD,
40     YAZ_MARC_CONTROLFIELD,
41     YAZ_MARC_COMMENT,
42     YAZ_MARC_LEADER
43 };
44
45 /** \brief represets a data field */
46 struct yaz_marc_datafield {
47     char *tag;
48     char *indicator;
49     struct yaz_marc_subfield *subfields;
50 };
51
52 /** \brief represents a control field */
53 struct yaz_marc_controlfield {
54     char *tag;
55     char *data;
56 };
57
58 /** \brief a comment node */
59 struct yaz_marc_comment {
60     char *comment;
61 };
62
63 /** \brief MARC node */
64 struct yaz_marc_node {
65     enum YAZ_MARC_NODE_TYPE which;
66     union {
67         struct yaz_marc_datafield datafield;
68         struct yaz_marc_controlfield controlfield;
69         char *comment;
70         char *leader;
71     } u;
72     struct yaz_marc_node *next;
73 };
74
75 /** \brief represents a subfield */
76 struct yaz_marc_subfield {
77     char *code_data;
78     struct yaz_marc_subfield *next;
79 };
80
81 /** \brief the internals of a yaz_marc_t handle */
82 struct yaz_marc_t_ {
83     WRBUF m_wr;
84     NMEM nmem;
85     int xml;
86     int debug;
87     int write_using_libxml2;
88     yaz_iconv_t iconv_cd;
89     char subfield_str[8];
90     char endline_str[8];
91     char *leader_spec;
92     struct yaz_marc_node *nodes;
93     struct yaz_marc_node **nodes_pp;
94     struct yaz_marc_subfield **subfield_pp;
95 };
96
97 yaz_marc_t yaz_marc_create(void)
98 {
99     yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
100     mt->xml = YAZ_MARC_LINE;
101     mt->debug = 0;
102     mt->write_using_libxml2 = 0;
103     mt->m_wr = wrbuf_alloc();
104     mt->iconv_cd = 0;
105     mt->leader_spec = 0;
106     strcpy(mt->subfield_str, " $");
107     strcpy(mt->endline_str, "\n");
108
109     mt->nmem = nmem_create();
110     yaz_marc_reset(mt);
111     return mt;
112 }
113
114 void yaz_marc_destroy(yaz_marc_t mt)
115 {
116     if (!mt)
117         return ;
118     nmem_destroy(mt->nmem);
119     wrbuf_destroy(mt->m_wr);
120     xfree(mt->leader_spec);
121     xfree(mt);
122 }
123
124 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
125 {
126     return mt->nmem;
127 }
128
129 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
130 {
131     wrbuf_iconv_reset(wr, mt->iconv_cd);
132 }
133
134 static int marc_exec_leader(const char *leader_spec, char *leader,
135                             size_t size);
136
137
138 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
139 {
140     struct yaz_marc_node *n = (struct yaz_marc_node *)
141         nmem_malloc(mt->nmem, sizeof(*n));
142     n->next = 0;
143     *mt->nodes_pp = n;
144     mt->nodes_pp = &n->next;
145     return n;
146 }
147
148 #if YAZ_HAVE_XML2
149 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
150                                    const xmlNode *ptr_data)
151 {
152     struct yaz_marc_node *n = yaz_marc_add_node(mt);
153     n->which = YAZ_MARC_CONTROLFIELD;
154     n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
155     n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
156 }
157 #endif
158
159
160 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
161 {
162     struct yaz_marc_node *n = yaz_marc_add_node(mt);
163     n->which = YAZ_MARC_COMMENT;
164     n->u.comment = nmem_strdup(mt->nmem, comment);
165 }
166
167 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
168 {
169     va_list ap;
170     char buf[200];
171
172     va_start(ap, fmt);
173     yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
174     yaz_marc_add_comment(mt, buf);
175     va_end (ap);
176 }
177
178 int yaz_marc_get_debug(yaz_marc_t mt)
179 {
180     return mt->debug;
181 }
182
183 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
184 {
185     struct yaz_marc_node *n = yaz_marc_add_node(mt);
186     n->which = YAZ_MARC_LEADER;
187     n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
188     marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
189 }
190
191 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
192                                const char *data, size_t data_len)
193 {
194     struct yaz_marc_node *n = yaz_marc_add_node(mt);
195     n->which = YAZ_MARC_CONTROLFIELD;
196     n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
197     n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
198     if (mt->debug)
199     {
200         size_t i;
201         char msg[80];
202
203         sprintf(msg, "controlfield:");
204         for (i = 0; i < 16 && i < data_len; i++)
205             sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
206         if (i < data_len)
207             sprintf(msg + strlen(msg), " ..");
208         yaz_marc_add_comment(mt, msg);
209     }
210 }
211
212 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
213                             const char *indicator, size_t indicator_len)
214 {
215     struct yaz_marc_node *n = yaz_marc_add_node(mt);
216     n->which = YAZ_MARC_DATAFIELD;
217     n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
218     n->u.datafield.indicator =
219         nmem_strdupn(mt->nmem, indicator, indicator_len);
220     n->u.datafield.subfields = 0;
221
222     /* make subfield_pp the current (last one) */
223     mt->subfield_pp = &n->u.datafield.subfields;
224 }
225
226 #if YAZ_HAVE_XML2
227 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
228                                 const char *indicator, size_t indicator_len)
229 {
230     struct yaz_marc_node *n = yaz_marc_add_node(mt);
231     n->which = YAZ_MARC_DATAFIELD;
232     n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
233     n->u.datafield.indicator =
234         nmem_strdupn(mt->nmem, indicator, indicator_len);
235     n->u.datafield.subfields = 0;
236
237     /* make subfield_pp the current (last one) */
238     mt->subfield_pp = &n->u.datafield.subfields;
239 }
240 #endif
241
242 void yaz_marc_add_subfield(yaz_marc_t mt,
243                            const char *code_data, size_t code_data_len)
244 {
245     if (mt->debug)
246     {
247         size_t i;
248         char msg[80];
249
250         sprintf(msg, "subfield:");
251         for (i = 0; i < 16 && i < code_data_len; i++)
252             sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
253         if (i < code_data_len)
254             sprintf(msg + strlen(msg), " ..");
255         yaz_marc_add_comment(mt, msg);
256     }
257
258     if (mt->subfield_pp)
259     {
260         struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
261             nmem_malloc(mt->nmem, sizeof(*n));
262         n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
263         n->next = 0;
264         /* mark subfield_pp to point to this one, so we append here next */
265         *mt->subfield_pp = n;
266         mt->subfield_pp = &n->next;
267     }
268 }
269
270 int atoi_n_check(const char *buf, int size, int *val)
271 {
272     int i;
273     for (i = 0; i < size; i++)
274         if (!isdigit(i[(const unsigned char *) buf]))
275             return 0;
276     *val = atoi_n(buf, size);
277     return 1;
278 }
279
280 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
281                          int *indicator_length,
282                          int *identifier_length,
283                          int *base_address,
284                          int *length_data_entry,
285                          int *length_starting,
286                          int *length_implementation)
287 {
288     char leader[24];
289
290     memcpy(leader, leader_c, 24);
291
292     if (!atoi_n_check(leader+10, 1, indicator_length))
293     {
294         yaz_marc_cprintf(mt, 
295                          "Indicator length at offset 10 should hold a digit."
296                          " Assuming 2");
297         leader[10] = '2';
298         *indicator_length = 2;
299     }
300     if (!atoi_n_check(leader+11, 1, identifier_length))
301     {
302         yaz_marc_cprintf(mt, 
303                          "Identifier length at offset 11 should hold a digit."
304                          " Assuming 2");
305         leader[11] = '2';
306         *identifier_length = 2;
307     }
308     if (!atoi_n_check(leader+12, 5, base_address))
309     {
310         yaz_marc_cprintf(mt, 
311                          "Base address at offsets 12..16 should hold a number."
312                          " Assuming 0");
313         *base_address = 0;
314     }
315     if (!atoi_n_check(leader+20, 1, length_data_entry))
316     {
317         yaz_marc_cprintf(mt, 
318                          "Length data entry at offset 20 should hold a digit."
319                          " Assuming 4");
320         *length_data_entry = 4;
321         leader[20] = '4';
322     }
323     if (!atoi_n_check(leader+21, 1, length_starting))
324     {
325         yaz_marc_cprintf(mt,
326                          "Length starting at offset 21 should hold a digit."
327                          " Assuming 5");
328         *length_starting = 5;
329         leader[21] = '5';
330     }
331     if (!atoi_n_check(leader+22, 1, length_implementation))
332     {
333         yaz_marc_cprintf(mt, 
334                          "Length implementation at offset 22 should hold a digit."
335                          " Assuming 0");
336         *length_implementation = 0;
337         leader[22] = '0';
338     }
339
340     if (mt->debug)
341     {
342         yaz_marc_cprintf(mt, "Indicator length      %5d", *indicator_length);
343         yaz_marc_cprintf(mt, "Identifier length     %5d", *identifier_length);
344         yaz_marc_cprintf(mt, "Base address          %5d", *base_address);
345         yaz_marc_cprintf(mt, "Length data entry     %5d", *length_data_entry);
346         yaz_marc_cprintf(mt, "Length starting       %5d", *length_starting);
347         yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
348     }
349     yaz_marc_add_leader(mt, leader, 24);
350 }
351
352 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
353 {
354     strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
355     mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
356 }
357
358 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
359 {
360     strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
361     mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
362 }
363
364 /* try to guess how many bytes the identifier really is! */
365 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
366 {
367     if (mt->iconv_cd)
368     {
369         size_t i;
370         for (i = 1; i<5; i++)
371         {
372             char outbuf[12];
373             size_t outbytesleft = sizeof(outbuf);
374             char *outp = outbuf;
375             const char *inp = buf;
376
377             size_t inbytesleft = i;
378             size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
379                                  &outp, &outbytesleft);
380             if (r != (size_t) (-1))
381                 return i;  /* got a complete sequence */
382         }
383         return 1; /* giving up */
384     }
385     return 1; /* we don't know */
386 }
387                               
388 void yaz_marc_reset(yaz_marc_t mt)
389 {
390     nmem_reset(mt->nmem);
391     mt->nodes = 0;
392     mt->nodes_pp = &mt->nodes;
393     mt->subfield_pp = 0;
394 }
395
396 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
397 {
398     struct yaz_marc_node *n;
399     int identifier_length;
400     const char *leader = 0;
401
402     for (n = mt->nodes; n; n = n->next)
403         if (n->which == YAZ_MARC_LEADER)
404         {
405             leader = n->u.leader;
406             break;
407         }
408     
409     if (!leader)
410         return -1;
411     if (!atoi_n_check(leader+11, 1, &identifier_length))
412         return -1;
413
414     for (n = mt->nodes; n; n = n->next)
415     {
416         switch(n->which)
417         {
418         case YAZ_MARC_COMMENT:
419             wrbuf_iconv_write(wr, mt->iconv_cd, 
420                               n->u.comment, strlen(n->u.comment));
421             wrbuf_puts(wr, ")\n");
422             break;
423         default:
424             break;
425         }
426     }
427     return 0;
428 }
429
430
431 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
432 {
433     struct yaz_marc_node *n;
434     int identifier_length;
435     const char *leader = 0;
436
437     for (n = mt->nodes; n; n = n->next)
438         if (n->which == YAZ_MARC_LEADER)
439         {
440             leader = n->u.leader;
441             break;
442         }
443     
444     if (!leader)
445         return -1;
446     if (!atoi_n_check(leader+11, 1, &identifier_length))
447         return -1;
448
449     for (n = mt->nodes; n; n = n->next)
450     {
451         struct yaz_marc_subfield *s;
452         switch(n->which)
453         {
454         case YAZ_MARC_DATAFIELD:
455             wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
456                          n->u.datafield.indicator);
457             for (s = n->u.datafield.subfields; s; s = s->next)
458             {
459                 /* if identifier length is 2 (most MARCs),
460                    the code is a single character .. However we've
461                    seen multibyte codes, so see how big it really is */
462                 size_t using_code_len = 
463                     (identifier_length != 2) ? identifier_length - 1
464                     :
465                     cdata_one_character(mt, s->code_data);
466                 
467                 wrbuf_puts (wr, mt->subfield_str); 
468                 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data, 
469                                   using_code_len);
470                 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
471                 wrbuf_iconv_puts(wr, mt->iconv_cd, 
472                                  s->code_data + using_code_len);
473                 marc_iconv_reset(mt, wr);
474             }
475             wrbuf_puts (wr, mt->endline_str);
476             break;
477         case YAZ_MARC_CONTROLFIELD:
478             wrbuf_printf(wr, "%s", n->u.controlfield.tag);
479             wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
480             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
481             marc_iconv_reset(mt, wr);
482             wrbuf_puts (wr, mt->endline_str);
483             break;
484         case YAZ_MARC_COMMENT:
485             wrbuf_puts(wr, "(");
486             wrbuf_iconv_write(wr, mt->iconv_cd, 
487                               n->u.comment, strlen(n->u.comment));
488             marc_iconv_reset(mt, wr);
489             wrbuf_puts(wr, ")\n");
490             break;
491         case YAZ_MARC_LEADER:
492             wrbuf_printf(wr, "%s\n", n->u.leader);
493         }
494     }
495     wrbuf_puts(wr, "\n");
496     return 0;
497 }
498
499 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
500 {
501     switch(mt->xml)
502     {
503     case YAZ_MARC_LINE:
504         return yaz_marc_write_line(mt, wr);
505     case YAZ_MARC_MARCXML:
506         return yaz_marc_write_marcxml(mt, wr);
507     case YAZ_MARC_XCHANGE:
508         return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
509     case YAZ_MARC_ISO2709:
510         return yaz_marc_write_iso2709(mt, wr);
511     case YAZ_MARC_CHECK:
512         return yaz_marc_write_check(mt, wr);
513     }
514     return -1;
515 }
516
517 /** \brief common MARC XML/Xchange writer
518     \param mt handle
519     \param wr WRBUF output
520     \param ns XMLNS for the elements
521     \param format record format (e.g. "MARC21")
522     \param type record type (e.g. "Bibliographic")
523 */
524 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
525                                       const char *ns, 
526                                       const char *format,
527                                       const char *type)
528 {
529     struct yaz_marc_node *n;
530     int identifier_length;
531     const char *leader = 0;
532
533     for (n = mt->nodes; n; n = n->next)
534         if (n->which == YAZ_MARC_LEADER)
535         {
536             leader = n->u.leader;
537             break;
538         }
539     
540     if (!leader)
541         return -1;
542     if (!atoi_n_check(leader+11, 1, &identifier_length))
543         return -1;
544
545     wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
546     if (format)
547         wrbuf_printf(wr, " format=\"%.80s\"", format);
548     if (type)
549         wrbuf_printf(wr, " type=\"%.80s\"", type);
550     wrbuf_printf(wr, ">\n");
551     for (n = mt->nodes; n; n = n->next)
552     {
553         struct yaz_marc_subfield *s;
554
555         switch(n->which)
556         {
557         case YAZ_MARC_DATAFIELD:
558             wrbuf_printf(wr, "  <datafield tag=\"");
559             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
560                                     strlen(n->u.datafield.tag));
561             wrbuf_printf(wr, "\"");
562             if (n->u.datafield.indicator)
563             {
564                 int i;
565                 for (i = 0; n->u.datafield.indicator[i]; i++)
566                 {
567                     wrbuf_printf(wr, " ind%d=\"", i+1);
568                     wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
569                                           n->u.datafield.indicator+i, 1);
570                     wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
571                 }
572             }
573             wrbuf_printf(wr, ">\n");
574             for (s = n->u.datafield.subfields; s; s = s->next)
575             {
576                 /* if identifier length is 2 (most MARCs),
577                    the code is a single character .. However we've
578                    seen multibyte codes, so see how big it really is */
579                 size_t using_code_len = 
580                     (identifier_length != 2) ? identifier_length - 1
581                     :
582                     cdata_one_character(mt, s->code_data);
583                 
584                 wrbuf_iconv_puts(wr, mt->iconv_cd, "    <subfield code=\"");
585                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
586                                         s->code_data, using_code_len);
587                 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
588                 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
589                                         s->code_data + using_code_len,
590                                         strlen(s->code_data + using_code_len));
591                 marc_iconv_reset(mt, wr);
592                 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
593                 wrbuf_puts(wr, "\n");
594             }
595             wrbuf_printf(wr, "  </datafield>\n");
596             break;
597         case YAZ_MARC_CONTROLFIELD:
598             wrbuf_printf(wr, "  <controlfield tag=\"");
599             wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
600                                     strlen(n->u.controlfield.tag));
601             wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
602             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
603
604             marc_iconv_reset(mt, wr);
605             wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
606             wrbuf_puts(wr, "\n");
607             break;
608         case YAZ_MARC_COMMENT:
609             wrbuf_printf(wr, "<!-- ");
610             wrbuf_puts(wr, n->u.comment);
611             wrbuf_printf(wr, " -->\n");
612             break;
613         case YAZ_MARC_LEADER:
614             wrbuf_printf(wr, "  <leader>");
615             wrbuf_iconv_write_cdata(wr, 
616                                     0 /* no charset conversion for leader */,
617                                     n->u.leader, strlen(n->u.leader));
618             wrbuf_printf(wr, "</leader>\n");
619         }
620     }
621     wrbuf_puts(wr, "</record>\n");
622     return 0;
623 }
624
625 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
626                                      const char *ns, 
627                                      const char *format,
628                                      const char *type)
629 {
630     if (mt->write_using_libxml2)
631     {
632 #if YAZ_HAVE_XML2
633         int ret;
634         xmlNode *root_ptr;
635
636         ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
637         if (ret == 0)
638         {
639             xmlChar *buf_out;
640             xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
641             int len_out;
642
643             xmlDocSetRootElement(doc, root_ptr);
644             xmlDocDumpMemory(doc, &buf_out, &len_out);
645
646             wrbuf_write(wr, (const char *) buf_out, len_out);
647             wrbuf_puts(wr, "");
648             xmlFree(buf_out);
649             xmlFreeDoc(doc);
650         }
651         return ret;
652 #else
653         return -1;
654 #endif
655     }
656     else
657         return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
658 }
659
660 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
661 {
662     if (!mt->leader_spec)
663         yaz_marc_modify_leader(mt, 9, "a");
664     return yaz_marc_write_marcxml_ns(mt, wr, "http://www.loc.gov/MARC21/slim",
665                                      0, 0);
666 }
667
668 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
669                                const char *format,
670                                const char *type)
671 {
672     return yaz_marc_write_marcxml_ns(mt, wr,
673                                      "http://www.bs.dk/standards/MarcXchange",
674                                      0, 0);
675 }
676
677
678 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
679                        const char *ns, 
680                        const char *format,
681                        const char *type)
682 {
683 #if YAZ_HAVE_XML2
684     struct yaz_marc_node *n;
685     int identifier_length;
686     const char *leader = 0;
687     xmlNode *record_ptr;
688     xmlNsPtr ns_record;
689     WRBUF wr_cdata = 0;
690
691     for (n = mt->nodes; n; n = n->next)
692         if (n->which == YAZ_MARC_LEADER)
693         {
694             leader = n->u.leader;
695             break;
696         }
697     
698     if (!leader)
699         return -1;
700     if (!atoi_n_check(leader+11, 1, &identifier_length))
701         return -1;
702
703     wr_cdata = wrbuf_alloc();
704
705     record_ptr = xmlNewNode(0, BAD_CAST "record");
706     *root_ptr = record_ptr;
707
708     ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
709     xmlSetNs(record_ptr, ns_record);
710
711     if (format)
712         xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
713     if (type)
714         xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
715     for (n = mt->nodes; n; n = n->next)
716     {
717         struct yaz_marc_subfield *s;
718         xmlNode *ptr;
719
720         switch(n->which)
721         {
722         case YAZ_MARC_DATAFIELD:
723             ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
724             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
725             if (n->u.datafield.indicator)
726             {
727                 int i;
728                 for (i = 0; n->u.datafield.indicator[i]; i++)
729                 {
730                     char ind_str[6];
731                     char ind_val[2];
732
733                     sprintf(ind_str, "ind%d", i+1);
734                     ind_val[0] = n->u.datafield.indicator[i];
735                     ind_val[1] = '\0';
736                     xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
737                 }
738             }
739             for (s = n->u.datafield.subfields; s; s = s->next)
740             {
741                 xmlNode *ptr_subfield;
742                 /* if identifier length is 2 (most MARCs),
743                    the code is a single character .. However we've
744                    seen multibyte codes, so see how big it really is */
745                 size_t using_code_len = 
746                     (identifier_length != 2) ? identifier_length - 1
747                     :
748                     cdata_one_character(mt, s->code_data);
749
750                 wrbuf_rewind(wr_cdata);
751                 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
752                                  s->code_data + using_code_len);
753                 marc_iconv_reset(mt, wr_cdata);
754                 ptr_subfield = xmlNewTextChild(
755                     ptr, ns_record, 
756                     BAD_CAST "subfield",  BAD_CAST wrbuf_cstr(wr_cdata));
757
758                 wrbuf_rewind(wr_cdata);
759                 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
760                                   s->code_data, using_code_len);
761                 xmlNewProp(ptr_subfield, BAD_CAST "code",
762                            BAD_CAST wrbuf_cstr(wr_cdata));
763             }
764             break;
765         case YAZ_MARC_CONTROLFIELD:
766             wrbuf_rewind(wr_cdata);
767             wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
768             marc_iconv_reset(mt, wr_cdata);
769             
770             ptr = xmlNewTextChild(record_ptr, ns_record,
771                                   BAD_CAST "controlfield",
772                                   BAD_CAST wrbuf_cstr(wr_cdata));
773             
774             xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
775             break;
776         case YAZ_MARC_COMMENT:
777             ptr = xmlNewComment(BAD_CAST n->u.comment);
778             xmlAddChild(record_ptr, ptr);
779             break;
780         case YAZ_MARC_LEADER:
781             xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
782                             BAD_CAST n->u.leader);
783             break;
784         }
785     }
786     wrbuf_destroy(wr_cdata);
787     return 0;
788 #else
789     return -1;
790 #endif
791 }
792
793 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
794 {
795     struct yaz_marc_node *n;
796     int indicator_length;
797     int identifier_length;
798     int length_data_entry;
799     int length_starting;
800     int length_implementation;
801     int data_offset = 0;
802     const char *leader = 0;
803     WRBUF wr_dir, wr_head, wr_data_tmp;
804     int base_address;
805     
806     for (n = mt->nodes; n; n = n->next)
807         if (n->which == YAZ_MARC_LEADER)
808             leader = n->u.leader;
809     
810     if (!leader)
811         return -1;
812     if (!atoi_n_check(leader+10, 1, &indicator_length))
813         return -1;
814     if (!atoi_n_check(leader+11, 1, &identifier_length))
815         return -1;
816     if (!atoi_n_check(leader+20, 1, &length_data_entry))
817         return -1;
818     if (!atoi_n_check(leader+21, 1, &length_starting))
819         return -1;
820     if (!atoi_n_check(leader+22, 1, &length_implementation))
821         return -1;
822
823     wr_data_tmp = wrbuf_alloc();
824     wr_dir = wrbuf_alloc();
825     for (n = mt->nodes; n; n = n->next)
826     {
827         int data_length = 0;
828         struct yaz_marc_subfield *s;
829
830         switch(n->which)
831         {
832         case YAZ_MARC_DATAFIELD:
833             wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
834             data_length += indicator_length;
835             wrbuf_rewind(wr_data_tmp);
836             for (s = n->u.datafield.subfields; s; s = s->next)
837             {
838                 /* write dummy IDFS + content */
839                 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
840                 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
841                 marc_iconv_reset(mt, wr_data_tmp);
842             }
843             /* write dummy FS (makes MARC-8 to become ASCII) */
844             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
845             marc_iconv_reset(mt, wr_data_tmp);
846             data_length += wrbuf_len(wr_data_tmp);
847             break;
848         case YAZ_MARC_CONTROLFIELD:
849             wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
850
851             wrbuf_rewind(wr_data_tmp);
852             wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, 
853                              n->u.controlfield.data);
854             marc_iconv_reset(mt, wr_data_tmp);
855             wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
856             marc_iconv_reset(mt, wr_data_tmp);
857             data_length += wrbuf_len(wr_data_tmp);
858             break;
859         case YAZ_MARC_COMMENT:
860             break;
861         case YAZ_MARC_LEADER:
862             break;
863         }
864         if (data_length)
865         {
866             wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
867             wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
868             data_offset += data_length;
869         }
870     }
871     /* mark end of directory */
872     wrbuf_putc(wr_dir, ISO2709_FS);
873
874     /* base address of data (comes after leader+directory) */
875     base_address = 24 + wrbuf_len(wr_dir);
876
877     wr_head = wrbuf_alloc();
878
879     /* write record length */
880     wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
881     /* from "original" leader */
882     wrbuf_write(wr_head, leader+5, 7);
883     /* base address of data */
884     wrbuf_printf(wr_head, "%05d", base_address);
885     /* from "original" leader */
886     wrbuf_write(wr_head, leader+17, 7);
887     
888     wrbuf_write(wr, wrbuf_buf(wr_head), 24);
889     wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
890     wrbuf_destroy(wr_head);
891     wrbuf_destroy(wr_dir);
892     wrbuf_destroy(wr_data_tmp);
893
894     for (n = mt->nodes; n; n = n->next)
895     {
896         struct yaz_marc_subfield *s;
897
898         switch(n->which)
899         {
900         case YAZ_MARC_DATAFIELD:
901             wrbuf_printf(wr, "%.*s", indicator_length,
902                          n->u.datafield.indicator);
903             for (s = n->u.datafield.subfields; s; s = s->next)
904             {
905                 wrbuf_putc(wr, ISO2709_IDFS);
906                 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
907                 marc_iconv_reset(mt, wr);
908             }
909             wrbuf_putc(wr, ISO2709_FS);
910             break;
911         case YAZ_MARC_CONTROLFIELD:
912             wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
913             marc_iconv_reset(mt, wr);
914             wrbuf_putc(wr, ISO2709_FS);
915             break;
916         case YAZ_MARC_COMMENT:
917             break;
918         case YAZ_MARC_LEADER:
919             break;
920         }
921     }
922     wrbuf_printf(wr, "%c", ISO2709_RS);
923     return 0;
924 }
925
926
927 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
928 {
929     int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
930     if (r <= 0)
931         return r;
932     s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
933     if (s != 0)
934         return -1; /* error */
935     return r; /* OK, return length > 0 */
936 }
937
938 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
939                          const char **result, size_t *rsize)
940 {
941     int r;
942
943     wrbuf_rewind(mt->m_wr);
944     r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
945     if (result)
946         *result = wrbuf_cstr(mt->m_wr);
947     if (rsize)
948         *rsize = wrbuf_len(mt->m_wr);
949     return r;
950 }
951
952 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
953 {
954     if (mt)
955         mt->xml = xmlmode;
956 }
957
958 void yaz_marc_debug(yaz_marc_t mt, int level)
959 {
960     if (mt)
961         mt->debug = level;
962 }
963
964 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
965 {
966     mt->iconv_cd = cd;
967 }
968
969 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
970 {
971     struct yaz_marc_node *n;
972     char *leader = 0;
973     for (n = mt->nodes; n; n = n->next)
974         if (n->which == YAZ_MARC_LEADER)
975         {
976             leader = n->u.leader;
977             memcpy(leader+off, str, strlen(str));
978             break;
979         }
980 }
981
982 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
983 {
984     xfree(mt->leader_spec);
985     mt->leader_spec = 0;
986     if (leader_spec)
987     {
988         char dummy_leader[24];
989         if (marc_exec_leader(leader_spec, dummy_leader, 24))
990             return -1;
991         mt->leader_spec = xstrdup(leader_spec);
992     }
993     return 0;
994 }
995
996 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
997 {
998     const char *cp = leader_spec;
999     while (cp)
1000     {
1001         char val[21];
1002         int pos;
1003         int no_read = 0, no = 0;
1004
1005         no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1006         if (no < 2 || no_read < 3)
1007             return -1;
1008         if (pos < 0 || pos >= size)
1009             return -1;
1010
1011         if (*val == '\'')
1012         {
1013             const char *vp = strchr(val+1, '\'');
1014             size_t len;
1015             
1016             if (!vp)
1017                 return -1;
1018             len = vp-val-1;
1019             if (len + pos > size)
1020                 return -1;
1021             memcpy(leader + pos, val+1, len);
1022         }
1023         else if (*val >= '0' && *val <= '9')
1024         {
1025             int ch = atoi(val);
1026             leader[pos] = ch;
1027         }
1028         else
1029             return -1;
1030         cp += no_read;
1031         if (*cp != ',')
1032             break;
1033
1034         cp++;
1035     }
1036     return 0;
1037 }
1038
1039 int yaz_marc_decode_formatstr(const char *arg)
1040 {
1041     int mode = -1; 
1042     if (!strcmp(arg, "marc"))
1043         mode = YAZ_MARC_ISO2709;
1044     if (!strcmp(arg, "marcxml"))
1045         mode = YAZ_MARC_MARCXML;
1046     if (!strcmp(arg, "marcxchange"))
1047         mode = YAZ_MARC_XCHANGE;
1048     if (!strcmp(arg, "line"))
1049         mode = YAZ_MARC_LINE;
1050     return mode;
1051 }
1052
1053 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1054 {
1055     mt->write_using_libxml2 = enable;
1056 }
1057
1058 /*
1059  * Local variables:
1060  * c-basic-offset: 4
1061  * indent-tabs-mode: nil
1062  * End:
1063  * vim: shiftwidth=4 tabstop=8 expandtab
1064  */
1065