1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2010 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
24 #include <yaz/marcdisp.h>
25 #include <yaz/wrbuf.h>
26 #include <yaz/yaz-util.h>
27 #include <yaz/nmem_xml.h>
28 #include <yaz/snprintf.h>
31 #include <libxml/parser.h>
32 #include <libxml/tree.h>
35 enum yaz_collection_state {
41 /** \brief node types for yaz_marc_node */
42 enum YAZ_MARC_NODE_TYPE
45 YAZ_MARC_CONTROLFIELD,
50 /** \brief represets a data field */
51 struct yaz_marc_datafield {
54 struct yaz_marc_subfield *subfields;
57 /** \brief represents a control field */
58 struct yaz_marc_controlfield {
63 /** \brief a comment node */
64 struct yaz_marc_comment {
68 /** \brief MARC node */
69 struct yaz_marc_node {
70 enum YAZ_MARC_NODE_TYPE which;
72 struct yaz_marc_datafield datafield;
73 struct yaz_marc_controlfield controlfield;
77 struct yaz_marc_node *next;
80 /** \brief represents a subfield */
81 struct yaz_marc_subfield {
83 struct yaz_marc_subfield *next;
86 /** \brief the internals of a yaz_marc_t handle */
93 int write_using_libxml2;
94 enum yaz_collection_state enable_collection;
99 struct yaz_marc_node *nodes;
100 struct yaz_marc_node **nodes_pp;
101 struct yaz_marc_subfield **subfield_pp;
104 yaz_marc_t yaz_marc_create(void)
106 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
107 mt->output_format = YAZ_MARC_LINE;
109 mt->write_using_libxml2 = 0;
110 mt->enable_collection = no_collection;
111 mt->m_wr = wrbuf_alloc();
114 strcpy(mt->subfield_str, " $");
115 strcpy(mt->endline_str, "\n");
117 mt->nmem = nmem_create();
122 void yaz_marc_destroy(yaz_marc_t mt)
126 nmem_destroy(mt->nmem);
127 wrbuf_destroy(mt->m_wr);
128 xfree(mt->leader_spec);
132 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
137 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
139 wrbuf_iconv_reset(wr, mt->iconv_cd);
142 static int marc_exec_leader(const char *leader_spec, char *leader,
146 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
148 struct yaz_marc_node *n = (struct yaz_marc_node *)
149 nmem_malloc(mt->nmem, sizeof(*n));
152 mt->nodes_pp = &n->next;
157 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
158 const xmlNode *ptr_data)
160 struct yaz_marc_node *n = yaz_marc_add_node(mt);
161 n->which = YAZ_MARC_CONTROLFIELD;
162 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
163 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
166 void yaz_marc_add_controlfield_turbo_xml(yaz_marc_t mt, const char *tag,
167 const xmlNode *ptr_data)
169 struct yaz_marc_node *n = yaz_marc_add_node(mt);
170 n->which = YAZ_MARC_CONTROLFIELD;
171 n->u.controlfield.tag = tag;
172 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
178 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
180 struct yaz_marc_node *n = yaz_marc_add_node(mt);
181 n->which = YAZ_MARC_COMMENT;
182 n->u.comment = nmem_strdup(mt->nmem, comment);
185 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
191 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
192 yaz_marc_add_comment(mt, buf);
196 int yaz_marc_get_debug(yaz_marc_t mt)
201 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
203 struct yaz_marc_node *n = yaz_marc_add_node(mt);
204 n->which = YAZ_MARC_LEADER;
205 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
206 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
209 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
210 const char *data, size_t data_len)
212 struct yaz_marc_node *n = yaz_marc_add_node(mt);
213 n->which = YAZ_MARC_CONTROLFIELD;
214 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
215 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
221 sprintf(msg, "controlfield:");
222 for (i = 0; i < 16 && i < data_len; i++)
223 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
225 sprintf(msg + strlen(msg), " ..");
226 yaz_marc_add_comment(mt, msg);
230 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
231 const char *indicator, size_t indicator_len)
233 struct yaz_marc_node *n = yaz_marc_add_node(mt);
234 n->which = YAZ_MARC_DATAFIELD;
235 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
236 n->u.datafield.indicator =
237 nmem_strdupn(mt->nmem, indicator, indicator_len);
238 n->u.datafield.subfields = 0;
240 /* make subfield_pp the current (last one) */
241 mt->subfield_pp = &n->u.datafield.subfields;
245 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
246 const char *indicator, size_t indicator_len)
248 struct yaz_marc_node *n = yaz_marc_add_node(mt);
249 n->which = YAZ_MARC_DATAFIELD;
250 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
251 n->u.datafield.indicator =
252 nmem_strdupn(mt->nmem, indicator, indicator_len);
253 n->u.datafield.subfields = 0;
255 /* make subfield_pp the current (last one) */
256 mt->subfield_pp = &n->u.datafield.subfields;
259 struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value)
261 struct yaz_marc_node *n = yaz_marc_add_node(mt);
262 n->which = YAZ_MARC_DATAFIELD;
263 n->u.datafield.tag = tag_value;
264 n->u.datafield.indicator = 0;
265 n->u.datafield.subfields = 0;
267 /* make subfield_pp the current (last one) */
268 mt->subfield_pp = &n->u.datafield.subfields;
272 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
274 n->u.datafield.indicator = indicator;
279 void yaz_marc_add_subfield(yaz_marc_t mt,
280 const char *code_data, size_t code_data_len)
287 sprintf(msg, "subfield:");
288 for (i = 0; i < 16 && i < code_data_len; i++)
289 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
290 if (i < code_data_len)
291 sprintf(msg + strlen(msg), " ..");
292 yaz_marc_add_comment(mt, msg);
297 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
298 nmem_malloc(mt->nmem, sizeof(*n));
299 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
301 /* mark subfield_pp to point to this one, so we append here next */
302 *mt->subfield_pp = n;
303 mt->subfield_pp = &n->next;
307 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
308 int *indicator_length,
309 int *identifier_length,
311 int *length_data_entry,
312 int *length_starting,
313 int *length_implementation)
317 memcpy(leader, leader_c, 24);
319 if (!atoi_n_check(leader+10, 1, indicator_length))
322 "Indicator length at offset 10 should hold a digit."
325 *indicator_length = 2;
327 if (!atoi_n_check(leader+11, 1, identifier_length))
330 "Identifier length at offset 11 should hold a digit."
333 *identifier_length = 2;
335 if (!atoi_n_check(leader+12, 5, base_address))
338 "Base address at offsets 12..16 should hold a number."
342 if (!atoi_n_check(leader+20, 1, length_data_entry))
345 "Length data entry at offset 20 should hold a digit."
347 *length_data_entry = 4;
350 if (!atoi_n_check(leader+21, 1, length_starting))
353 "Length starting at offset 21 should hold a digit."
355 *length_starting = 5;
358 if (!atoi_n_check(leader+22, 1, length_implementation))
361 "Length implementation at offset 22 should hold a digit."
363 *length_implementation = 0;
369 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
370 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
371 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
372 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
373 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
374 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
376 yaz_marc_add_leader(mt, leader, 24);
379 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
381 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
382 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
385 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
387 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
388 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
391 /* try to guess how many bytes the identifier really is! */
392 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
397 for (i = 1; i<5; i++)
400 size_t outbytesleft = sizeof(outbuf);
402 const char *inp = buf;
404 size_t inbytesleft = i;
405 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
406 &outp, &outbytesleft);
407 if (r != (size_t) (-1))
408 return i; /* got a complete sequence */
410 return 1; /* giving up */
412 return 1; /* we don't know */
415 void yaz_marc_reset(yaz_marc_t mt)
417 nmem_reset(mt->nmem);
419 mt->nodes_pp = &mt->nodes;
423 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
425 struct yaz_marc_node *n;
426 int identifier_length;
427 const char *leader = 0;
429 for (n = mt->nodes; n; n = n->next)
430 if (n->which == YAZ_MARC_LEADER)
432 leader = n->u.leader;
438 if (!atoi_n_check(leader+11, 1, &identifier_length))
441 for (n = mt->nodes; n; n = n->next)
445 case YAZ_MARC_COMMENT:
446 wrbuf_iconv_write(wr, mt->iconv_cd,
447 n->u.comment, strlen(n->u.comment));
448 wrbuf_puts(wr, "\n");
457 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
458 int identifier_length)
460 /* if identifier length is 2 (most MARCs) or less (probably an error),
461 the code is a single character .. However we've
462 seen multibyte codes, so see how big it really is */
463 if (identifier_length > 2)
464 return identifier_length - 1;
466 return cdata_one_character(mt, data);
469 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
471 struct yaz_marc_node *n;
472 int identifier_length;
473 const char *leader = 0;
475 for (n = mt->nodes; n; n = n->next)
476 if (n->which == YAZ_MARC_LEADER)
478 leader = n->u.leader;
484 if (!atoi_n_check(leader+11, 1, &identifier_length))
487 for (n = mt->nodes; n; n = n->next)
489 struct yaz_marc_subfield *s;
492 case YAZ_MARC_DATAFIELD:
493 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
494 n->u.datafield.indicator);
495 for (s = n->u.datafield.subfields; s; s = s->next)
497 size_t using_code_len = get_subfield_len(mt, s->code_data,
500 wrbuf_puts (wr, mt->subfield_str);
501 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
503 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
504 wrbuf_iconv_puts(wr, mt->iconv_cd,
505 s->code_data + using_code_len);
506 marc_iconv_reset(mt, wr);
508 wrbuf_puts (wr, mt->endline_str);
510 case YAZ_MARC_CONTROLFIELD:
511 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
512 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
513 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
514 marc_iconv_reset(mt, wr);
515 wrbuf_puts (wr, mt->endline_str);
517 case YAZ_MARC_COMMENT:
519 wrbuf_iconv_write(wr, mt->iconv_cd,
520 n->u.comment, strlen(n->u.comment));
521 marc_iconv_reset(mt, wr);
522 wrbuf_puts(wr, ")\n");
524 case YAZ_MARC_LEADER:
525 wrbuf_printf(wr, "%s\n", n->u.leader);
528 wrbuf_puts(wr, "\n");
532 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
534 if (mt->enable_collection == collection_second)
536 switch(mt->output_format)
538 case YAZ_MARC_MARCXML:
539 case YAZ_MARC_TMARCXML:
540 wrbuf_printf(wr, "</collection>\n");
542 case YAZ_MARC_XCHANGE:
543 wrbuf_printf(wr, "</collection>\n");
550 void yaz_marc_enable_collection(yaz_marc_t mt)
552 mt->enable_collection = collection_first;
555 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
557 switch(mt->output_format)
560 return yaz_marc_write_line(mt, wr);
561 case YAZ_MARC_MARCXML:
562 case YAZ_MARC_TMARCXML:
563 return yaz_marc_write_marcxml(mt, wr);
564 case YAZ_MARC_XCHANGE:
565 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
566 case YAZ_MARC_ISO2709:
567 return yaz_marc_write_iso2709(mt, wr);
569 return yaz_marc_write_check(mt, wr);
574 /** \brief common MARC XML/Xchange writer
576 \param wr WRBUF output
577 \param ns XMLNS for the elements
578 \param format record format (e.g. "MARC21")
579 \param type record type (e.g. "Bibliographic")
581 static int yaz_marc_write_marcxml_ns1(yaz_marc_t mt, WRBUF wr,
586 struct yaz_marc_node *n;
587 int identifier_length;
588 const char *leader = 0;
590 for (n = mt->nodes; n; n = n->next)
591 if (n->which == YAZ_MARC_LEADER)
593 leader = n->u.leader;
599 if (!atoi_n_check(leader+11, 1, &identifier_length))
602 if (mt->enable_collection != no_collection)
604 if (mt->enable_collection == collection_first)
605 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
606 mt->enable_collection = collection_second;
607 wrbuf_printf(wr, "<record");
611 wrbuf_printf(wr, "<record xmlns=\"%s\"", ns);
614 wrbuf_printf(wr, " format=\"%.80s\"", format);
616 wrbuf_printf(wr, " type=\"%.80s\"", type);
617 wrbuf_printf(wr, ">\n");
618 for (n = mt->nodes; n; n = n->next)
620 struct yaz_marc_subfield *s;
624 case YAZ_MARC_DATAFIELD:
625 wrbuf_printf(wr, " <datafield tag=\"");
626 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
627 strlen(n->u.datafield.tag));
628 wrbuf_printf(wr, "\"");
629 if (n->u.datafield.indicator)
632 for (i = 0; n->u.datafield.indicator[i]; i++)
634 wrbuf_printf(wr, " ind%d=\"", i+1);
635 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
636 n->u.datafield.indicator+i, 1);
637 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
640 wrbuf_printf(wr, ">\n");
641 for (s = n->u.datafield.subfields; s; s = s->next)
643 size_t using_code_len = get_subfield_len(mt, s->code_data,
645 wrbuf_iconv_puts(wr, mt->iconv_cd, " <subfield code=\"");
646 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
647 s->code_data, using_code_len);
648 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
649 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
650 s->code_data + using_code_len,
651 strlen(s->code_data + using_code_len));
652 marc_iconv_reset(mt, wr);
653 wrbuf_iconv_puts(wr, mt->iconv_cd, "</subfield>");
654 wrbuf_puts(wr, "\n");
656 wrbuf_printf(wr, " </datafield>\n");
658 case YAZ_MARC_CONTROLFIELD:
659 wrbuf_printf(wr, " <controlfield tag=\"");
660 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
661 strlen(n->u.controlfield.tag));
662 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
663 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
664 n->u.controlfield.data,
665 strlen(n->u.controlfield.data));
667 marc_iconv_reset(mt, wr);
668 wrbuf_iconv_puts(wr, mt->iconv_cd, "</controlfield>");
669 wrbuf_puts(wr, "\n");
671 case YAZ_MARC_COMMENT:
672 wrbuf_printf(wr, "<!-- ");
673 wrbuf_puts(wr, n->u.comment);
674 wrbuf_printf(wr, " -->\n");
676 case YAZ_MARC_LEADER:
677 wrbuf_printf(wr, " <leader>");
678 wrbuf_iconv_write_cdata(wr,
679 0 /* no charset conversion for leader */,
680 n->u.leader, strlen(n->u.leader));
681 wrbuf_printf(wr, "</leader>\n");
684 wrbuf_puts(wr, "</record>\n");
688 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
693 if (mt->write_using_libxml2)
699 if (yaz_marc_get_write_format(mt) == YAZ_MARC_MARCXML)
700 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
701 else // Check for Turbo XML
702 ret = yaz_marc_write_turbo_xml(mt, &root_ptr, ns, format, type);
706 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
709 xmlDocSetRootElement(doc, root_ptr);
710 xmlDocDumpMemory(doc, &buf_out, &len_out);
712 wrbuf_write(wr, (const char *) buf_out, len_out);
723 return yaz_marc_write_marcxml_ns1(mt, wr, ns, format, type);
726 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
728 /* set leader 09 to 'a' for UNICODE */
729 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
730 if (!mt->leader_spec)
731 yaz_marc_modify_leader(mt, 9, "a");
732 char *name_space = "http://www.loc.gov/MARC21/slim";
733 if (mt->output_format == YAZ_MARC_TMARCXML)
734 name_space = "http://www.indexdata.com/MARC21/turboxml";
735 return yaz_marc_write_marcxml_ns(mt, wr, name_space,
739 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
743 return yaz_marc_write_marcxml_ns(mt, wr,
744 "info:lc/xmlns/marcxchange-v1",
750 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n, xmlNode *record_ptr, xmlNsPtr ns_record, WRBUF wr_cdata, int identifier_length)
753 struct yaz_marc_subfield *s;
754 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
756 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
757 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
760 //TODO consider if safe
763 strncpy(field + 1, n->u.datafield.tag, 3);
765 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
767 if (n->u.datafield.indicator)
770 for (i = 0; n->u.datafield.indicator[i]; i++)
775 ind_val[0] = n->u.datafield.indicator[i];
778 sprintf(ind_str, "ind%d", i+1);
779 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
782 sprintf(ind_str, "i%d", i+1);
783 xmlNewTextChild(ptr, ns_record, BAD_CAST ind_str, BAD_CAST ind_val);
787 WRBUF subfield_name = wrbuf_alloc();
788 for (s = n->u.datafield.subfields; s; s = s->next)
790 xmlNode *ptr_subfield;
791 size_t using_code_len = get_subfield_len(mt, s->code_data,
793 wrbuf_rewind(wr_cdata);
794 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
795 marc_iconv_reset(mt, wr_cdata);
798 ptr_subfield = xmlNewTextChild(
800 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
801 wrbuf_rewind(wr_cdata);
802 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
803 xmlNewProp(ptr_subfield, BAD_CAST "code",
804 BAD_CAST wrbuf_cstr(wr_cdata));
806 else { // Turbo format
807 wrbuf_rewind(subfield_name);
808 wrbuf_puts(subfield_name, "s");
809 // TODO Map special codes to something possible for XML ELEMENT names
810 if ((s->code_data[0] >= '0' && s->code_data[0] <= '9') ||
811 (s->code_data[0] >= 'a' && s->code_data[0] <= 'z') ||
812 (s->code_data[0] >= 'A' && s->code_data[0] <= 'Z'))
814 wrbuf_iconv_write(subfield_name, mt->iconv_cd,s->code_data, using_code_len);
817 char buffer[2*using_code_len + 1];
819 for (index = 0; index < using_code_len; index++) {
820 sprintf(buffer + 2*index, "%02X", (unsigned char) s->code_data[index] & 0xFF);
822 buffer[2*(index+1)] = 0;
823 wrbuf_puts(subfield_name, "-");
824 wrbuf_puts(subfield_name, buffer);
825 yaz_log(YLOG_WARN, "Using numeric value in element name: %s", buffer);
827 ptr_subfield = xmlNewTextChild(ptr, ns_record,
828 BAD_CAST wrbuf_cstr(subfield_name),
829 BAD_CAST wrbuf_cstr(wr_cdata));
832 wrbuf_destroy(subfield_name);
835 int yaz_marc_write_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
840 struct yaz_marc_node *n;
841 int identifier_length;
842 const char *leader = 0;
846 int turbo = mt->output_format == YAZ_MARC_TMARCXML;
847 for (n = mt->nodes; n; n = n->next)
848 if (n->which == YAZ_MARC_LEADER)
850 leader = n->u.leader;
856 if (!atoi_n_check(leader+11, 1, &identifier_length))
859 wr_cdata = wrbuf_alloc();
861 record_ptr = xmlNewNode(0, BAD_CAST "record");
862 *root_ptr = record_ptr;
864 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
865 xmlSetNs(record_ptr, ns_record);
868 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
870 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
871 for (n = mt->nodes; n; n = n->next)
873 struct yaz_marc_subfield *s;
878 case YAZ_MARC_DATAFIELD:
879 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
881 case YAZ_MARC_CONTROLFIELD:
882 wrbuf_rewind(wr_cdata);
883 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
884 marc_iconv_reset(mt, wr_cdata);
887 ptr = xmlNewTextChild(record_ptr, ns_record,
888 BAD_CAST "controlfield",
889 BAD_CAST wrbuf_cstr(wr_cdata));
890 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
893 // TODO required iconv?
896 strncpy(field + 1, n->u.controlfield.tag, 3);
898 ptr = xmlNewTextChild(record_ptr, ns_record,
900 BAD_CAST wrbuf_cstr(wr_cdata));
904 case YAZ_MARC_COMMENT:
905 ptr = xmlNewComment(BAD_CAST n->u.comment);
906 xmlAddChild(record_ptr, ptr);
908 case YAZ_MARC_LEADER:
910 char *field = "leader";
913 xmlNewTextChild(record_ptr, ns_record, BAD_CAST field,
914 BAD_CAST n->u.leader);
919 wrbuf_destroy(wr_cdata);
924 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
929 struct yaz_marc_node *n;
930 int identifier_length;
931 const char *leader = 0;
936 for (n = mt->nodes; n; n = n->next)
937 if (n->which == YAZ_MARC_LEADER)
939 leader = n->u.leader;
945 if (!atoi_n_check(leader+11, 1, &identifier_length))
948 wr_cdata = wrbuf_alloc();
950 record_ptr = xmlNewNode(0, BAD_CAST "record");
951 *root_ptr = record_ptr;
953 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
954 xmlSetNs(record_ptr, ns_record);
957 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
959 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
960 for (n = mt->nodes; n; n = n->next)
962 struct yaz_marc_subfield *s;
967 case YAZ_MARC_DATAFIELD:
968 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
969 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
970 if (n->u.datafield.indicator)
973 for (i = 0; n->u.datafield.indicator[i]; i++)
978 sprintf(ind_str, "ind%d", i+1);
979 ind_val[0] = n->u.datafield.indicator[i];
981 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
984 for (s = n->u.datafield.subfields; s; s = s->next)
986 xmlNode *ptr_subfield;
987 size_t using_code_len = get_subfield_len(mt, s->code_data,
989 wrbuf_rewind(wr_cdata);
990 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
991 s->code_data + using_code_len);
992 marc_iconv_reset(mt, wr_cdata);
993 ptr_subfield = xmlNewTextChild(
995 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
997 wrbuf_rewind(wr_cdata);
998 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
999 s->code_data, using_code_len);
1000 xmlNewProp(ptr_subfield, BAD_CAST "code",
1001 BAD_CAST wrbuf_cstr(wr_cdata));
1004 case YAZ_MARC_CONTROLFIELD:
1005 wrbuf_rewind(wr_cdata);
1006 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1007 marc_iconv_reset(mt, wr_cdata);
1009 ptr = xmlNewTextChild(record_ptr, ns_record,
1010 BAD_CAST "controlfield",
1011 BAD_CAST wrbuf_cstr(wr_cdata));
1013 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1015 case YAZ_MARC_COMMENT:
1016 ptr = xmlNewComment(BAD_CAST n->u.comment);
1017 xmlAddChild(record_ptr, ptr);
1019 case YAZ_MARC_LEADER:
1020 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1021 BAD_CAST n->u.leader);
1025 wrbuf_destroy(wr_cdata);
1034 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1036 struct yaz_marc_node *n;
1037 int indicator_length;
1038 int identifier_length;
1039 int length_data_entry;
1040 int length_starting;
1041 int length_implementation;
1042 int data_offset = 0;
1043 const char *leader = 0;
1044 WRBUF wr_dir, wr_head, wr_data_tmp;
1047 for (n = mt->nodes; n; n = n->next)
1048 if (n->which == YAZ_MARC_LEADER)
1049 leader = n->u.leader;
1053 if (!atoi_n_check(leader+10, 1, &indicator_length))
1055 if (!atoi_n_check(leader+11, 1, &identifier_length))
1057 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1059 if (!atoi_n_check(leader+21, 1, &length_starting))
1061 if (!atoi_n_check(leader+22, 1, &length_implementation))
1064 wr_data_tmp = wrbuf_alloc();
1065 wr_dir = wrbuf_alloc();
1066 for (n = mt->nodes; n; n = n->next)
1068 int data_length = 0;
1069 struct yaz_marc_subfield *s;
1073 case YAZ_MARC_DATAFIELD:
1074 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1075 data_length += indicator_length;
1076 wrbuf_rewind(wr_data_tmp);
1077 for (s = n->u.datafield.subfields; s; s = s->next)
1079 /* write dummy IDFS + content */
1080 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1081 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1082 marc_iconv_reset(mt, wr_data_tmp);
1084 /* write dummy FS (makes MARC-8 to become ASCII) */
1085 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1086 marc_iconv_reset(mt, wr_data_tmp);
1087 data_length += wrbuf_len(wr_data_tmp);
1089 case YAZ_MARC_CONTROLFIELD:
1090 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1092 wrbuf_rewind(wr_data_tmp);
1093 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1094 n->u.controlfield.data);
1095 marc_iconv_reset(mt, wr_data_tmp);
1096 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1097 marc_iconv_reset(mt, wr_data_tmp);
1098 data_length += wrbuf_len(wr_data_tmp);
1100 case YAZ_MARC_COMMENT:
1102 case YAZ_MARC_LEADER:
1107 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1108 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1109 data_offset += data_length;
1112 /* mark end of directory */
1113 wrbuf_putc(wr_dir, ISO2709_FS);
1115 /* base address of data (comes after leader+directory) */
1116 base_address = 24 + wrbuf_len(wr_dir);
1118 wr_head = wrbuf_alloc();
1120 /* write record length */
1121 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1122 /* from "original" leader */
1123 wrbuf_write(wr_head, leader+5, 7);
1124 /* base address of data */
1125 wrbuf_printf(wr_head, "%05d", base_address);
1126 /* from "original" leader */
1127 wrbuf_write(wr_head, leader+17, 7);
1129 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1130 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1131 wrbuf_destroy(wr_head);
1132 wrbuf_destroy(wr_dir);
1133 wrbuf_destroy(wr_data_tmp);
1135 for (n = mt->nodes; n; n = n->next)
1137 struct yaz_marc_subfield *s;
1141 case YAZ_MARC_DATAFIELD:
1142 wrbuf_printf(wr, "%.*s", indicator_length,
1143 n->u.datafield.indicator);
1144 for (s = n->u.datafield.subfields; s; s = s->next)
1146 wrbuf_putc(wr, ISO2709_IDFS);
1147 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1148 marc_iconv_reset(mt, wr);
1150 wrbuf_putc(wr, ISO2709_FS);
1152 case YAZ_MARC_CONTROLFIELD:
1153 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1154 marc_iconv_reset(mt, wr);
1155 wrbuf_putc(wr, ISO2709_FS);
1157 case YAZ_MARC_COMMENT:
1159 case YAZ_MARC_LEADER:
1163 wrbuf_printf(wr, "%c", ISO2709_RS);
1168 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1170 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1173 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1175 return -1; /* error */
1176 return r; /* OK, return length > 0 */
1179 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1180 const char **result, size_t *rsize)
1184 wrbuf_rewind(mt->m_wr);
1185 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1187 *result = wrbuf_cstr(mt->m_wr);
1189 *rsize = wrbuf_len(mt->m_wr);
1193 void yaz_marc_set_read_format(yaz_marc_t mt, int format)
1196 mt->input_format = format;
1199 int yaz_marc_get_read_format(yaz_marc_t mt)
1202 return mt->input_format;
1207 void yaz_marc_set_write_format(yaz_marc_t mt, int format)
1210 mt->output_format = format;
1211 // Force using libxml2
1212 if (mt->output_format == YAZ_MARC_TMARCXML)
1213 mt->write_using_libxml2 = 1;
1217 int yaz_marc_get_write_format(yaz_marc_t mt)
1220 return mt->output_format;
1226 * Deprecated, use yaz_marc_set_write_format
1228 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1230 yaz_marc_set_write_format(mt, xmlmode);
1235 void yaz_marc_debug(yaz_marc_t mt, int level)
1241 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1246 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1248 return mt->iconv_cd;
1251 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1253 struct yaz_marc_node *n;
1255 for (n = mt->nodes; n; n = n->next)
1256 if (n->which == YAZ_MARC_LEADER)
1258 leader = n->u.leader;
1259 memcpy(leader+off, str, strlen(str));
1264 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1266 xfree(mt->leader_spec);
1267 mt->leader_spec = 0;
1270 char dummy_leader[24];
1271 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1273 mt->leader_spec = xstrdup(leader_spec);
1278 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1280 const char *cp = leader_spec;
1285 int no_read = 0, no = 0;
1287 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1288 if (no < 2 || no_read < 3)
1290 if (pos < 0 || (size_t) pos >= size)
1295 const char *vp = strchr(val+1, '\'');
1301 if (len + pos > size)
1303 memcpy(leader + pos, val+1, len);
1305 else if (*val >= '0' && *val <= '9')
1321 int yaz_marc_decode_formatstr(const char *arg)
1324 if (!strcmp(arg, "marc"))
1325 mode = YAZ_MARC_ISO2709;
1326 if (!strcmp(arg, "marcxml"))
1327 mode = YAZ_MARC_MARCXML;
1328 if (!strcmp(arg, "tmarcxml"))
1329 mode = YAZ_MARC_TMARCXML;
1330 if (!strcmp(arg, "marcxchange"))
1331 mode = YAZ_MARC_XCHANGE;
1332 if (!strcmp(arg, "line"))
1333 mode = YAZ_MARC_LINE;
1337 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1339 mt->write_using_libxml2 = enable;
1342 int yaz_marc_is_turbo_format(yaz_marc_t mt)
1344 return mt->output_format == YAZ_MARC_TMARCXML;
1351 * c-file-style: "Stroustrup"
1352 * indent-tabs-mode: nil
1354 * vim: shiftwidth=4 tabstop=8 expandtab