1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2012 Index Data
3 * See the file LICENSE for details.
8 * \brief Implements MARC conversion utilities
23 #include <yaz/marcdisp.h>
24 #include <yaz/wrbuf.h>
25 #include <yaz/yaz-util.h>
26 #include <yaz/nmem_xml.h>
27 #include <yaz/snprintf.h>
30 #include <libxml/parser.h>
31 #include <libxml/tree.h>
34 enum yaz_collection_state {
40 /** \brief node types for yaz_marc_node */
41 enum YAZ_MARC_NODE_TYPE
44 YAZ_MARC_CONTROLFIELD,
49 /** \brief represets a data field */
50 struct yaz_marc_datafield {
53 struct yaz_marc_subfield *subfields;
56 /** \brief represents a control field */
57 struct yaz_marc_controlfield {
62 /** \brief a comment node */
63 struct yaz_marc_comment {
67 /** \brief MARC node */
68 struct yaz_marc_node {
69 enum YAZ_MARC_NODE_TYPE which;
71 struct yaz_marc_datafield datafield;
72 struct yaz_marc_controlfield controlfield;
76 struct yaz_marc_node *next;
79 /** \brief represents a subfield */
80 struct yaz_marc_subfield {
82 struct yaz_marc_subfield *next;
85 /** \brief the internals of a yaz_marc_t handle */
91 int write_using_libxml2;
92 enum yaz_collection_state enable_collection;
97 struct yaz_marc_node *nodes;
98 struct yaz_marc_node **nodes_pp;
99 struct yaz_marc_subfield **subfield_pp;
102 yaz_marc_t yaz_marc_create(void)
104 yaz_marc_t mt = (yaz_marc_t) xmalloc(sizeof(*mt));
105 mt->output_format = YAZ_MARC_LINE;
107 mt->write_using_libxml2 = 0;
108 mt->enable_collection = no_collection;
109 mt->m_wr = wrbuf_alloc();
112 strcpy(mt->subfield_str, " $");
113 strcpy(mt->endline_str, "\n");
115 mt->nmem = nmem_create();
120 void yaz_marc_destroy(yaz_marc_t mt)
124 nmem_destroy(mt->nmem);
125 wrbuf_destroy(mt->m_wr);
126 xfree(mt->leader_spec);
130 NMEM yaz_marc_get_nmem(yaz_marc_t mt)
135 static void marc_iconv_reset(yaz_marc_t mt, WRBUF wr)
137 wrbuf_iconv_reset(wr, mt->iconv_cd);
140 static int marc_exec_leader(const char *leader_spec, char *leader,
143 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
149 static struct yaz_marc_node *yaz_marc_add_node(yaz_marc_t mt)
151 struct yaz_marc_node *n = (struct yaz_marc_node *)
152 nmem_malloc(mt->nmem, sizeof(*n));
155 mt->nodes_pp = &n->next;
160 void yaz_marc_add_controlfield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
161 const xmlNode *ptr_data)
163 struct yaz_marc_node *n = yaz_marc_add_node(mt);
164 n->which = YAZ_MARC_CONTROLFIELD;
165 n->u.controlfield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
166 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
169 void yaz_marc_add_controlfield_xml2(yaz_marc_t mt, char *tag,
170 const xmlNode *ptr_data)
172 struct yaz_marc_node *n = yaz_marc_add_node(mt);
173 n->which = YAZ_MARC_CONTROLFIELD;
174 n->u.controlfield.tag = tag;
175 n->u.controlfield.data = nmem_text_node_cdata(ptr_data, mt->nmem);
181 void yaz_marc_add_comment(yaz_marc_t mt, char *comment)
183 struct yaz_marc_node *n = yaz_marc_add_node(mt);
184 n->which = YAZ_MARC_COMMENT;
185 n->u.comment = nmem_strdup(mt->nmem, comment);
188 void yaz_marc_cprintf(yaz_marc_t mt, const char *fmt, ...)
194 yaz_vsnprintf(buf, sizeof(buf)-1, fmt, ap);
195 yaz_marc_add_comment(mt, buf);
199 int yaz_marc_get_debug(yaz_marc_t mt)
204 void yaz_marc_add_leader(yaz_marc_t mt, const char *leader, size_t leader_len)
206 struct yaz_marc_node *n = yaz_marc_add_node(mt);
207 n->which = YAZ_MARC_LEADER;
208 n->u.leader = nmem_strdupn(mt->nmem, leader, leader_len);
209 marc_exec_leader(mt->leader_spec, n->u.leader, leader_len);
212 void yaz_marc_add_controlfield(yaz_marc_t mt, const char *tag,
213 const char *data, size_t data_len)
215 struct yaz_marc_node *n = yaz_marc_add_node(mt);
216 n->which = YAZ_MARC_CONTROLFIELD;
217 n->u.controlfield.tag = nmem_strdup(mt->nmem, tag);
218 n->u.controlfield.data = nmem_strdupn(mt->nmem, data, data_len);
224 sprintf(msg, "controlfield:");
225 for (i = 0; i < 16 && i < data_len; i++)
226 sprintf(msg + strlen(msg), " %02X", data[i] & 0xff);
228 sprintf(msg + strlen(msg), " ..");
229 yaz_marc_add_comment(mt, msg);
233 void yaz_marc_add_datafield(yaz_marc_t mt, const char *tag,
234 const char *indicator, size_t indicator_len)
236 struct yaz_marc_node *n = yaz_marc_add_node(mt);
237 n->which = YAZ_MARC_DATAFIELD;
238 n->u.datafield.tag = nmem_strdup(mt->nmem, tag);
239 n->u.datafield.indicator =
240 nmem_strdupn(mt->nmem, indicator, indicator_len);
241 n->u.datafield.subfields = 0;
243 /* make subfield_pp the current (last one) */
244 mt->subfield_pp = &n->u.datafield.subfields;
247 /** \brief adds a attribute value to the element name if it is plain chars
249 If not, and if the attribute name is not null, it will append a
250 attribute element with the value if attribute name is null it will
251 return a non-zero value meaning it couldnt handle the value.
253 static int element_name_append_attribute_value(
254 yaz_marc_t mt, WRBUF buffer,
255 const char *attribute_name, char *code_data, size_t code_len)
257 /* TODO Map special codes to something possible for XML ELEMENT names */
262 for (index = 0; index < code_len; index++)
264 if (!((code_data[index] >= '0' && code_data[index] <= '9') ||
265 (code_data[index] >= 'a' && code_data[index] <= 'z') ||
266 (code_data[index] >= 'A' && code_data[index] <= 'Z')))
269 /* Add as attribute */
270 if (encode && attribute_name)
271 wrbuf_printf(buffer, " %s=\"", attribute_name);
273 if (!encode || attribute_name)
274 wrbuf_iconv_write_cdata(buffer, mt->iconv_cd, code_data, code_len);
278 if (encode && attribute_name)
279 wrbuf_printf(buffer, "\""); /* return error if we couldn't handle it.*/
284 void yaz_marc_add_datafield_xml(yaz_marc_t mt, const xmlNode *ptr_tag,
285 const char *indicator, size_t indicator_len)
287 struct yaz_marc_node *n = yaz_marc_add_node(mt);
288 n->which = YAZ_MARC_DATAFIELD;
289 n->u.datafield.tag = nmem_text_node_cdata(ptr_tag, mt->nmem);
290 n->u.datafield.indicator =
291 nmem_strdupn(mt->nmem, indicator, indicator_len);
292 n->u.datafield.subfields = 0;
294 /* make subfield_pp the current (last one) */
295 mt->subfield_pp = &n->u.datafield.subfields;
298 void yaz_marc_add_datafield_xml2(yaz_marc_t mt, char *tag_value, char *indicators)
300 struct yaz_marc_node *n = yaz_marc_add_node(mt);
301 n->which = YAZ_MARC_DATAFIELD;
302 n->u.datafield.tag = tag_value;
303 n->u.datafield.indicator = indicators;
304 n->u.datafield.subfields = 0;
306 /* make subfield_pp the current (last one) */
307 mt->subfield_pp = &n->u.datafield.subfields;
310 void yaz_marc_datafield_set_indicators(struct yaz_marc_node *n, char *indicator)
312 n->u.datafield.indicator = indicator;
317 void yaz_marc_add_subfield(yaz_marc_t mt,
318 const char *code_data, size_t code_data_len)
325 sprintf(msg, "subfield:");
326 for (i = 0; i < 16 && i < code_data_len; i++)
327 sprintf(msg + strlen(msg), " %02X", code_data[i] & 0xff);
328 if (i < code_data_len)
329 sprintf(msg + strlen(msg), " ..");
330 yaz_marc_add_comment(mt, msg);
335 struct yaz_marc_subfield *n = (struct yaz_marc_subfield *)
336 nmem_malloc(mt->nmem, sizeof(*n));
337 n->code_data = nmem_strdupn(mt->nmem, code_data, code_data_len);
339 /* mark subfield_pp to point to this one, so we append here next */
340 *mt->subfield_pp = n;
341 mt->subfield_pp = &n->next;
345 void yaz_marc_set_leader(yaz_marc_t mt, const char *leader_c,
346 int *indicator_length,
347 int *identifier_length,
349 int *length_data_entry,
350 int *length_starting,
351 int *length_implementation)
355 memcpy(leader, leader_c, 24);
357 if (!atoi_n_check(leader+10, 1, indicator_length))
360 "Indicator length at offset 10 should hold a digit."
363 *indicator_length = 2;
365 if (!atoi_n_check(leader+11, 1, identifier_length))
368 "Identifier length at offset 11 should hold a digit."
371 *identifier_length = 2;
373 if (!atoi_n_check(leader+12, 5, base_address))
376 "Base address at offsets 12..16 should hold a number."
380 if (!atoi_n_check(leader+20, 1, length_data_entry))
383 "Length data entry at offset 20 should hold a digit."
385 *length_data_entry = 4;
388 if (!atoi_n_check(leader+21, 1, length_starting))
391 "Length starting at offset 21 should hold a digit."
393 *length_starting = 5;
396 if (!atoi_n_check(leader+22, 1, length_implementation))
399 "Length implementation at offset 22 should hold a digit."
401 *length_implementation = 0;
407 yaz_marc_cprintf(mt, "Indicator length %5d", *indicator_length);
408 yaz_marc_cprintf(mt, "Identifier length %5d", *identifier_length);
409 yaz_marc_cprintf(mt, "Base address %5d", *base_address);
410 yaz_marc_cprintf(mt, "Length data entry %5d", *length_data_entry);
411 yaz_marc_cprintf(mt, "Length starting %5d", *length_starting);
412 yaz_marc_cprintf(mt, "Length implementation %5d", *length_implementation);
414 yaz_marc_add_leader(mt, leader, 24);
417 void yaz_marc_subfield_str(yaz_marc_t mt, const char *s)
419 strncpy(mt->subfield_str, s, sizeof(mt->subfield_str)-1);
420 mt->subfield_str[sizeof(mt->subfield_str)-1] = '\0';
423 void yaz_marc_endline_str(yaz_marc_t mt, const char *s)
425 strncpy(mt->endline_str, s, sizeof(mt->endline_str)-1);
426 mt->endline_str[sizeof(mt->endline_str)-1] = '\0';
429 /* try to guess how many bytes the identifier really is! */
430 static size_t cdata_one_character(yaz_marc_t mt, const char *buf)
435 for (i = 1; i<5; i++)
438 size_t outbytesleft = sizeof(outbuf);
440 const char *inp = buf;
442 size_t inbytesleft = i;
443 size_t r = yaz_iconv(mt->iconv_cd, (char**) &inp, &inbytesleft,
444 &outp, &outbytesleft);
445 if (r != (size_t) (-1))
446 return i; /* got a complete sequence */
448 return 1; /* giving up */
450 return 1; /* we don't know */
453 void yaz_marc_reset(yaz_marc_t mt)
455 nmem_reset(mt->nmem);
457 mt->nodes_pp = &mt->nodes;
461 int yaz_marc_write_check(yaz_marc_t mt, WRBUF wr)
463 struct yaz_marc_node *n;
464 int identifier_length;
465 const char *leader = 0;
467 for (n = mt->nodes; n; n = n->next)
468 if (n->which == YAZ_MARC_LEADER)
470 leader = n->u.leader;
476 if (!atoi_n_check(leader+11, 1, &identifier_length))
479 for (n = mt->nodes; n; n = n->next)
483 case YAZ_MARC_COMMENT:
484 wrbuf_iconv_write(wr, mt->iconv_cd,
485 n->u.comment, strlen(n->u.comment));
486 wrbuf_puts(wr, "\n");
495 static size_t get_subfield_len(yaz_marc_t mt, const char *data,
496 int identifier_length)
498 /* if identifier length is 2 (most MARCs) or less (probably an error),
499 the code is a single character .. However we've
500 seen multibyte codes, so see how big it really is */
501 if (identifier_length > 2)
502 return identifier_length - 1;
504 return cdata_one_character(mt, data);
507 int yaz_marc_write_line(yaz_marc_t mt, WRBUF wr)
509 struct yaz_marc_node *n;
510 int identifier_length;
511 const char *leader = 0;
513 for (n = mt->nodes; n; n = n->next)
514 if (n->which == YAZ_MARC_LEADER)
516 leader = n->u.leader;
522 if (!atoi_n_check(leader+11, 1, &identifier_length))
525 for (n = mt->nodes; n; n = n->next)
527 struct yaz_marc_subfield *s;
530 case YAZ_MARC_DATAFIELD:
531 wrbuf_printf(wr, "%s %s", n->u.datafield.tag,
532 n->u.datafield.indicator);
533 for (s = n->u.datafield.subfields; s; s = s->next)
535 size_t using_code_len = get_subfield_len(mt, s->code_data,
538 wrbuf_puts (wr, mt->subfield_str);
539 wrbuf_iconv_write(wr, mt->iconv_cd, s->code_data,
541 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
542 wrbuf_iconv_puts(wr, mt->iconv_cd,
543 s->code_data + using_code_len);
544 marc_iconv_reset(mt, wr);
546 wrbuf_puts (wr, mt->endline_str);
548 case YAZ_MARC_CONTROLFIELD:
549 wrbuf_printf(wr, "%s", n->u.controlfield.tag);
550 wrbuf_iconv_puts(wr, mt->iconv_cd, " ");
551 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
552 marc_iconv_reset(mt, wr);
553 wrbuf_puts (wr, mt->endline_str);
555 case YAZ_MARC_COMMENT:
557 wrbuf_iconv_write(wr, mt->iconv_cd,
558 n->u.comment, strlen(n->u.comment));
559 marc_iconv_reset(mt, wr);
560 wrbuf_puts(wr, ")\n");
562 case YAZ_MARC_LEADER:
563 wrbuf_printf(wr, "%s\n", n->u.leader);
566 wrbuf_puts(wr, "\n");
570 int yaz_marc_write_trailer(yaz_marc_t mt, WRBUF wr)
572 if (mt->enable_collection == collection_second)
574 switch(mt->output_format)
576 case YAZ_MARC_MARCXML:
577 case YAZ_MARC_TURBOMARC:
578 wrbuf_printf(wr, "</collection>\n");
580 case YAZ_MARC_XCHANGE:
581 wrbuf_printf(wr, "</collection>\n");
588 void yaz_marc_enable_collection(yaz_marc_t mt)
590 mt->enable_collection = collection_first;
593 int yaz_marc_write_mode(yaz_marc_t mt, WRBUF wr)
595 switch(mt->output_format)
598 return yaz_marc_write_line(mt, wr);
599 case YAZ_MARC_MARCXML:
600 return yaz_marc_write_marcxml(mt, wr);
601 case YAZ_MARC_TURBOMARC:
602 return yaz_marc_write_turbomarc(mt, wr);
603 case YAZ_MARC_XCHANGE:
604 return yaz_marc_write_marcxchange(mt, wr, 0, 0); /* no format, type */
605 case YAZ_MARC_ISO2709:
606 return yaz_marc_write_iso2709(mt, wr);
608 return yaz_marc_write_check(mt, wr);
613 static const char *record_name[2] = { "record", "r"};
614 static const char *leader_name[2] = { "leader", "l"};
615 static const char *controlfield_name[2] = { "controlfield", "c"};
616 static const char *datafield_name[2] = { "datafield", "d"};
617 static const char *indicator_name[2] = { "ind", "i"};
618 static const char *subfield_name[2] = { "subfield", "s"};
620 /** \brief common MARC XML/Xchange/turbomarc writer
622 \param wr WRBUF output
623 \param ns XMLNS for the elements
624 \param format record format (e.g. "MARC21")
625 \param type record type (e.g. "Bibliographic")
626 \param turbo =1 for turbomarc
630 static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr,
636 struct yaz_marc_node *n;
637 int identifier_length;
638 const char *leader = 0;
640 for (n = mt->nodes; n; n = n->next)
641 if (n->which == YAZ_MARC_LEADER)
643 leader = n->u.leader;
649 if (!atoi_n_check(leader+11, 1, &identifier_length))
652 if (mt->enable_collection != no_collection)
654 if (mt->enable_collection == collection_first)
656 wrbuf_printf(wr, "<collection xmlns=\"%s\">\n", ns);
657 mt->enable_collection = collection_second;
659 wrbuf_printf(wr, "<%s", record_name[turbo]);
663 wrbuf_printf(wr, "<%s xmlns=\"%s\"", record_name[turbo], ns);
666 wrbuf_printf(wr, " format=\"%.80s\"", format);
668 wrbuf_printf(wr, " type=\"%.80s\"", type);
669 wrbuf_printf(wr, ">\n");
670 for (n = mt->nodes; n; n = n->next)
672 struct yaz_marc_subfield *s;
676 case YAZ_MARC_DATAFIELD:
678 wrbuf_printf(wr, " <%s", datafield_name[turbo]);
680 wrbuf_printf(wr, " tag=\"");
681 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
682 strlen(n->u.datafield.tag));
684 wrbuf_printf(wr, "\"");
685 if (n->u.datafield.indicator)
688 for (i = 0; n->u.datafield.indicator[i]; i++)
690 wrbuf_printf(wr, " %s%d=\"", indicator_name[turbo], i+1);
691 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
692 n->u.datafield.indicator+i, 1);
693 wrbuf_iconv_puts(wr, mt->iconv_cd, "\"");
696 wrbuf_printf(wr, ">\n");
697 for (s = n->u.datafield.subfields; s; s = s->next)
699 size_t using_code_len = get_subfield_len(mt, s->code_data,
701 wrbuf_printf(wr, " <%s", subfield_name[turbo]);
704 wrbuf_printf(wr, " code=\"");
705 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
706 s->code_data, using_code_len);
707 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
711 element_name_append_attribute_value(mt, wr, "code", s->code_data, using_code_len);
714 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
715 s->code_data + using_code_len,
716 strlen(s->code_data + using_code_len));
717 marc_iconv_reset(mt, wr);
718 wrbuf_printf(wr, "</%s", subfield_name[turbo]);
720 element_name_append_attribute_value(mt, wr, 0, s->code_data, using_code_len);
721 wrbuf_puts(wr, ">\n");
723 wrbuf_printf(wr, " </%s", datafield_name[turbo]);
726 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.datafield.tag,
727 strlen(n->u.datafield.tag));
728 wrbuf_printf(wr, ">\n");
730 case YAZ_MARC_CONTROLFIELD:
731 wrbuf_printf(wr, " <%s", controlfield_name[turbo]);
734 wrbuf_printf(wr, " tag=\"");
735 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
736 strlen(n->u.controlfield.tag));
737 wrbuf_iconv_puts(wr, mt->iconv_cd, "\">");
741 /* TODO convert special */
742 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
743 strlen(n->u.controlfield.tag));
744 wrbuf_iconv_puts(wr, mt->iconv_cd, ">");
746 wrbuf_iconv_write_cdata(wr, mt->iconv_cd,
747 n->u.controlfield.data,
748 strlen(n->u.controlfield.data));
749 marc_iconv_reset(mt, wr);
750 wrbuf_printf(wr, "</%s", controlfield_name[turbo]);
751 /* TODO convert special */
753 wrbuf_iconv_write_cdata(wr, mt->iconv_cd, n->u.controlfield.tag,
754 strlen(n->u.controlfield.tag));
755 wrbuf_puts(wr, ">\n");
757 case YAZ_MARC_COMMENT:
758 wrbuf_printf(wr, "<!-- ");
759 wrbuf_puts(wr, n->u.comment);
760 wrbuf_printf(wr, " -->\n");
762 case YAZ_MARC_LEADER:
763 wrbuf_printf(wr, " <%s>", leader_name[turbo]);
764 wrbuf_iconv_write_cdata(wr,
765 0 , /* no charset conversion for leader */
766 n->u.leader, strlen(n->u.leader));
767 wrbuf_printf(wr, "</%s>\n", leader_name[turbo]);
770 wrbuf_printf(wr, "</%s>\n", record_name[turbo]);
774 static void sanitise_leader_for_utf8(yaz_marc_t mt)
776 /* the leader MUST be ASCII for UTF-8 output (XML) */
777 struct yaz_marc_node *n;
778 for (n = mt->nodes; n; n = n->next)
779 if (n->which == YAZ_MARC_LEADER)
782 for (i = 0; n->u.leader[i]; i++)
783 if (n->u.leader[i] < ' ' || n->u.leader[i] > 126)
785 n->u.leader[i] = ' ';
786 yaz_marc_cprintf(mt, "Fixing leader char at offset %d",
792 static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr,
798 sanitise_leader_for_utf8(mt);
799 if (mt->write_using_libxml2)
806 ret = yaz_marc_write_xml(mt, &root_ptr, ns, format, type);
808 ret = yaz_marc_write_xml_turbo_xml(mt, &root_ptr, ns, format, type);
812 xmlDocPtr doc = xmlNewDoc(BAD_CAST "1.0");
815 xmlDocSetRootElement(doc, root_ptr);
816 xmlDocDumpMemory(doc, &buf_out, &len_out);
818 wrbuf_write(wr, (const char *) buf_out, len_out);
829 return yaz_marc_write_marcxml_wrbuf(mt, wr, ns, format, type, turbo);
832 int yaz_marc_write_marcxml(yaz_marc_t mt, WRBUF wr)
834 /* set leader 09 to 'a' for UNICODE */
835 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
836 if (!mt->leader_spec)
837 yaz_marc_modify_leader(mt, 9, "a");
838 return yaz_marc_write_marcxml_ns(mt, wr,
839 "http://www.loc.gov/MARC21/slim",
843 int yaz_marc_write_turbomarc(yaz_marc_t mt, WRBUF wr)
845 /* set leader 09 to 'a' for UNICODE */
846 /* http://www.loc.gov/marc/bibliographic/ecbdldrd.html#mrcblea */
847 if (!mt->leader_spec)
848 yaz_marc_modify_leader(mt, 9, "a");
849 return yaz_marc_write_marcxml_ns(mt, wr,
850 "http://www.indexdata.com/turbomarc", 0, 0, 1);
853 int yaz_marc_write_marcxchange(yaz_marc_t mt, WRBUF wr,
857 return yaz_marc_write_marcxml_ns(mt, wr,
858 "info:lc/xmlns/marcxchange-v1",
864 void add_marc_datafield_turbo_xml(yaz_marc_t mt, struct yaz_marc_node *n,
866 xmlNsPtr ns_record, WRBUF wr_cdata,
867 int identifier_length)
870 struct yaz_marc_subfield *s;
871 WRBUF subfield_name = wrbuf_alloc();
873 /* TODO consider if safe */
876 strncpy(field + 1, n->u.datafield.tag, 3);
878 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST field, 0);
880 if (n->u.datafield.indicator)
883 for (i = 0; n->u.datafield.indicator[i]; i++)
888 ind_val[0] = n->u.datafield.indicator[i];
890 sprintf(ind_str, "%s%d", indicator_name[1], i+1);
891 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
894 for (s = n->u.datafield.subfields; s; s = s->next)
897 xmlNode *ptr_subfield;
898 size_t using_code_len = get_subfield_len(mt, s->code_data,
900 wrbuf_rewind(wr_cdata);
901 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, s->code_data + using_code_len);
902 marc_iconv_reset(mt, wr_cdata);
904 wrbuf_rewind(subfield_name);
905 wrbuf_puts(subfield_name, "s");
906 not_written = element_name_append_attribute_value(mt, subfield_name, 0, s->code_data, using_code_len) != 0;
907 ptr_subfield = xmlNewTextChild(ptr, ns_record,
908 BAD_CAST wrbuf_cstr(subfield_name),
909 BAD_CAST wrbuf_cstr(wr_cdata));
912 /* Generate code attribute value and add */
913 wrbuf_rewind(wr_cdata);
914 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,s->code_data, using_code_len);
915 xmlNewProp(ptr_subfield, BAD_CAST "code", BAD_CAST wrbuf_cstr(wr_cdata));
918 wrbuf_destroy(subfield_name);
921 static int yaz_marc_write_xml_turbo_xml(yaz_marc_t mt, xmlNode **root_ptr,
926 struct yaz_marc_node *n;
927 int identifier_length;
928 const char *leader = 0;
933 for (n = mt->nodes; n; n = n->next)
934 if (n->which == YAZ_MARC_LEADER)
936 leader = n->u.leader;
942 if (!atoi_n_check(leader+11, 1, &identifier_length))
945 wr_cdata = wrbuf_alloc();
947 record_ptr = xmlNewNode(0, BAD_CAST "r");
948 *root_ptr = record_ptr;
950 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
951 xmlSetNs(record_ptr, ns_record);
954 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
956 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
957 for (n = mt->nodes; n; n = n->next)
967 case YAZ_MARC_DATAFIELD:
968 add_marc_datafield_turbo_xml(mt, n, record_ptr, ns_record, wr_cdata, identifier_length);
970 case YAZ_MARC_CONTROLFIELD:
971 wrbuf_rewind(wr_cdata);
972 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
973 marc_iconv_reset(mt, wr_cdata);
975 strncpy(field + 1, n->u.controlfield.tag, 3);
976 ptr = xmlNewTextChild(record_ptr, ns_record,
978 BAD_CAST wrbuf_cstr(wr_cdata));
980 case YAZ_MARC_COMMENT:
981 ptr = xmlNewComment(BAD_CAST n->u.comment);
982 xmlAddChild(record_ptr, ptr);
984 case YAZ_MARC_LEADER:
985 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "l",
986 BAD_CAST n->u.leader);
990 wrbuf_destroy(wr_cdata);
995 int yaz_marc_write_xml(yaz_marc_t mt, xmlNode **root_ptr,
1000 struct yaz_marc_node *n;
1001 int identifier_length;
1002 const char *leader = 0;
1003 xmlNode *record_ptr;
1007 for (n = mt->nodes; n; n = n->next)
1008 if (n->which == YAZ_MARC_LEADER)
1010 leader = n->u.leader;
1016 if (!atoi_n_check(leader+11, 1, &identifier_length))
1019 wr_cdata = wrbuf_alloc();
1021 record_ptr = xmlNewNode(0, BAD_CAST "record");
1022 *root_ptr = record_ptr;
1024 ns_record = xmlNewNs(record_ptr, BAD_CAST ns, 0);
1025 xmlSetNs(record_ptr, ns_record);
1028 xmlNewProp(record_ptr, BAD_CAST "format", BAD_CAST format);
1030 xmlNewProp(record_ptr, BAD_CAST "type", BAD_CAST type);
1031 for (n = mt->nodes; n; n = n->next)
1033 struct yaz_marc_subfield *s;
1038 case YAZ_MARC_DATAFIELD:
1039 ptr = xmlNewChild(record_ptr, ns_record, BAD_CAST "datafield", 0);
1040 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.datafield.tag);
1041 if (n->u.datafield.indicator)
1044 for (i = 0; n->u.datafield.indicator[i]; i++)
1049 sprintf(ind_str, "ind%d", i+1);
1050 ind_val[0] = n->u.datafield.indicator[i];
1052 xmlNewProp(ptr, BAD_CAST ind_str, BAD_CAST ind_val);
1055 for (s = n->u.datafield.subfields; s; s = s->next)
1057 xmlNode *ptr_subfield;
1058 size_t using_code_len = get_subfield_len(mt, s->code_data,
1060 wrbuf_rewind(wr_cdata);
1061 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd,
1062 s->code_data + using_code_len);
1063 marc_iconv_reset(mt, wr_cdata);
1064 ptr_subfield = xmlNewTextChild(
1066 BAD_CAST "subfield", BAD_CAST wrbuf_cstr(wr_cdata));
1068 wrbuf_rewind(wr_cdata);
1069 wrbuf_iconv_write(wr_cdata, mt->iconv_cd,
1070 s->code_data, using_code_len);
1071 xmlNewProp(ptr_subfield, BAD_CAST "code",
1072 BAD_CAST wrbuf_cstr(wr_cdata));
1075 case YAZ_MARC_CONTROLFIELD:
1076 wrbuf_rewind(wr_cdata);
1077 wrbuf_iconv_puts(wr_cdata, mt->iconv_cd, n->u.controlfield.data);
1078 marc_iconv_reset(mt, wr_cdata);
1080 ptr = xmlNewTextChild(record_ptr, ns_record,
1081 BAD_CAST "controlfield",
1082 BAD_CAST wrbuf_cstr(wr_cdata));
1084 xmlNewProp(ptr, BAD_CAST "tag", BAD_CAST n->u.controlfield.tag);
1086 case YAZ_MARC_COMMENT:
1087 ptr = xmlNewComment(BAD_CAST n->u.comment);
1088 xmlAddChild(record_ptr, ptr);
1090 case YAZ_MARC_LEADER:
1091 xmlNewTextChild(record_ptr, ns_record, BAD_CAST "leader",
1092 BAD_CAST n->u.leader);
1096 wrbuf_destroy(wr_cdata);
1102 int yaz_marc_write_iso2709(yaz_marc_t mt, WRBUF wr)
1104 struct yaz_marc_node *n;
1105 int indicator_length;
1106 int identifier_length;
1107 int length_data_entry;
1108 int length_starting;
1109 int length_implementation;
1110 int data_offset = 0;
1111 const char *leader = 0;
1112 WRBUF wr_dir, wr_head, wr_data_tmp;
1115 for (n = mt->nodes; n; n = n->next)
1116 if (n->which == YAZ_MARC_LEADER)
1117 leader = n->u.leader;
1121 if (!atoi_n_check(leader+10, 1, &indicator_length))
1123 if (!atoi_n_check(leader+11, 1, &identifier_length))
1125 if (!atoi_n_check(leader+20, 1, &length_data_entry))
1127 if (!atoi_n_check(leader+21, 1, &length_starting))
1129 if (!atoi_n_check(leader+22, 1, &length_implementation))
1132 wr_data_tmp = wrbuf_alloc();
1133 wr_dir = wrbuf_alloc();
1134 for (n = mt->nodes; n; n = n->next)
1136 int data_length = 0;
1137 struct yaz_marc_subfield *s;
1141 case YAZ_MARC_DATAFIELD:
1142 wrbuf_printf(wr_dir, "%.3s", n->u.datafield.tag);
1143 data_length += indicator_length;
1144 wrbuf_rewind(wr_data_tmp);
1145 for (s = n->u.datafield.subfields; s; s = s->next)
1147 /* write dummy IDFS + content */
1148 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1149 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd, s->code_data);
1150 marc_iconv_reset(mt, wr_data_tmp);
1152 /* write dummy FS (makes MARC-8 to become ASCII) */
1153 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');
1154 marc_iconv_reset(mt, wr_data_tmp);
1155 data_length += wrbuf_len(wr_data_tmp);
1157 case YAZ_MARC_CONTROLFIELD:
1158 wrbuf_printf(wr_dir, "%.3s", n->u.controlfield.tag);
1160 wrbuf_rewind(wr_data_tmp);
1161 wrbuf_iconv_puts(wr_data_tmp, mt->iconv_cd,
1162 n->u.controlfield.data);
1163 marc_iconv_reset(mt, wr_data_tmp);
1164 wrbuf_iconv_putchar(wr_data_tmp, mt->iconv_cd, ' ');/* field sep */
1165 marc_iconv_reset(mt, wr_data_tmp);
1166 data_length += wrbuf_len(wr_data_tmp);
1168 case YAZ_MARC_COMMENT:
1170 case YAZ_MARC_LEADER:
1175 wrbuf_printf(wr_dir, "%0*d", length_data_entry, data_length);
1176 wrbuf_printf(wr_dir, "%0*d", length_starting, data_offset);
1177 data_offset += data_length;
1180 /* mark end of directory */
1181 wrbuf_putc(wr_dir, ISO2709_FS);
1183 /* base address of data (comes after leader+directory) */
1184 base_address = 24 + wrbuf_len(wr_dir);
1186 wr_head = wrbuf_alloc();
1188 /* write record length */
1189 wrbuf_printf(wr_head, "%05d", base_address + data_offset + 1);
1190 /* from "original" leader */
1191 wrbuf_write(wr_head, leader+5, 7);
1192 /* base address of data */
1193 wrbuf_printf(wr_head, "%05d", base_address);
1194 /* from "original" leader */
1195 wrbuf_write(wr_head, leader+17, 7);
1197 wrbuf_write(wr, wrbuf_buf(wr_head), 24);
1198 wrbuf_write(wr, wrbuf_buf(wr_dir), wrbuf_len(wr_dir));
1199 wrbuf_destroy(wr_head);
1200 wrbuf_destroy(wr_dir);
1201 wrbuf_destroy(wr_data_tmp);
1203 for (n = mt->nodes; n; n = n->next)
1205 struct yaz_marc_subfield *s;
1209 case YAZ_MARC_DATAFIELD:
1210 wrbuf_write(wr, n->u.datafield.indicator, indicator_length);
1211 for (s = n->u.datafield.subfields; s; s = s->next)
1213 wrbuf_putc(wr, ISO2709_IDFS);
1214 wrbuf_iconv_puts(wr, mt->iconv_cd, s->code_data);
1215 marc_iconv_reset(mt, wr);
1217 wrbuf_putc(wr, ISO2709_FS);
1219 case YAZ_MARC_CONTROLFIELD:
1220 wrbuf_iconv_puts(wr, mt->iconv_cd, n->u.controlfield.data);
1221 marc_iconv_reset(mt, wr);
1222 wrbuf_putc(wr, ISO2709_FS);
1224 case YAZ_MARC_COMMENT:
1226 case YAZ_MARC_LEADER:
1230 wrbuf_printf(wr, "%c", ISO2709_RS);
1235 int yaz_marc_decode_wrbuf(yaz_marc_t mt, const char *buf, int bsize, WRBUF wr)
1237 int s, r = yaz_marc_read_iso2709(mt, buf, bsize);
1240 s = yaz_marc_write_mode(mt, wr); /* returns 0 for OK, -1 otherwise */
1242 return -1; /* error */
1243 return r; /* OK, return length > 0 */
1246 int yaz_marc_decode_buf (yaz_marc_t mt, const char *buf, int bsize,
1247 const char **result, size_t *rsize)
1251 wrbuf_rewind(mt->m_wr);
1252 r = yaz_marc_decode_wrbuf(mt, buf, bsize, mt->m_wr);
1254 *result = wrbuf_cstr(mt->m_wr);
1256 *rsize = wrbuf_len(mt->m_wr);
1260 void yaz_marc_xml(yaz_marc_t mt, int xmlmode)
1262 mt->output_format = xmlmode;
1265 void yaz_marc_debug(yaz_marc_t mt, int level)
1271 void yaz_marc_iconv(yaz_marc_t mt, yaz_iconv_t cd)
1276 yaz_iconv_t yaz_marc_get_iconv(yaz_marc_t mt)
1278 return mt->iconv_cd;
1281 void yaz_marc_modify_leader(yaz_marc_t mt, size_t off, const char *str)
1283 struct yaz_marc_node *n;
1285 for (n = mt->nodes; n; n = n->next)
1286 if (n->which == YAZ_MARC_LEADER)
1288 leader = n->u.leader;
1289 memcpy(leader+off, str, strlen(str));
1294 int yaz_marc_leader_spec(yaz_marc_t mt, const char *leader_spec)
1296 xfree(mt->leader_spec);
1297 mt->leader_spec = 0;
1300 char dummy_leader[24];
1301 if (marc_exec_leader(leader_spec, dummy_leader, 24))
1303 mt->leader_spec = xstrdup(leader_spec);
1308 static int marc_exec_leader(const char *leader_spec, char *leader, size_t size)
1310 const char *cp = leader_spec;
1315 int no_read = 0, no = 0;
1317 no = sscanf(cp, "%d=%20[^,]%n", &pos, val, &no_read);
1318 if (no < 2 || no_read < 3)
1320 if (pos < 0 || (size_t) pos >= size)
1325 const char *vp = strchr(val+1, '\'');
1331 if (len + pos > size)
1333 memcpy(leader + pos, val+1, len);
1335 else if (*val >= '0' && *val <= '9')
1351 int yaz_marc_decode_formatstr(const char *arg)
1354 if (!strcmp(arg, "marc"))
1355 mode = YAZ_MARC_ISO2709;
1356 if (!strcmp(arg, "marcxml"))
1357 mode = YAZ_MARC_MARCXML;
1358 if (!strcmp(arg, "turbomarc"))
1359 mode = YAZ_MARC_TURBOMARC;
1360 if (!strcmp(arg, "marcxchange"))
1361 mode = YAZ_MARC_XCHANGE;
1362 if (!strcmp(arg, "line"))
1363 mode = YAZ_MARC_LINE;
1367 void yaz_marc_write_using_libxml2(yaz_marc_t mt, int enable)
1369 mt->write_using_libxml2 = enable;
1375 * c-file-style: "Stroustrup"
1376 * indent-tabs-mode: nil
1378 * vim: shiftwidth=4 tabstop=8 expandtab