1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2012 Index Data
3 * See the file LICENSE for details.
7 * \file marc_read_xml.c
8 * \brief Implements reading of MARC as XML
21 #include <yaz/marcdisp.h>
22 #include <yaz/wrbuf.h>
23 #include <yaz/yaz-util.h>
24 #include <yaz/nmem_xml.h>
27 #include <libxml/tree.h>
31 int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
33 NMEM nmem = yaz_marc_get_nmem(mt);
34 for (; ptr; ptr = ptr->next)
36 if (ptr->type == XML_ELEMENT_NODE)
38 if (!strcmp((const char *) ptr->name, "subfield"))
40 size_t ctrl_data_len = 0;
41 char *ctrl_data_buf = 0;
42 const xmlNode *p = 0, *ptr_code = 0;
43 struct _xmlAttr *attr;
44 for (attr = ptr->properties; attr; attr = attr->next)
45 if (!strcmp((const char *)attr->name, "code"))
46 ptr_code = attr->children;
50 mt, "Bad attribute '%.80s' for 'subfield'",
57 mt, "Missing attribute 'code' for 'subfield'" );
60 if (ptr_code->type == XML_TEXT_NODE)
63 strlen((const char *)ptr_code->content);
68 mt, "Missing value for 'code' in 'subfield'" );
71 for (p = ptr->children; p ; p = p->next)
72 if (p->type == XML_TEXT_NODE)
73 ctrl_data_len += strlen((const char *)p->content);
74 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
75 strcpy(ctrl_data_buf, (const char *)ptr_code->content);
76 for (p = ptr->children; p ; p = p->next)
77 if (p->type == XML_TEXT_NODE)
78 strcat(ctrl_data_buf, (const char *)p->content);
79 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
84 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
92 const char *tag_value_extract(const char *name, char tag_buffer[5])
94 size_t length = strlen(name);
97 strcpy(tag_buffer, name);
103 // Given a xmlNode ptr, extract a value from either a element name or from a given attribute
104 char *element_attribute_value_extract(const xmlNode *ptr,
105 const char *attribute_name,
108 const char *name = (const char *) ptr->name;
109 size_t length = strlen(name);
112 return nmem_strdup(nmem, name+1);
113 // TODO Extract from attribute where matches attribute_name
114 for (attr = ptr->properties; attr; attr = attr->next)
115 if (!strcmp((const char *)attr->name, attribute_name))
116 return nmem_text_node_cdata(attr->children, nmem);
121 int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
123 for (; ptr; ptr = ptr->next)
125 if (ptr->type == XML_ELEMENT_NODE)
127 if (!strncmp((const char *) ptr->name, "s", 1))
129 NMEM nmem = yaz_marc_get_nmem(mt);
131 size_t ctrl_data_len = 0;
132 char *ctrl_data_buf = 0;
133 const char *tag_value = element_attribute_value_extract(ptr, "code", nmem);
137 mt, "Missing 'code' value for 'subfield'" );
141 ctrl_data_len = strlen((const char *) tag_value);
142 // Extract (length) from CDATA
143 for (p = ptr->children; p ; p = p->next)
144 if (p->type == XML_TEXT_NODE)
145 ctrl_data_len += strlen((const char *)p->content);
146 // Allocate memory for code value (1 character (can be multi-byte) and data
147 ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
148 // Build a string with "<Code><data>"
149 strcpy(ctrl_data_buf, (const char *) tag_value);
150 for (p = ptr->children; p ; p = p->next)
151 if (p->type == XML_TEXT_NODE)
152 strcat(ctrl_data_buf, (const char *)p->content);
153 yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
158 mt, "Expected element 'subfield', got '%.80s'", ptr->name);
167 static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p,
168 int *indicator_length)
170 int identifier_length;
172 int length_data_entry;
174 int length_implementation;
175 const char *leader = 0;
176 const xmlNode *ptr = *ptr_p;
178 for(; ptr; ptr = ptr->next)
179 if (ptr->type == XML_ELEMENT_NODE)
181 if ( !strcmp( (const char *) ptr->name, "leader") ||
182 (!strncmp((const char *) ptr->name, "l", 1) ))
184 xmlNode *p = ptr->children;
185 for(; p; p = p->next)
186 if (p->type == XML_TEXT_NODE)
187 leader = (const char *) p->content;
193 mt, "Expected element 'leader', got '%.80s'", ptr->name);
198 yaz_marc_cprintf(mt, "Missing element 'leader'");
201 if (strlen(leader) != 24)
203 yaz_marc_cprintf(mt, "Bad length %d of leader data."
204 " Must have length of 24 characters", strlen(leader));
207 yaz_marc_set_leader(mt, leader,
213 &length_implementation);
218 static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
219 int indicator_length)
221 for(; ptr; ptr = ptr->next)
222 if (ptr->type == XML_ELEMENT_NODE)
224 if (!strcmp( (const char *) ptr->name, "controlfield"))
226 const xmlNode *ptr_tag = 0;
227 struct _xmlAttr *attr;
228 for (attr = ptr->properties; attr; attr = attr->next)
229 if (!strcmp((const char *)attr->name, "tag"))
230 ptr_tag = attr->children;
234 mt, "Bad attribute '%.80s' for 'controlfield'",
241 mt, "Missing attribute 'tag' for 'controlfield'" );
244 yaz_marc_add_controlfield_xml(mt, ptr_tag, ptr->children);
246 else if (!strcmp((const char *) ptr->name, "datafield"))
248 char indstr[11]; /* 0(unused), 1,....9, + zero term */
249 const xmlNode *ptr_tag = 0;
250 struct _xmlAttr *attr;
252 for (i = 0; i < indicator_length; i++)
255 for (attr = ptr->properties; attr; attr = attr->next)
256 if (!strcmp((const char *)attr->name, "tag"))
257 ptr_tag = attr->children;
258 else if (strlen((const char *)attr->name) == 4 &&
259 !memcmp(attr->name, "ind", 3))
261 int no = atoi((const char *)attr->name + 3);
262 if (attr->children &&
263 attr->children->type == XML_TEXT_NODE &&
264 no <= indicator_length && no > 0 &&
265 attr->children->content[0])
267 indstr[no - 1] = attr->children->content[0];
272 mt, "Bad attribute '%.80s' for 'datafield'",
279 mt, "Bad attribute '%.80s' for 'datafield'",
285 mt, "Missing attribute 'tag' for 'datafield'" );
288 yaz_marc_add_datafield_xml(mt, ptr_tag,
289 indstr, indicator_length);
290 if (yaz_marc_read_xml_subfields(mt, ptr->children))
296 "Expected element controlfield or datafield,"
297 " got %.80s", ptr->name);
305 static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr,
306 int indicator_length)
308 for(; ptr; ptr = ptr->next)
309 if (ptr->type == XML_ELEMENT_NODE)
311 if (!strncmp( (const char *) ptr->name, "c", 1))
313 NMEM nmem = yaz_marc_get_nmem(mt);
314 char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
318 mt, "Missing attribute 'tag' for 'controlfield'" );
321 yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children);
323 else if (!strncmp((const char *) ptr->name, "d",1))
325 struct _xmlAttr *attr;
326 NMEM nmem = yaz_marc_get_nmem(mt);
328 char *indstr = nmem_malloc(nmem, indicator_length + 1);
330 for (i = 0; i < indicator_length; i++)
333 tag_value = element_attribute_value_extract(ptr, "tag", nmem);
337 mt, "Missing attribute 'tag' for 'datafield'" );
340 for (attr = ptr->properties; attr; attr = attr->next)
341 if (strlen((const char *)attr->name) == 2 &&
342 attr->name[0] == 'i')
344 //extract indicator attribute from i#="Y" pattern
345 int no = atoi((const char *)attr->name + 1);
346 if (attr->children &&
347 attr->children->type == XML_TEXT_NODE &&
348 no <= indicator_length && no > 0 &&
349 attr->children->content[0])
351 indstr[no - 1] = attr->children->content[0];
356 mt, "Bad attribute '%.80s' for 'd'",attr->name);
362 mt, "Bad attribute '%.80s' for 'd'", attr->name);
364 yaz_marc_add_datafield_xml2(mt, tag_value, indstr);
365 if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */))
371 "Expected element controlfield or datafield,"
372 " got %.80s", ptr->name);
383 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
385 int indicator_length = 0;
389 for(; ptr; ptr = ptr->next)
390 if (ptr->type == XML_ELEMENT_NODE)
392 if (!strcmp((const char *) ptr->name, "record"))
394 format = YAZ_MARC_MARCXML;
397 else if (!strcmp((const char *) ptr->name, "r"))
399 format = YAZ_MARC_TURBOMARC;
405 mt, "Unknown element '%.80s' in MARC XML reader",
412 yaz_marc_cprintf(mt, "Missing element 'record' in MARC XML record");
415 /* ptr points to record node now */
417 if (yaz_marc_read_xml_leader(mt, &ptr, &indicator_length))
422 case YAZ_MARC_MARCXML:
423 return yaz_marc_read_xml_fields(mt, ptr->next, indicator_length);
424 case YAZ_MARC_TURBOMARC:
425 return yaz_marc_read_turbo_xml_fields(mt, ptr->next, indicator_length);
435 * c-file-style: "Stroustrup"
436 * indent-tabs-mode: nil
438 * vim: shiftwidth=4 tabstop=8 expandtab