-/*
- * Copyright (C) 1995-2007, Index Data ApS
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2010 Index Data
* See the file LICENSE for details.
- *
- * $Id: marc_read_xml.c,v 1.4 2007-12-17 20:59:30 adam Exp $
*/
/**
return 0;
}
+const char *tag_value_extract(const char *name, char tag_buffer[5])
+{
+ size_t length = strlen(name);
+ if (length == 3)
+ {
+ strcpy(tag_buffer, name);
+ return tag_buffer;
+ }
+ return 0;
+}
+
+// Given a xmlNode ptr, extract a value from either a element name or from a given attribute
+char *element_attribute_value_extract(const xmlNode *ptr,
+ const char *attribute_name,
+ NMEM nmem)
+{
+ const char *name = (const char *) ptr->name;
+ size_t length = strlen(name);
+ xmlAttr *attr;
+ if (length > 1 )
+ return nmem_strdup(nmem, name+1);
+ // TODO Extract from attribute where matches attribute_name
+ for (attr = ptr->properties; attr; attr = attr->next)
+ if (!strcmp((const char *)attr->name, attribute_name))
+ return nmem_text_node_cdata(attr->children, nmem);
+ return 0;
+}
+
+
+int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr)
+{
+ for (; ptr; ptr = ptr->next)
+ {
+ if (ptr->type == XML_ELEMENT_NODE)
+ {
+ if (!strncmp((const char *) ptr->name, "s", 1))
+ {
+ NMEM nmem = yaz_marc_get_nmem(mt);
+ xmlNode *p;
+ size_t ctrl_data_len = 0;
+ char *ctrl_data_buf = 0;
+ const char *tag_value = element_attribute_value_extract(ptr, "code", nmem);
+ if (!tag_value)
+ {
+ yaz_marc_cprintf(
+ mt, "Missing 'code' value for 'subfield'" );
+ return -1;
+ }
+
+ ctrl_data_len = strlen((const char *) tag_value);
+ // Extract (length) from CDATA
+ for (p = ptr->children; p ; p = p->next)
+ if (p->type == XML_TEXT_NODE)
+ ctrl_data_len += strlen((const char *)p->content);
+ // Allocate memory for code value (1 character (can be multi-byte) and data
+ ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1);
+ // Build a string with "<Code><data>"
+ strcpy(ctrl_data_buf, (const char *) tag_value);
+ for (p = ptr->children; p ; p = p->next)
+ if (p->type == XML_TEXT_NODE)
+ strcat(ctrl_data_buf, (const char *)p->content);
+ yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len);
+ }
+ else
+ {
+ yaz_marc_cprintf(
+ mt, "Expected element 'subfield', got '%.80s'", ptr->name);
+ return -1;
+ }
+ }
+ }
+ return 0;
+}
+
+
static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p)
{
int indicator_length;
for(; ptr; ptr = ptr->next)
if (ptr->type == XML_ELEMENT_NODE)
{
- if (!strcmp((const char *) ptr->name, "leader"))
+ if ( !strcmp( (const char *) ptr->name, "leader") ||
+ (!strncmp((const char *) ptr->name, "l", 1) ))
{
xmlNode *p = ptr->children;
for(; p; p = p->next)
for(; ptr; ptr = ptr->next)
if (ptr->type == XML_ELEMENT_NODE)
{
- if (!strcmp((const char *) ptr->name, "controlfield"))
+ if (!strcmp( (const char *) ptr->name, "controlfield"))
{
const xmlNode *ptr_tag = 0;
struct _xmlAttr *attr;
}
return 0;
}
+
+
+static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr)
+{
+ for(; ptr; ptr = ptr->next)
+ if (ptr->type == XML_ELEMENT_NODE)
+ {
+ if (!strncmp( (const char *) ptr->name, "c", 1))
+ {
+ NMEM nmem = yaz_marc_get_nmem(mt);
+ char *tag_value = element_attribute_value_extract(ptr, "tag", nmem);
+ if (!tag_value)
+ {
+ yaz_marc_cprintf(
+ mt, "Missing attribute 'tag' for 'controlfield'" );
+ return -1;
+ }
+ yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children);
+ }
+ else if (!strncmp((const char *) ptr->name, "d",1))
+ {
+ struct _xmlAttr *attr;
+ NMEM nmem = yaz_marc_get_nmem(mt);
+ char *tag_value;
+ char *indstr = nmem_malloc(nmem, 11); /* 0(unused), 1,....9, + zero term */
+ int index = 0;
+ for (index = 0; index < 11; index++)
+ indstr[index] = '\0';
+ tag_value = element_attribute_value_extract(ptr, "tag", nmem);
+ if (!tag_value)
+ {
+ yaz_marc_cprintf(
+ mt, "Missing attribute 'tag' for 'datafield'" );
+ return -1;
+ }
+ for (attr = ptr->properties; attr; attr = attr->next)
+ if (strlen((const char *)attr->name) == 2 &&
+ attr->name[0] == 'i')
+ {
+ //extract indicator attribute from i#="Y" pattern
+ int no = atoi((const char *)attr->name+1);
+ if (attr->children
+ && attr->children->type == XML_TEXT_NODE)
+ indstr[no] = attr->children->content[0];
+ }
+ else
+ {
+ yaz_marc_cprintf(
+ mt, "Bad attribute '%.80s' for 'datafield'",
+ attr->name);
+ }
+ /* note that indstr[0] is unused so we use indstr[1..] */
+ yaz_marc_add_datafield_xml2(mt, tag_value, indstr+1);
+ if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */))
+ return -1;
+ }
+ else
+ {
+ yaz_marc_cprintf(mt,
+ "Expected element controlfield or datafield,"
+ " got %.80s", ptr->name);
+ return -1;
+ }
+ }
+ return 0;
+}
+
+
#endif
+#if YAZ_HAVE_XML2
int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr)
{
-#if YAZ_HAVE_XML2
+ int format = 0;
yaz_marc_reset(mt);
-
+
for(; ptr; ptr = ptr->next)
if (ptr->type == XML_ELEMENT_NODE)
{
if (!strcmp((const char *) ptr->name, "record"))
+ {
+ format = YAZ_MARC_MARCXML;
break;
+ }
+ else if (!strcmp((const char *) ptr->name, "r"))
+ {
+ format = YAZ_MARC_TURBOMARC;
+ break;
+ }
else
{
yaz_marc_cprintf(
ptr = ptr->children;
if (yaz_marc_read_xml_leader(mt, &ptr))
return -1;
- return yaz_marc_read_xml_fields(mt, ptr->next);
-#else
+
+ switch (format)
+ {
+ case YAZ_MARC_MARCXML:
+ return yaz_marc_read_xml_fields(mt, ptr->next);
+ case YAZ_MARC_TURBOMARC:
+ return yaz_marc_read_turbo_xml_fields(mt, ptr->next);
+ }
return -1;
-#endif
}
+#endif
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab