X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarc_read_xml.c;h=7356ca2f67973d04fb36ce6148b4231e20c9f106;hp=3f3e05bf24dcf0293e5ccd3a8ce729fdcefc7c90;hb=b06636208651777c08e2456bf3b5953fc3c0f2c2;hpb=f15418ee295542935d616a2163377b71e40ce04f diff --git a/src/marc_read_xml.c b/src/marc_read_xml.c index 3f3e05b..7356ca2 100644 --- a/src/marc_read_xml.c +++ b/src/marc_read_xml.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2010 Index Data + * Copyright (C) 1995-2013 Index Data * See the file LICENSE for details. */ @@ -18,7 +18,6 @@ #include #include -#include #include #include #include @@ -60,7 +59,7 @@ int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) } if (ptr_code->type == XML_TEXT_NODE) { - ctrl_data_len = + ctrl_data_len = strlen((const char *)ptr_code->content); } else @@ -90,46 +89,48 @@ int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) return 0; } -const char *tag_value_extract(const char *name, char tag_buffer[5]) { - size_t length = strlen(name); - if (length == 3) { - strcpy(tag_buffer, name); - return tag_buffer; - } - return 0; +const char *tag_value_extract(const char *name, char tag_buffer[5]) +{ + size_t length = strlen(name); + if (length == 3) + { + strcpy(tag_buffer, name); + return tag_buffer; + } + return 0; } // Given a xmlNode ptr, extract a value from either a element name or from a given attribute -const char *element_attribute_value_extract(const xmlNode *ptr, const char *attribute_name, NMEM nmem) { - - const char *name = ptr->name; - size_t length = strlen(name); - if (length > 1 ) { - return nmem_strdup(nmem, name+1); - } - // TODO Extract from attribute where matches attribute_name - xmlAttr *attr; +char *element_attribute_value_extract(const xmlNode *ptr, + const char *attribute_name, + NMEM nmem) +{ + const char *name = (const char *) ptr->name; + size_t length = strlen(name); + xmlAttr *attr; + if (length > 1 ) + return nmem_strdup(nmem, name+1); + // TODO Extract from attribute where matches attribute_name for (attr = ptr->properties; attr; attr = attr->next) - if (!strcmp((const char *)attr->name, attribute_name)) { - return nmem_text_node_cdata(attr->children, nmem); - } - return 0; + if (!strcmp((const char *)attr->name, attribute_name)) + return nmem_text_node_cdata(attr->children, nmem); + return 0; } int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) { - NMEM nmem = yaz_marc_get_nmem(mt); for (; ptr; ptr = ptr->next) { if (ptr->type == XML_ELEMENT_NODE) { - xmlNode *p; if (!strncmp((const char *) ptr->name, "s", 1)) { - NMEM nmem = yaz_marc_get_nmem(mt); - char *buffer = (char *) nmem_malloc(nmem, 5); - const char *tag_value = element_attribute_value_extract(ptr, "code", nmem); + NMEM nmem = yaz_marc_get_nmem(mt); + xmlNode *p; + size_t ctrl_data_len = 0; + char *ctrl_data_buf = 0; + const char *tag_value = element_attribute_value_extract(ptr, "code", nmem); if (!tag_value) { yaz_marc_cprintf( @@ -137,15 +138,12 @@ int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) return -1; } - size_t ctrl_data_len = 0; - char *ctrl_data_buf = 0; - ctrl_data_len = strlen((const char *) tag_value); - // Extract (length) from CDATA - xmlNode *p; - for (p = ptr->children; p ; p = p->next) + ctrl_data_len = strlen((const char *) tag_value); + // Extract (length) from CDATA + for (p = ptr->children; p ; p = p->next) if (p->type == XML_TEXT_NODE) ctrl_data_len += strlen((const char *)p->content); - // Allocate memory for code value (1 character (can be multi-byte) and data + // Allocate memory for code value (1 character (can be multi-byte) and data ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1); // Build a string with "" strcpy(ctrl_data_buf, (const char *) tag_value); @@ -166,9 +164,9 @@ int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) } -static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) +static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p, + int *indicator_length) { - int indicator_length; int identifier_length; int base_address; int length_data_entry; @@ -180,25 +178,21 @@ static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if ( !strcmp( (const char *) ptr->name, "leader") || - (!strncmp((const char *) ptr->name, "l", 1) )) + if ( !strcmp( (const char *) ptr->name, "leader") || + (!strncmp((const char *) ptr->name, "l", 1) )) { xmlNode *p = ptr->children; for(; p; p = p->next) if (p->type == XML_TEXT_NODE) leader = (const char *) p->content; - break; - } - else - { - yaz_marc_cprintf( - mt, "Expected element 'leader', got '%.80s'", ptr->name); + ptr = ptr->next; } + break; } if (!leader) { - yaz_marc_cprintf(mt, "Missing element 'leader'"); - return -1; + yaz_marc_cprintf(mt, "Missing leader. Inserting fake leader"); + leader = "00000nam a22000000a 4500"; } if (strlen(leader) != 24) { @@ -207,7 +201,7 @@ static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) return -1; } yaz_marc_set_leader(mt, leader, - &indicator_length, + indicator_length, &identifier_length, &base_address, &length_data_entry, @@ -217,12 +211,13 @@ static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) return 0; } -static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) +static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr, + int indicator_length) { for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if (!strcmp( (const char *) ptr->name, "controlfield")) + if (!strcmp( (const char *) ptr->name, "controlfield")) { const xmlNode *ptr_tag = 0; struct _xmlAttr *attr; @@ -250,18 +245,29 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) const xmlNode *ptr_tag = 0; struct _xmlAttr *attr; int i; - for (i = 0; i<11; i++) - indstr[i] = '\0'; + for (i = 0; i < indicator_length; i++) + indstr[i] = ' '; + indstr[i] = '\0'; for (attr = ptr->properties; attr; attr = attr->next) if (!strcmp((const char *)attr->name, "tag")) ptr_tag = attr->children; else if (strlen((const char *)attr->name) == 4 && !memcmp(attr->name, "ind", 3)) { - int no = atoi((const char *)attr->name+3); - if (attr->children - && attr->children->type == XML_TEXT_NODE) - indstr[no] = attr->children->content[0]; + int no = atoi((const char *)attr->name + 3); + if (attr->children && + attr->children->type == XML_TEXT_NODE && + no <= indicator_length && no > 0 && + attr->children->content[0]) + { + indstr[no - 1] = attr->children->content[0]; + } + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'datafield'", + attr->name); + } } else { @@ -275,10 +281,8 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) mt, "Missing attribute 'tag' for 'datafield'" ); return -1; } - /* note that indstr[0] is unused so we use indstr[1..] */ yaz_marc_add_datafield_xml(mt, ptr_tag, - indstr+1, strlen(indstr+1)); - + indstr, indicator_length); if (yaz_marc_read_xml_subfields(mt, ptr->children)) return -1; } @@ -293,60 +297,68 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) return 0; } -void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators); -static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr) +static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr, + int indicator_length) { for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if (!strncmp( (const char *) ptr->name, "c", 1)) + if (!strncmp( (const char *) ptr->name, "c", 1)) { - NMEM nmem = yaz_marc_get_nmem(mt); - const char *tag_value = element_attribute_value_extract(ptr, "tag", nmem); + NMEM nmem = yaz_marc_get_nmem(mt); + char *tag_value = element_attribute_value_extract(ptr, "tag", nmem); if (!tag_value) { yaz_marc_cprintf( mt, "Missing attribute 'tag' for 'controlfield'" ); return -1; } - yaz_marc_add_controlfield_turbo_xml(mt, tag_value, ptr->children); + yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children); } else if (!strncmp((const char *) ptr->name, "d",1)) { struct _xmlAttr *attr; - NMEM nmem = yaz_marc_get_nmem(mt); - char *indstr = nmem_malloc(nmem, 11); /* 0(unused), 1,....9, + zero term */ - int index = 0; - for (index = 0; index < 11; index++) - indstr[index] = '\0'; - const char *tag_value = element_attribute_value_extract(ptr, "tag", nmem); + NMEM nmem = yaz_marc_get_nmem(mt); + char *tag_value; + char *indstr = nmem_malloc(nmem, indicator_length + 1); + int i = 0; + for (i = 0; i < indicator_length; i++) + indstr[i] = ' '; + indstr[i] = '\0'; + tag_value = element_attribute_value_extract(ptr, "tag", nmem); if (!tag_value) - { + { yaz_marc_cprintf( mt, "Missing attribute 'tag' for 'datafield'" ); return -1; } for (attr = ptr->properties; attr; attr = attr->next) if (strlen((const char *)attr->name) == 2 && - attr->name[0] == 'i') + attr->name[0] == 'i') { //extract indicator attribute from i#="Y" pattern - int no = atoi((const char *)attr->name+1); - if (attr->children - && attr->children->type == XML_TEXT_NODE) - indstr[no] = attr->children->content[0]; + int no = atoi((const char *)attr->name + 1); + if (attr->children && + attr->children->type == XML_TEXT_NODE && + no <= indicator_length && no > 0 && + attr->children->content[0]) + { + indstr[no - 1] = attr->children->content[0]; + } + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'd'",attr->name); + } } else { yaz_marc_cprintf( - mt, "Bad attribute '%.80s' for 'datafield'", - attr->name); + mt, "Bad attribute '%.80s' for 'd'", attr->name); } - /* note that indstr[0] is unused so we use indstr[1..] */ - yaz_marc_add_datafield_turbo_xml(mt, tag_value, indstr+1); - int rc = yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */); - if (rc) + yaz_marc_add_datafield_xml2(mt, tag_value, indstr); + if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */)) return -1; } else @@ -366,20 +378,24 @@ static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr) #if YAZ_HAVE_XML2 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) { + int indicator_length = 0; + int format = 0; yaz_marc_reset(mt); for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - //TODO Should actually look at the namespace but... - if (!strcmp((const char *) ptr->name, "record")) { - yaz_marc_set_read_format(mt, YAZ_MARC_MARCXML); + if (!strcmp((const char *) ptr->name, "record")) + { + format = YAZ_MARC_MARCXML; break; } - else if (!strcmp((const char *) ptr->name, "r")) { - yaz_marc_set_read_format(mt, YAZ_MARC_TMARCXML); + else if (!strcmp((const char *) ptr->name, "r")) + { + format = YAZ_MARC_TURBOMARC; break; } + else { yaz_marc_cprintf( mt, "Unknown element '%.80s' in MARC XML reader", @@ -394,16 +410,17 @@ int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) } /* ptr points to record node now */ ptr = ptr->children; - if (yaz_marc_read_xml_leader(mt, &ptr)) + if (yaz_marc_read_xml_leader(mt, &ptr, &indicator_length)) return -1; - switch (yaz_marc_get_read_format(mt)) { - case YAZ_MARC_MARCXML: - return yaz_marc_read_xml_fields(mt, ptr->next); - case YAZ_MARC_TMARCXML: - return yaz_marc_read_turbo_xml_fields(mt, ptr->next); + switch (format) + { + case YAZ_MARC_MARCXML: + return yaz_marc_read_xml_fields(mt, ptr, indicator_length); + case YAZ_MARC_TURBOMARC: + return yaz_marc_read_turbo_xml_fields(mt, ptr, indicator_length); } - return -1; + return -1; } #endif