X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarc_read_xml.c;h=1db432cbf86cf22f74dce503e49f35abf72ba316;hp=3f5d29bdf17aab759093aa1a2d1d46d5d376e3a1;hb=73546e0de845d238d169531dfabc4590b080713f;hpb=379504a233e3e2cc85bca1e7b6d864f1395aec7c diff --git a/src/marc_read_xml.c b/src/marc_read_xml.c index 3f5d29b..1db432c 100644 --- a/src/marc_read_xml.c +++ b/src/marc_read_xml.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2009 Index Data + * Copyright (C) 1995-2010 Index Data * See the file LICENSE for details. */ @@ -90,6 +90,92 @@ int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) return 0; } +const char *tag_value_extract(const char *name, char tag_buffer[5]) { + size_t length = strlen(name); + if (length == 3) { + strcpy(tag_buffer, name); + return tag_buffer; + } + return 0; +} + +// pattern 2 && length < 6) { + if (name[0] != '-') { + return 0; + } + length--; + const char *ptr = name+1; + int index = 0; + for (index = 0; index < length/2; index++) { + unsigned int value; + char temp[3]; + strncpy(temp, ptr + 2*index, 2); + sscanf(temp, "%02X", &value); + tag_buffer[index] = (unsigned char) value; + } + tag_buffer[index] = '\0'; + if (index > 0) + return tag_buffer; + } + return 0; +} + + +int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) +{ + NMEM nmem = yaz_marc_get_nmem(mt); + for (; ptr; ptr = ptr->next) + { + if (ptr->type == XML_ELEMENT_NODE) + { + xmlNode *p; + if (!strncmp((const char *) ptr->name, "s", 1)) + { + NMEM nmem = yaz_marc_get_nmem(mt); + char *buffer = (char *) nmem_malloc(nmem, 5); + const char *tag_value = code_value_extract((ptr->name+1), buffer); + if (!tag_value) + { + yaz_marc_cprintf( + mt, "Missing 'code' value for 'subfield'" ); + return -1; + } + + size_t ctrl_data_len = 0; + char *ctrl_data_buf = 0; + ctrl_data_len = strlen((const char *) tag_value); + // Extract (length) from CDATA + xmlNode *p; + for (p = ptr->children; p ; p = p->next) + if (p->type == XML_TEXT_NODE) + ctrl_data_len += strlen((const char *)p->content); + // Allocate memory for code value (1 character (can be multi-byte) and data + ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1); + // Build a string with "" + strcpy(ctrl_data_buf, (const char *) tag_value); + for (p = ptr->children; p ; p = p->next) + if (p->type == XML_TEXT_NODE) + strcat(ctrl_data_buf, (const char *)p->content); + yaz_marc_add_subfield(mt, ctrl_data_buf, ctrl_data_len); + } + else + { + yaz_marc_cprintf( + mt, "Expected element 'subfield', got '%.80s'", ptr->name); + return -1; + } + } + } + return 0; +} + + static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) { int indicator_length; @@ -104,7 +190,8 @@ static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if (!strcmp((const char *) ptr->name, "leader")) + if ( !strcmp( (const char *) ptr->name, "leader") || + (!strncmp((const char *) ptr->name, "l", 1) )) { xmlNode *p = ptr->children; for(; p; p = p->next) @@ -145,7 +232,7 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if (!strcmp((const char *) ptr->name, "controlfield")) + if (!strcmp( (const char *) ptr->name, "controlfield")) { const xmlNode *ptr_tag = 0; struct _xmlAttr *attr; @@ -215,6 +302,77 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) } return 0; } + +void yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, char *tag_value, char *indicators); + +static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr) +{ + for(; ptr; ptr = ptr->next) + if (ptr->type == XML_ELEMENT_NODE) + { + if (!strncmp( (const char *) ptr->name, "c", 1)) + { + NMEM nmem = yaz_marc_get_nmem(mt); + char *buffer = (char *) nmem_malloc(nmem, 5); + const char *tag_value = tag_value_extract((const char *)(ptr->name+1), buffer); + if (!tag_value) + { + yaz_marc_cprintf( + mt, "Missing attribute 'tag' for 'controlfield'" ); + return -1; + } + yaz_marc_add_controlfield_turbo_xml(mt, tag_value, ptr->children); + } + else if (!strncmp((const char *) ptr->name, "d",1)) + { + struct _xmlAttr *attr; + NMEM nmem = yaz_marc_get_nmem(mt); + char *indstr = nmem_malloc(nmem, 11); /* 0(unused), 1,....9, + zero term */ + int index = 0; + for (index = 0; index < 11; index++) + indstr[index] = '\0'; + char *buffer = (char *) nmem_malloc(nmem, 5); + char *tag_value = tag_value_extract(ptr->name+1, buffer); + if (!tag_value) + { + yaz_marc_cprintf( + mt, "Missing attribute 'tag' for 'datafield'" ); + return -1; + } + for (attr = ptr->properties; attr; attr = attr->next) + if (strlen((const char *)attr->name) == 2 && + attr->name[0] == 'i') + { + //extract indicator attribute from i#="Y" pattern + int no = atoi((const char *)attr->name+1); + if (attr->children + && attr->children->type == XML_TEXT_NODE) + indstr[no] = attr->children->content[0]; + } + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'datafield'", + attr->name); + } + /* note that indstr[0] is unused so we use indstr[1..] */ + yaz_marc_add_datafield_turbo_xml(mt, tag_value, indstr+1); + int rc = yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */); + if (rc) + return -1; + } + else + { + yaz_marc_cprintf(mt, + "Expected element controlfield or datafield," + " got %.80s", ptr->name); + return -1; + } + } + return 0; +} + + #endif #if YAZ_HAVE_XML2 @@ -225,9 +383,15 @@ int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if (!strcmp((const char *) ptr->name, "record")) + //TODO Should actually look at the namespace but... + if (!strcmp((const char *) ptr->name, "record")) { + yaz_marc_set_read_format(mt, YAZ_MARC_MARCXML); break; - else + } + else if (!strcmp((const char *) ptr->name, "r")) { + yaz_marc_set_read_format(mt, YAZ_MARC_TMARCXML); + break; + } { yaz_marc_cprintf( mt, "Unknown element '%.80s' in MARC XML reader", @@ -244,7 +408,14 @@ int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) ptr = ptr->children; if (yaz_marc_read_xml_leader(mt, &ptr)) return -1; - return yaz_marc_read_xml_fields(mt, ptr->next); + + switch (yaz_marc_get_read_format(mt)) { + case YAZ_MARC_MARCXML: + return yaz_marc_read_xml_fields(mt, ptr->next); + case YAZ_MARC_TMARCXML: + return yaz_marc_read_turbo_xml_fields(mt, ptr->next); + } + return -1; } #endif