X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Fmarc_read_xml.c;h=6b3e3dd2416a3a352e4ce1c24650c7de16249e57;hp=650106ce37c41118fe0a015b738964160d91dd01;hb=5a3e6fa63181ab4afa8bce5f01c6de016a333334;hpb=77f27a99f17bdd5e6fc4d8a202ecc6da4ef95660 diff --git a/src/marc_read_xml.c b/src/marc_read_xml.c index 650106c..6b3e3dd 100644 --- a/src/marc_read_xml.c +++ b/src/marc_read_xml.c @@ -90,67 +90,48 @@ int yaz_marc_read_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) return 0; } -const char *tag_value_extract(const char *name, char tag_buffer[5]) { - size_t length = strlen(name); - if (length == 3) { - strcpy(tag_buffer, name); - return tag_buffer; - } - return 0; +const char *tag_value_extract(const char *name, char tag_buffer[5]) +{ + size_t length = strlen(name); + if (length == 3) + { + strcpy(tag_buffer, name); + return tag_buffer; + } + return 0; } -// pattern 2 && length < 5) { - if (name[0] != '-') { - return 0; - } - length--; - const char *ptr = name+1; - int index = 0; - for (index = 0; index < length/2; index++) { - unsigned int value; - char temp[3]; - strncpy(temp, ptr + 2*index, 2); - sscanf(temp, "%02X", &value); - tag_buffer[index] = (unsigned char) value; - } - tag_buffer[index] = '\0'; - if (index > 0) - return tag_buffer; - } - return 0; +// Given a xmlNode ptr, extract a value from either a element name or from a given attribute +char *element_attribute_value_extract(const xmlNode *ptr, + const char *attribute_name, + NMEM nmem) +{ + const char *name = (const char *) ptr->name; + size_t length = strlen(name); + xmlAttr *attr; + if (length > 1 ) + return nmem_strdup(nmem, name+1); + // TODO Extract from attribute where matches attribute_name + for (attr = ptr->properties; attr; attr = attr->next) + if (!strcmp((const char *)attr->name, attribute_name)) + return nmem_text_node_cdata(attr->children, nmem); + return 0; } -int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr, char indicators[11]) +int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr) { - NMEM nmem = yaz_marc_get_nmem(mt); for (; ptr; ptr = ptr->next) { if (ptr->type == XML_ELEMENT_NODE) { - xmlNode *p; - if (!strncmp((const char *) ptr->name, "i", 1)) { - int length = strlen(ptr->name+1); - if (length > 0) { - int index = (int)strtol(ptr->name+1, (char **)NULL, 10); - for (p = ptr->children; p ; p = p->next) - if (p->type == XML_TEXT_NODE) { - indicators[index] = ((const char *)p->content)[0]; - break; - } - } - } - else if (!strncmp((const char *) ptr->name, "s", 1)) + if (!strncmp((const char *) ptr->name, "s", 1)) { - NMEM nmem = yaz_marc_get_nmem(mt); - char *buffer = (char *) nmem_malloc(nmem, 5); - const char *tag_value = code_value_extract((ptr->name+1), buffer); + NMEM nmem = yaz_marc_get_nmem(mt); + xmlNode *p; + size_t ctrl_data_len = 0; + char *ctrl_data_buf = 0; + const char *tag_value = element_attribute_value_extract(ptr, "code", nmem); if (!tag_value) { yaz_marc_cprintf( @@ -158,15 +139,12 @@ int yaz_marc_read_turbo_xml_subfields(yaz_marc_t mt, const xmlNode *ptr, char in return -1; } - size_t ctrl_data_len = 0; - char *ctrl_data_buf = 0; - ctrl_data_len = strlen((const char *) tag_value); - // Extract (length) from CDATA - xmlNode *p; - for (p = ptr->children; p ; p = p->next) + ctrl_data_len = strlen((const char *) tag_value); + // Extract (length) from CDATA + for (p = ptr->children; p ; p = p->next) if (p->type == XML_TEXT_NODE) ctrl_data_len += strlen((const char *)p->content); - // Allocate memory for code value (1 character (can be multi-byte) and data + // Allocate memory for code value (1 character (can be multi-byte) and data ctrl_data_buf = (char *) nmem_malloc(nmem, ctrl_data_len+1); // Build a string with "" strcpy(ctrl_data_buf, (const char *) tag_value); @@ -201,8 +179,8 @@ static int yaz_marc_read_xml_leader(yaz_marc_t mt, const xmlNode **ptr_p) for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if ( !strcmp( (const char *) ptr->name, "leader") || - (!strncmp((const char *) ptr->name, "l", 1) )) + if ( !strcmp( (const char *) ptr->name, "leader") || + (!strncmp((const char *) ptr->name, "l", 1) )) { xmlNode *p = ptr->children; for(; p; p = p->next) @@ -243,7 +221,7 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if (!strcmp( (const char *) ptr->name, "controlfield")) + if (!strcmp( (const char *) ptr->name, "controlfield")) { const xmlNode *ptr_tag = 0; struct _xmlAttr *attr; @@ -314,46 +292,59 @@ static int yaz_marc_read_xml_fields(yaz_marc_t mt, const xmlNode *ptr) return 0; } -struct yaz_marc_node* yaz_marc_add_datafield_turbo_xml(yaz_marc_t mt, const char *tag_value); static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr) { for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { - if (!strncmp( (const char *) ptr->name, "c", 1)) + if (!strncmp( (const char *) ptr->name, "c", 1)) { - NMEM nmem = yaz_marc_get_nmem(mt); - char *buffer = (char *) nmem_malloc(nmem, 5); - //Extract the tag value out of the rest of the element name - const char *tag_value = tag_value_extract((const char *)(ptr->name+1), buffer); + NMEM nmem = yaz_marc_get_nmem(mt); + char *tag_value = element_attribute_value_extract(ptr, "tag", nmem); if (!tag_value) { yaz_marc_cprintf( mt, "Missing attribute 'tag' for 'controlfield'" ); return -1; } - yaz_marc_add_controlfield_turbo_xml(mt, tag_value, ptr->children); - //wrbuf_destroy(tag_value); + yaz_marc_add_controlfield_xml2(mt, tag_value, ptr->children); } else if (!strncmp((const char *) ptr->name, "d",1)) { - NMEM nmem = yaz_marc_get_nmem(mt); + struct _xmlAttr *attr; + NMEM nmem = yaz_marc_get_nmem(mt); + char *tag_value; char *indstr = nmem_malloc(nmem, 11); /* 0(unused), 1,....9, + zero term */ - char *buffer = (char *) nmem_malloc(nmem, 5); - const char *tag_value = tag_value_extract(ptr->name+1, buffer); + int index = 0; + for (index = 0; index < 11; index++) + indstr[index] = '\0'; + tag_value = element_attribute_value_extract(ptr, "tag", nmem); if (!tag_value) - { + { yaz_marc_cprintf( mt, "Missing attribute 'tag' for 'datafield'" ); return -1; } + for (attr = ptr->properties; attr; attr = attr->next) + if (strlen((const char *)attr->name) == 2 && + attr->name[0] == 'i') + { + //extract indicator attribute from i#="Y" pattern + int no = atoi((const char *)attr->name+1); + if (attr->children + && attr->children->type == XML_TEXT_NODE) + indstr[no] = attr->children->content[0]; + } + else + { + yaz_marc_cprintf( + mt, "Bad attribute '%.80s' for 'datafield'", + attr->name); + } /* note that indstr[0] is unused so we use indstr[1..] */ - struct yaz_marc_node *n = yaz_marc_add_datafield_turbo_xml(mt, tag_value); - - int rc = yaz_marc_read_turbo_xml_subfields(mt, ptr->children, indstr); - yaz_marc_datafield_set_indicators(n, indstr+1, strlen(indstr+1)); - if (rc) + yaz_marc_add_datafield_xml2(mt, tag_value, indstr+1); + if (yaz_marc_read_turbo_xml_subfields(mt, ptr->children /*, indstr */)) return -1; } else @@ -373,13 +364,22 @@ static int yaz_marc_read_turbo_xml_fields(yaz_marc_t mt, const xmlNode *ptr) #if YAZ_HAVE_XML2 int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) { + int format = 0; yaz_marc_reset(mt); - + for(; ptr; ptr = ptr->next) if (ptr->type == XML_ELEMENT_NODE) { if (!strcmp((const char *) ptr->name, "record")) + { + format = YAZ_MARC_MARCXML; break; + } + else if (!strcmp((const char *) ptr->name, "r")) + { + format = YAZ_MARC_TURBOMARC; + break; + } else { yaz_marc_cprintf( @@ -397,14 +397,15 @@ int yaz_marc_read_xml(yaz_marc_t mt, const xmlNode *ptr) ptr = ptr->children; if (yaz_marc_read_xml_leader(mt, &ptr)) return -1; - - switch (yaz_marc_get_read_format(mt)) { - case YAZ_MARC_MARCXML: - return yaz_marc_read_xml_fields(mt, ptr->next); - case YAZ_MARC_TMARCXML: - return yaz_marc_read_turbo_xml_fields(mt, ptr->next); + + switch (format) + { + case YAZ_MARC_MARCXML: + return yaz_marc_read_xml_fields(mt, ptr->next); + case YAZ_MARC_TURBOMARC: + return yaz_marc_read_turbo_xml_fields(mt, ptr->next); } - return -1; + return -1; } #endif