X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Frecord_conv.c;h=851ec9f5112932578f43a81b003ef409da455b72;hp=4ceb023ee7b77c045947f6a53bd13165da4375d9;hb=4efb9de61a4284830d3dde10a992a42067879c84;hpb=a0e27aac0589d493172c73f6660b844fc6460d7c diff --git a/src/record_conv.c b/src/record_conv.c index 4ceb023..851ec9f 100644 --- a/src/record_conv.c +++ b/src/record_conv.c @@ -1,8 +1,6 @@ -/* - * Copyright (C) 2005-2006, Index Data ApS +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2009 Index Data * See the file LICENSE for details. - * - * $Id: record_conv.c,v 1.1 2006-05-02 20:47:45 adam Exp $ */ /** * \file record_conv.c @@ -13,63 +11,110 @@ #include #endif -#if HAVE_XML2 -#include -#include -#endif - #include - +#include +#include #include #include #include #include +#include +#include + +#if YAZ_HAVE_XML2 +#include +#include +#include +#if YAZ_HAVE_XSLT +#include +#include +#endif +#if YAZ_HAVE_EXSLT +#include +#endif /** \brief The internal structure for yaz_record_conv_t */ struct yaz_record_conv_struct { - /** memory for configuration */ + /** \brief memory for configuration */ NMEM nmem; - /** conversion rules (allocated using NMEM) */ + /** \brief conversion rules (allocated using NMEM) */ struct yaz_record_conv_rule *rules; - /** pointer to last conversion rule pointer in chain */ + /** \brief pointer to last conversion rule pointer in chain */ struct yaz_record_conv_rule **rules_p; - /** string buffer for error messages */ + /** \brief string buffer for error messages */ WRBUF wr_error; + + /** \brief path for opening files */ + char *path; }; /** \brief tranformation types (rule types) */ enum YAZ_RECORD_CONV_RULE { YAZ_RECORD_CONV_RULE_XSLT, - YAZ_RECORD_CONV_RULE_MARC_TO_XML, - YAZ_RECORD_CONV_RULE_XML_TO_MARC + YAZ_RECORD_CONV_RULE_MARC }; /** \brief tranformation info (rule info) */ struct yaz_record_conv_rule { enum YAZ_RECORD_CONV_RULE which; union { +#if YAZ_HAVE_XSLT struct { - const char *stylesheet; + xmlDocPtr xsp_doc; } xslt; +#endif struct { - const char *charset; - } marc_to_xml; - struct { - const char *charset; - } xml_to_marc; + const char *input_charset; + const char *output_charset; + int input_format; + int output_format; + } marc; } u; struct yaz_record_conv_rule *next; }; +/** \brief reset rules+configuration */ +static void yaz_record_conv_reset(yaz_record_conv_t p) +{ + + struct yaz_record_conv_rule *r; + for (r = p->rules; r; r = r->next) + { + if (r->which == YAZ_RECORD_CONV_RULE_MARC) + { + ; + } +#if YAZ_HAVE_XSLT + else if (r->which == YAZ_RECORD_CONV_RULE_XSLT) + { + xmlFreeDoc(r->u.xslt.xsp_doc); + } +#endif + } + wrbuf_rewind(p->wr_error); + nmem_reset(p->nmem); + + p->rules = 0; + + p->rules_p = &p->rules; +} + yaz_record_conv_t yaz_record_conv_create() { - yaz_record_conv_t p = xmalloc(sizeof(*p)); + yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p)); p->nmem = nmem_create(); p->wr_error = wrbuf_alloc(); + p->rules = 0; + p->path = 0; + +#if YAZ_HAVE_EXSLT + exsltRegisterAll(); +#endif + yaz_record_conv_reset(p); return p; } @@ -77,17 +122,20 @@ void yaz_record_conv_destroy(yaz_record_conv_t p) { if (p) { + yaz_record_conv_reset(p); nmem_destroy(p->nmem); - wrbuf_free(p->wr_error, 1); + wrbuf_destroy(p->wr_error); + xfree(p->path); xfree(p); } } -#if HAVE_XML2 +/** \brief adds a rule */ static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p, enum YAZ_RECORD_CONV_RULE type) { - struct yaz_record_conv_rule *r = nmem_malloc(p->nmem, sizeof(*r)); + struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *) + nmem_malloc(p->nmem, sizeof(*r)); r->which = type; r->next = 0; *p->rules_p = r; @@ -95,16 +143,10 @@ static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p, return r; } -static void yaz_record_conv_reset(yaz_record_conv_t p) -{ - wrbuf_rewind(p->wr_error); - nmem_reset(p->nmem); - p->rules = 0; - p->rules_p = &p->rules; -} - +/** \brief parse 'xslt' conversion node */ static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr) { +#if YAZ_HAVE_XSLT struct _xmlAttr *attr; const char *stylesheet = 0; @@ -115,138 +157,426 @@ static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr) stylesheet = (const char *) attr->children->content; else { - wrbuf_printf(p->wr_error, "Bad attribute '%s'." + wrbuf_printf(p->wr_error, "Bad attribute '%s'" "Expected stylesheet.", attr->name); return -1; } } - if (stylesheet) + if (!stylesheet) { - struct yaz_record_conv_rule *r = - add_rule(p, YAZ_RECORD_CONV_RULE_XSLT); - r->u.xslt.stylesheet = nmem_strdup(p->nmem, stylesheet); - return 0; + wrbuf_printf(p->wr_error, "Element : " + "attribute 'stylesheet' expected"); + return -1; } - wrbuf_printf(p->wr_error, "Missing attribute 'stylesheet'"); + else + { + char fullpath[1024]; + xsltStylesheetPtr xsp; + xmlDocPtr xsp_doc; + if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath)) + { + wrbuf_printf(p->wr_error, "Element :" + " could not locate stylesheet '%s'", + stylesheet, fullpath); + if (p->path) + wrbuf_printf(p->wr_error, " with path '%s'", p->path); + + return -1; + } + xsp_doc = xmlParseFile(fullpath); + if (!xsp_doc) + { + wrbuf_printf(p->wr_error, "Element: :" + " xml parse failed: %s", stylesheet, fullpath); + if (p->path) + wrbuf_printf(p->wr_error, " with path '%s'", p->path); + return -1; + } + xsp = xsltParseStylesheetDoc(xsp_doc); + if (!xsp) + { + wrbuf_printf(p->wr_error, "Element: :" + " xslt parse failed: %s", stylesheet, fullpath); + if (p->path) + wrbuf_printf(p->wr_error, " with path '%s'", p->path); + wrbuf_printf(p->wr_error, " (" +#if YAZ_HAVE_EXSLT + + "EXSLT enabled" +#else + "EXSLT not supported" +#endif + ")"); + return -1; + } + else + { + struct yaz_record_conv_rule *r = + add_rule(p, YAZ_RECORD_CONV_RULE_XSLT); + r->u.xslt.xsp_doc = xmlCopyDoc(xsp_doc, 1); + xsltFreeStylesheet(xsp); /* will free xsp_doc */ + } + } + return 0; +#else + wrbuf_printf(p->wr_error, "xslt unsupported." + " YAZ compiled without XSLT support"); return -1; +#endif } -static int conv_marc_to_xml(yaz_record_conv_t p, const xmlNode *ptr) +/** \brief parse 'marc' conversion node */ +static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr) { struct _xmlAttr *attr; - const char *charset = 0; + const char *input_charset = 0; + const char *output_charset = 0; + const char *input_format = 0; + const char *output_format = 0; + int input_format_mode = 0; + int output_format_mode = 0; struct yaz_record_conv_rule *r; for (attr = ptr->properties; attr; attr = attr->next) { - if (!xmlStrcmp(attr->name, BAD_CAST "charset") && + if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") && + attr->children && attr->children->type == XML_TEXT_NODE) + input_charset = (const char *) attr->children->content; + else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") && attr->children && attr->children->type == XML_TEXT_NODE) - charset = (const char *) attr->children->content; + output_charset = (const char *) attr->children->content; + else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") && + attr->children && attr->children->type == XML_TEXT_NODE) + input_format = (const char *) attr->children->content; + else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") && + attr->children && attr->children->type == XML_TEXT_NODE) + output_format = (const char *) attr->children->content; else { - wrbuf_printf(p->wr_error, "Bad attribute '%s'." - "Expected charset.", attr->name); + wrbuf_printf(p->wr_error, "Element : expected attributes" + "'inputformat', 'inputcharset', 'outputformat' or" + " 'outputcharset', got attribute '%s'", + attr->name); return -1; } } - r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC_TO_XML); - if (charset) - r->u.marc_to_xml.charset = nmem_strdup(p->nmem, charset); + if (!input_format) + { + wrbuf_printf(p->wr_error, "Element : " + "attribute 'inputformat' required"); + return -1; + } + else if (!strcmp(input_format, "marc")) + { + input_format_mode = YAZ_MARC_ISO2709; + } + else if (!strcmp(input_format, "xml")) + { + input_format_mode = YAZ_MARC_MARCXML; + /** Libxml2 generates UTF-8 encoding by default . + So we convert from UTF-8 to outputcharset (if defined) + */ + if (!input_charset && output_charset) + input_charset = "utf-8"; + } + else + { + wrbuf_printf(p->wr_error, "Element : " + " Unsupported input format" + " defined by attribute value", + input_format); + return -1; + } + + if (!output_format) + { + wrbuf_printf(p->wr_error, + "Element : attribute 'outputformat' required"); + return -1; + } + else if (!strcmp(output_format, "line")) + { + output_format_mode = YAZ_MARC_LINE; + } + else if (!strcmp(output_format, "marcxml")) + { + output_format_mode = YAZ_MARC_MARCXML; + if (input_charset && !output_charset) + output_charset = "utf-8"; + } + else if (!strcmp(output_format, "marc")) + { + output_format_mode = YAZ_MARC_ISO2709; + } + else if (!strcmp(output_format, "marcxchange")) + { + output_format_mode = YAZ_MARC_XCHANGE; + if (input_charset && !output_charset) + output_charset = "utf-8"; + } else - r->u.marc_to_xml.charset = 0; + { + wrbuf_printf(p->wr_error, "Element : " + " Unsupported output format" + " defined by attribute value", + output_format); + return -1; + } + if (input_charset && output_charset) + { + yaz_iconv_t cd = yaz_iconv_open(output_charset, input_charset); + if (!cd) + { + wrbuf_printf(p->wr_error, + "Element :" + " Unsupported character set mapping" + " defined by attribute values", + input_charset, output_charset); + return -1; + } + yaz_iconv_close(cd); + } + else if (input_charset) + { + wrbuf_printf(p->wr_error, "Element : " + "attribute 'outputcharset' missing"); + return -1; + } + else if (output_charset) + { + wrbuf_printf(p->wr_error, "Element : " + "attribute 'inputcharset' missing"); + return -1; + } + r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC); + + r->u.marc.input_charset = nmem_strdup(p->nmem, input_charset); + r->u.marc.output_charset = nmem_strdup(p->nmem, output_charset); + r->u.marc.input_format = input_format_mode; + r->u.marc.output_format = output_format_mode; return 0; } -static int conv_xml_to_marc(yaz_record_conv_t p, const xmlNode *ptr) +int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr) { - struct _xmlAttr *attr; - const char *charset = 0; - struct yaz_record_conv_rule *r; + yaz_record_conv_reset(p); - for (attr = ptr->properties; attr; attr = attr->next) - { - if (!xmlStrcmp(attr->name, BAD_CAST "charset") && - attr->children && attr->children->type == XML_TEXT_NODE) - charset = (const char *) attr->children->content; - else + /* parsing element children */ + for (ptr = ptr->children; ptr; ptr = ptr->next) { - wrbuf_printf(p->wr_error, "Bad attribute '%s'." - "Expected charset.", attr->name); - return -1; + if (ptr->type != XML_ELEMENT_NODE) + continue; + if (!strcmp((const char *) ptr->name, "xslt")) + { + if (conv_xslt(p, ptr)) + return -1; + } + else if (!strcmp((const char *) ptr->name, "marc")) + { + if (conv_marc(p, ptr)) + return -1; + } + else + { + wrbuf_printf(p->wr_error, "Element : expected " + " or element, got <%s>" + , ptr->name); + return -1; + } } - } - r = add_rule(p, YAZ_RECORD_CONV_RULE_XML_TO_MARC); - if (charset) - r->u.xml_to_marc.charset = nmem_strdup(p->nmem, charset); - else - r->u.xml_to_marc.charset = 0; return 0; } +static int yaz_record_conv_record_rule(yaz_record_conv_t p, + struct yaz_record_conv_rule *r, + const char *input_record_buf, + size_t input_record_len, + WRBUF output_record); -int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v) +int yaz_record_conv_opac_record(yaz_record_conv_t p, + Z_OPACRecord *input_record, + WRBUF output_record) { - const xmlNode *ptr = ptr_v; + int ret = 0; + struct yaz_record_conv_rule *r = p->rules; + if (!r || r->which != YAZ_RECORD_CONV_RULE_MARC) + ret = -1; /* no marc rule so we can't do OPAC */ + else + { + WRBUF res = wrbuf_alloc(); + yaz_marc_t mt = yaz_marc_create(); + yaz_iconv_t cd = yaz_iconv_open(r->u.marc.output_charset, + r->u.marc.input_charset); + + wrbuf_rewind(p->wr_error); + yaz_marc_xml(mt, r->u.marc.output_format); + + yaz_marc_iconv(mt, cd); + + yaz_opac_decode_wrbuf(mt, input_record, res); + if (ret != -1) + { + ret = yaz_record_conv_record_rule(p, + r->next, + wrbuf_buf(res), wrbuf_len(res), + output_record); + } + yaz_marc_destroy(mt); + if (cd) + yaz_iconv_close(cd); + wrbuf_destroy(res); + } + return ret; +} - yaz_record_conv_reset(p); +int yaz_record_conv_record(yaz_record_conv_t p, + const char *input_record_buf, + size_t input_record_len, + WRBUF output_record) +{ + return yaz_record_conv_record_rule(p, p->rules, + input_record_buf, + input_record_len, output_record); +} - if (ptr && ptr->type == XML_ELEMENT_NODE && - !strcmp((const char *) ptr->name, "convert")) +static int yaz_record_conv_record_rule(yaz_record_conv_t p, + struct yaz_record_conv_rule *r, + const char *input_record_buf, + size_t input_record_len, + WRBUF output_record) +{ + int ret = 0; + WRBUF record = output_record; /* pointer transfer */ + wrbuf_rewind(p->wr_error); + + wrbuf_write(record, input_record_buf, input_record_len); + for (; ret == 0 && r; r = r->next) { - for (ptr = ptr->children; ptr; ptr = ptr->next) + if (r->which == YAZ_RECORD_CONV_RULE_MARC) { - if (ptr->type != XML_ELEMENT_NODE) - continue; - if (!strcmp((const char *) ptr->name, "xslt")) + yaz_iconv_t cd = + yaz_iconv_open(r->u.marc.output_charset, + r->u.marc.input_charset); + yaz_marc_t mt = yaz_marc_create(); + + yaz_marc_xml(mt, r->u.marc.output_format); + + if (cd) + yaz_marc_iconv(mt, cd); + if (r->u.marc.input_format == YAZ_MARC_ISO2709) { - if (conv_xslt(p, ptr)) - return -1; + int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record), + wrbuf_len(record)); + if (sz > 0) + ret = 0; + else + ret = -1; } - else if (!strcmp((const char *) ptr->name, "marc_to_xml")) + else if (r->u.marc.input_format == YAZ_MARC_MARCXML) { - if (conv_marc_to_xml(p, ptr)) - return -1; + xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), + wrbuf_len(record)); + if (!doc) + { + wrbuf_printf(p->wr_error, "xmlParseMemory failed"); + ret = -1; + } + else + { + ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc)); + if (ret) + wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed"); + } + xmlFreeDoc(doc); } - else if (!strcmp((const char *) ptr->name, "xml_to_marc")) + else { - if (conv_xml_to_marc(p, ptr)) - return -1; + wrbuf_printf(p->wr_error, "unsupported input format"); + ret = -1; + } + if (ret == 0) + { + wrbuf_rewind(record); + ret = yaz_marc_write_mode(mt, record); + if (ret) + wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed"); + } + if (cd) + yaz_iconv_close(cd); + yaz_marc_destroy(mt); + } +#if YAZ_HAVE_XSLT + else if (r->which == YAZ_RECORD_CONV_RULE_XSLT) + { + xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), + wrbuf_len(record)); + if (!doc) + { + wrbuf_printf(p->wr_error, "xmlParseMemory failed"); + ret = -1; } else { - wrbuf_printf(p->wr_error, "Bad element '%s'." - "Expected xslt, marc_to_xml,...", ptr->name); - return -1; + xmlDocPtr xsp_doc = xmlCopyDoc(r->u.xslt.xsp_doc, 1); + xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc); + xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0); + if (res) + { + xmlChar *out_buf = 0; + int out_len; + +#if YAZ_HAVE_XSLTSAVERESULTTOSTRING + xsltSaveResultToString(&out_buf, &out_len, res, xsp); +#else + xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1); +#endif + if (!out_buf) + { + wrbuf_printf(p->wr_error, + "xsltSaveResultToString failed"); + ret = -1; + } + else + { + wrbuf_rewind(record); + wrbuf_write(record, (const char *) out_buf, out_len); + + xmlFree(out_buf); + } + xmlFreeDoc(res); + } + else + { + wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed"); + ret = -1; + } + xmlFreeDoc(doc); + xsltFreeStylesheet(xsp); /* frees xsp_doc too */ } } +#endif } - else - { - wrbuf_printf(p->wr_error, "Missing 'convert' element"); - return -1; - } - return 0; + return ret; } -#else -/* HAVE_XML2 */ -int yaz_record_conv_configure(yaz_record_conv_t p, const void *ptr_v) +const char *yaz_record_conv_get_error(yaz_record_conv_t p) { - wrbuf_rewind(p->wr_error); - wrbuf_printf(p->wr_error, "No XML support for yaz_record_conv"); - return -1; + return wrbuf_cstr(p->wr_error); } -#endif - -const char *yaz_record_conv_get_error(yaz_record_conv_t p) +void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path) { - return wrbuf_buf(p->wr_error); + xfree(p->path); + p->path = 0; + if (path) + p->path = xstrdup(path); } +#endif /* * Local variables: * c-basic-offset: 4 + * c-file-style: "Stroustrup" * indent-tabs-mode: nil * End: * vim: shiftwidth=4 tabstop=8 expandtab