X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Frecord_conv.c;h=2fbe98698d2f933d01ff0aac4a09c4ff93f1b767;hp=497203eba8651bc8abc0ad133bd5b742aede4b07;hb=8cb8947e3a7bff4dbf8f124871cb4905df1adce7;hpb=56fae7ba6f0af88da829afa064908009c106fea6 diff --git a/src/record_conv.c b/src/record_conv.c index 497203e..2fbe986 100644 --- a/src/record_conv.c +++ b/src/record_conv.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2010 Index Data + * Copyright (C) Index Data * See the file LICENSE for details. */ /** @@ -25,6 +25,8 @@ #include #include #include +#include +#include #if YAZ_HAVE_XSLT #include #include @@ -51,29 +53,19 @@ struct yaz_record_conv_struct { char *path; }; -/** \brief tranformation types (rule types) */ -enum YAZ_RECORD_CONV_RULE -{ - YAZ_RECORD_CONV_RULE_XSLT, - YAZ_RECORD_CONV_RULE_MARC +struct marc_info { + NMEM nmem; + const char *input_charset; + const char *output_charset; + int input_format_mode; + int output_format_mode; + const char *leader_spec; }; /** \brief tranformation info (rule info) */ struct yaz_record_conv_rule { - enum YAZ_RECORD_CONV_RULE which; - union { -#if YAZ_HAVE_XSLT - struct { - xmlDocPtr xsp_doc; - } xslt; -#endif - struct { - const char *input_charset; - const char *output_charset; - int input_format; - int output_format; - } marc; - } u; + struct yaz_record_conv_type *type; + void *info; struct yaz_record_conv_rule *next; }; @@ -84,16 +76,7 @@ static void yaz_record_conv_reset(yaz_record_conv_t p) struct yaz_record_conv_rule *r; for (r = p->rules; r; r = r->next) { - if (r->which == YAZ_RECORD_CONV_RULE_MARC) - { - ; - } -#if YAZ_HAVE_XSLT - else if (r->which == YAZ_RECORD_CONV_RULE_XSLT) - { - xmlFreeDoc(r->u.xslt.xsp_doc); - } -#endif + r->type->destroy(r->info); } wrbuf_rewind(p->wr_error); nmem_reset(p->nmem); @@ -103,21 +86,6 @@ static void yaz_record_conv_reset(yaz_record_conv_t p) p->rules_p = &p->rules; } -yaz_record_conv_t yaz_record_conv_create() -{ - yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p)); - p->nmem = nmem_create(); - p->wr_error = wrbuf_alloc(); - p->rules = 0; - p->path = 0; - -#if YAZ_HAVE_EXSLT - exsltRegisterAll(); -#endif - yaz_record_conv_reset(p); - return p; -} - void yaz_record_conv_destroy(yaz_record_conv_t p) { if (p) @@ -125,30 +93,31 @@ void yaz_record_conv_destroy(yaz_record_conv_t p) yaz_record_conv_reset(p); nmem_destroy(p->nmem); wrbuf_destroy(p->wr_error); + xfree(p->path); xfree(p); } } -/** \brief adds a rule */ -static struct yaz_record_conv_rule *add_rule(yaz_record_conv_t p, - enum YAZ_RECORD_CONV_RULE type) -{ - struct yaz_record_conv_rule *r = (struct yaz_record_conv_rule *) - nmem_malloc(p->nmem, sizeof(*r)); - r->which = type; - r->next = 0; - *p->rules_p = r; - p->rules_p = &r->next; - return r; -} +#if YAZ_HAVE_XSLT +struct xslt_info { + NMEM nmem; + xmlDocPtr xsp_doc; + const char **xsl_parms; +}; -/** \brief parse 'xslt' conversion node */ -static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr) +static void *construct_xslt(const xmlNode *ptr, + const char *path, WRBUF wr_error) { -#if YAZ_HAVE_XSLT struct _xmlAttr *attr; const char *stylesheet = 0; + struct xslt_info *info = 0; + NMEM nmem = 0; + int max_parms = 10; + int no_parms = 0; + + if (strcmp((const char *) ptr->name, "xslt")) + return 0; for (attr = ptr->properties; attr; attr = attr->next) { @@ -157,247 +126,650 @@ static int conv_xslt(yaz_record_conv_t p, const xmlNode *ptr) stylesheet = (const char *) attr->children->content; else { - wrbuf_printf(p->wr_error, "Bad attribute '%s'" + wrbuf_printf(wr_error, "Bad attribute '%s'" "Expected stylesheet.", attr->name); - return -1; + return 0; } } + nmem = nmem_create(); + info = nmem_malloc(nmem, sizeof(*info)); + info->nmem = nmem; + info->xsl_parms = nmem_malloc( + nmem, (2 * max_parms + 1) * sizeof(*info->xsl_parms)); + + for (ptr = ptr->children; ptr; ptr = ptr->next) + { + const char *name = 0; + const char *value = 0; + char *qvalue = 0; + if (ptr->type != XML_ELEMENT_NODE) + continue; + if (strcmp((const char *) ptr->name, "param")) + { + wrbuf_printf(wr_error, "Bad element '%s'" + "Expected param.", ptr->name); + nmem_destroy(nmem); + return 0; + } + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!xmlStrcmp(attr->name, BAD_CAST "name") && + attr->children && attr->children->type == XML_TEXT_NODE) + name = (const char *) attr->children->content; + else if (!xmlStrcmp(attr->name, BAD_CAST "value") && + attr->children && attr->children->type == XML_TEXT_NODE) + value = (const char *) attr->children->content; + else + { + wrbuf_printf(wr_error, "Bad attribute '%s'" + "Expected name or value.", attr->name); + nmem_destroy(nmem); + return 0; + } + } + if (!name || !value) + { + wrbuf_printf(wr_error, "Missing attributes name or value"); + nmem_destroy(nmem); + return 0; + } + if (no_parms >= max_parms) + { + wrbuf_printf(wr_error, "Too many parameters given"); + nmem_destroy(nmem); + return 0; + } + + qvalue = nmem_malloc(nmem, strlen(value) + 3); + strcpy(qvalue, "\'"); + strcat(qvalue, value); + strcat(qvalue, "\'"); + + info->xsl_parms[2 * no_parms] = nmem_strdup(nmem, name); + info->xsl_parms[2 * no_parms + 1] = qvalue; + no_parms++; + } + + info->xsl_parms[2 * no_parms] = '\0'; + if (!stylesheet) { - wrbuf_printf(p->wr_error, "Element : " + wrbuf_printf(wr_error, "Element : " "attribute 'stylesheet' expected"); - return -1; + nmem_destroy(nmem); } else { char fullpath[1024]; xsltStylesheetPtr xsp; - xmlDocPtr xsp_doc; - if (!yaz_filepath_resolve(stylesheet, p->path, 0, fullpath)) + if (!yaz_filepath_resolve(stylesheet, path, 0, fullpath)) { - wrbuf_printf(p->wr_error, "Element :" + wrbuf_printf(wr_error, "Element :" " could not locate stylesheet '%s'", - stylesheet, fullpath); - if (p->path) - wrbuf_printf(p->wr_error, " with path '%s'", p->path); - - return -1; + stylesheet, stylesheet); + if (path) + wrbuf_printf(wr_error, " with path '%s'", path); + + nmem_destroy(nmem); + return 0; } - xsp_doc = xmlParseFile(fullpath); - if (!xsp_doc) + info->xsp_doc = xmlParseFile(fullpath); + if (!info->xsp_doc) { - wrbuf_printf(p->wr_error, "Element: :" + wrbuf_printf(wr_error, "Element: :" " xml parse failed: %s", stylesheet, fullpath); - if (p->path) - wrbuf_printf(p->wr_error, " with path '%s'", p->path); - return -1; + if (path) + wrbuf_printf(wr_error, " with path '%s'", path); + nmem_destroy(nmem); + return 0; } - xsp = xsltParseStylesheetDoc(xsp_doc); + /* need to copy this before passing it to the processor. It will + be encapsulated in the xsp and destroyed by xsltFreeStylesheet */ + xsp = xsltParseStylesheetDoc(xmlCopyDoc(info->xsp_doc, 1)); if (!xsp) { - wrbuf_printf(p->wr_error, "Element: :" + wrbuf_printf(wr_error, "Element: :" " xslt parse failed: %s", stylesheet, fullpath); - if (p->path) - wrbuf_printf(p->wr_error, " with path '%s'", p->path); - wrbuf_printf(p->wr_error, " (" + if (path) + wrbuf_printf(wr_error, " with path '%s'", path); + wrbuf_printf(wr_error, " (" #if YAZ_HAVE_EXSLT - + "EXSLT enabled" #else "EXSLT not supported" #endif ")"); - return -1; + xmlFreeDoc(info->xsp_doc); + nmem_destroy(info->nmem); } else { - struct yaz_record_conv_rule *r = - add_rule(p, YAZ_RECORD_CONV_RULE_XSLT); - r->u.xslt.xsp_doc = xmlCopyDoc(xsp_doc, 1); - xsltFreeStylesheet(xsp); /* will free xsp_doc */ + xsltFreeStylesheet(xsp); + return info; } } return 0; +} + +static int convert_xslt(void *vinfo, WRBUF record, WRBUF wr_error) +{ + int ret = 0; + struct xslt_info *info = vinfo; + + xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), + wrbuf_len(record)); + if (!doc) + { + wrbuf_printf(wr_error, "xmlParseMemory failed"); + ret = -1; + } + else + { + xmlDocPtr xsp_doc = xmlCopyDoc(info->xsp_doc, 1); + xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc); + xmlDocPtr res = xsltApplyStylesheet(xsp, doc, info->xsl_parms); + if (res) + { + xmlChar *out_buf = 0; + int out_len; + +#if HAVE_XSLTSAVERESULTTOSTRING + xsltSaveResultToString(&out_buf, &out_len, res, xsp); #else - wrbuf_printf(p->wr_error, "xslt unsupported." - " YAZ compiled without XSLT support"); - return -1; + xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1); #endif + if (!out_buf) + { + wrbuf_printf(wr_error, + "xsltSaveResultToString failed"); + ret = -1; + } + else + { + wrbuf_rewind(record); + wrbuf_write(record, (const char *) out_buf, out_len); + + xmlFree(out_buf); + } + xmlFreeDoc(res); + } + else + { + wrbuf_printf(wr_error, "xsltApplyStylesheet failed"); + ret = -1; + } + xmlFreeDoc(doc); + xsltFreeStylesheet(xsp); /* frees xsp_doc too */ + } + return ret; +} + +static void destroy_xslt(void *vinfo) +{ + struct xslt_info *info = vinfo; + + if (info) + { + xmlFreeDoc(info->xsp_doc); + nmem_destroy(info->nmem); + } +} + +/* YAZ_HAVE_XSLT */ +#endif + +struct select_info { + NMEM nmem; + char *xpath_expr; +}; + +static void *construct_select(const xmlNode *ptr, + const char *path, WRBUF wr_error) +{ + if (strcmp((const char *) ptr->name, "select")) + return 0; + else + { + struct _xmlAttr *attr; + NMEM nmem = nmem_create(); + struct select_info *info = nmem_malloc(nmem, sizeof(*info)); + + info->nmem = nmem; + info->xpath_expr = 0; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!xmlStrcmp(attr->name, BAD_CAST "path") && + attr->children && attr->children->type == XML_TEXT_NODE) + info->xpath_expr = + nmem_strdup(nmem, (const char *) attr->children->content); + else + { + wrbuf_printf(wr_error, "Bad attribute '%s'" + "Expected xpath.", attr->name); + nmem_destroy(nmem); + return 0; + } + } + return info; + } +} + +static int convert_select(void *vinfo, WRBUF record, WRBUF wr_error) +{ + int ret = 0; + struct select_info *info = vinfo; + + xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), + wrbuf_len(record)); + if (!doc) + { + wrbuf_printf(wr_error, "xmlParseMemory failed"); + ret = -1; + } + else + { + xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); + if (xpathCtx && info->xpath_expr) + { + xmlXPathObjectPtr xpathObj = + xmlXPathEvalExpression((const xmlChar *) info->xpath_expr, + xpathCtx); + if (xpathObj) + { + xmlNodeSetPtr nodes = xpathObj->nodesetval; + wrbuf_rewind(record); + if (nodes) + { + int i; + for (i = 0; i < nodes->nodeNr; i++) + { + xmlNode *ptr = nodes->nodeTab[i]; + if (ptr->type == XML_ELEMENT_NODE) + ptr = ptr->children; + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_TEXT_NODE) + wrbuf_puts(record, (const char *) ptr->content); + } + } + xmlXPathFreeObject(xpathObj); + } + xmlXPathFreeContext(xpathCtx); + } + xmlFreeDoc(doc); + } + return ret; } -/** \brief parse 'marc' conversion node */ -static int conv_marc(yaz_record_conv_t p, const xmlNode *ptr) +static void destroy_select(void *vinfo) { + struct select_info *info = vinfo; + + if (info) + nmem_destroy(info->nmem); +} + + +static void *construct_solrmarc(const xmlNode *ptr, + const char *path, WRBUF wr_error) +{ + if (strcmp((const char *) ptr->name, "solrmarc")) + return 0; + return wr_error; /* any non-null ptr will do; we don't use it later*/ +} + +static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error) +{ + WRBUF w = wrbuf_alloc(); + const char *buf = wrbuf_buf(record); + size_t i, sz = wrbuf_len(record); + for (i = 0; i < sz; i++) + { + int ch; + if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';' + && atoi_n_check(buf+i+1, 2, &ch)) + i += 3; + else + ch = buf[i]; + wrbuf_putc(w, ch); + } + wrbuf_rewind(record); + wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w)); + wrbuf_destroy(w); + return 0; +} + +static void destroy_solrmarc(void *info) +{ +} + +static void *construct_marc(const xmlNode *ptr, + const char *path, WRBUF wr_error) +{ + NMEM nmem = nmem_create(); + struct marc_info *info = nmem_malloc(nmem, sizeof(*info)); struct _xmlAttr *attr; - const char *input_charset = 0; - const char *output_charset = 0; const char *input_format = 0; const char *output_format = 0; - int input_format_mode = 0; - int output_format_mode = 0; - struct yaz_record_conv_rule *r; + + if (strcmp((const char *) ptr->name, "marc")) + { + nmem_destroy(nmem); + return 0; + } + info->nmem = nmem; + info->input_charset = 0; + info->output_charset = 0; + info->input_format_mode = 0; + info->output_format_mode = 0; + info->leader_spec = 0; for (attr = ptr->properties; attr; attr = attr->next) { if (!xmlStrcmp(attr->name, BAD_CAST "inputcharset") && attr->children && attr->children->type == XML_TEXT_NODE) - input_charset = (const char *) attr->children->content; + info->input_charset = (const char *) attr->children->content; else if (!xmlStrcmp(attr->name, BAD_CAST "outputcharset") && attr->children && attr->children->type == XML_TEXT_NODE) - output_charset = (const char *) attr->children->content; + info->output_charset = (const char *) attr->children->content; else if (!xmlStrcmp(attr->name, BAD_CAST "inputformat") && attr->children && attr->children->type == XML_TEXT_NODE) input_format = (const char *) attr->children->content; else if (!xmlStrcmp(attr->name, BAD_CAST "outputformat") && attr->children && attr->children->type == XML_TEXT_NODE) output_format = (const char *) attr->children->content; + else if (!xmlStrcmp(attr->name, BAD_CAST "leaderspec") && + attr->children && attr->children->type == XML_TEXT_NODE) + info->leader_spec = + nmem_strdup(info->nmem,(const char *) attr->children->content); else { - wrbuf_printf(p->wr_error, "Element : expected attributes" + wrbuf_printf(wr_error, "Element : expected attributes" "'inputformat', 'inputcharset', 'outputformat' or" - " 'outputcharset', got attribute '%s'", + " 'outputcharset', got attribute '%s'", attr->name); - return -1; + nmem_destroy(info->nmem); + return 0; } } if (!input_format) { - wrbuf_printf(p->wr_error, "Element : " + wrbuf_printf(wr_error, "Element : " "attribute 'inputformat' required"); - return -1; + nmem_destroy(info->nmem); + return 0; } else if (!strcmp(input_format, "marc")) { - input_format_mode = YAZ_MARC_ISO2709; + info->input_format_mode = YAZ_MARC_ISO2709; } else if (!strcmp(input_format, "xml")) { - input_format_mode = YAZ_MARC_MARCXML; + info->input_format_mode = YAZ_MARC_MARCXML; /** Libxml2 generates UTF-8 encoding by default . - So we convert from UTF-8 to outputcharset (if defined) + So we convert from UTF-8 to outputcharset (if defined) */ - if (!input_charset && output_charset) - input_charset = "utf-8"; + if (!info->input_charset && info->output_charset) + info->input_charset = "utf-8"; + } + else if (!strcmp(input_format, "json")) + { + info->input_format_mode = YAZ_MARC_JSON; } else { - wrbuf_printf(p->wr_error, "Element : " + wrbuf_printf(wr_error, "Element : " " Unsupported input format" - " defined by attribute value", + " defined by attribute value", input_format); - return -1; + nmem_destroy(info->nmem); + return 0; } - + if (!output_format) { - wrbuf_printf(p->wr_error, + wrbuf_printf(wr_error, "Element : attribute 'outputformat' required"); - return -1; + nmem_destroy(info->nmem); + return 0; } else if (!strcmp(output_format, "line")) { - output_format_mode = YAZ_MARC_LINE; + info->output_format_mode = YAZ_MARC_LINE; } else if (!strcmp(output_format, "marcxml")) { - output_format_mode = YAZ_MARC_MARCXML; - if (input_charset && !output_charset) - output_charset = "utf-8"; + info->output_format_mode = YAZ_MARC_MARCXML; + if (info->input_charset && !info->output_charset) + info->output_charset = "utf-8"; } - else if (!strcmp(output_format, "tmarcxml")) + else if (!strcmp(output_format, "turbomarc")) { - output_format_mode = YAZ_MARC_TMARCXML; - if (input_charset && !output_charset) - output_charset = "utf-8"; + info->output_format_mode = YAZ_MARC_TURBOMARC; + if (info->input_charset && !info->output_charset) + info->output_charset = "utf-8"; } else if (!strcmp(output_format, "marc")) { - output_format_mode = YAZ_MARC_ISO2709; + info->output_format_mode = YAZ_MARC_ISO2709; } else if (!strcmp(output_format, "marcxchange")) { - output_format_mode = YAZ_MARC_XCHANGE; - if (input_charset && !output_charset) - output_charset = "utf-8"; + info->output_format_mode = YAZ_MARC_XCHANGE; + if (info->input_charset && !info->output_charset) + info->output_charset = "utf-8"; + } + else if (!strcmp(output_format, "json")) + { + info->output_format_mode = YAZ_MARC_JSON; + if (info->input_charset && !info->output_charset) + info->output_charset = "utf-8"; } else { - wrbuf_printf(p->wr_error, "Element : " + wrbuf_printf(wr_error, "Element : " " Unsupported output format" - " defined by attribute value", + " defined by attribute value", output_format); - return -1; + nmem_destroy(info->nmem); + return 0; } - if (input_charset && output_charset) + if (info->input_charset && info->output_charset) { - yaz_iconv_t cd = yaz_iconv_open(output_charset, input_charset); + yaz_iconv_t cd = yaz_iconv_open(info->output_charset, + info->input_charset); if (!cd) { - wrbuf_printf(p->wr_error, + wrbuf_printf(wr_error, "Element :" " Unsupported character set mapping" " defined by attribute values", - input_charset, output_charset); - return -1; + info->input_charset, info->output_charset); + nmem_destroy(info->nmem); + return 0; } yaz_iconv_close(cd); } - else if (input_charset) + else if (!info->output_charset) { - wrbuf_printf(p->wr_error, "Element : " + wrbuf_printf(wr_error, "Element : " "attribute 'outputcharset' missing"); - return -1; + nmem_destroy(info->nmem); + return 0; } - else if (output_charset) + else if (!info->input_charset) { - wrbuf_printf(p->wr_error, "Element : " + wrbuf_printf(wr_error, "Element : " "attribute 'inputcharset' missing"); - return -1; + nmem_destroy(info->nmem); + return 0; } - r = add_rule(p, YAZ_RECORD_CONV_RULE_MARC); + info->input_charset = nmem_strdup(info->nmem, info->input_charset); + info->output_charset = nmem_strdup(info->nmem, info->output_charset); + return info; +} - r->u.marc.input_charset = nmem_strdup(p->nmem, input_charset); - r->u.marc.output_charset = nmem_strdup(p->nmem, output_charset); - r->u.marc.input_format = input_format_mode; - r->u.marc.output_format = output_format_mode; - return 0; +static int convert_marc(void *info, WRBUF record, WRBUF wr_error) +{ + struct marc_info *mi = info; + const char *input_charset = mi->input_charset; + int ret = 0; + yaz_marc_t mt = yaz_marc_create(); + + yaz_marc_xml(mt, mi->output_format_mode); + if (mi->leader_spec) + yaz_marc_leader_spec(mt, mi->leader_spec); + + if (mi->input_format_mode == YAZ_MARC_ISO2709) + { + int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record), + wrbuf_len(record)); + if (sz > 0) + { + if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record), + wrbuf_len(record))) + input_charset = "utf-8"; + ret = 0; + } + else + ret = -1; + } + else if (mi->input_format_mode == YAZ_MARC_MARCXML || + mi->input_format_mode == YAZ_MARC_TURBOMARC) + { + xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), + wrbuf_len(record)); + if (!doc) + { + wrbuf_printf(wr_error, "xmlParseMemory failed"); + ret = -1; + } + else + { + ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc)); + if (ret) + wrbuf_printf(wr_error, "yaz_marc_read_xml failed"); + } + xmlFreeDoc(doc); + } + else + { + wrbuf_printf(wr_error, "unsupported input format"); + ret = -1; + } + if (ret == 0) + { + yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset); + + if (cd) + yaz_marc_iconv(mt, cd); + + wrbuf_rewind(record); + ret = yaz_marc_write_mode(mt, record); + if (ret) + wrbuf_printf(wr_error, "yaz_marc_write_mode failed"); + if (cd) + yaz_iconv_close(cd); + } + yaz_marc_destroy(mt); + return ret; } -int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr) +static void destroy_marc(void *info) { + struct marc_info *mi = info; + + nmem_destroy(mi->nmem); +} + +int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr, + struct yaz_record_conv_type *types) +{ + struct yaz_record_conv_type bt[4]; + size_t i = 0; + + /* register marc */ + bt[i].construct = construct_marc; + bt[i].convert = convert_marc; + bt[i++].destroy = destroy_marc; + + bt[i-1].next = &bt[i]; + bt[i].construct = construct_solrmarc; + bt[i].convert = convert_solrmarc; + bt[i++].destroy = destroy_solrmarc; + + bt[i-1].next = &bt[i]; + bt[i].construct = construct_select; + bt[i].convert = convert_select; + bt[i++].destroy = destroy_select; + +#if YAZ_HAVE_XSLT + /* register xslt */ + bt[i-1].next = &bt[i]; + bt[i].construct = construct_xslt; + bt[i].convert = convert_xslt; + bt[i++].destroy = destroy_xslt; +#endif + + bt[i-1].next = types; yaz_record_conv_reset(p); /* parsing element children */ for (ptr = ptr->children; ptr; ptr = ptr->next) + { + struct yaz_record_conv_type *t; + struct yaz_record_conv_rule *r; + void *info = 0; + if (ptr->type != XML_ELEMENT_NODE) + continue; + for (t = &bt[0]; t; t = t->next) { - if (ptr->type != XML_ELEMENT_NODE) - continue; - if (!strcmp((const char *) ptr->name, "xslt")) - { - if (conv_xslt(p, ptr)) - return -1; - } - else if (!strcmp((const char *) ptr->name, "marc")) - { - if (conv_marc(p, ptr)) - return -1; - } - else - { - wrbuf_printf(p->wr_error, "Element : expected " - " or element, got <%s>" - , ptr->name); - return -1; - } + wrbuf_rewind(p->wr_error); + info = t->construct(ptr, p->path, p->wr_error); + + if (info || wrbuf_len(p->wr_error)) + break; + /* info== 0 and no error reported , ie not handled by it */ } + if (!info) + { + if (wrbuf_len(p->wr_error) == 0) + wrbuf_printf(p->wr_error, "Element : expected " + " or element, got <%s>" + , ptr->name); + return -1; + } + r = (struct yaz_record_conv_rule *) nmem_malloc(p->nmem, sizeof(*r)); + r->next = 0; + r->info = info; + r->type = nmem_malloc(p->nmem, sizeof(*t)); + memcpy(r->type, t, sizeof(*t)); + *p->rules_p = r; + p->rules_p = &r->next; + } return 0; } +int yaz_record_conv_configure(yaz_record_conv_t p, const xmlNode *ptr) +{ + return yaz_record_conv_configure_t(p, ptr, 0); +} + static int yaz_record_conv_record_rule(yaz_record_conv_t p, struct yaz_record_conv_rule *r, const char *input_record_buf, size_t input_record_len, - WRBUF output_record); + WRBUF output_record) +{ + int ret = 0; + WRBUF record = output_record; /* pointer transfer */ + wrbuf_rewind(p->wr_error); + + wrbuf_write(record, input_record_buf, input_record_len); + for (; ret == 0 && r; r = r->next) + ret = r->type->convert(r->info, record, p->wr_error); + return ret; +} int yaz_record_conv_opac_record(yaz_record_conv_t p, Z_OPACRecord *input_record, @@ -405,24 +777,33 @@ int yaz_record_conv_opac_record(yaz_record_conv_t p, { int ret = 0; struct yaz_record_conv_rule *r = p->rules; - if (!r || r->which != YAZ_RECORD_CONV_RULE_MARC) + if (!r || r->type->construct != construct_marc) + { + wrbuf_puts(p->wr_error, "Expecting MARC rule as first rule for OPAC"); ret = -1; /* no marc rule so we can't do OPAC */ + } else { + struct marc_info *mi = r->info; + const char *input_charset = mi->input_charset; + yaz_iconv_t cd; + WRBUF res = wrbuf_alloc(); yaz_marc_t mt = yaz_marc_create(); - yaz_iconv_t cd = yaz_iconv_open(r->u.marc.output_charset, - r->u.marc.input_charset); - + + if (yaz_opac_check_marc21_coding(input_charset, input_record)) + input_charset = "utf-8"; + cd = yaz_iconv_open(mi->output_charset, input_charset); + wrbuf_rewind(p->wr_error); - yaz_marc_xml(mt, r->u.marc.output_format); - + yaz_marc_xml(mt, mi->output_format_mode); + yaz_marc_iconv(mt, cd); - + yaz_opac_decode_wrbuf(mt, input_record, res); if (ret != -1) { - ret = yaz_record_conv_record_rule(p, + ret = yaz_record_conv_record_rule(p, r->next, wrbuf_buf(res), wrbuf_len(res), output_record); @@ -445,127 +826,6 @@ int yaz_record_conv_record(yaz_record_conv_t p, input_record_len, output_record); } -static int yaz_record_conv_record_rule(yaz_record_conv_t p, - struct yaz_record_conv_rule *r, - const char *input_record_buf, - size_t input_record_len, - WRBUF output_record) -{ - int ret = 0; - WRBUF record = output_record; /* pointer transfer */ - wrbuf_rewind(p->wr_error); - - wrbuf_write(record, input_record_buf, input_record_len); - for (; ret == 0 && r; r = r->next) - { - if (r->which == YAZ_RECORD_CONV_RULE_MARC) - { - yaz_iconv_t cd = - yaz_iconv_open(r->u.marc.output_charset, - r->u.marc.input_charset); - yaz_marc_t mt = yaz_marc_create(); - - yaz_marc_xml(mt, r->u.marc.output_format); - - if (cd) - yaz_marc_iconv(mt, cd); - if (r->u.marc.input_format == YAZ_MARC_ISO2709) - { - int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record), - wrbuf_len(record)); - if (sz > 0) - ret = 0; - else - ret = -1; - } - else if (r->u.marc.input_format == YAZ_MARC_MARCXML || - r->u.marc.input_format == YAZ_MARC_TMARCXML) - { - xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), - wrbuf_len(record)); - if (!doc) - { - wrbuf_printf(p->wr_error, "xmlParseMemory failed"); - ret = -1; - } - else - { - ret = yaz_marc_read_xml(mt, xmlDocGetRootElement(doc)); - if (ret) - wrbuf_printf(p->wr_error, "yaz_marc_read_xml failed"); - } - xmlFreeDoc(doc); - } - else - { - wrbuf_printf(p->wr_error, "unsupported input format"); - ret = -1; - } - if (ret == 0) - { - wrbuf_rewind(record); - ret = yaz_marc_write_mode(mt, record); - if (ret) - wrbuf_printf(p->wr_error, "yaz_marc_write_mode failed"); - } - if (cd) - yaz_iconv_close(cd); - yaz_marc_destroy(mt); - } -#if YAZ_HAVE_XSLT - else if (r->which == YAZ_RECORD_CONV_RULE_XSLT) - { - xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), - wrbuf_len(record)); - if (!doc) - { - wrbuf_printf(p->wr_error, "xmlParseMemory failed"); - ret = -1; - } - else - { - xmlDocPtr xsp_doc = xmlCopyDoc(r->u.xslt.xsp_doc, 1); - xsltStylesheetPtr xsp = xsltParseStylesheetDoc(xsp_doc); - xmlDocPtr res = xsltApplyStylesheet(xsp, doc, 0); - if (res) - { - xmlChar *out_buf = 0; - int out_len; - -#if YAZ_HAVE_XSLTSAVERESULTTOSTRING - xsltSaveResultToString(&out_buf, &out_len, res, xsp); -#else - xmlDocDumpFormatMemory (res, &out_buf, &out_len, 1); -#endif - if (!out_buf) - { - wrbuf_printf(p->wr_error, - "xsltSaveResultToString failed"); - ret = -1; - } - else - { - wrbuf_rewind(record); - wrbuf_write(record, (const char *) out_buf, out_len); - - xmlFree(out_buf); - } - xmlFreeDoc(res); - } - else - { - wrbuf_printf(p->wr_error, "xsltApplyStylesheet failed"); - ret = -1; - } - xmlFreeDoc(doc); - xsltFreeStylesheet(xsp); /* frees xsp_doc too */ - } - } -#endif - } - return ret; -} - const char *yaz_record_conv_get_error(yaz_record_conv_t p) { return wrbuf_cstr(p->wr_error); @@ -578,6 +838,18 @@ void yaz_record_conv_set_path(yaz_record_conv_t p, const char *path) if (path) p->path = xstrdup(path); } + +yaz_record_conv_t yaz_record_conv_create() +{ + yaz_record_conv_t p = (yaz_record_conv_t) xmalloc(sizeof(*p)); + p->nmem = nmem_create(); + p->wr_error = wrbuf_alloc(); + p->rules = 0; + p->path = 0; + return p; +} + +/* YAZ_HAVE_XML2 */ #endif /*