X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=src%2Frecord_conv.c;h=2fbe98698d2f933d01ff0aac4a09c4ff93f1b767;hp=4e69f43db81bb9e1d5500ef92c769acec670825d;hb=8cb8947e3a7bff4dbf8f124871cb4905df1adce7;hpb=0c46d2e66bdeea1600e700124a81a5d0a65d349e diff --git a/src/record_conv.c b/src/record_conv.c index 4e69f43..2fbe986 100644 --- a/src/record_conv.c +++ b/src/record_conv.c @@ -1,5 +1,5 @@ /* This file is part of the YAZ toolkit. - * Copyright (C) 1995-2013 Index Data + * Copyright (C) Index Data * See the file LICENSE for details. */ /** @@ -25,6 +25,8 @@ #include #include #include +#include +#include #if YAZ_HAVE_XSLT #include #include @@ -194,7 +196,7 @@ static void *construct_xslt(const xmlNode *ptr, { wrbuf_printf(wr_error, "Element : " "attribute 'stylesheet' expected"); - return 0; + nmem_destroy(nmem); } else { @@ -208,6 +210,7 @@ static void *construct_xslt(const xmlNode *ptr, if (path) wrbuf_printf(wr_error, " with path '%s'", path); + nmem_destroy(nmem); return 0; } info->xsp_doc = xmlParseFile(fullpath); @@ -217,6 +220,7 @@ static void *construct_xslt(const xmlNode *ptr, " xml parse failed: %s", stylesheet, fullpath); if (path) wrbuf_printf(wr_error, " with path '%s'", path); + nmem_destroy(nmem); return 0; } /* need to copy this before passing it to the processor. It will @@ -238,7 +242,6 @@ static void *construct_xslt(const xmlNode *ptr, ")"); xmlFreeDoc(info->xsp_doc); nmem_destroy(info->nmem); - return 0; } else { @@ -316,6 +319,129 @@ static void destroy_xslt(void *vinfo) /* YAZ_HAVE_XSLT */ #endif +struct select_info { + NMEM nmem; + char *xpath_expr; +}; + +static void *construct_select(const xmlNode *ptr, + const char *path, WRBUF wr_error) +{ + if (strcmp((const char *) ptr->name, "select")) + return 0; + else + { + struct _xmlAttr *attr; + NMEM nmem = nmem_create(); + struct select_info *info = nmem_malloc(nmem, sizeof(*info)); + + info->nmem = nmem; + info->xpath_expr = 0; + for (attr = ptr->properties; attr; attr = attr->next) + { + if (!xmlStrcmp(attr->name, BAD_CAST "path") && + attr->children && attr->children->type == XML_TEXT_NODE) + info->xpath_expr = + nmem_strdup(nmem, (const char *) attr->children->content); + else + { + wrbuf_printf(wr_error, "Bad attribute '%s'" + "Expected xpath.", attr->name); + nmem_destroy(nmem); + return 0; + } + } + return info; + } +} + +static int convert_select(void *vinfo, WRBUF record, WRBUF wr_error) +{ + int ret = 0; + struct select_info *info = vinfo; + + xmlDocPtr doc = xmlParseMemory(wrbuf_buf(record), + wrbuf_len(record)); + if (!doc) + { + wrbuf_printf(wr_error, "xmlParseMemory failed"); + ret = -1; + } + else + { + xmlXPathContextPtr xpathCtx = xmlXPathNewContext(doc); + if (xpathCtx && info->xpath_expr) + { + xmlXPathObjectPtr xpathObj = + xmlXPathEvalExpression((const xmlChar *) info->xpath_expr, + xpathCtx); + if (xpathObj) + { + xmlNodeSetPtr nodes = xpathObj->nodesetval; + wrbuf_rewind(record); + if (nodes) + { + int i; + for (i = 0; i < nodes->nodeNr; i++) + { + xmlNode *ptr = nodes->nodeTab[i]; + if (ptr->type == XML_ELEMENT_NODE) + ptr = ptr->children; + for (; ptr; ptr = ptr->next) + if (ptr->type == XML_TEXT_NODE) + wrbuf_puts(record, (const char *) ptr->content); + } + } + xmlXPathFreeObject(xpathObj); + } + xmlXPathFreeContext(xpathCtx); + } + xmlFreeDoc(doc); + } + return ret; +} + +static void destroy_select(void *vinfo) +{ + struct select_info *info = vinfo; + + if (info) + nmem_destroy(info->nmem); +} + + +static void *construct_solrmarc(const xmlNode *ptr, + const char *path, WRBUF wr_error) +{ + if (strcmp((const char *) ptr->name, "solrmarc")) + return 0; + return wr_error; /* any non-null ptr will do; we don't use it later*/ +} + +static int convert_solrmarc(void *info, WRBUF record, WRBUF wr_error) +{ + WRBUF w = wrbuf_alloc(); + const char *buf = wrbuf_buf(record); + size_t i, sz = wrbuf_len(record); + for (i = 0; i < sz; i++) + { + int ch; + if (buf[i] == '#' && i < sz - 3 && buf[i+3] == ';' + && atoi_n_check(buf+i+1, 2, &ch)) + i += 3; + else + ch = buf[i]; + wrbuf_putc(w, ch); + } + wrbuf_rewind(record); + wrbuf_write(record, wrbuf_buf(w), wrbuf_len(w)); + wrbuf_destroy(w); + return 0; +} + +static void destroy_solrmarc(void *info) +{ +} static void *construct_marc(const xmlNode *ptr, const char *path, WRBUF wr_error) @@ -331,7 +457,6 @@ static void *construct_marc(const xmlNode *ptr, nmem_destroy(nmem); return 0; } - info->nmem = nmem; info->input_charset = 0; info->output_charset = 0; @@ -387,6 +512,10 @@ static void *construct_marc(const xmlNode *ptr, if (!info->input_charset && info->output_charset) info->input_charset = "utf-8"; } + else if (!strcmp(input_format, "json")) + { + info->input_format_mode = YAZ_MARC_JSON; + } else { wrbuf_printf(wr_error, "Element : " @@ -430,6 +559,12 @@ static void *construct_marc(const xmlNode *ptr, if (info->input_charset && !info->output_charset) info->output_charset = "utf-8"; } + else if (!strcmp(output_format, "json")) + { + info->output_format_mode = YAZ_MARC_JSON; + if (info->input_charset && !info->output_charset) + info->output_charset = "utf-8"; + } else { wrbuf_printf(wr_error, "Element : " @@ -477,23 +612,25 @@ static void *construct_marc(const xmlNode *ptr, static int convert_marc(void *info, WRBUF record, WRBUF wr_error) { struct marc_info *mi = info; + const char *input_charset = mi->input_charset; int ret = 0; - - yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, mi->input_charset); yaz_marc_t mt = yaz_marc_create(); yaz_marc_xml(mt, mi->output_format_mode); if (mi->leader_spec) yaz_marc_leader_spec(mt, mi->leader_spec); - if (cd) - yaz_marc_iconv(mt, cd); if (mi->input_format_mode == YAZ_MARC_ISO2709) { int sz = yaz_marc_read_iso2709(mt, wrbuf_buf(record), wrbuf_len(record)); if (sz > 0) + { + if (yaz_marc_check_marc21_coding(input_charset, wrbuf_buf(record), + wrbuf_len(record))) + input_charset = "utf-8"; ret = 0; + } else ret = -1; } @@ -522,13 +659,18 @@ static int convert_marc(void *info, WRBUF record, WRBUF wr_error) } if (ret == 0) { + yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, input_charset); + + if (cd) + yaz_marc_iconv(mt, cd); + wrbuf_rewind(record); ret = yaz_marc_write_mode(mt, record); if (ret) wrbuf_printf(wr_error, "yaz_marc_write_mode failed"); + if (cd) + yaz_iconv_close(cd); } - if (cd) - yaz_iconv_close(cd); yaz_marc_destroy(mt); return ret; } @@ -543,24 +685,33 @@ static void destroy_marc(void *info) int yaz_record_conv_configure_t(yaz_record_conv_t p, const xmlNode *ptr, struct yaz_record_conv_type *types) { - struct yaz_record_conv_type bt[2]; + struct yaz_record_conv_type bt[4]; + size_t i = 0; /* register marc */ - bt[0].construct = construct_marc; - bt[0].convert = convert_marc; - bt[0].destroy = destroy_marc; + bt[i].construct = construct_marc; + bt[i].convert = convert_marc; + bt[i++].destroy = destroy_marc; + + bt[i-1].next = &bt[i]; + bt[i].construct = construct_solrmarc; + bt[i].convert = convert_solrmarc; + bt[i++].destroy = destroy_solrmarc; + + bt[i-1].next = &bt[i]; + bt[i].construct = construct_select; + bt[i].convert = convert_select; + bt[i++].destroy = destroy_select; #if YAZ_HAVE_XSLT /* register xslt */ - bt[0].next = &bt[1]; - bt[1].next = types; - bt[1].construct = construct_xslt; - bt[1].convert = convert_xslt; - bt[1].destroy = destroy_xslt; -#else - bt[0].next = types; + bt[i-1].next = &bt[i]; + bt[i].construct = construct_xslt; + bt[i].convert = convert_xslt; + bt[i++].destroy = destroy_xslt; #endif + bt[i-1].next = types; yaz_record_conv_reset(p); /* parsing element children */ @@ -627,15 +778,22 @@ int yaz_record_conv_opac_record(yaz_record_conv_t p, int ret = 0; struct yaz_record_conv_rule *r = p->rules; if (!r || r->type->construct != construct_marc) + { + wrbuf_puts(p->wr_error, "Expecting MARC rule as first rule for OPAC"); ret = -1; /* no marc rule so we can't do OPAC */ + } else { struct marc_info *mi = r->info; + const char *input_charset = mi->input_charset; + yaz_iconv_t cd; WRBUF res = wrbuf_alloc(); yaz_marc_t mt = yaz_marc_create(); - yaz_iconv_t cd = yaz_iconv_open(mi->output_charset, - mi->input_charset); + + if (yaz_opac_check_marc21_coding(input_charset, input_record)) + input_charset = "utf-8"; + cd = yaz_iconv_open(mi->output_charset, input_charset); wrbuf_rewind(p->wr_error); yaz_marc_xml(mt, mi->output_format_mode); @@ -688,9 +846,6 @@ yaz_record_conv_t yaz_record_conv_create() p->wr_error = wrbuf_alloc(); p->rules = 0; p->path = 0; -#if YAZ_HAVE_EXSLT - exsltRegisterAll(); -#endif return p; }