From: Adam Dickmeiss Date: Wed, 12 Sep 2012 12:44:47 +0000 (+0200) Subject: Skip bad UTF-8 chars in MARC leaders X-Git-Tag: v4.2.38~3 X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=commitdiff_plain;h=6725546edd736d234e34c8878bdd1fad85be8ee1 Skip bad UTF-8 chars in MARC leaders But only when outputting XML. --- diff --git a/src/marcdisp.c b/src/marcdisp.c index 50447be..50339ad 100644 --- a/src/marcdisp.c +++ b/src/marcdisp.c @@ -771,12 +771,31 @@ static int yaz_marc_write_marcxml_wrbuf(yaz_marc_t mt, WRBUF wr, return 0; } +static void sanitise_leader_for_utf8(yaz_marc_t mt) +{ + /* the leader MUST be ASCII for UTF-8 output (XML) */ + struct yaz_marc_node *n; + for (n = mt->nodes; n; n = n->next) + if (n->which == YAZ_MARC_LEADER) + { + size_t i; + for (i = 0; n->u.leader[i]; i++) + if (n->u.leader[i] < ' ' || n->u.leader[i] > 126) + { + n->u.leader[i] = ' '; + yaz_marc_cprintf(mt, "Fixing leader char at offset %d", + (int) (i+1)); + } + } +} + static int yaz_marc_write_marcxml_ns(yaz_marc_t mt, WRBUF wr, const char *ns, const char *format, const char *type, int turbo) { + sanitise_leader_for_utf8(mt); if (mt->write_using_libxml2) { #if YAZ_HAVE_XML2