From 85e7aad7167c848ae654bacd378403894f2f889b Mon Sep 17 00:00:00 2001 From: Wolfram Schneider Date: Mon, 17 Nov 2008 15:15:17 +0100 Subject: [PATCH 1/1] add copy of iconv_encode_marc8.c --- src/iconv_encode_iso5426.c | 443 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 443 insertions(+) create mode 100644 src/iconv_encode_iso5426.c diff --git a/src/iconv_encode_iso5426.c b/src/iconv_encode_iso5426.c new file mode 100644 index 0000000..0fa3b56 --- /dev/null +++ b/src/iconv_encode_iso5426.c @@ -0,0 +1,443 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2008 Index Data + * See the file LICENSE for details. + */ +/** + * \file + * \brief MARC-8 encoding + * + * MARC-8 reference: + * http://www.loc.gov/marc/specifications/speccharmarc8.html + */ + +#if HAVE_CONFIG_H +#include +#endif + +#include +#include +#include +#include + +#include +#include +#include "iconv-p.h" + +yaz_conv_func_t yaz_marc8r_42_conv; +yaz_conv_func_t yaz_marc8r_45_conv; +yaz_conv_func_t yaz_marc8r_67_conv; +yaz_conv_func_t yaz_marc8r_62_conv; +yaz_conv_func_t yaz_marc8r_70_conv; +yaz_conv_func_t yaz_marc8r_32_conv; +yaz_conv_func_t yaz_marc8r_4E_conv; +yaz_conv_func_t yaz_marc8r_51_conv; +yaz_conv_func_t yaz_marc8r_33_conv; +yaz_conv_func_t yaz_marc8r_34_conv; +yaz_conv_func_t yaz_marc8r_53_conv; +yaz_conv_func_t yaz_marc8r_31_conv; + +#define ESC "\033" + +struct encoder_data +{ + unsigned write_marc8_second_half_char; + unsigned long write_marc8_last; + int write_marc8_ncr; + const char *write_marc8_lpage; + const char *write_marc8_g0; + const char *write_marc8_g1; +}; + +static void init_marc8(yaz_iconv_encoder_t w) +{ + struct encoder_data *data = (struct encoder_data *) w->data; + data->write_marc8_second_half_char = 0; + data->write_marc8_last = 0; + data->write_marc8_ncr = 0; + data->write_marc8_lpage = 0; + data->write_marc8_g0 = ESC "(B"; + data->write_marc8_g1 = 0; +} + +static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, + struct encoder_data *w, + char **outbuf, size_t *outbytesleft, + const char *page_chr); + +static unsigned long lookup_marc8(yaz_iconv_t cd, + unsigned long x, int *comb, + const char **page_chr) +{ + char utf8_buf[7]; + char *utf8_outbuf = utf8_buf; + size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r; + int error_code; + + r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code); + if (r == (size_t)(-1)) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ); + return 0; + } + else + { + unsigned char *inp; + size_t inbytesleft, no_read_sub = 0; + unsigned long x; + + *utf8_outbuf = '\0'; + inp = (unsigned char *) utf8_buf; + inbytesleft = strlen(utf8_buf); + + x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "(B"; + return x; + } + x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "(B"; + return x; + } + x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "b"; + return x; + } + x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "p"; + return x; + } + x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "(2"; + return x; + } + x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "(N"; + return x; + } + x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "(Q"; + return x; + } + x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "(3"; + return x; + } + x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "(4"; + return x; + } + x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "(S"; + return x; + } + x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0); + if (x) + { + *page_chr = ESC "$1"; + return x; + } + yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ); + return x; + } +} + +static size_t flush_combos(yaz_iconv_t cd, + struct encoder_data *w, + char **outbuf, size_t *outbytesleft) +{ + unsigned long y = w->write_marc8_last; + + if (!y) + return 0; + + assert(w->write_marc8_lpage); + if (w->write_marc8_lpage) + { + size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, + w->write_marc8_lpage); + if (r) + return r; + } + + if (9 >= *outbytesleft) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG); + return (size_t) (-1); + } + if (w->write_marc8_ncr) + { + yaz_snprintf(*outbuf, 9, "&#x%04x;", y); + (*outbytesleft) -= 8; + (*outbuf) += 8; + } + else + { + size_t out_no = 0; + unsigned char byte; + + byte = (unsigned char )((y>>16) & 0xff); + if (byte) + (*outbuf)[out_no++] = byte; + byte = (unsigned char)((y>>8) & 0xff); + if (byte) + (*outbuf)[out_no++] = byte; + byte = (unsigned char )(y & 0xff); + if (byte) + (*outbuf)[out_no++] = byte; + *outbuf += out_no; + (*outbytesleft) -= out_no; + } + + if (w->write_marc8_second_half_char) + { + *(*outbuf)++ = w->write_marc8_second_half_char; + (*outbytesleft)--; + } + + w->write_marc8_last = 0; + w->write_marc8_ncr = 0; + w->write_marc8_lpage = 0; + w->write_marc8_second_half_char = 0; + return 0; +} + +static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, + struct encoder_data *w, + char **outbuf, size_t *outbytesleft, + const char *page_chr) +{ + const char **old_page_chr = &w->write_marc8_g0; + + /* are we going to a G1-set (such as such as ESC ")!E") */ + if (page_chr && page_chr[1] == ')') + old_page_chr = &w->write_marc8_g1; + + if (!*old_page_chr || strcmp(page_chr, *old_page_chr)) + { + size_t plen = 0; + const char *page_out = page_chr; + + if (*outbytesleft < 8) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG); + + return (size_t) (-1); + } + + if (*old_page_chr) + { + if (!strcmp(*old_page_chr, ESC "p") + || !strcmp(*old_page_chr, ESC "g") + || !strcmp(*old_page_chr, ESC "b")) + { + page_out = ESC "s"; + /* Technique 1 leave */ + if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */ + { + /* Must leave script + enter new page */ + plen = strlen(page_out); + memcpy(*outbuf, page_out, plen); + (*outbuf) += plen; + (*outbytesleft) -= plen; + page_out = ESC "(B"; + } + } + } + *old_page_chr = page_chr; + plen = strlen(page_out); + memcpy(*outbuf, page_out, plen); + (*outbuf) += plen; + (*outbytesleft) -= plen; + } + return 0; +} + + +static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w, + unsigned long x, + char **outbuf, size_t *outbytesleft, + int loss_mode) +{ + int comb = 0; + int enable_ncr = 0; + const char *page_chr = 0; + unsigned long y = lookup_marc8(cd, x, &comb, &page_chr); + + if (!y) + { + if (loss_mode == 0) + return (size_t) (-1); + page_chr = ESC "(B"; + if (loss_mode == 1) + y = '|'; + else + { + y = x; + enable_ncr = 1; + } + } + + if (comb) + { + if (page_chr) + { + size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, + page_chr); + if (r) + return r; + } + if (x == 0x0361) + w->write_marc8_second_half_char = 0xEC; + else if (x == 0x0360) + w->write_marc8_second_half_char = 0xFB; + + if (*outbytesleft <= 1) + { + yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG); + return (size_t) (-1); + } + *(*outbuf)++ = y; + (*outbytesleft)--; + } + else + { + size_t r = flush_combos(cd, w, outbuf, outbytesleft); + if (r) + return r; + + w->write_marc8_last = y; + w->write_marc8_lpage = page_chr; + w->write_marc8_ncr = enable_ncr; + } + return 0; +} + +static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en, + char **outbuf, size_t *outbytesleft) +{ + struct encoder_data *w = (struct encoder_data *) en->data; + size_t r = flush_combos(cd, w, outbuf, outbytesleft); + if (r) + return r; + w->write_marc8_g1 = 0; + return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B"); +} + +static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w, + unsigned long x, + char **outbuf, size_t *outbytesleft, + int loss_mode) +{ + unsigned long x1, x2; + if (yaz_iso_8859_1_lookup_y(x, &x1, &x2)) + { + /* save the output pointers .. */ + char *outbuf0 = *outbuf; + size_t outbytesleft0 = *outbytesleft; + int last_ch = w->write_marc8_last; + int ncr = w->write_marc8_ncr; + const char *lpage = w->write_marc8_lpage; + size_t r; + + r = yaz_write_marc8_2(cd, w, x1, + outbuf, outbytesleft, loss_mode); + if (r) + return r; + r = yaz_write_marc8_2(cd, w, x2, + outbuf, outbytesleft, loss_mode); + if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG) + { + /* not enough room. reset output to original values */ + *outbuf = outbuf0; + *outbytesleft = outbytesleft0; + w->write_marc8_last = last_ch; + w->write_marc8_ncr = ncr; + w->write_marc8_lpage = lpage; + } + return r; + } + return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode); +} + +static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e, + unsigned long x, + char **outbuf, size_t *outbytesleft) +{ + return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data, + x, outbuf, outbytesleft, 0); +} + +static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e, + unsigned long x, + char **outbuf, size_t *outbytesleft) +{ + return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data, + x, outbuf, outbytesleft, 1); +} + +static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e, + unsigned long x, + char **outbuf, size_t *outbytesleft) +{ + return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data, + x, outbuf, outbytesleft, 2); +} + +static void destroy_marc8(yaz_iconv_encoder_t e) +{ + xfree(e->data); +} + +yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode, + yaz_iconv_encoder_t e) + +{ + if (!yaz_matchstr(tocode, "MARC8")) + e->write_handle = write_marc8_normal; + else if (!yaz_matchstr(tocode, "MARC8s")) + e->write_handle = write_marc8_normal; + else if (!yaz_matchstr(tocode, "MARC8lossy")) + e->write_handle = write_marc8_lossy; + else if (!yaz_matchstr(tocode, "MARC8lossless")) + e->write_handle = write_marc8_lossless; + else + return 0; + + { + struct encoder_data *data = (struct encoder_data *) + xmalloc(sizeof(*data)); + e->data = data; + e->destroy_handle = destroy_marc8; + e->flush_handle = flush_marc8; + e->init_handle = init_marc8; + } + return e; +} + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ -- 1.7.10.4