From 919e3dd6d63e3371053b5be495ee4c0b74ef2e13 Mon Sep 17 00:00:00 2001 From: Wolfram Schneider Date: Mon, 17 Nov 2008 16:12:52 +0100 Subject: [PATCH] add ISO 5426 reference documentation --- src/iconv_decode_iso5426.c | 91 +++++++++++++++++++++++--------------------- 1 file changed, 47 insertions(+), 44 deletions(-) diff --git a/src/iconv_decode_iso5426.c b/src/iconv_decode_iso5426.c index eecee04..0bef7a7 100644 --- a/src/iconv_decode_iso5426.c +++ b/src/iconv_decode_iso5426.c @@ -4,10 +4,14 @@ */ /** * \file - * \brief MARC-8 decoding + * \brief ISO 5426 decoding * * MARC-8 reference: - * http://www.loc.gov/marc/specifications/speccharmarc8.html + * http://www.loc.gov/marc/specifications/specchariso8.html + * + * ISO 5426 reference (in German) + * Zeichenkonkordanz MAB2-Zeichensatz - ISO/IEC 10646 / Unicode + * http://www.d-nb.de/standardisierung/pdf/mab_unic.pdf */ #if HAVE_CONFIG_H @@ -32,27 +36,27 @@ struct decoder_data { size_t comb_no_read[8]; }; -yaz_conv_func_t yaz_marc8_42_conv; -yaz_conv_func_t yaz_marc8_45_conv; -yaz_conv_func_t yaz_marc8_67_conv; -yaz_conv_func_t yaz_marc8_62_conv; -yaz_conv_func_t yaz_marc8_70_conv; -yaz_conv_func_t yaz_marc8_32_conv; -yaz_conv_func_t yaz_marc8_4E_conv; -yaz_conv_func_t yaz_marc8_51_conv; -yaz_conv_func_t yaz_marc8_33_conv; -yaz_conv_func_t yaz_marc8_34_conv; -yaz_conv_func_t yaz_marc8_53_conv; -yaz_conv_func_t yaz_marc8_31_conv; +yaz_conv_func_t yaz_iso5426_42_conv; +yaz_conv_func_t yaz_iso5426_45_conv; +yaz_conv_func_t yaz_iso5426_67_conv; +yaz_conv_func_t yaz_iso5426_62_conv; +yaz_conv_func_t yaz_iso5426_70_conv; +yaz_conv_func_t yaz_iso5426_32_conv; +yaz_conv_func_t yaz_iso5426_4E_conv; +yaz_conv_func_t yaz_iso5426_51_conv; +yaz_conv_func_t yaz_iso5426_33_conv; +yaz_conv_func_t yaz_iso5426_34_conv; +yaz_conv_func_t yaz_iso5426_53_conv; +yaz_conv_func_t yaz_iso5426_31_conv; -static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, +static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd, struct decoder_data *data, unsigned char *inp, size_t inbytesleft, size_t *no_read, int *comb); -static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d, +static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read) { @@ -69,7 +73,7 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d, the processing of the closing ligature character */ /* this code is no longer necessary.. our handlers code in - yaz_marc8_?_conv (generated by charconv.tcl) now returns + yaz_iso5426_?_conv (generated by charconv.tcl) now returns 0 and no_read=1 when a sequence does not match the input. The SECOND HALFs in codetables.xml produces a non-existant entry in the conversion trie.. Hence when met, the input byte is @@ -95,7 +99,7 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d, *no_read = 0; break; } - x = yaz_read_marc8_comb(cd, data, inp, inbytesleft, no_read, &comb); + x = yaz_read_iso5426_comb(cd, data, inp, inbytesleft, no_read, &comb); if (!comb || !x) break; data->comb_x[data->comb_size] = x; @@ -106,12 +110,12 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d, return x; } -static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d, +static unsigned long read_iso5426s(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read) { struct decoder_data *data = (struct decoder_data *) d->data; - unsigned long x = read_marc8(cd, d, inp, inbytesleft, no_read); + unsigned long x = read_iso5426(cd, d, inp, inbytesleft, no_read); if (x && data->comb_size == 1) { if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x)) @@ -123,7 +127,7 @@ static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d, return x; } -static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, +static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd, struct decoder_data *data, unsigned char *inp, size_t inbytesleft, size_t *no_read, @@ -189,41 +193,44 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, { case 'B': /* Basic ASCII */ case 's': /* ASCII */ - x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case 'E': /* ANSEL */ - x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128); + x = yaz_iso5426_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128); break; + +#if 0 case 'g': /* Greek */ - x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case 'b': /* Subscripts */ - x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case 'p': /* Superscripts */ - x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case '2': /* Basic Hebrew */ - x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case 'N': /* Basic Cyrillic */ - x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case 'Q': /* Extended Cyrillic */ - x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case '3': /* Basic Arabic */ - x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case '4': /* Extended Arabic */ - x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case 'S': /* Greek */ - x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; case '1': /* Chinese, Japanese, Korean (EACC) */ - x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); + x = yaz_iso5426_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0); break; +#endif default: *no_read = 0; yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ); @@ -239,7 +246,7 @@ incomplete: } -static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d, +static size_t init_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d, unsigned char *inp, size_t inbytesleft, size_t *no_read) { @@ -250,29 +257,25 @@ static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d, return 0; } -void destroy_marc8(yaz_iconv_decoder_t d) +void destroy_iso5426(yaz_iconv_decoder_t d) { struct decoder_data *data = (struct decoder_data *) d->data; xfree(data); } -yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode, +yaz_iconv_decoder_t yaz_iso5426_decoder(const char *fromcode, yaz_iconv_decoder_t d) { - if (!yaz_matchstr(fromcode, "MARC8") || !yaz_matchstr(fromcode, "ANSEL")) - d->read_handle = read_marc8; - else if (!yaz_matchstr(fromcode, "ISO5426")) - d->read_handle = read_marc8; - else if (!yaz_matchstr(fromcode, "MARC8s")) - d->read_handle = read_marc8s; + if (!yaz_matchstr(fromcode, "ISO5426")) + d->read_handle = read_iso5426; else return 0; { struct decoder_data *data = (struct decoder_data *) xmalloc(sizeof(*data)); d->data = data; - d->init_handle = init_marc8; - d->destroy_handle = destroy_marc8; + d->init_handle = init_iso5426; + d->destroy_handle = destroy_iso5426; } return d; } -- 1.7.10.4