-#define ESC "\033"
-
-static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
- char **outbuf, size_t *outbytesleft,
- const char *page_chr);
-
-static unsigned long yaz_read_ISO8859_1(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = inp[0];
- *no_read = 1;
- return x;
-}
-
-
-
-#if HAVE_WCHAR_H
-static unsigned long yaz_read_wchar_t(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = 0;
-
- if (inbytesleft < sizeof(wchar_t))
- {
- cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
- *no_read = 0;
- }
- else
- {
- wchar_t wch;
- memcpy(&wch, inp, sizeof(wch));
- x = wch;
- *no_read = sizeof(wch);
- }
- return x;
-}
-#endif
-
-
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read,
- int *comb);
-
-static unsigned long yaz_read_marc8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x;
- if (cd->comb_offset < cd->comb_size)
- {
- *no_read = cd->comb_no_read[cd->comb_offset];
- x = cd->comb_x[cd->comb_offset];
-
- /* special case for double-diacritic combining characters,
- INVERTED BREVE and DOUBLE TILDE.
- We'll increment the no_read counter by 1, since we want to skip over
- the processing of the closing ligature character
- */
- /* this code is no longer necessary.. our handlers code in
- yaz_marc8_?_conv (generated by charconv.tcl) now returns
- 0 and no_read=1 when a sequence does not match the input.
- The SECOND HALFs in codetables.xml produces a non-existant
- entry in the conversion trie.. Hence when met, the input byte is
- skipped as it should (in yaz_iconv)
- */
-#if 0
- if (x == 0x0361 || x == 0x0360)
- *no_read += 1;
-#endif
- cd->comb_offset++;
- return x;
- }
-
- cd->comb_offset = 0;
- for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
- {
- int comb = 0;
-
- if (inbytesleft == 0 && cd->comb_size)
- {
- cd->my_errno = YAZ_ICONV_EINVAL;
- x = 0;
- *no_read = 0;
- break;
- }
- x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
- if (!comb || !x)
- break;
- cd->comb_x[cd->comb_size] = x;
- cd->comb_no_read[cd->comb_size] = *no_read;
- inp += *no_read;
- inbytesleft = inbytesleft - *no_read;
- }
- return x;
-}
-
-static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
- if (x && cd->comb_size == 1)
- {
- /* For MARC8s we try to get a Latin-1 page code out of it */
- int i;
- for (i = 0; latin1_comb[i].x1; i++)
- if (cd->comb_x[0] == latin1_comb[i].x2 && x == latin1_comb[i].x1)
- {
- *no_read += cd->comb_no_read[0];
- cd->comb_size = 0;
- x = latin1_comb[i].y;
- break;
- }
- }
- return x;
-}
-
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read,
- int *comb)
-{
- *no_read = 0;
- while (inbytesleft > 0 && *inp == 27)
- {
- int *modep = &cd->g0_mode;
- size_t inbytesleft0 = inbytesleft;
-
- inbytesleft--;
- inp++;
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '$') /* set with multiple bytes */
- {
- inbytesleft--;
- inp++;
- }
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '(' || *inp == ',') /* G0 */
- {
- inbytesleft--;
- inp++;
- }
- else if (*inp == ')' || *inp == '-') /* G1 */
- {
- inbytesleft--;
- inp++;
- modep = &cd->g1_mode;
- }
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '!') /* ANSEL is a special case */
- {
- inbytesleft--;
- inp++;
- }
- if (inbytesleft == 0)
- goto incomplete;
- *modep = *inp++; /* Final character */
- inbytesleft--;
-
- (*no_read) += inbytesleft0 - inbytesleft;
- }
- if (inbytesleft == 0)
- return 0;
- else if (*inp == ' ')
- {
- *no_read += 1;
- return ' ';
- }
- else
- {
- unsigned long x;
- size_t no_read_sub = 0;
- int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode;
- *comb = 0;
-
- switch(mode)
- {
- case 'B': /* Basic ASCII */
- case 's': /* ASCII */
- x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'E': /* ANSEL */
- x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
- break;
- case 'g': /* Greek */
- x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'b': /* Subscripts */
- x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'p': /* Superscripts */
- x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '2': /* Basic Hebrew */
- x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'N': /* Basic Cyrillic */
- x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'Q': /* Extended Cyrillic */
- x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '3': /* Basic Arabic */
- x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '4': /* Extended Arabic */
- x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'S': /* Greek */
- x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '1': /* Chinese, Japanese, Korean (EACC) */
- x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- default:
- *no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
- return 0;
- }
- *no_read += no_read_sub;
- return x;
- }
-incomplete:
- *no_read = 0;
- cd->my_errno = YAZ_ICONV_EINVAL;
- return 0;
-}
-
-static size_t yaz_write_ISO8859_1(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft)
-{
- /* list of two char unicode sequence that, when combined, are
- equivalent to single unicode chars that can be represented in
- ISO-8859-1/Latin-1.
- Regular iconv on Linux at least does not seem to convert these,
- but since MARC-8 to UTF-8 generates these composed sequence
- we get a better chance of a successful MARC-8 -> ISO-8859-1
- conversion */
- unsigned char *outp = (unsigned char *) *outbuf;
-
- if (cd->compose_char)
- {
- int i;
- for (i = 0; latin1_comb[i].x1; i++)
- if (cd->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
- {
- x = latin1_comb[i].y;
- break;
- }
- if (*outbytesleft < 1)
- { /* no room. Retain compose_char and bail out */
- cd->my_errno = YAZ_ICONV_E2BIG;
- return (size_t)(-1);
- }
- if (!latin1_comb[i].x1)
- { /* not found. Just write compose_char */
- *outp++ = (unsigned char) cd->compose_char;
- (*outbytesleft)--;
- *outbuf = (char *) outp;
- }
- /* compose_char used so reset it. x now holds current char */
- cd->compose_char = 0;
- }
-
- if (x > 32 && x < 127 && cd->compose_char == 0)
- {
- cd->compose_char = x;
- return 0;
- }
- else if (x > 255 || x < 1)
- {
- cd->my_errno = YAZ_ICONV_EILSEQ;
- return (size_t) -1;
- }
- else if (*outbytesleft < 1)
- {
- cd->my_errno = YAZ_ICONV_E2BIG;
- return (size_t)(-1);
- }
- *outp++ = (unsigned char) x;
- (*outbytesleft)--;
- *outbuf = (char *) outp;
- return 0;
-}
-
-static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
- char **outbuf, size_t *outbytesleft)
-{
- if (cd->compose_char)
- {
- unsigned char *outp = (unsigned char *) *outbuf;
- if (*outbytesleft < 1)
- {
- cd->my_errno = YAZ_ICONV_E2BIG;
- return (size_t)(-1);
- }
- *outp++ = (unsigned char) cd->compose_char;
- (*outbytesleft)--;
- *outbuf = (char *) outp;
- cd->compose_char = 0;
- }
- return 0;
-}
-
-static unsigned long lookup_marc8(yaz_iconv_t cd,
- unsigned long x, int *comb,
- const char **page_chr)
-{
- char utf8_buf[7];
- char *utf8_outbuf = utf8_buf;
- size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
-
- r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
- if (r == (size_t)(-1))
- {
- cd->my_errno = YAZ_ICONV_EILSEQ;
- return 0;
- }
- else
- {
- unsigned char *inp;
- size_t inbytesleft, no_read_sub = 0;
- unsigned long x;
-
- *utf8_outbuf = '\0';
- inp = (unsigned char *) utf8_buf;
- inbytesleft = strlen(utf8_buf);
-
- x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "(B";
- return x;
- }
- x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "(B";
- return x;
- }
- x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "b";
- return x;
- }
- x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "p";
- return x;
- }
- x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "(2";
- return x;
- }
- x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "(N";
- return x;
- }
- x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "(Q";
- return x;
- }
- x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "(3";
- return x;
- }
- x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "(4";
- return x;
- }
- x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "(S";
- return x;
- }
- x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
- if (x)
- {
- *page_chr = ESC "$1";
- return x;
- }
- cd->my_errno = YAZ_ICONV_EILSEQ;
- return x;
- }
-}
-
-static size_t flush_combos(yaz_iconv_t cd,
- char **outbuf, size_t *outbytesleft)
-{
- unsigned long y = cd->write_marc8_last;
-
- if (!y)
- return 0;
-
- assert(cd->write_marc8_lpage);
- if (cd->write_marc8_lpage)
- {
- size_t r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft,
- cd->write_marc8_lpage);
- if (r)
- return r;
- }
-
- if (9 >= *outbytesleft)
- {
- cd->my_errno = YAZ_ICONV_E2BIG;
- return (size_t) (-1);
- }
- if (cd->write_marc8_ncr)
- {
- yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
- (*outbytesleft) -= 8;
- (*outbuf) += 8;
- }
- else
- {
- size_t out_no = 0;
- unsigned char byte;
-
- byte = (unsigned char )((y>>16) & 0xff);
- if (byte)
- (*outbuf)[out_no++] = byte;
- byte = (unsigned char)((y>>8) & 0xff);
- if (byte)
- (*outbuf)[out_no++] = byte;
- byte = (unsigned char )(y & 0xff);
- if (byte)
- (*outbuf)[out_no++] = byte;
- *outbuf += out_no;
- (*outbytesleft) -= out_no;
- }
-
- if (cd->write_marc8_second_half_char)
- {
- *(*outbuf)++ = cd->write_marc8_second_half_char;
- (*outbytesleft)--;
- }
-
- cd->write_marc8_last = 0;
- cd->write_marc8_ncr = 0;
- cd->write_marc8_lpage = 0;
- cd->write_marc8_second_half_char = 0;
- return 0;
-}