-static unsigned long yaz_read_ISO8859_1(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = inp[0];
- *no_read = 1;
- return x;
-}
-
-#if HAVE_WCHAR_H
-static unsigned long yaz_read_wchar_t(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = 0;
-
- if (inbytesleft < sizeof(wchar_t))
- {
- cd->my_errno = YAZ_ICONV_EINVAL; /* incomplete input */
- *no_read = 0;
- }
- else
- {
- wchar_t wch;
- memcpy(&wch, inp, sizeof(wch));
- x = wch;
- *no_read = sizeof(wch);
- }
- return x;
-}
-#endif
-
-
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read,
- int *comb);
-
-static unsigned long yaz_read_marc8(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x;
- if (cd->comb_offset < cd->comb_size)
- {
- *no_read = cd->comb_no_read[cd->comb_offset];
- x = cd->comb_x[cd->comb_offset];
-
- /* special case for double-diacritic combining characters,
- INVERTED BREVE and DOUBLE TILDE.
- We'll increment the no_read counter by 1, since we want to skip over
- the processing of the closing ligature character
- */
- /* this code is no longer necessary.. our handlers code in
- yaz_marc8_?_conv (generated by charconv.tcl) now returns
- 0 and no_read=1 when a sequence does not match the input.
- The SECOND HALFs in codetables.xml produces a non-existant
- entry in the conversion trie.. Hence when met, the input byte is
- skipped as it should (in yaz_iconv)
- */
-#if 0
- if (x == 0x0361 || x == 0x0360)
- *no_read += 1;
-#endif
- cd->comb_offset++;
- return x;
- }
-
- cd->comb_offset = 0;
- for (cd->comb_size = 0; cd->comb_size < 8; cd->comb_size++)
- {
- int comb = 0;
-
- if (inbytesleft == 0 && cd->comb_size)
- {
- cd->my_errno = YAZ_ICONV_EINVAL;
- x = 0;
- *no_read = 0;
- break;
- }
- x = yaz_read_marc8_comb(cd, inp, inbytesleft, no_read, &comb);
- if (!comb || !x)
- break;
- cd->comb_x[cd->comb_size] = x;
- cd->comb_no_read[cd->comb_size] = *no_read;
- inp += *no_read;
- inbytesleft = inbytesleft - *no_read;
- }
- return x;
-}
-
-static unsigned long yaz_read_marc8s(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read)
-{
- unsigned long x = yaz_read_marc8(cd, inp, inbytesleft, no_read);
- if (x && cd->comb_size == 1)
- {
- if (yaz_iso_8859_1_lookup_x12(x, cd->comb_x[0], &x))
- {
- *no_read += cd->comb_no_read[0];
- cd->comb_size = 0;
- }
- }
- return x;
-}
-
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
- size_t inbytesleft, size_t *no_read,
- int *comb)
-{
- *no_read = 0;
- while (inbytesleft > 0 && *inp == 27)
- {
- int *modep = &cd->g0_mode;
- size_t inbytesleft0 = inbytesleft;
-
- inbytesleft--;
- inp++;
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '$') /* set with multiple bytes */
- {
- inbytesleft--;
- inp++;
- }
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '(' || *inp == ',') /* G0 */
- {
- inbytesleft--;
- inp++;
- }
- else if (*inp == ')' || *inp == '-') /* G1 */
- {
- inbytesleft--;
- inp++;
- modep = &cd->g1_mode;
- }
- if (inbytesleft == 0)
- goto incomplete;
- if (*inp == '!') /* ANSEL is a special case */
- {
- inbytesleft--;
- inp++;
- }
- if (inbytesleft == 0)
- goto incomplete;
- *modep = *inp++; /* Final character */
- inbytesleft--;
-
- (*no_read) += inbytesleft0 - inbytesleft;
- }
- if (inbytesleft == 0)
- return 0;
- else if (*inp == ' ')
- {
- *no_read += 1;
- return ' ';
- }
- else
- {
- unsigned long x;
- size_t no_read_sub = 0;
- int mode = *inp < 128 ? cd->g0_mode : cd->g1_mode;
- *comb = 0;
-
- switch(mode)
- {
- case 'B': /* Basic ASCII */
- case 's': /* ASCII */
- x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'E': /* ANSEL */
- x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
- break;
- case 'g': /* Greek */
- x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'b': /* Subscripts */
- x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'p': /* Superscripts */
- x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '2': /* Basic Hebrew */
- x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'N': /* Basic Cyrillic */
- x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'Q': /* Extended Cyrillic */
- x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '3': /* Basic Arabic */
- x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '4': /* Extended Arabic */
- x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case 'S': /* Greek */
- x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- case '1': /* Chinese, Japanese, Korean (EACC) */
- x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
- break;
- default:
- *no_read = 0;
- cd->my_errno = YAZ_ICONV_EILSEQ;
- return 0;
- }
- *no_read += no_read_sub;
- return x;
- }
-incomplete:
- *no_read = 0;
- cd->my_errno = YAZ_ICONV_EINVAL;
- return 0;
-}
-
-
-