/*
- * Copyright (c) 1997-2003, Index Data
+ * Copyright (c) 1997-2004, Index Data
* See the file LICENSE for details.
*
- * $Id: siconv.c,v 1.2 2004-03-11 10:09:11 oleg Exp $
+ * $Id: siconv.c,v 1.7 2004-10-15 00:19:00 adam Exp $
+ */
+/**
+ * \file siconv.c
+ * \brief Implements simple ICONV
+ *
+ * This implements an interface similar to that of iconv and
+ * is used by YAZ to interface with iconv (if present).
+ * For systems where iconv is not present, this layer
+ * provides a few important conversion: UTF-8, MARC-8, Latin-1.
*/
-
-/* mini iconv and wrapper for system iconv library (if present) */
#if HAVE_CONFIG_H
#include <config.h>
#include <yaz/yaz-util.h>
-unsigned long yaz_marc8_conv (unsigned char *inp, size_t inbytesleft,
- size_t *no_read);
+unsigned long yaz_marc8_1_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
+unsigned long yaz_marc8_2_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
+unsigned long yaz_marc8_3_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
+unsigned long yaz_marc8_4_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
+unsigned long yaz_marc8_5_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
+unsigned long yaz_marc8_6_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
+unsigned long yaz_marc8_7_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
+unsigned long yaz_marc8_8_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
+unsigned long yaz_marc8_9_conv (unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining);
struct yaz_iconv_struct {
int my_errno;
size_t inbytesleft, size_t *no_read);
size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
char **outbuf, size_t *outbytesleft);
+ int marc8_esc_mode;
+ int marc8_comb_x;
+ int marc8_comb_no_read;
#if HAVE_ICONV_H
iconv_t iconv_cd;
#endif
static unsigned long yaz_read_marc8 (yaz_iconv_t cd, unsigned char *inp,
size_t inbytesleft, size_t *no_read)
{
- return yaz_marc8_conv(inp, inbytesleft, no_read);
+ if (cd->marc8_comb_x)
+ {
+ unsigned long x = cd->marc8_comb_x;
+ *no_read = cd->marc8_comb_no_read;
+ cd->marc8_comb_x = 0;
+ return x;
+ }
+ *no_read = 0;
+ while(inbytesleft >= 1 && inp[0] == 27)
+ {
+ size_t inbytesleft0 = inbytesleft;
+ inp++;
+ inbytesleft--;
+ while(inbytesleft > 0 && strchr("(,$!", *inp))
+ {
+ inbytesleft--;
+ inp++;
+ }
+ if (inbytesleft <= 0)
+ {
+ *no_read = 0;
+ cd->my_errno = YAZ_ICONV_EINVAL;
+ return 0;
+ }
+ cd->marc8_esc_mode = *inp++;
+ inbytesleft--;
+ (*no_read) += inbytesleft0 - inbytesleft;
+ }
+ if (inbytesleft <= 0)
+ return 0;
+ else
+ {
+ unsigned long x;
+ int comb = 0;
+ size_t no_read_sub = 0;
+
+ switch(cd->marc8_esc_mode)
+ {
+ case 'B': /* Basic ASCII */
+ case 'E': /* ANSEL */
+ case 's': /* ASCII */
+ x = yaz_marc8_1_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ case 'g': /* Greek */
+ x = yaz_marc8_2_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ case 'b': /* Subscripts */
+ x = yaz_marc8_3_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ case 'p': /* Superscripts */
+ x = yaz_marc8_4_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ case '2': /* Basic Hebrew */
+ x = yaz_marc8_5_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ case 'N': /* Basic Cyrillic */
+ case 'Q': /* Extended Cyrillic */
+ x = yaz_marc8_6_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ case '3': /* Basic Arabic */
+ case '4': /* Extended Arabic */
+ x = yaz_marc8_7_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ case 'S': /* Greek */
+ x = yaz_marc8_8_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ case '1': /* Chinese, Japanese, Korean (EACC) */
+ x = yaz_marc8_9_conv(inp, inbytesleft, &no_read_sub, &comb);
+ break;
+ default:
+ *no_read = 0;
+ cd->my_errno = YAZ_ICONV_EILSEQ;
+ return 0;
+ }
+#if 0
+ printf ("esc mode=%c x=%04lX comb=%d\n", cd->marc8_esc_mode, x, comb);
+#endif
+ *no_read += no_read_sub;
+
+ if (comb && cd->marc8_comb_x == 0)
+ {
+ size_t tmp_read = 0;
+ unsigned long next_x;
+
+ /* read next char .. */
+ next_x = yaz_read_marc8(cd, inp + *no_read,
+ inbytesleft - *no_read, &tmp_read);
+ /* save this x for later .. */
+ cd->marc8_comb_x = x;
+ /* save next read for later .. */
+ cd->marc8_comb_no_read = tmp_read;
+ /* return next x - thereby swap */
+ x = next_x;
+ }
+ return x;
+ }
}
static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
cd->read_handle = 0;
cd->init_handle = 0;
cd->my_errno = YAZ_ICONV_UNKNOWN;
+ cd->marc8_esc_mode = 'B';
+ cd->marc8_comb_x = 0;
/* a useful hack: if fromcode has leading @,
the library not use YAZ's own conversions .. */
r = (size_t)(-1);
break;
}
- r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
- if (r)
- break;
+ if (x)
+ {
+ r = (cd->write_handle)(cd, x, outbuf, outbytesleft);
+ if (r)
+ break;
+ }
*inbytesleft -= no_read;
(*inbuf) += no_read;
}