* Copyright (C) 1995-2007, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: siconv.c,v 1.35 2007-03-12 10:59:59 adam Exp $
+ * $Id: siconv.c,v 1.37 2007-03-20 21:37:32 adam Exp $
*/
/**
* \file siconv.c
unsigned long (*read_handle)(yaz_iconv_t cd, unsigned char *inbuf,
size_t inbytesleft, size_t *no_read);
size_t (*write_handle)(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last);
+ char **outbuf, size_t *outbytesleft);
size_t (*flush_handle)(yaz_iconv_t cd,
char **outbuf, size_t *outbytesleft);
int marc8_esc_mode;
}
static size_t yaz_write_advancegreek(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last)
+ char **outbuf, size_t *outbytesleft)
{
size_t k = 0;
unsigned char *out = (unsigned char*) *outbuf;
}
static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last)
+ char **outbuf, size_t *outbytesleft)
{
return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
}
return 0;
}
-
static size_t yaz_write_ISO8859_1 (yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last)
+ char **outbuf, size_t *outbytesleft)
{
/* list of two char unicode sequence that, when combined, are
equivalent to single unicode chars that can be represented in
cd->compose_char = 0;
}
- if (!last && x > 32 && x < 127 && cd->compose_char == 0)
+ if (x > 32 && x < 127 && cd->compose_char == 0)
{
cd->compose_char = x;
return 0;
return 0;
}
+static size_t yaz_flush_ISO8859_1(yaz_iconv_t cd,
+ char **outbuf, size_t *outbytesleft)
+{
+ if (cd->compose_char)
+ {
+ unsigned char *outp = (unsigned char *) *outbuf;
+ if (*outbytesleft < 1)
+ {
+ cd->my_errno = YAZ_ICONV_E2BIG;
+ return (size_t)(-1);
+ }
+ *outp++ = (unsigned char) cd->compose_char;
+ (*outbytesleft)--;
+ *outbuf = (char *) outp;
+ cd->compose_char = 0;
+ }
+ return 0;
+}
static size_t yaz_write_UCS4 (yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last)
+ char **outbuf, size_t *outbytesleft)
{
unsigned char *outp = (unsigned char *) *outbuf;
if (*outbytesleft >= 4)
}
static size_t yaz_write_UCS4LE (yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last)
+ char **outbuf, size_t *outbytesleft)
{
unsigned char *outp = (unsigned char *) *outbuf;
if (*outbytesleft >= 4)
char *utf8_outbuf = utf8_buf;
size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
- r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft, 0);
+ r = yaz_write_UTF8(cd, x, &utf8_outbuf, &utf8_outbytesleft);
if (r == (size_t)(-1))
{
cd->my_errno = YAZ_ICONV_EILSEQ;
return 0;
}
+static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
+ char **outbuf, size_t *outbytesleft,
+ const char *page_chr)
+{
+ const char *old_page_chr = cd->write_marc8_page_chr;
+ if (strcmp(page_chr, old_page_chr))
+ {
+ size_t plen = 0;
+ const char *page_out = page_chr;
+
+ if (*outbytesleft < 8)
+ {
+ cd->my_errno = YAZ_ICONV_E2BIG;
+
+ return (size_t) (-1);
+ }
+ cd->write_marc8_page_chr = page_chr;
+
+ if (!strcmp(old_page_chr, "\033p")
+ || !strcmp(old_page_chr, "\033g")
+ || !strcmp(old_page_chr, "\033b"))
+ {
+ /* Technique 1 leave */
+ page_out = "\033s";
+ if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
+ {
+ /* Must leave script + enter new page */
+ plen = strlen(page_out);
+ memcpy(*outbuf, page_out, plen);
+ (*outbuf) += plen;
+ (*outbytesleft) -= plen;
+ page_out = page_chr;
+ }
+ }
+ plen = strlen(page_out);
+ memcpy(*outbuf, page_out, plen);
+ (*outbuf) += plen;
+ (*outbytesleft) -= plen;
+ }
+ return 0;
+}
+
+
static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last)
+ char **outbuf, size_t *outbytesleft)
{
int comb = 0;
const char *page_chr = 0;
else
{
size_t r = flush_combos(cd, outbuf, outbytesleft);
- const char *old_page_chr = cd->write_marc8_page_chr;
if (r)
return r;
- if (strcmp(page_chr, old_page_chr))
- {
- size_t plen = 0;
- const char *page_out = page_chr;
- if (*outbytesleft < 8)
- {
- cd->my_errno = YAZ_ICONV_E2BIG;
-
- return (size_t) (-1);
- }
- cd->write_marc8_page_chr = page_chr;
-
- if (!strcmp(old_page_chr, "\033p")
- || !strcmp(old_page_chr, "\033g")
- || !strcmp(old_page_chr, "\033b"))
- {
- /* Technique 1 leave */
- page_out = "\033s";
- if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
- {
- /* Must leave script + enter new page */
- plen = strlen(page_out);
- memcpy(*outbuf, page_out, plen);
- (*outbuf) += plen;
- (*outbytesleft) -= plen;
- page_out = page_chr;
- }
- }
- plen = strlen(page_out);
- memcpy(*outbuf, page_out, plen);
- (*outbuf) += plen;
- (*outbytesleft) -= plen;
- }
- cd->write_marc8_last = y;
- }
- if (last)
- {
- size_t r = flush_combos(cd, outbuf, outbytesleft);
+ r = yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, page_chr);
if (r)
- {
- if (comb)
- cd->write_marc8_comb_no--;
- else
- cd->write_marc8_last = 0;
return r;
- }
+ cd->write_marc8_last = y;
}
return 0;
}
static size_t yaz_flush_marc8(yaz_iconv_t cd,
char **outbuf, size_t *outbytesleft)
{
- if (strcmp(cd->write_marc8_page_chr, "\033(B"))
- {
- if (*outbytesleft < 3)
- {
- cd->my_errno = YAZ_ICONV_E2BIG;
- return (size_t) (-1);
- }
- memcpy(*outbuf, "\033(B", 3);
- (*outbuf) += 3;
- *outbytesleft -= 3;
- }
- return 0;
+ size_t r = flush_combos(cd, outbuf, outbytesleft);
+ if (r)
+ return r;
+ return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, "\033(B");
}
static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last)
+ char **outbuf, size_t *outbytesleft)
{
int i;
for (i = 0; latin1_comb[i].x1; i++)
int last_ch = cd->write_marc8_last;
r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
- outbuf, outbytesleft, 0);
+ outbuf, outbytesleft);
if (r)
return r;
r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
- outbuf, outbytesleft, last);
+ outbuf, outbytesleft);
if (r && cd->my_errno == YAZ_ICONV_E2BIG)
{
/* not enough room. reset output to original values */
return r;
}
}
- return yaz_write_marc8_2(cd, x, outbuf, outbytesleft, last);
+ return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
}
#if HAVE_WCHAR_H
-static size_t yaz_write_wchar_t (yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft,
- int last)
+static size_t yaz_write_wchar_t(yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft)
{
unsigned char *outp = (unsigned char *) *outbuf;
if (!yaz_matchstr(tocode, "UTF8"))
cd->write_handle = yaz_write_UTF8;
else if (!yaz_matchstr(tocode, "ISO88591"))
+ {
cd->write_handle = yaz_write_ISO8859_1;
+ cd->flush_handle = yaz_flush_ISO8859_1;
+ }
else if (!yaz_matchstr (tocode, "UCS4"))
cd->write_handle = yaz_write_UCS4;
else if (!yaz_matchstr(tocode, "UCS4LE"))
}
cd->init_flag = 0;
+ if (!inbuf || !*inbuf)
+ {
+ if (outbuf && *outbuf)
+ {
+ if (cd->unget_x)
+ r = (*cd->write_handle)(cd, cd->unget_x, outbuf, outbytesleft);
+ if (cd->flush_handle)
+ r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
+ }
+ if (r == 0)
+ cd->init_flag = 1;
+ cd->unget_x = 0;
+ return r;
+ }
while (1)
{
unsigned long x;
x = cd->unget_x;
no_read = cd->no_read_x;
}
- else if (inbuf && *inbuf)
+ else
{
if (*inbytesleft == 0)
{
r = *inbuf - inbuf0;
break;
}
- x = (cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
- &no_read);
+ x = (*cd->read_handle)(cd, (unsigned char *) *inbuf, *inbytesleft,
+ &no_read);
if (no_read == 0)
{
r = (size_t)(-1);
break;
}
}
- else
- {
- r = 0;
- if (cd->flush_handle && outbuf && *outbuf)
- r = (*cd->flush_handle)(cd, outbuf, outbytesleft);
- if (r == 0)
- cd->init_flag = 1;
- break;
- }
if (x)
{
- r = (cd->write_handle)(cd, x, outbuf, outbytesleft,
- (*inbytesleft - no_read) == 0 ? 1 : 0);
+ r = (*cd->write_handle)(cd, x, outbuf, outbytesleft);
if (r)
{
/* unable to write it. save it because read_handle cannot