#include <yaz/xmalloc.h>
#include <yaz/nmem.h>
+#include <yaz/snprintf.h>
#include "iconv-p.h"
-
-unsigned long yaz_marc8_42_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_45_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_67_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_62_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_70_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_32_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_4E_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_51_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_33_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_34_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_53_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8_31_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-
-
-unsigned long yaz_marc8r_42_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_45_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_67_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_62_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_70_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_32_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_4E_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_51_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_33_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_34_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_53_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
-unsigned long yaz_marc8r_31_conv(unsigned char *inp, size_t inbytesleft,
- size_t *no_read, int *combining);
+typedef unsigned long yaz_conv_func_t(unsigned char *inp, size_t inbytesleft,
+ size_t *no_read, int *combining,
+ unsigned mask, int boffset);
+
+
+yaz_conv_func_t yaz_marc8_42_conv;
+yaz_conv_func_t yaz_marc8_45_conv;
+yaz_conv_func_t yaz_marc8_67_conv;
+yaz_conv_func_t yaz_marc8_62_conv;
+yaz_conv_func_t yaz_marc8_70_conv;
+yaz_conv_func_t yaz_marc8_32_conv;
+yaz_conv_func_t yaz_marc8_4E_conv;
+yaz_conv_func_t yaz_marc8_51_conv;
+yaz_conv_func_t yaz_marc8_33_conv;
+yaz_conv_func_t yaz_marc8_34_conv;
+yaz_conv_func_t yaz_marc8_53_conv;
+yaz_conv_func_t yaz_marc8_31_conv;
+
+yaz_conv_func_t yaz_marc8r_42_conv;
+yaz_conv_func_t yaz_marc8r_45_conv;
+yaz_conv_func_t yaz_marc8r_67_conv;
+yaz_conv_func_t yaz_marc8r_62_conv;
+yaz_conv_func_t yaz_marc8r_70_conv;
+yaz_conv_func_t yaz_marc8r_32_conv;
+yaz_conv_func_t yaz_marc8r_4E_conv;
+yaz_conv_func_t yaz_marc8r_51_conv;
+yaz_conv_func_t yaz_marc8r_33_conv;
+yaz_conv_func_t yaz_marc8r_34_conv;
+yaz_conv_func_t yaz_marc8r_53_conv;
+yaz_conv_func_t yaz_marc8r_31_conv;
struct yaz_iconv_struct {
int my_errno;
unsigned write_marc8_second_half_char;
unsigned long write_marc8_last;
+ int write_marc8_ncr;
const char *write_marc8_lpage;
const char *write_marc8_g0;
const char *write_marc8_g1;
{
case 'B': /* Basic ASCII */
case 's': /* ASCII */
+ x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+ break;
case 'E': /* ANSEL */
- x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb);
- if (!x)
- {
- no_read_sub = 0;
- x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb);
- }
+ x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
break;
case 'g': /* Greek */
- x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case 'b': /* Subscripts */
- x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case 'p': /* Superscripts */
- x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case '2': /* Basic Hebrew */
- x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case 'N': /* Basic Cyrillic */
- x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case 'Q': /* Extended Cyrillic */
- x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case '3': /* Basic Arabic */
- x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case '4': /* Extended Arabic */
- x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case 'S': /* Greek */
- x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
case '1': /* Chinese, Japanese, Korean (EACC) */
- x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
break;
default:
*no_read = 0;
inp = (unsigned char *) utf8_buf;
inbytesleft = strlen(utf8_buf);
- x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "(B";
return x;
}
- x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "(B";
return x;
}
- x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "b";
return x;
}
- x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "p";
return x;
}
- x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "(2";
return x;
}
- x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "(N";
return x;
}
- x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "(Q";
return x;
}
- x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "(3";
return x;
}
- x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "(4";
return x;
}
- x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "(S";
return x;
}
- x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb);
+ x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
if (x)
{
*page_chr = ESC "$1";
char **outbuf, size_t *outbytesleft)
{
unsigned long y = cd->write_marc8_last;
- unsigned char byte;
- char out_buf[4];
- size_t out_no = 0;
if (!y)
return 0;
return r;
}
- byte = (unsigned char )((y>>16) & 0xff);
- if (byte)
- out_buf[out_no++] = byte;
- byte = (unsigned char)((y>>8) & 0xff);
- if (byte)
- out_buf[out_no++] = byte;
- byte = (unsigned char )(y & 0xff);
- if (byte)
- out_buf[out_no++] = byte;
-
- if (out_no + 2 >= *outbytesleft)
+ if (9 >= *outbytesleft)
{
cd->my_errno = YAZ_ICONV_E2BIG;
return (size_t) (-1);
}
+ if (cd->write_marc8_ncr)
+ {
+ yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
+ (*outbytesleft) -= 8;
+ (*outbuf) += 8;
+ }
+ else
+ {
+ char out_buf[4];
+ size_t out_no = 0;
+ unsigned char byte;
+
+
+ byte = (unsigned char )((y>>16) & 0xff);
+ if (byte)
+ out_buf[out_no++] = byte;
+ byte = (unsigned char)((y>>8) & 0xff);
+ if (byte)
+ out_buf[out_no++] = byte;
+ byte = (unsigned char )(y & 0xff);
+ if (byte)
+ out_buf[out_no++] = byte;
+ memcpy(*outbuf, out_buf, out_no);
+ *outbuf += out_no;
+ (*outbytesleft) -= out_no;
+ }
- memcpy(*outbuf, out_buf, out_no);
- *outbuf += out_no;
- (*outbytesleft) -= out_no;
if (cd->write_marc8_second_half_char)
{
*(*outbuf)++ = cd->write_marc8_second_half_char;
}
cd->write_marc8_last = 0;
+ cd->write_marc8_ncr = 0;
cd->write_marc8_lpage = 0;
cd->write_marc8_second_half_char = 0;
return 0;
static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft)
+ char **outbuf, size_t *outbytesleft,
+ int loss_mode)
{
int comb = 0;
+ int enable_ncr = 0;
const char *page_chr = 0;
unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
if (!y)
- return (size_t) (-1);
+ {
+ if (loss_mode == 0 || cd->my_errno != YAZ_ICONV_EILSEQ)
+ return (size_t) (-1);
+ page_chr = ESC "(B";
+ if (loss_mode == 1)
+ y = '|';
+ else
+ {
+ y = x;
+ enable_ncr = 1;
+ }
+ }
if (comb)
{
cd->write_marc8_last = y;
cd->write_marc8_lpage = page_chr;
+ cd->write_marc8_ncr = enable_ncr;
}
return 0;
}
return yaz_write_marc8_page_chr(cd, outbuf, outbytesleft, ESC "(B");
}
-static size_t yaz_write_marc8(yaz_iconv_t cd, unsigned long x,
- char **outbuf, size_t *outbytesleft)
+static size_t yaz_write_marc8_generic(yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft,
+ int loss_mode);
+
+static size_t yaz_write_marc8_normal(yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft)
+{
+ return yaz_write_marc8_generic(cd, x, outbuf, outbytesleft, 0);
+}
+
+static size_t yaz_write_marc8_lossy(yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft)
+{
+ return yaz_write_marc8_generic(cd, x, outbuf, outbytesleft, 1);
+}
+
+static size_t yaz_write_marc8_lossless(yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft)
+{
+ return yaz_write_marc8_generic(cd, x, outbuf, outbytesleft, 2);
+}
+
+static size_t yaz_write_marc8_generic(yaz_iconv_t cd, unsigned long x,
+ char **outbuf, size_t *outbytesleft,
+ int loss_mode)
{
int i;
for (i = 0; latin1_comb[i].x1; i++)
const char *lpage = cd->write_marc8_lpage;
r = yaz_write_marc8_2(cd, latin1_comb[i].x1,
- outbuf, outbytesleft);
+ outbuf, outbytesleft, loss_mode);
if (r)
return r;
r = yaz_write_marc8_2(cd, latin1_comb[i].x2,
- outbuf, outbytesleft);
+ outbuf, outbytesleft, loss_mode);
if (r && cd->my_errno == YAZ_ICONV_E2BIG)
{
/* not enough room. reset output to original values */
return r;
}
}
- return yaz_write_marc8_2(cd, x, outbuf, outbytesleft);
+ return yaz_write_marc8_2(cd, x, outbuf, outbytesleft, loss_mode);
}
cd->write_handle = yaz_write_UCS4LE;
else if (!yaz_matchstr(tocode, "MARC8"))
{
- cd->write_handle = yaz_write_marc8;
+ cd->write_handle = yaz_write_marc8_normal;
cd->flush_handle = yaz_flush_marc8;
}
else if (!yaz_matchstr(tocode, "MARC8s"))
{
- cd->write_handle = yaz_write_marc8;
+ cd->write_handle = yaz_write_marc8_normal;
+ cd->flush_handle = yaz_flush_marc8;
+ }
+ else if (!yaz_matchstr(tocode, "MARC8lossy"))
+ {
+ cd->write_handle = yaz_write_marc8_lossy;
+ cd->flush_handle = yaz_flush_marc8;
+ }
+ else if (!yaz_matchstr(tocode, "MARC8lossless"))
+ {
+ cd->write_handle = yaz_write_marc8_lossless;
cd->flush_handle = yaz_flush_marc8;
}
else if (!yaz_matchstr(tocode, "advancegreek"))
{
cd->my_errno = YAZ_ICONV_UNKNOWN;
cd->g0_mode = 'B';
- cd->g1_mode = 'B';
+ cd->g1_mode = 'E';
cd->comb_offset = cd->comb_size = 0;
cd->compose_char = 0;
cd->write_marc8_second_half_char = 0;
cd->write_marc8_last = 0;
+ cd->write_marc8_ncr = 0;
cd->write_marc8_lpage = 0;
cd->write_marc8_g0 = ESC "(B";
cd->write_marc8_g1 = 0;