2 * Copyright (C) 1995-2008, Index Data ApS
3 * See the file LICENSE for details.
8 * \brief ISO-8859-1 encoding / decoding
25 #include <yaz/xmalloc.h>
31 unsigned long compose_char;
40 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
41 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
42 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
43 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
44 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
45 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
46 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
47 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
48 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
49 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
50 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
51 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
52 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
53 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
54 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
55 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
56 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
57 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
58 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
59 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
60 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
61 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
62 /* omitted: 0xd7 MULTIPLICATION SIGN */
63 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
64 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
65 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
66 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
67 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
68 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
69 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
70 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
71 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
72 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
73 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
74 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
75 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
76 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
77 /* omitted: 0xe6 LATIN SMALL LETTER AE */
78 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
79 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
80 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
81 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
82 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
83 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
84 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
85 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
86 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
87 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
88 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
89 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
90 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
91 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
92 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
93 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
94 /* omitted: 0xf7 DIVISION SIGN */
95 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
96 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
97 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
98 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
99 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
100 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
101 /* omitted: 0xfe LATIN SMALL LETTER THORN */
102 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
107 int yaz_iso_8859_1_lookup_y(unsigned long v,
108 unsigned long *x1, unsigned long *x2)
110 if (v >= 0xc0 && v <= 0xff) /* optimization. min and max .y values */
113 for (i = 0; latin1_comb[i].x1; i++)
115 if (v == latin1_comb[i].y)
117 *x1 = latin1_comb[i].x1;
118 *x2 = latin1_comb[i].x2;
126 int yaz_iso_8859_1_lookup_x12(unsigned long x1, unsigned long x2,
129 /* For MARC8s we try to get a Latin-1 page code out of it */
131 for (i = 0; latin1_comb[i].x1; i++)
132 if (x2 == latin1_comb[i].x2 && x1 == latin1_comb[i].x1)
134 *y = latin1_comb[i].y;
140 static size_t write_iso_8859_1(yaz_iconv_t cd, yaz_iconv_encoder_t e,
142 char **outbuf, size_t *outbytesleft)
144 struct encoder_data *w = e->data;
145 /* list of two char unicode sequence that, when combined, are
146 equivalent to single unicode chars that can be represented in
148 Regular iconv on Linux at least does not seem to convert these,
149 but since MARC-8 to UTF-8 generates these composed sequence
150 we get a better chance of a successful MARC-8 -> ISO-8859-1
152 unsigned char *outp = (unsigned char *) *outbuf;
157 for (i = 0; latin1_comb[i].x1; i++)
158 if (w->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
160 x = latin1_comb[i].y;
163 if (*outbytesleft < 1)
164 { /* no room. Retain compose_char and bail out */
165 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
168 if (!latin1_comb[i].x1)
169 { /* not found. Just write compose_char */
170 *outp++ = (unsigned char) w->compose_char;
172 *outbuf = (char *) outp;
174 /* compose_char used so reset it. x now holds current char */
178 if (x > 32 && x < 127 && w->compose_char == 0)
183 else if (x > 255 || x < 1)
185 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
188 else if (*outbytesleft < 1)
190 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
193 *outp++ = (unsigned char) x;
195 *outbuf = (char *) outp;
199 static size_t flush_iso_8859_1(yaz_iconv_t cd, yaz_iconv_encoder_t e,
200 char **outbuf, size_t *outbytesleft)
202 struct encoder_data *w = e->data;
205 unsigned char *outp = (unsigned char *) *outbuf;
206 if (*outbytesleft < 1)
208 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
211 *outp++ = (unsigned char) w->compose_char;
213 *outbuf = (char *) outp;
220 void init_iso_8859_1(yaz_iconv_encoder_t e)
222 struct encoder_data *w = e->data;
226 void destroy_iso_8859_1(yaz_iconv_encoder_t e)
231 yaz_iconv_encoder_t yaz_iso_8859_1_encoder(const char *tocode,
232 yaz_iconv_encoder_t e)
235 if (!yaz_matchstr(tocode, "iso88591"))
237 struct encoder_data *data = xmalloc(sizeof(*data));
239 e->write_handle = write_iso_8859_1;
240 e->flush_handle = flush_iso_8859_1;
241 e->init_handle = init_iso_8859_1;
242 e->destroy_handle = destroy_iso_8859_1;
252 * indent-tabs-mode: nil
254 * vim: shiftwidth=4 tabstop=8 expandtab