1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
7 * \brief ISO-8859-1 encoding / decoding
24 #include <yaz/xmalloc.h>
30 unsigned long compose_char;
39 { 'A', 0x0300, 0xc0}, /* LATIN CAPITAL LETTER A WITH GRAVE */
40 { 'A', 0x0301, 0xc1}, /* LATIN CAPITAL LETTER A WITH ACUTE */
41 { 'A', 0x0302, 0xc2}, /* LATIN CAPITAL LETTER A WITH CIRCUMFLEX */
42 { 'A', 0x0303, 0xc3}, /* LATIN CAPITAL LETTER A WITH TILDE */
43 { 'A', 0x0308, 0xc4}, /* LATIN CAPITAL LETTER A WITH DIAERESIS */
44 { 'A', 0x030a, 0xc5}, /* LATIN CAPITAL LETTER A WITH RING ABOVE */
45 /* no need for 0xc6 LATIN CAPITAL LETTER AE */
46 { 'C', 0x0327, 0xc7}, /* LATIN CAPITAL LETTER C WITH CEDILLA */
47 { 'E', 0x0300, 0xc8}, /* LATIN CAPITAL LETTER E WITH GRAVE */
48 { 'E', 0x0301, 0xc9}, /* LATIN CAPITAL LETTER E WITH ACUTE */
49 { 'E', 0x0302, 0xca}, /* LATIN CAPITAL LETTER E WITH CIRCUMFLEX */
50 { 'E', 0x0308, 0xcb}, /* LATIN CAPITAL LETTER E WITH DIAERESIS */
51 { 'I', 0x0300, 0xcc}, /* LATIN CAPITAL LETTER I WITH GRAVE */
52 { 'I', 0x0301, 0xcd}, /* LATIN CAPITAL LETTER I WITH ACUTE */
53 { 'I', 0x0302, 0xce}, /* LATIN CAPITAL LETTER I WITH CIRCUMFLEX */
54 { 'I', 0x0308, 0xcf}, /* LATIN CAPITAL LETTER I WITH DIAERESIS */
55 { 'N', 0x0303, 0xd1}, /* LATIN CAPITAL LETTER N WITH TILDE */
56 { 'O', 0x0300, 0xd2}, /* LATIN CAPITAL LETTER O WITH GRAVE */
57 { 'O', 0x0301, 0xd3}, /* LATIN CAPITAL LETTER O WITH ACUTE */
58 { 'O', 0x0302, 0xd4}, /* LATIN CAPITAL LETTER O WITH CIRCUMFLEX */
59 { 'O', 0x0303, 0xd5}, /* LATIN CAPITAL LETTER O WITH TILDE */
60 { 'O', 0x0308, 0xd6}, /* LATIN CAPITAL LETTER O WITH DIAERESIS */
61 /* omitted: 0xd7 MULTIPLICATION SIGN */
62 /* omitted: 0xd8 LATIN CAPITAL LETTER O WITH STROKE */
63 { 'U', 0x0300, 0xd9}, /* LATIN CAPITAL LETTER U WITH GRAVE */
64 { 'U', 0x0301, 0xda}, /* LATIN CAPITAL LETTER U WITH ACUTE */
65 { 'U', 0x0302, 0xdb}, /* LATIN CAPITAL LETTER U WITH CIRCUMFLEX */
66 { 'U', 0x0308, 0xdc}, /* LATIN CAPITAL LETTER U WITH DIAERESIS */
67 { 'Y', 0x0301, 0xdd}, /* LATIN CAPITAL LETTER Y WITH ACUTE */
68 /* omitted: 0xde LATIN CAPITAL LETTER THORN */
69 /* omitted: 0xdf LATIN SMALL LETTER SHARP S */
70 { 'a', 0x0300, 0xe0}, /* LATIN SMALL LETTER A WITH GRAVE */
71 { 'a', 0x0301, 0xe1}, /* LATIN SMALL LETTER A WITH ACUTE */
72 { 'a', 0x0302, 0xe2}, /* LATIN SMALL LETTER A WITH CIRCUMFLEX */
73 { 'a', 0x0303, 0xe3}, /* LATIN SMALL LETTER A WITH TILDE */
74 { 'a', 0x0308, 0xe4}, /* LATIN SMALL LETTER A WITH DIAERESIS */
75 { 'a', 0x030a, 0xe5}, /* LATIN SMALL LETTER A WITH RING ABOVE */
76 /* omitted: 0xe6 LATIN SMALL LETTER AE */
77 { 'c', 0x0327, 0xe7}, /* LATIN SMALL LETTER C WITH CEDILLA */
78 { 'e', 0x0300, 0xe8}, /* LATIN SMALL LETTER E WITH GRAVE */
79 { 'e', 0x0301, 0xe9}, /* LATIN SMALL LETTER E WITH ACUTE */
80 { 'e', 0x0302, 0xea}, /* LATIN SMALL LETTER E WITH CIRCUMFLEX */
81 { 'e', 0x0308, 0xeb}, /* LATIN SMALL LETTER E WITH DIAERESIS */
82 { 'i', 0x0300, 0xec}, /* LATIN SMALL LETTER I WITH GRAVE */
83 { 'i', 0x0301, 0xed}, /* LATIN SMALL LETTER I WITH ACUTE */
84 { 'i', 0x0302, 0xee}, /* LATIN SMALL LETTER I WITH CIRCUMFLEX */
85 { 'i', 0x0308, 0xef}, /* LATIN SMALL LETTER I WITH DIAERESIS */
86 /* omitted: 0xf0 LATIN SMALL LETTER ETH */
87 { 'n', 0x0303, 0xf1}, /* LATIN SMALL LETTER N WITH TILDE */
88 { 'o', 0x0300, 0xf2}, /* LATIN SMALL LETTER O WITH GRAVE */
89 { 'o', 0x0301, 0xf3}, /* LATIN SMALL LETTER O WITH ACUTE */
90 { 'o', 0x0302, 0xf4}, /* LATIN SMALL LETTER O WITH CIRCUMFLEX */
91 { 'o', 0x0303, 0xf5}, /* LATIN SMALL LETTER O WITH TILDE */
92 { 'o', 0x0308, 0xf6}, /* LATIN SMALL LETTER O WITH DIAERESIS */
93 /* omitted: 0xf7 DIVISION SIGN */
94 /* omitted: 0xf8 LATIN SMALL LETTER O WITH STROKE */
95 { 'u', 0x0300, 0xf9}, /* LATIN SMALL LETTER U WITH GRAVE */
96 { 'u', 0x0301, 0xfa}, /* LATIN SMALL LETTER U WITH ACUTE */
97 { 'u', 0x0302, 0xfb}, /* LATIN SMALL LETTER U WITH CIRCUMFLEX */
98 { 'u', 0x0308, 0xfc}, /* LATIN SMALL LETTER U WITH DIAERESIS */
99 { 'y', 0x0301, 0xfd}, /* LATIN SMALL LETTER Y WITH ACUTE */
100 /* omitted: 0xfe LATIN SMALL LETTER THORN */
101 { 'y', 0x0308, 0xff}, /* LATIN SMALL LETTER Y WITH DIAERESIS */
106 int yaz_iso_8859_1_lookup_y(unsigned long v,
107 unsigned long *x1, unsigned long *x2)
109 if (v >= 0xc0 && v <= 0xff) /* optimization. min and max .y values */
112 for (i = 0; latin1_comb[i].x1; i++)
114 if (v == latin1_comb[i].y)
116 *x1 = latin1_comb[i].x1;
117 *x2 = latin1_comb[i].x2;
125 int yaz_iso_8859_1_lookup_x12(unsigned long x1, unsigned long x2,
128 /* For MARC8s we try to get a Latin-1 page code out of it */
130 for (i = 0; latin1_comb[i].x1; i++)
131 if (x2 == latin1_comb[i].x2 && x1 == latin1_comb[i].x1)
133 *y = latin1_comb[i].y;
139 static size_t write_iso_8859_1(yaz_iconv_t cd, yaz_iconv_encoder_t e,
141 char **outbuf, size_t *outbytesleft)
143 struct encoder_data *w = e->data;
144 /* list of two char unicode sequence that, when combined, are
145 equivalent to single unicode chars that can be represented in
147 Regular iconv on Linux at least does not seem to convert these,
148 but since MARC-8 to UTF-8 generates these composed sequence
149 we get a better chance of a successful MARC-8 -> ISO-8859-1
151 unsigned char *outp = (unsigned char *) *outbuf;
156 for (i = 0; latin1_comb[i].x1; i++)
157 if (w->compose_char == latin1_comb[i].x1 && x == latin1_comb[i].x2)
159 x = latin1_comb[i].y;
162 if (*outbytesleft < 1)
163 { /* no room. Retain compose_char and bail out */
164 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
167 if (!latin1_comb[i].x1)
168 { /* not found. Just write compose_char */
169 *outp++ = (unsigned char) w->compose_char;
171 *outbuf = (char *) outp;
173 /* compose_char used so reset it. x now holds current char */
177 if (x > 32 && x < 127 && w->compose_char == 0)
182 else if (x > 255 || x < 1)
184 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
187 else if (*outbytesleft < 1)
189 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
192 *outp++ = (unsigned char) x;
194 *outbuf = (char *) outp;
198 static size_t flush_iso_8859_1(yaz_iconv_t cd, yaz_iconv_encoder_t e,
199 char **outbuf, size_t *outbytesleft)
201 struct encoder_data *w = e->data;
204 unsigned char *outp = (unsigned char *) *outbuf;
205 if (*outbytesleft < 1)
207 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
210 *outp++ = (unsigned char) w->compose_char;
212 *outbuf = (char *) outp;
219 void init_iso_8859_1(yaz_iconv_encoder_t e)
221 struct encoder_data *w = e->data;
225 void destroy_iso_8859_1(yaz_iconv_encoder_t e)
230 yaz_iconv_encoder_t yaz_iso_8859_1_encoder(const char *tocode,
231 yaz_iconv_encoder_t e)
234 if (!yaz_matchstr(tocode, "iso88591"))
236 struct encoder_data *data = xmalloc(sizeof(*data));
238 e->write_handle = write_iso_8859_1;
239 e->flush_handle = flush_iso_8859_1;
240 e->init_handle = init_iso_8859_1;
241 e->destroy_handle = destroy_iso_8859_1;
251 * indent-tabs-mode: nil
253 * vim: shiftwidth=4 tabstop=8 expandtab