2 * Copyright (C) 1995-2008, Index Data ApS
3 * See the file LICENSE for details.
8 * \brief MARC-8 encoding
11 * http://www.loc.gov/marc/specifications/speccharmarc8.html
23 #include <yaz/xmalloc.h>
25 #include <yaz/snprintf.h>
28 yaz_conv_func_t yaz_marc8r_42_conv;
29 yaz_conv_func_t yaz_marc8r_45_conv;
30 yaz_conv_func_t yaz_marc8r_67_conv;
31 yaz_conv_func_t yaz_marc8r_62_conv;
32 yaz_conv_func_t yaz_marc8r_70_conv;
33 yaz_conv_func_t yaz_marc8r_32_conv;
34 yaz_conv_func_t yaz_marc8r_4E_conv;
35 yaz_conv_func_t yaz_marc8r_51_conv;
36 yaz_conv_func_t yaz_marc8r_33_conv;
37 yaz_conv_func_t yaz_marc8r_34_conv;
38 yaz_conv_func_t yaz_marc8r_53_conv;
39 yaz_conv_func_t yaz_marc8r_31_conv;
45 unsigned write_marc8_second_half_char;
46 unsigned long write_marc8_last;
48 const char *write_marc8_lpage;
49 const char *write_marc8_g0;
50 const char *write_marc8_g1;
53 static void init_marc8(yaz_iconv_encoder_t w)
55 struct encoder_data *data = w->data;
56 data->write_marc8_second_half_char = 0;
57 data->write_marc8_last = 0;
58 data->write_marc8_ncr = 0;
59 data->write_marc8_lpage = 0;
60 data->write_marc8_g0 = ESC "(B";
61 data->write_marc8_g1 = 0;
64 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
65 struct encoder_data *w,
66 char **outbuf, size_t *outbytesleft,
67 const char *page_chr);
69 static unsigned long lookup_marc8(yaz_iconv_t cd,
70 unsigned long x, int *comb,
71 const char **page_chr)
74 char *utf8_outbuf = utf8_buf;
75 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
78 r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
79 if (r == (size_t)(-1))
81 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
87 size_t inbytesleft, no_read_sub = 0;
91 inp = (unsigned char *) utf8_buf;
92 inbytesleft = strlen(utf8_buf);
94 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
100 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
103 *page_chr = ESC "(B";
106 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
112 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
118 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
121 *page_chr = ESC "(2";
124 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
127 *page_chr = ESC "(N";
130 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
133 *page_chr = ESC "(Q";
136 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
139 *page_chr = ESC "(3";
142 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
145 *page_chr = ESC "(4";
148 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
151 *page_chr = ESC "(S";
154 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
157 *page_chr = ESC "$1";
160 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
165 static size_t flush_combos(yaz_iconv_t cd,
166 struct encoder_data *w,
167 char **outbuf, size_t *outbytesleft)
169 unsigned long y = w->write_marc8_last;
174 assert(w->write_marc8_lpage);
175 if (w->write_marc8_lpage)
177 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
178 w->write_marc8_lpage);
183 if (9 >= *outbytesleft)
185 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
186 return (size_t) (-1);
188 if (w->write_marc8_ncr)
190 yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
191 (*outbytesleft) -= 8;
199 byte = (unsigned char )((y>>16) & 0xff);
201 (*outbuf)[out_no++] = byte;
202 byte = (unsigned char)((y>>8) & 0xff);
204 (*outbuf)[out_no++] = byte;
205 byte = (unsigned char )(y & 0xff);
207 (*outbuf)[out_no++] = byte;
209 (*outbytesleft) -= out_no;
212 if (w->write_marc8_second_half_char)
214 *(*outbuf)++ = w->write_marc8_second_half_char;
218 w->write_marc8_last = 0;
219 w->write_marc8_ncr = 0;
220 w->write_marc8_lpage = 0;
221 w->write_marc8_second_half_char = 0;
225 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
226 struct encoder_data *w,
227 char **outbuf, size_t *outbytesleft,
228 const char *page_chr)
230 const char **old_page_chr = &w->write_marc8_g0;
232 /* are we going to a G1-set (such as such as ESC ")!E") */
233 if (page_chr && page_chr[1] == ')')
234 old_page_chr = &w->write_marc8_g1;
236 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
239 const char *page_out = page_chr;
241 if (*outbytesleft < 8)
243 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
245 return (size_t) (-1);
250 if (!strcmp(*old_page_chr, ESC "p")
251 || !strcmp(*old_page_chr, ESC "g")
252 || !strcmp(*old_page_chr, ESC "b"))
255 /* Technique 1 leave */
256 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
258 /* Must leave script + enter new page */
259 plen = strlen(page_out);
260 memcpy(*outbuf, page_out, plen);
262 (*outbytesleft) -= plen;
267 *old_page_chr = page_chr;
268 plen = strlen(page_out);
269 memcpy(*outbuf, page_out, plen);
271 (*outbytesleft) -= plen;
277 static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
279 char **outbuf, size_t *outbytesleft,
284 const char *page_chr = 0;
285 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
290 return (size_t) (-1);
305 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
311 w->write_marc8_second_half_char = 0xEC;
312 else if (x == 0x0360)
313 w->write_marc8_second_half_char = 0xFB;
315 if (*outbytesleft <= 1)
317 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
318 return (size_t) (-1);
325 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
329 w->write_marc8_last = y;
330 w->write_marc8_lpage = page_chr;
331 w->write_marc8_ncr = enable_ncr;
336 static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
337 char **outbuf, size_t *outbytesleft)
339 struct encoder_data *w = en->data;
340 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
343 w->write_marc8_g1 = 0;
344 return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
347 static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w,
349 char **outbuf, size_t *outbytesleft,
352 unsigned long x1, x2;
353 if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
355 /* save the output pointers .. */
356 char *outbuf0 = *outbuf;
357 size_t outbytesleft0 = *outbytesleft;
358 int last_ch = w->write_marc8_last;
359 int ncr = w->write_marc8_ncr;
360 const char *lpage = w->write_marc8_lpage;
363 r = yaz_write_marc8_2(cd, w, x1,
364 outbuf, outbytesleft, loss_mode);
367 r = yaz_write_marc8_2(cd, w, x2,
368 outbuf, outbytesleft, loss_mode);
369 if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
371 /* not enough room. reset output to original values */
373 *outbytesleft = outbytesleft0;
374 w->write_marc8_last = last_ch;
375 w->write_marc8_ncr = ncr;
376 w->write_marc8_lpage = lpage;
380 return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
383 static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e,
385 char **outbuf, size_t *outbytesleft)
387 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 0);
390 static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e,
392 char **outbuf, size_t *outbytesleft)
394 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 1);
397 static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e,
399 char **outbuf, size_t *outbytesleft)
401 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 2);
404 static void destroy_marc8(yaz_iconv_encoder_t e)
409 yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode,
410 yaz_iconv_encoder_t e)
413 if (!yaz_matchstr(tocode, "MARC8"))
414 e->write_handle = write_marc8_normal;
415 else if (!yaz_matchstr(tocode, "MARC8s"))
416 e->write_handle = write_marc8_normal;
417 else if (!yaz_matchstr(tocode, "MARC8lossy"))
418 e->write_handle = write_marc8_lossy;
419 else if (!yaz_matchstr(tocode, "MARC8lossless"))
420 e->write_handle = write_marc8_lossless;
425 struct encoder_data *data = xmalloc(sizeof(*data));
427 e->destroy_handle = destroy_marc8;
428 e->flush_handle = flush_marc8;
429 e->init_handle = init_marc8;
438 * indent-tabs-mode: nil
440 * vim: shiftwidth=4 tabstop=8 expandtab