1 /* This file is part of the YAZ toolkit.
2 * Copyright (C) 1995-2008 Index Data
3 * See the file LICENSE for details.
7 * \brief MARC-8 encoding
10 * http://www.loc.gov/marc/specifications/speccharmarc8.html
22 #include <yaz/xmalloc.h>
24 #include <yaz/snprintf.h>
27 yaz_conv_func_t yaz_marc8r_42_conv;
28 yaz_conv_func_t yaz_marc8r_45_conv;
29 yaz_conv_func_t yaz_marc8r_67_conv;
30 yaz_conv_func_t yaz_marc8r_62_conv;
31 yaz_conv_func_t yaz_marc8r_70_conv;
32 yaz_conv_func_t yaz_marc8r_32_conv;
33 yaz_conv_func_t yaz_marc8r_4E_conv;
34 yaz_conv_func_t yaz_marc8r_51_conv;
35 yaz_conv_func_t yaz_marc8r_33_conv;
36 yaz_conv_func_t yaz_marc8r_34_conv;
37 yaz_conv_func_t yaz_marc8r_53_conv;
38 yaz_conv_func_t yaz_marc8r_31_conv;
44 unsigned write_marc8_second_half_char;
45 unsigned long write_marc8_last;
47 const char *write_marc8_lpage;
48 const char *write_marc8_g0;
49 const char *write_marc8_g1;
52 static void init_marc8(yaz_iconv_encoder_t w)
54 struct encoder_data *data = w->data;
55 data->write_marc8_second_half_char = 0;
56 data->write_marc8_last = 0;
57 data->write_marc8_ncr = 0;
58 data->write_marc8_lpage = 0;
59 data->write_marc8_g0 = ESC "(B";
60 data->write_marc8_g1 = 0;
63 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
64 struct encoder_data *w,
65 char **outbuf, size_t *outbytesleft,
66 const char *page_chr);
68 static unsigned long lookup_marc8(yaz_iconv_t cd,
69 unsigned long x, int *comb,
70 const char **page_chr)
73 char *utf8_outbuf = utf8_buf;
74 size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
77 r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
78 if (r == (size_t)(-1))
80 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
86 size_t inbytesleft, no_read_sub = 0;
90 inp = (unsigned char *) utf8_buf;
91 inbytesleft = strlen(utf8_buf);
93 x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
99 x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
102 *page_chr = ESC "(B";
105 x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
111 x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
117 x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
120 *page_chr = ESC "(2";
123 x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
126 *page_chr = ESC "(N";
129 x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
132 *page_chr = ESC "(Q";
135 x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
138 *page_chr = ESC "(3";
141 x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
144 *page_chr = ESC "(4";
147 x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
150 *page_chr = ESC "(S";
153 x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
156 *page_chr = ESC "$1";
159 yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
164 static size_t flush_combos(yaz_iconv_t cd,
165 struct encoder_data *w,
166 char **outbuf, size_t *outbytesleft)
168 unsigned long y = w->write_marc8_last;
173 assert(w->write_marc8_lpage);
174 if (w->write_marc8_lpage)
176 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
177 w->write_marc8_lpage);
182 if (9 >= *outbytesleft)
184 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
185 return (size_t) (-1);
187 if (w->write_marc8_ncr)
189 yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
190 (*outbytesleft) -= 8;
198 byte = (unsigned char )((y>>16) & 0xff);
200 (*outbuf)[out_no++] = byte;
201 byte = (unsigned char)((y>>8) & 0xff);
203 (*outbuf)[out_no++] = byte;
204 byte = (unsigned char )(y & 0xff);
206 (*outbuf)[out_no++] = byte;
208 (*outbytesleft) -= out_no;
211 if (w->write_marc8_second_half_char)
213 *(*outbuf)++ = w->write_marc8_second_half_char;
217 w->write_marc8_last = 0;
218 w->write_marc8_ncr = 0;
219 w->write_marc8_lpage = 0;
220 w->write_marc8_second_half_char = 0;
224 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd,
225 struct encoder_data *w,
226 char **outbuf, size_t *outbytesleft,
227 const char *page_chr)
229 const char **old_page_chr = &w->write_marc8_g0;
231 /* are we going to a G1-set (such as such as ESC ")!E") */
232 if (page_chr && page_chr[1] == ')')
233 old_page_chr = &w->write_marc8_g1;
235 if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
238 const char *page_out = page_chr;
240 if (*outbytesleft < 8)
242 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
244 return (size_t) (-1);
249 if (!strcmp(*old_page_chr, ESC "p")
250 || !strcmp(*old_page_chr, ESC "g")
251 || !strcmp(*old_page_chr, ESC "b"))
254 /* Technique 1 leave */
255 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
257 /* Must leave script + enter new page */
258 plen = strlen(page_out);
259 memcpy(*outbuf, page_out, plen);
261 (*outbytesleft) -= plen;
266 *old_page_chr = page_chr;
267 plen = strlen(page_out);
268 memcpy(*outbuf, page_out, plen);
270 (*outbytesleft) -= plen;
276 static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
278 char **outbuf, size_t *outbytesleft,
283 const char *page_chr = 0;
284 unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
289 return (size_t) (-1);
304 size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
310 w->write_marc8_second_half_char = 0xEC;
311 else if (x == 0x0360)
312 w->write_marc8_second_half_char = 0xFB;
314 if (*outbytesleft <= 1)
316 yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
317 return (size_t) (-1);
324 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
328 w->write_marc8_last = y;
329 w->write_marc8_lpage = page_chr;
330 w->write_marc8_ncr = enable_ncr;
335 static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
336 char **outbuf, size_t *outbytesleft)
338 struct encoder_data *w = en->data;
339 size_t r = flush_combos(cd, w, outbuf, outbytesleft);
342 w->write_marc8_g1 = 0;
343 return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
346 static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w,
348 char **outbuf, size_t *outbytesleft,
351 unsigned long x1, x2;
352 if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
354 /* save the output pointers .. */
355 char *outbuf0 = *outbuf;
356 size_t outbytesleft0 = *outbytesleft;
357 int last_ch = w->write_marc8_last;
358 int ncr = w->write_marc8_ncr;
359 const char *lpage = w->write_marc8_lpage;
362 r = yaz_write_marc8_2(cd, w, x1,
363 outbuf, outbytesleft, loss_mode);
366 r = yaz_write_marc8_2(cd, w, x2,
367 outbuf, outbytesleft, loss_mode);
368 if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
370 /* not enough room. reset output to original values */
372 *outbytesleft = outbytesleft0;
373 w->write_marc8_last = last_ch;
374 w->write_marc8_ncr = ncr;
375 w->write_marc8_lpage = lpage;
379 return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
382 static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e,
384 char **outbuf, size_t *outbytesleft)
386 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 0);
389 static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e,
391 char **outbuf, size_t *outbytesleft)
393 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 1);
396 static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e,
398 char **outbuf, size_t *outbytesleft)
400 return yaz_write_marc8_generic(cd, e->data, x, outbuf, outbytesleft, 2);
403 static void destroy_marc8(yaz_iconv_encoder_t e)
408 yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode,
409 yaz_iconv_encoder_t e)
412 if (!yaz_matchstr(tocode, "MARC8"))
413 e->write_handle = write_marc8_normal;
414 else if (!yaz_matchstr(tocode, "MARC8s"))
415 e->write_handle = write_marc8_normal;
416 else if (!yaz_matchstr(tocode, "MARC8lossy"))
417 e->write_handle = write_marc8_lossy;
418 else if (!yaz_matchstr(tocode, "MARC8lossless"))
419 e->write_handle = write_marc8_lossless;
424 struct encoder_data *data = xmalloc(sizeof(*data));
426 e->destroy_handle = destroy_marc8;
427 e->flush_handle = flush_marc8;
428 e->init_handle = init_marc8;
437 * indent-tabs-mode: nil
439 * vim: shiftwidth=4 tabstop=8 expandtab