Fix leak for odr_print of ZOOM connection.
[yaz-moved-to-github.git] / src / iconv_encode_marc8.c
1 /* This file is part of the YAZ toolkit.
2  * Copyright (C) 1995-2009 Index Data
3  * See the file LICENSE for details.
4  */
5 /**
6  * \file
7  * \brief MARC-8 encoding
8  *
9  * MARC-8 reference:
10  *  http://www.loc.gov/marc/specifications/speccharmarc8.html
11  */
12
13 #if HAVE_CONFIG_H
14 #include <config.h>
15 #endif
16
17 #include <assert.h>
18 #include <errno.h>
19 #include <string.h>
20 #include <ctype.h>
21
22 #include <yaz/xmalloc.h>
23 #include <yaz/snprintf.h>
24 #include "iconv-p.h"
25
26 yaz_conv_func_t yaz_marc8r_42_conv;
27 yaz_conv_func_t yaz_marc8r_45_conv;
28 yaz_conv_func_t yaz_marc8r_67_conv;
29 yaz_conv_func_t yaz_marc8r_62_conv;
30 yaz_conv_func_t yaz_marc8r_70_conv;
31 yaz_conv_func_t yaz_marc8r_32_conv;
32 yaz_conv_func_t yaz_marc8r_4E_conv;
33 yaz_conv_func_t yaz_marc8r_51_conv;
34 yaz_conv_func_t yaz_marc8r_33_conv;
35 yaz_conv_func_t yaz_marc8r_34_conv;
36 yaz_conv_func_t yaz_marc8r_53_conv;
37 yaz_conv_func_t yaz_marc8r_31_conv;
38
39 #define ESC "\033"
40
41 struct encoder_data
42 {
43     unsigned write_marc8_second_half_char;
44     unsigned long write_marc8_last;
45     int write_marc8_ncr;
46     const char *write_marc8_lpage;
47     const char *write_marc8_g0;
48     const char *write_marc8_g1;
49 };
50
51 static void init_marc8(yaz_iconv_encoder_t w)
52 {
53     struct encoder_data *data = (struct encoder_data *) w->data;
54     data->write_marc8_second_half_char = 0;
55     data->write_marc8_last = 0;
56     data->write_marc8_ncr = 0;
57     data->write_marc8_lpage = 0;
58     data->write_marc8_g0 = ESC "(B";
59     data->write_marc8_g1 = 0;
60 }
61
62 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
63                                        struct encoder_data *w,
64                                        char **outbuf, size_t *outbytesleft,
65                                        const char *page_chr);
66
67 static unsigned long lookup_marc8(yaz_iconv_t cd,
68                                   unsigned long x, int *comb,
69                                   const char **page_chr)
70 {
71     char utf8_buf[7];
72     char *utf8_outbuf = utf8_buf;
73     size_t utf8_outbytesleft = sizeof(utf8_buf)-1, r;
74     int error_code;
75
76     r = yaz_write_UTF8_char(x, &utf8_outbuf, &utf8_outbytesleft, &error_code);
77     if (r == (size_t)(-1))
78     {
79         yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
80         return 0;
81     }
82     else
83     {
84         unsigned char *inp;
85         size_t inbytesleft, no_read_sub = 0;
86         unsigned long x;
87
88         *utf8_outbuf = '\0';        
89         inp = (unsigned char *) utf8_buf;
90         inbytesleft = strlen(utf8_buf);
91
92         x = yaz_marc8r_42_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
93         if (x)
94         {
95             *page_chr = ESC "(B";
96             return x;
97         }
98         x = yaz_marc8r_45_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
99         if (x)
100         {
101             *page_chr = ESC "(B";
102             return x;
103         }
104         x = yaz_marc8r_62_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
105         if (x)
106         {
107             *page_chr = ESC "b";
108             return x;
109         }
110         x = yaz_marc8r_70_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
111         if (x)
112         {
113             *page_chr = ESC "p";
114             return x;
115         }
116         x = yaz_marc8r_32_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
117         if (x)
118         {
119             *page_chr = ESC "(2";
120             return x;
121         }
122         x = yaz_marc8r_4E_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
123         if (x)
124         {
125             *page_chr = ESC "(N";
126             return x;
127         }
128         x = yaz_marc8r_51_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
129         if (x)
130         {
131             *page_chr = ESC "(Q";
132             return x;
133         }
134         x = yaz_marc8r_33_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
135         if (x)
136         {
137             *page_chr = ESC "(3";
138             return x;
139         }
140         x = yaz_marc8r_34_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
141         if (x)
142         {
143             *page_chr = ESC "(4";
144             return x;
145         }
146         x = yaz_marc8r_53_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
147         if (x)
148         {
149             *page_chr = ESC "(S";
150             return x;
151         }
152         x = yaz_marc8r_31_conv(inp, inbytesleft, &no_read_sub, comb, 255, 0);
153         if (x)
154         {
155             *page_chr = ESC "$1";
156             return x;
157         }
158         yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
159         return x;
160     }
161 }
162
163 static size_t flush_combos(yaz_iconv_t cd,
164                            struct encoder_data *w,
165                            char **outbuf, size_t *outbytesleft)
166 {
167     unsigned long y = w->write_marc8_last;
168
169     if (!y)
170         return 0;
171
172     assert(w->write_marc8_lpage);
173     if (w->write_marc8_lpage)
174     {
175         size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
176                                             w->write_marc8_lpage);
177         if (r)
178             return r;
179     }
180
181     if (9 >= *outbytesleft)
182     {
183         yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
184         return (size_t) (-1);
185     }
186     if (w->write_marc8_ncr)
187     {
188         yaz_snprintf(*outbuf, 9, "&#x%04x;", y);
189         (*outbytesleft) -= 8;
190         (*outbuf) += 8;
191     }
192     else
193     {
194         size_t out_no = 0;
195         unsigned char byte;
196
197         byte = (unsigned char )((y>>16) & 0xff);
198         if (byte)
199             (*outbuf)[out_no++] = byte;
200         byte = (unsigned char)((y>>8) & 0xff);
201         if (byte)
202             (*outbuf)[out_no++] = byte;
203         byte = (unsigned char )(y & 0xff);
204         if (byte)
205             (*outbuf)[out_no++] = byte;
206         *outbuf += out_no;
207         (*outbytesleft) -= out_no;
208     }
209
210     if (w->write_marc8_second_half_char)
211     {
212         *(*outbuf)++ = w->write_marc8_second_half_char;
213         (*outbytesleft)--;
214     }        
215
216     w->write_marc8_last = 0;
217     w->write_marc8_ncr = 0;
218     w->write_marc8_lpage = 0;
219     w->write_marc8_second_half_char = 0;
220     return 0;
221 }
222
223 static size_t yaz_write_marc8_page_chr(yaz_iconv_t cd, 
224                                        struct encoder_data *w,
225                                        char **outbuf, size_t *outbytesleft,
226                                        const char *page_chr)
227 {
228     const char **old_page_chr = &w->write_marc8_g0;
229
230     /* are we going to a G1-set (such as such as ESC ")!E") */
231     if (page_chr && page_chr[1] == ')')
232         old_page_chr = &w->write_marc8_g1;
233
234     if (!*old_page_chr || strcmp(page_chr, *old_page_chr))
235     {
236         size_t plen = 0;
237         const char *page_out = page_chr;
238         
239         if (*outbytesleft < 8)
240         {
241             yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
242             
243             return (size_t) (-1);
244         }
245
246         if (*old_page_chr)
247         {
248             if (!strcmp(*old_page_chr, ESC "p") 
249                 || !strcmp(*old_page_chr, ESC "g")
250                 || !strcmp(*old_page_chr, ESC "b"))
251             {
252                 page_out = ESC "s";
253                 /* Technique 1 leave */
254                 if (strcmp(page_chr, ESC "(B")) /* Not going ASCII page? */
255                 {
256                     /* Must leave script + enter new page */
257                     plen = strlen(page_out);
258                     memcpy(*outbuf, page_out, plen);
259                     (*outbuf) += plen;
260                     (*outbytesleft) -= plen;
261                     page_out = ESC "(B";
262                 }
263             }
264         }
265         *old_page_chr = page_chr;
266         plen = strlen(page_out);
267         memcpy(*outbuf, page_out, plen);
268         (*outbuf) += plen;
269         (*outbytesleft) -= plen;
270     }
271     return 0;
272 }
273
274
275 static size_t yaz_write_marc8_2(yaz_iconv_t cd, struct encoder_data *w,
276                                 unsigned long x,
277                                 char **outbuf, size_t *outbytesleft,
278                                 int loss_mode)
279 {
280     int comb = 0;
281     int enable_ncr = 0;
282     const char *page_chr = 0;
283     unsigned long y = lookup_marc8(cd, x, &comb, &page_chr);
284
285     if (!y)
286     {
287         if (loss_mode == 0)
288             return (size_t) (-1);
289         page_chr = ESC "(B";
290         if (loss_mode == 1)
291             y = '|';
292         else
293         {
294             y = x; 
295             enable_ncr = 1;
296         }
297     }
298
299     if (comb)
300     {
301         if (page_chr)
302         {
303             size_t r = yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft,
304                                                 page_chr);
305             if (r)
306                 return r;
307         }
308         if (x == 0x0361)
309             w->write_marc8_second_half_char = 0xEC;
310         else if (x == 0x0360)
311             w->write_marc8_second_half_char = 0xFB;
312
313         if (*outbytesleft <= 1)
314         {
315             yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
316             return (size_t) (-1);
317         }
318         *(*outbuf)++ = y;
319         (*outbytesleft)--;
320     }
321     else
322     {
323         size_t r = flush_combos(cd, w, outbuf, outbytesleft);
324         if (r)
325             return r;
326
327         w->write_marc8_last = y;
328         w->write_marc8_lpage = page_chr;
329         w->write_marc8_ncr = enable_ncr;
330     }
331     return 0;
332 }
333
334 static size_t flush_marc8(yaz_iconv_t cd, yaz_iconv_encoder_t en,
335                            char **outbuf, size_t *outbytesleft)
336 {
337     struct encoder_data *w = (struct encoder_data *) en->data;
338     size_t r = flush_combos(cd, w, outbuf, outbytesleft);
339     if (r)
340         return r;
341     w->write_marc8_g1 = 0;
342     return yaz_write_marc8_page_chr(cd, w, outbuf, outbytesleft, ESC "(B");
343 }
344
345 static size_t yaz_write_marc8_generic(yaz_iconv_t cd, struct encoder_data *w,
346                                       unsigned long x,
347                                       char **outbuf, size_t *outbytesleft,
348                                       int loss_mode)
349 {
350     unsigned long x1, x2;
351     if (yaz_iso_8859_1_lookup_y(x, &x1, &x2))
352     {
353         /* save the output pointers .. */
354         char *outbuf0 = *outbuf;
355         size_t outbytesleft0 = *outbytesleft;
356         int last_ch = w->write_marc8_last;
357         int ncr = w->write_marc8_ncr;
358         const char *lpage = w->write_marc8_lpage;
359         size_t r;
360         
361         r = yaz_write_marc8_2(cd, w, x1,
362                               outbuf, outbytesleft, loss_mode);
363         if (r)
364             return r;
365         r = yaz_write_marc8_2(cd, w, x2,
366                               outbuf, outbytesleft, loss_mode);
367         if (r && yaz_iconv_error(cd) == YAZ_ICONV_E2BIG)
368         {
369             /* not enough room. reset output to original values */
370             *outbuf = outbuf0;
371             *outbytesleft = outbytesleft0;
372             w->write_marc8_last = last_ch;
373             w->write_marc8_ncr = ncr;
374             w->write_marc8_lpage = lpage;
375         }
376         return r;
377     }
378     return yaz_write_marc8_2(cd, w, x, outbuf, outbytesleft, loss_mode);
379 }
380
381 static size_t write_marc8_normal(yaz_iconv_t cd, yaz_iconv_encoder_t e,
382                                  unsigned long x,
383                                  char **outbuf, size_t *outbytesleft)
384 {
385     return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
386                                    x, outbuf, outbytesleft, 0);
387 }
388
389 static size_t write_marc8_lossy(yaz_iconv_t cd, yaz_iconv_encoder_t e,
390                                 unsigned long x,
391                                 char **outbuf, size_t *outbytesleft)
392 {
393     return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
394                                    x, outbuf, outbytesleft, 1);
395 }
396
397 static size_t write_marc8_lossless(yaz_iconv_t cd, yaz_iconv_encoder_t e,
398                                    unsigned long x,
399                                    char **outbuf, size_t *outbytesleft)
400 {
401     return yaz_write_marc8_generic(cd, (struct encoder_data *) e->data,
402                                    x, outbuf, outbytesleft, 2);
403 }
404
405 static void destroy_marc8(yaz_iconv_encoder_t e)
406 {
407     xfree(e->data);
408 }
409
410 yaz_iconv_encoder_t yaz_marc8_encoder(const char *tocode,
411                                       yaz_iconv_encoder_t e)
412     
413 {
414     if (!yaz_matchstr(tocode, "MARC8"))
415         e->write_handle = write_marc8_normal;
416     else if (!yaz_matchstr(tocode, "MARC8s"))
417         e->write_handle = write_marc8_normal;
418     else if (!yaz_matchstr(tocode, "MARC8lossy"))
419         e->write_handle = write_marc8_lossy;
420     else if (!yaz_matchstr(tocode, "MARC8lossless"))
421         e->write_handle = write_marc8_lossless;
422     else
423         return 0;
424
425     {
426         struct encoder_data *data = (struct encoder_data *)
427             xmalloc(sizeof(*data));
428         e->data = data;
429         e->destroy_handle = destroy_marc8;
430         e->flush_handle = flush_marc8;
431         e->init_handle = init_marc8;
432     }
433     return e;
434 }
435
436
437 /*
438  * Local variables:
439  * c-basic-offset: 4
440  * c-file-style: "Stroustrup"
441  * indent-tabs-mode: nil
442  * End:
443  * vim: shiftwidth=4 tabstop=8 expandtab
444  */
445