cookie: introduce yaz_cookies_reset
[yaz-moved-to-github.git] / src / iconv_decode_iso5426.c
index eecee04..49a27ea 100644 (file)
@@ -1,13 +1,17 @@
 /* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2008 Index Data
+ * Copyright (C) Index Data
  * See the file LICENSE for details.
  */
 /**
  * \file
- * \brief MARC-8 decoding
+ * \brief ISO 5426 decoding
  *
  * MARC-8 reference:
- *  http://www.loc.gov/marc/specifications/speccharmarc8.html
+ *  http://www.loc.gov/marc/specifications/specchariso8.html
+ *
+ * ISO 5426 reference (in German)
+ * Zeichenkonkordanz MAB2-Zeichensatz - ISO/IEC 10646 / Unicode
+ * http://www.d-nb.de/standardisierung/pdf/mab_unic.pdf
  */
 
 #if HAVE_CONFIG_H
@@ -17,7 +21,6 @@
 #include <assert.h>
 #include <errno.h>
 #include <string.h>
-#include <ctype.h>
 
 #include <yaz/xmalloc.h>
 #include "iconv-p.h"
@@ -32,27 +35,27 @@ struct decoder_data {
     size_t comb_no_read[8];
 };
 
-yaz_conv_func_t yaz_marc8_42_conv;
-yaz_conv_func_t yaz_marc8_45_conv;
-yaz_conv_func_t yaz_marc8_67_conv;
-yaz_conv_func_t yaz_marc8_62_conv;
-yaz_conv_func_t yaz_marc8_70_conv;
-yaz_conv_func_t yaz_marc8_32_conv;
-yaz_conv_func_t yaz_marc8_4E_conv;
-yaz_conv_func_t yaz_marc8_51_conv;
-yaz_conv_func_t yaz_marc8_33_conv;
-yaz_conv_func_t yaz_marc8_34_conv;
-yaz_conv_func_t yaz_marc8_53_conv;
-yaz_conv_func_t yaz_marc8_31_conv;
-
-
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
+yaz_conv_func_t yaz_iso5426_42_conv;
+yaz_conv_func_t yaz_iso5426_45_conv;
+yaz_conv_func_t yaz_iso5426_67_conv;
+yaz_conv_func_t yaz_iso5426_62_conv;
+yaz_conv_func_t yaz_iso5426_70_conv;
+yaz_conv_func_t yaz_iso5426_32_conv;
+yaz_conv_func_t yaz_iso5426_4E_conv;
+yaz_conv_func_t yaz_iso5426_51_conv;
+yaz_conv_func_t yaz_iso5426_33_conv;
+yaz_conv_func_t yaz_iso5426_34_conv;
+yaz_conv_func_t yaz_iso5426_53_conv;
+yaz_conv_func_t yaz_iso5426_31_conv;
+
+
+static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
                                          struct decoder_data *data,
                                          unsigned char *inp,
                                          size_t inbytesleft, size_t *no_read,
                                          int *comb);
 
-static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                                unsigned char *inp,
                                size_t inbytesleft, size_t *no_read)
 {
@@ -63,13 +66,13 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
         *no_read = data->comb_no_read[data->comb_offset];
         x = data->comb_x[data->comb_offset];
 
-        /* special case for double-diacritic combining characters, 
+        /* special case for double-diacritic combining characters,
            INVERTED BREVE and DOUBLE TILDE.
            We'll increment the no_read counter by 1, since we want to skip over
            the processing of the closing ligature character
         */
         /* this code is no longer necessary.. our handlers code in
-           yaz_marc8_?_conv (generated by charconv.tcl) now returns
+           yaz_iso5426_?_conv (generated by charconv.tcl) now returns
            0 and no_read=1 when a sequence does not match the input.
            The SECOND HALFs in codetables.xml produces a non-existant
            entry in the conversion trie.. Hence when met, the input byte is
@@ -95,7 +98,7 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
             *no_read = 0;
             break;
         }
-        x = yaz_read_marc8_comb(cd, data, inp, inbytesleft, no_read, &comb);
+        x = yaz_read_iso5426_comb(cd, data, inp, inbytesleft, no_read, &comb);
         if (!comb || !x)
             break;
         data->comb_x[data->comb_size] = x;
@@ -106,24 +109,7 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
     return x;
 }
 
-static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
-                                 unsigned char *inp,
-                                 size_t inbytesleft, size_t *no_read)
-{
-    struct decoder_data *data = (struct decoder_data *) d->data;
-    unsigned long x = read_marc8(cd, d, inp, inbytesleft, no_read);
-    if (x && data->comb_size == 1)
-    {
-        if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
-        {
-            *no_read += data->comb_no_read[0];
-            data->comb_size = 0;
-        }
-    }
-    return x;
-}
-
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
+static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
                                          struct decoder_data *data,
                                          unsigned char *inp,
                                          size_t inbytesleft, size_t *no_read,
@@ -189,40 +175,12 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
         {
         case 'B':  /* Basic ASCII */
         case 's':  /* ASCII */
-            x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_42_conv(inp, inbytesleft, &no_read_sub, comb,
+                                    127, 0);
             break;
         case 'E':  /* ANSEL */
-            x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
-            break;
-        case 'g':  /* Greek */
-            x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case 'b':  /* Subscripts */
-            x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case 'p':  /* Superscripts */
-            x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case '2':  /* Basic Hebrew */
-            x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case 'N':  /* Basic Cyrillic */
-            x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case 'Q':  /* Extended Cyrillic */
-            x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case '3':  /* Basic Arabic */
-            x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case '4':  /* Extended Arabic */
-            x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case 'S':  /* Greek */
-            x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
-            break;
-        case '1':  /* Chinese, Japanese, Korean (EACC) */
-            x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_45_conv(inp, inbytesleft, &no_read_sub, comb,
+                                    127, 128);
             break;
         default:
             *no_read = 0;
@@ -239,7 +197,7 @@ incomplete:
 }
 
 
-static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+static size_t init_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                          unsigned char *inp,
                          size_t inbytesleft, size_t *no_read)
 {
@@ -250,29 +208,25 @@ static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
     return 0;
 }
 
-void destroy_marc8(yaz_iconv_decoder_t d)
+void destroy_iso5426(yaz_iconv_decoder_t d)
 {
     struct decoder_data *data = (struct decoder_data *) d->data;
     xfree(data);
 }
 
-yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode,
+yaz_iconv_decoder_t yaz_iso5426_decoder(const char *fromcode,
                                       yaz_iconv_decoder_t d)
 {
-    if (!yaz_matchstr(fromcode, "MARC8") || !yaz_matchstr(fromcode, "ANSEL"))
-        d->read_handle = read_marc8;
-    else if (!yaz_matchstr(fromcode, "ISO5426"))
-        d->read_handle = read_marc8;
-    else if (!yaz_matchstr(fromcode, "MARC8s"))
-        d->read_handle = read_marc8s;
+    if (!yaz_matchstr(fromcode, "ISO5426"))
+        d->read_handle = read_iso5426;
     else
         return 0;
     {
         struct decoder_data *data = (struct decoder_data *)
             xmalloc(sizeof(*data));
         d->data = data;
-        d->init_handle = init_marc8;
-        d->destroy_handle = destroy_marc8;
+        d->init_handle = init_iso5426;
+        d->destroy_handle = destroy_iso5426;
     }
     return d;
 }
@@ -281,7 +235,9 @@ yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode,
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab
  */
+