add ISO 5426 reference documentation
authorWolfram Schneider <wosch@indexdata.dk>
Mon, 17 Nov 2008 15:12:52 +0000 (16:12 +0100)
committerWolfram Schneider <wosch@indexdata.dk>
Mon, 17 Nov 2008 15:12:52 +0000 (16:12 +0100)
src/iconv_decode_iso5426.c

index eecee04..0bef7a7 100644 (file)
@@ -4,10 +4,14 @@
  */
 /**
  * \file
- * \brief MARC-8 decoding
+ * \brief ISO 5426 decoding
  *
  * MARC-8 reference:
- *  http://www.loc.gov/marc/specifications/speccharmarc8.html
+ *  http://www.loc.gov/marc/specifications/specchariso8.html
+ *
+ * ISO 5426 reference (in German)
+ * Zeichenkonkordanz MAB2-Zeichensatz - ISO/IEC 10646 / Unicode
+ * http://www.d-nb.de/standardisierung/pdf/mab_unic.pdf
  */
 
 #if HAVE_CONFIG_H
@@ -32,27 +36,27 @@ struct decoder_data {
     size_t comb_no_read[8];
 };
 
-yaz_conv_func_t yaz_marc8_42_conv;
-yaz_conv_func_t yaz_marc8_45_conv;
-yaz_conv_func_t yaz_marc8_67_conv;
-yaz_conv_func_t yaz_marc8_62_conv;
-yaz_conv_func_t yaz_marc8_70_conv;
-yaz_conv_func_t yaz_marc8_32_conv;
-yaz_conv_func_t yaz_marc8_4E_conv;
-yaz_conv_func_t yaz_marc8_51_conv;
-yaz_conv_func_t yaz_marc8_33_conv;
-yaz_conv_func_t yaz_marc8_34_conv;
-yaz_conv_func_t yaz_marc8_53_conv;
-yaz_conv_func_t yaz_marc8_31_conv;
+yaz_conv_func_t yaz_iso5426_42_conv;
+yaz_conv_func_t yaz_iso5426_45_conv;
+yaz_conv_func_t yaz_iso5426_67_conv;
+yaz_conv_func_t yaz_iso5426_62_conv;
+yaz_conv_func_t yaz_iso5426_70_conv;
+yaz_conv_func_t yaz_iso5426_32_conv;
+yaz_conv_func_t yaz_iso5426_4E_conv;
+yaz_conv_func_t yaz_iso5426_51_conv;
+yaz_conv_func_t yaz_iso5426_33_conv;
+yaz_conv_func_t yaz_iso5426_34_conv;
+yaz_conv_func_t yaz_iso5426_53_conv;
+yaz_conv_func_t yaz_iso5426_31_conv;
 
 
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
+static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
                                          struct decoder_data *data,
                                          unsigned char *inp,
                                          size_t inbytesleft, size_t *no_read,
                                          int *comb);
 
-static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+static unsigned long read_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                                unsigned char *inp,
                                size_t inbytesleft, size_t *no_read)
 {
@@ -69,7 +73,7 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
            the processing of the closing ligature character
         */
         /* this code is no longer necessary.. our handlers code in
-           yaz_marc8_?_conv (generated by charconv.tcl) now returns
+           yaz_iso5426_?_conv (generated by charconv.tcl) now returns
            0 and no_read=1 when a sequence does not match the input.
            The SECOND HALFs in codetables.xml produces a non-existant
            entry in the conversion trie.. Hence when met, the input byte is
@@ -95,7 +99,7 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
             *no_read = 0;
             break;
         }
-        x = yaz_read_marc8_comb(cd, data, inp, inbytesleft, no_read, &comb);
+        x = yaz_read_iso5426_comb(cd, data, inp, inbytesleft, no_read, &comb);
         if (!comb || !x)
             break;
         data->comb_x[data->comb_size] = x;
@@ -106,12 +110,12 @@ static unsigned long read_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
     return x;
 }
 
-static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+static unsigned long read_iso5426s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                                  unsigned char *inp,
                                  size_t inbytesleft, size_t *no_read)
 {
     struct decoder_data *data = (struct decoder_data *) d->data;
-    unsigned long x = read_marc8(cd, d, inp, inbytesleft, no_read);
+    unsigned long x = read_iso5426(cd, d, inp, inbytesleft, no_read);
     if (x && data->comb_size == 1)
     {
         if (yaz_iso_8859_1_lookup_x12(x, data->comb_x[0], &x))
@@ -123,7 +127,7 @@ static unsigned long read_marc8s(yaz_iconv_t cd, yaz_iconv_decoder_t d,
     return x;
 }
 
-static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
+static unsigned long yaz_read_iso5426_comb(yaz_iconv_t cd,
                                          struct decoder_data *data,
                                          unsigned char *inp,
                                          size_t inbytesleft, size_t *no_read,
@@ -189,41 +193,44 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd,
         {
         case 'B':  /* Basic ASCII */
         case 's':  /* ASCII */
-            x = yaz_marc8_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_42_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case 'E':  /* ANSEL */
-            x = yaz_marc8_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
+            x = yaz_iso5426_45_conv(inp, inbytesleft, &no_read_sub, comb, 127, 128);
             break;
+
+#if 0
         case 'g':  /* Greek */
-            x = yaz_marc8_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_67_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case 'b':  /* Subscripts */
-            x = yaz_marc8_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_62_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case 'p':  /* Superscripts */
-            x = yaz_marc8_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_70_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case '2':  /* Basic Hebrew */
-            x = yaz_marc8_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_32_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case 'N':  /* Basic Cyrillic */
-            x = yaz_marc8_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_4E_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case 'Q':  /* Extended Cyrillic */
-            x = yaz_marc8_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_51_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case '3':  /* Basic Arabic */
-            x = yaz_marc8_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_33_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case '4':  /* Extended Arabic */
-            x = yaz_marc8_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_34_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case 'S':  /* Greek */
-            x = yaz_marc8_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_53_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
         case '1':  /* Chinese, Japanese, Korean (EACC) */
-            x = yaz_marc8_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
+            x = yaz_iso5426_31_conv(inp, inbytesleft, &no_read_sub, comb, 127, 0);
             break;
+#endif
         default:
             *no_read = 0;
             yaz_iconv_set_errno(cd, YAZ_ICONV_EILSEQ);
@@ -239,7 +246,7 @@ incomplete:
 }
 
 
-static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+static size_t init_iso5426(yaz_iconv_t cd, yaz_iconv_decoder_t d,
                          unsigned char *inp,
                          size_t inbytesleft, size_t *no_read)
 {
@@ -250,29 +257,25 @@ static size_t init_marc8(yaz_iconv_t cd, yaz_iconv_decoder_t d,
     return 0;
 }
 
-void destroy_marc8(yaz_iconv_decoder_t d)
+void destroy_iso5426(yaz_iconv_decoder_t d)
 {
     struct decoder_data *data = (struct decoder_data *) d->data;
     xfree(data);
 }
 
-yaz_iconv_decoder_t yaz_marc8_decoder(const char *fromcode,
+yaz_iconv_decoder_t yaz_iso5426_decoder(const char *fromcode,
                                       yaz_iconv_decoder_t d)
 {
-    if (!yaz_matchstr(fromcode, "MARC8") || !yaz_matchstr(fromcode, "ANSEL"))
-        d->read_handle = read_marc8;
-    else if (!yaz_matchstr(fromcode, "ISO5426"))
-        d->read_handle = read_marc8;
-    else if (!yaz_matchstr(fromcode, "MARC8s"))
-        d->read_handle = read_marc8s;
+    if (!yaz_matchstr(fromcode, "ISO5426"))
+        d->read_handle = read_iso5426;
     else
         return 0;
     {
         struct decoder_data *data = (struct decoder_data *)
             xmalloc(sizeof(*data));
         d->data = data;
-        d->init_handle = init_marc8;
-        d->destroy_handle = destroy_marc8;
+        d->init_handle = init_iso5426;
+        d->destroy_handle = destroy_iso5426;
     }
     return d;
 }