Fixed bug #642: Problem with sub script symbols in conversion to
authorAdam Dickmeiss <adam@indexdata.dk>
Sun, 27 Aug 2006 19:04:03 +0000 (19:04 +0000)
committerAdam Dickmeiss <adam@indexdata.dk>
Sun, 27 Aug 2006 19:04:03 +0000 (19:04 +0000)
MARC-8.

src/siconv.c
test/tsticonv.c

index 0034006..cc90210 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2006, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: siconv.c,v 1.25 2006-08-24 10:01:03 adam Exp $
+ * $Id: siconv.c,v 1.26 2006-08-27 19:04:03 adam Exp $
  */
 /**
  * \file siconv.c
  * is used by YAZ to interface with iconv (if present).
  * For systems where iconv is not present, this layer
  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
+ *
+ * MARC-8 reference:
+ *  http://www.loc.gov/marc/specifications/speccharmarc8.html
  */
 
 #if HAVE_CONFIG_H
 #include <config.h>
 #endif
 
+#include <assert.h>
 #include <errno.h>
 #include <string.h>
 #include <ctype.h>
@@ -29,6 +33,7 @@
 #include <iconv.h>
 #endif
 
+
 #include <yaz/yaz-util.h>
 
 unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft,
@@ -818,21 +823,42 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
     else
     {
         size_t r = flush_combos(cd, outbuf, outbytesleft);
+        const char *old_page_chr = cd->write_marc8_page_chr;
         if (r)
             return r;
-        if (strcmp(page_chr, cd->write_marc8_page_chr))
+        if (strcmp(page_chr, old_page_chr))
         {
-            size_t plen = strlen(page_chr);
+            size_t plen = 0;
+            const char *page_out = page_chr;
 
-            if (*outbytesleft < plen)
+            if (*outbytesleft < 8)
             {
                 cd->my_errno = YAZ_ICONV_E2BIG;
+                
                 return (size_t) (-1);
             }
-            memcpy(*outbuf, page_chr, plen);
+            cd->write_marc8_page_chr = page_chr;
+
+            if (!strcmp(old_page_chr, "\033p") 
+                || !strcmp(old_page_chr, "\033g")
+                || !strcmp(old_page_chr, "\033b"))
+            {
+                /* Technique 1 leave */
+                page_out = "\033s";
+                if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
+                {
+                    /* Must leave script + enter new page */
+                    plen = strlen(page_out);
+                    memcpy(*outbuf, page_out, plen);
+                    (*outbuf) += plen;
+                    (*outbytesleft) -= plen;
+                    page_out = page_chr;
+                }
+            }
+            plen = strlen(page_out);
+            memcpy(*outbuf, page_out, plen);
             (*outbuf) += plen;
             (*outbytesleft) -= plen;
-            cd->write_marc8_page_chr = page_chr;            
         }
         cd->write_marc8_last = y;
     }
index b376aad..7941013 100644 (file)
@@ -2,7 +2,7 @@
  * Copyright (C) 1995-2006, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: tsticonv.c,v 1.20 2006-05-09 21:37:03 adam Exp $
+ * $Id: tsticonv.c,v 1.21 2006-08-27 19:04:03 adam Exp $
  */
 
 #if HAVE_CONFIG_H
@@ -457,6 +457,13 @@ static void tst_utf8_to_marc8()
                           "\xEF\xBC\x9F" "o",        /* UTF-8 */
                           "\033(1" "\x21\x2B\x3B" "\033(B" "o" ));
 
+
+    /** Superscript 0 . bug #642 */
+    YAZ_CHECK(tst_convert(cd,
+                          "(\xe2\x81\xb0)",        /* UTF-8 */
+                          "(\033p0\x1bs)"));
+    
     yaz_iconv_close(cd);
 }