changed output to be non-cascarding when using -n switch
[yaz-moved-to-github.git] / src / siconv.c
index ba54b16..b040729 100644 (file)
@@ -1,8 +1,8 @@
 /*
- * Copyright (C) 1995-2006, Index Data ApS
+ * Copyright (C) 1995-2007, Index Data ApS
  * See the file LICENSE for details.
  *
- * $Id: siconv.c,v 1.24 2006-08-04 14:35:40 adam Exp $
+ * $Id: siconv.c,v 1.32 2007-01-03 08:42:15 adam Exp $
  */
 /**
  * \file siconv.c
  * is used by YAZ to interface with iconv (if present).
  * For systems where iconv is not present, this layer
  * provides a few important conversions: UTF-8, MARC-8, Latin-1.
+ *
+ * MARC-8 reference:
+ *  http://www.loc.gov/marc/specifications/speccharmarc8.html
  */
 
 #if HAVE_CONFIG_H
 #include <config.h>
 #endif
 
+#include <assert.h>
 #include <errno.h>
 #include <string.h>
 #include <ctype.h>
@@ -29,6 +33,7 @@
 #include <iconv.h>
 #endif
 
+
 #include <yaz/yaz-util.h>
 
 unsigned long yaz_marc8_1_conv(unsigned char *inp, size_t inbytesleft,
@@ -95,6 +100,7 @@ struct yaz_iconv_struct {
 
     unsigned long write_marc8_comb_ch[8];
     size_t write_marc8_comb_no;
+    unsigned write_marc8_second_half_char;
     unsigned long write_marc8_last;
     const char *write_marc8_page_chr;
 };
@@ -178,6 +184,7 @@ static unsigned long yaz_read_ISO8859_1 (yaz_iconv_t cd, unsigned char *inp,
     return x;
 }
 
+
 static size_t yaz_init_UTF8 (yaz_iconv_t cd, unsigned char *inp,
                              size_t inbytesleft, size_t *no_read)
 {
@@ -426,7 +433,7 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
         size_t inbytesleft0 = inbytesleft;
         inp++;
         inbytesleft--;
-        while(inbytesleft > 0 && strchr("(,$!", *inp))
+        while(inbytesleft > 0 && strchr("(,$!)-", *inp))
         {
             inbytesleft--;
             inp++;
@@ -492,9 +499,16 @@ static unsigned long yaz_read_marc8_comb(yaz_iconv_t cd, unsigned char *inp,
     }
 }
 
-static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
-                              char **outbuf, size_t *outbytesleft,
-                              int last)
+static size_t yaz_write_UTF8(yaz_iconv_t cd, unsigned long x,
+                             char **outbuf, size_t *outbytesleft,
+                             int last)
+{
+    return yaz_write_UTF8_char(x, outbuf, outbytesleft, &cd->my_errno);
+}
+
+size_t yaz_write_UTF8_char(unsigned long x,
+                           char **outbuf, size_t *outbytesleft,
+                           int *error)
 {
     unsigned char *outp = (unsigned char *) *outbuf;
 
@@ -545,7 +559,7 @@ static size_t yaz_write_UTF8 (yaz_iconv_t cd, unsigned long x,
     }
     else 
     {
-        cd->my_errno = YAZ_ICONV_E2BIG;  /* not room for output */
+        *error = YAZ_ICONV_E2BIG;  /* not room for output */
         return (size_t)(-1);
     }
     *outbuf = (char *) outp;
@@ -731,7 +745,7 @@ static unsigned long lookup_marc8(yaz_iconv_t cd,
         x = yaz_marc8r_9_conv(inp, inbytesleft, &no_read_sub, comb);
         if (x)
         {
-            *page_chr = "\033(1";
+            *page_chr = "\033$1";
             return x;
         }
         cd->my_errno = YAZ_ICONV_EILSEQ;
@@ -743,7 +757,7 @@ static size_t flush_combos(yaz_iconv_t cd,
                            char **outbuf, size_t *outbytesleft)
 {
     unsigned long y = cd->write_marc8_last;
-    unsigned char byte, second_half = 0;
+    unsigned char byte;
     char out_buf[10];
     size_t i, out_no = 0;
 
@@ -770,25 +784,21 @@ static size_t flush_combos(yaz_iconv_t cd,
     {
         /* all MARC-8 combined characters are simple bytes */
         byte = (unsigned char )(cd->write_marc8_comb_ch[i]);
-        if (byte == 0xEB)
-            second_half = 0xEC;
-        else if (byte == 0xFA)
-            second_half = 0xFB;
-
         *(*outbuf)++ = byte;
         (*outbytesleft)--;
     }
     memcpy(*outbuf, out_buf, out_no);
     *outbuf += out_no;
     (*outbytesleft) -= out_no;
-    if (second_half)
+    if (cd->write_marc8_second_half_char)
     {
-        *(*outbuf)++ = second_half;
+        *(*outbuf)++ = cd->write_marc8_second_half_char;
         (*outbytesleft)--;
     }        
 
     cd->write_marc8_last = 0;
     cd->write_marc8_comb_no = 0;
+    cd->write_marc8_second_half_char = 0;
     return 0;
 }
 
@@ -805,27 +815,53 @@ static size_t yaz_write_marc8_2(yaz_iconv_t cd, unsigned long x,
 
     if (comb)
     {
+        if (x == 0x0361)
+            cd->write_marc8_second_half_char = 0xEC;
+        else if (x == 0x0360)
+            cd->write_marc8_second_half_char = 0xFB;
+
         if (cd->write_marc8_comb_no < 6)
             cd->write_marc8_comb_ch[cd->write_marc8_comb_no++] = y;
     }
     else
     {
         size_t r = flush_combos(cd, outbuf, outbytesleft);
+        const char *old_page_chr = cd->write_marc8_page_chr;
         if (r)
             return r;
-        if (strcmp(page_chr, cd->write_marc8_page_chr))
+        if (strcmp(page_chr, old_page_chr))
         {
-            size_t plen = strlen(page_chr);
+            size_t plen = 0;
+            const char *page_out = page_chr;
 
-            if (*outbytesleft < plen)
+            if (*outbytesleft < 8)
             {
                 cd->my_errno = YAZ_ICONV_E2BIG;
+                
                 return (size_t) (-1);
             }
-            memcpy(*outbuf, page_chr, plen);
+            cd->write_marc8_page_chr = page_chr;
+
+            if (!strcmp(old_page_chr, "\033p") 
+                || !strcmp(old_page_chr, "\033g")
+                || !strcmp(old_page_chr, "\033b"))
+            {
+                /* Technique 1 leave */
+                page_out = "\033s";
+                if (strcmp(page_chr, "\033(B")) /* Not going ASCII page? */
+                {
+                    /* Must leave script + enter new page */
+                    plen = strlen(page_out);
+                    memcpy(*outbuf, page_out, plen);
+                    (*outbuf) += plen;
+                    (*outbytesleft) -= plen;
+                    page_out = page_chr;
+                }
+            }
+            plen = strlen(page_out);
+            memcpy(*outbuf, page_out, plen);
             (*outbuf) += plen;
             (*outbytesleft) -= plen;
-            cd->write_marc8_page_chr = page_chr;            
         }
         cd->write_marc8_last = y;
     }
@@ -916,13 +952,6 @@ yaz_iconv_t yaz_iconv_open (const char *tocode, const char *fromcode)
     cd->read_handle = 0;
     cd->init_handle = 0;
     cd->my_errno = YAZ_ICONV_UNKNOWN;
-    cd->marc8_esc_mode = 'B';
-    cd->comb_offset = cd->comb_size = 0;
-    cd->compose_char = 0;
-
-    cd->write_marc8_comb_no = 0;
-    cd->write_marc8_last = 0;
-    cd->write_marc8_page_chr = "\033(B";
 
     /* a useful hack: if fromcode has leading @,
        the library not use YAZ's own conversions .. */
@@ -1032,7 +1061,7 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
     {
         if (cd->init_handle)
         {
-            size_t no_read;
+            size_t no_read = 0;
             size_t r = (cd->init_handle)(cd, (unsigned char *) *inbuf,
                                          *inbytesleft, &no_read);
             if (r)
@@ -1045,6 +1074,16 @@ size_t yaz_iconv(yaz_iconv_t cd, char **inbuf, size_t *inbytesleft,
             *inbytesleft -= no_read;
             *inbuf += no_read;
         }
+        cd->marc8_esc_mode = 'B';
+        
+        cd->comb_offset = cd->comb_size = 0;
+        cd->compose_char = 0;
+        
+        cd->write_marc8_comb_no = 0;
+        cd->write_marc8_second_half_char = 0;
+        cd->write_marc8_last = 0;
+        cd->write_marc8_page_chr = "\033(B";
+        
         cd->init_flag = 0;
         cd->unget_x = 0;
         cd->no_read_x = 0;