Added Danmarc character set decoder (bug #2345).
authorAdam Dickmeiss <adam@indexdata.dk>
Mon, 15 Dec 2008 14:41:34 +0000 (15:41 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Mon, 15 Dec 2008 14:41:34 +0000 (15:41 +0100)
src/Makefile.am
src/iconv-p.h
src/iconv_decode_danmarc.c [new file with mode: 0644]
src/siconv.c
test/tsticonv.c
win/makefile

index cc8c903..e72c1ee 100644 (file)
@@ -105,7 +105,7 @@ libyaz_la_SOURCES=version.c options.c log.c \
   record_conv.c retrieval.c elementset.c snprintf.c query-charset.c \
   copy_types.c match_glob.c poll.c daemon.c \
   iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c \
-  iconv_decode_marc8.c iconv_decode_iso5426.c sc.c
+  iconv_decode_marc8.c iconv_decode_iso5426.c iconv_decode_danmarc.c sc.c
 
 libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO)
 
index f99943b..e5e92e8 100644 (file)
@@ -103,6 +103,8 @@ yaz_iconv_decoder_t yaz_advancegreek_decoder(const char *name,
                                              yaz_iconv_decoder_t d);
 yaz_iconv_decoder_t yaz_wchar_decoder(const char *fromcode,
                                      yaz_iconv_decoder_t d);
+yaz_iconv_decoder_t yaz_danmarc_decoder(const char *fromcode,
+                                        yaz_iconv_decoder_t d);
 
 #endif
 /*
diff --git a/src/iconv_decode_danmarc.c b/src/iconv_decode_danmarc.c
new file mode 100644 (file)
index 0000000..3dc0f6e
--- /dev/null
@@ -0,0 +1,132 @@
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2008 Index Data
+ * See the file LICENSE for details.
+ */
+/**
+ * \file
+ * \brief Danmarc2 character set decoding
+ *
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <errno.h>
+#include <string.h>
+#include <ctype.h>
+#include <stdio.h>
+#include <yaz/log.h>
+#include <yaz/xmalloc.h>
+#include "iconv-p.h"
+
+struct decoder_data {
+    unsigned long x_back;
+};
+
+static unsigned long read_danmarc(yaz_iconv_t cd, 
+                                  yaz_iconv_decoder_t d,
+                                  unsigned char *inp,
+                                  size_t inbytesleft, size_t *no_read)
+{
+    struct decoder_data *data = (struct decoder_data *) d->data;
+    unsigned long x = inp[0];
+
+    if (data->x_back)
+    {
+        *no_read = 1;
+        x = data->x_back;
+        data->x_back = 0;
+        return x;
+    }
+
+    if (x == '@')
+    {
+        if (inbytesleft < 2)
+        {
+            yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
+            *no_read = 0;
+            return 0;
+        }
+        switch(inp[1])
+        {
+        case '@':
+        case '*':
+        case 0xa4: /* CURRENCY SIGN */
+            x = inp[1];
+            *no_read = 2;
+            break;
+        case 0xe5: /* LATIN SMALL LETTER A WITH RING ABOVE */
+            x = 'a';
+            data->x_back = 'a';
+            *no_read = 1;
+            break;
+        case 0xc5: /* LATIN CAPITAL LETTER A WITH RING ABOVE */
+            x = 'A';
+            data->x_back = 'a';
+            *no_read = 1;
+            break;
+        default:
+            if (inbytesleft < 5)
+            {
+                yaz_iconv_set_errno(cd, YAZ_ICONV_EINVAL);
+                *no_read = 0;
+                return 0;
+            }
+            else
+            {
+                unsigned long v;
+                sscanf(inp+1, "%4lx", &v);
+                *no_read = 5;
+                x = v;
+            }
+        }
+    }
+    else
+        *no_read = 1;
+    return x;
+}
+
+
+static size_t init_danmarc(yaz_iconv_t cd, yaz_iconv_decoder_t d,
+                           unsigned char *inp,
+                           size_t inbytesleft, size_t *no_read)
+{
+    struct decoder_data *data = (struct decoder_data *) d->data;
+    data->x_back = 0;
+    return 0;
+}
+
+void destroy_danmarc(yaz_iconv_decoder_t d)
+{
+    struct decoder_data *data = (struct decoder_data *) d->data;
+    xfree(data);
+}
+
+yaz_iconv_decoder_t yaz_danmarc_decoder(const char *fromcode,
+                                        yaz_iconv_decoder_t d)
+    
+{
+    if (!yaz_matchstr(fromcode, "danmarc"))
+    {
+        struct decoder_data *data = (struct decoder_data *)
+            xmalloc(sizeof(*data));
+        d->data = data;
+        data->x_back = 0;
+        d->read_handle = read_danmarc;
+        d->init_handle = init_danmarc;
+        d->destroy_handle = destroy_danmarc;
+        return d;
+    }
+    return 0;
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
index d7f066d..697afe8 100644 (file)
@@ -92,6 +92,8 @@ static int prepare_decoders(yaz_iconv_t cd, const char *tocode)
         return 1;
     if (yaz_wchar_decoder(tocode, &cd->decoder))
         return 1;
+    if (yaz_danmarc_decoder(tocode, &cd->decoder))
+        return 1;
     return 0;
 }
 
index 20424d6..5bc649a 100644 (file)
@@ -681,6 +681,27 @@ static void tst_utf8_codes(void)
     YAZ_CHECK(utf8_check(100000000));
 }
 
+static void tst_danmarc_to_latin1(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("iso-8859-1", "danmarc");
+
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+
+    YAZ_CHECK(tst_convert(cd, "ax", "ax"));
+
+    YAZ_CHECK(tst_convert(cd, "a@@b", "a@b"));
+    YAZ_CHECK(tst_convert(cd, "a@@@@b", "a@@b"));
+    YAZ_CHECK(tst_convert(cd, "@000ab", "\nb"));
+
+    YAZ_CHECK(tst_convert(cd, "@\xe5", "aa"));
+    YAZ_CHECK(tst_convert(cd, "@\xc5.", "Aa."));
+    
+    yaz_iconv_close(cd);
+}
+
+
 int main (int argc, char **argv)
 {
     YAZ_CHECK_INIT(argc, argv);
@@ -700,6 +721,8 @@ int main (int argc, char **argv)
     tst_utf8_to_marc8("marc8lossy");
     tst_utf8_to_marc8("marc8lossless");
 
+    tst_danmarc_to_latin1();
+
     tst_latin1_to_marc8();
 
     tst_marc8_to_ucs4b();
index c096690..ee78c78 100644 (file)
@@ -495,6 +495,7 @@ MISC_OBJS= \
    $(OBJDIR)\iconv_decode_marc8.obj \
    $(OBJDIR)\iconv_encode_wchar.obj \
    $(OBJDIR)\iconv_decode_iso5426.obj \
+   $(OBJDIR)\iconv_decode_danmarc.obj \
    $(OBJDIR)\mutex.obj \
    $(OBJDIR)\sc.obj