New encoding: danmarc YAZ-692
authorAdam Dickmeiss <adam@indexdata.dk>
Thu, 7 Nov 2013 15:02:44 +0000 (16:02 +0100)
committerAdam Dickmeiss <adam@indexdata.dk>
Thu, 7 Nov 2013 15:02:44 +0000 (16:02 +0100)
src/Makefile.am
src/iconv-p.h
src/iconv_encode_danmarc.c [new file with mode: 0644]
src/siconv.c
test/test_iconv.c
win/makefile

index 65bf742..08389c8 100644 (file)
@@ -104,7 +104,7 @@ libyaz_la_SOURCES=base64.c version.c options.c log.c \
   xmlquery.c xmlerror.c http.c \
   mime.c mime.h oid_util.c tokenizer.c \
   record_conv.c retrieval.c elementset.c snprintf.c query-charset.c \
   xmlquery.c xmlerror.c http.c \
   mime.c mime.h oid_util.c tokenizer.c \
   record_conv.c retrieval.c elementset.c snprintf.c query-charset.c \
-  copy_types.c match_glob.c poll.c daemon.c \
+  copy_types.c match_glob.c poll.c daemon.c iconv_encode_danmarc.c \
   iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c \
   iconv_decode_marc8.c iconv_decode_iso5426.c iconv_decode_danmarc.c sc.c \
   json.c xml_include.c file_glob.c dirent.c mutex-p.h mutex.c condvar.c \
   iconv_encode_marc8.c iconv_encode_iso_8859_1.c iconv_encode_wchar.c \
   iconv_decode_marc8.c iconv_decode_iso5426.c iconv_decode_danmarc.c sc.c \
   json.c xml_include.c file_glob.c dirent.c mutex-p.h mutex.c condvar.c \
index a236403..df62e30 100644 (file)
@@ -63,6 +63,8 @@ yaz_iconv_encoder_t yaz_iso_5428_encoder(const char *name,
                                          yaz_iconv_encoder_t e);
 yaz_iconv_encoder_t yaz_advancegreek_encoder(const char *name,
                                              yaz_iconv_encoder_t e);
                                          yaz_iconv_encoder_t e);
 yaz_iconv_encoder_t yaz_advancegreek_encoder(const char *name,
                                              yaz_iconv_encoder_t e);
+yaz_iconv_encoder_t yaz_danmarc_encoder(const char *name,
+                                        yaz_iconv_encoder_t e);
 yaz_iconv_encoder_t yaz_wchar_encoder(const char *name,
                                       yaz_iconv_encoder_t e);
 typedef unsigned long yaz_conv_func_t(unsigned char *inp, size_t inbytesleft,
 yaz_iconv_encoder_t yaz_wchar_encoder(const char *name,
                                       yaz_iconv_encoder_t e);
 typedef unsigned long yaz_conv_func_t(unsigned char *inp, size_t inbytesleft,
diff --git a/src/iconv_encode_danmarc.c b/src/iconv_encode_danmarc.c
new file mode 100644 (file)
index 0000000..1afcf7c
--- /dev/null
@@ -0,0 +1,86 @@
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2013 Index Data
+ * See the file LICENSE for details.
+ */
+/**
+ * \file
+ * \brief Danmarc2 character set encoding
+ */
+
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <assert.h>
+#include <stdio.h>
+#include <errno.h>
+#include <string.h>
+
+#include <yaz/xmalloc.h>
+#include "iconv-p.h"
+
+static size_t write_danmarc(yaz_iconv_t cd, yaz_iconv_encoder_t en,
+                           unsigned long x,
+                           char **outbuf, size_t *outbytesleft)
+{
+    unsigned char *outp = (unsigned char *) *outbuf;
+
+    if (x == '@')
+    {
+        if (*outbytesleft < 2)
+        {
+            yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
+            return (size_t)(-1);
+        }
+        *outp++ = x;
+        (*outbytesleft)--;
+        *outp++ = x;
+        (*outbytesleft)--;
+    }
+    else if (x <= 255)
+    {  /* latin-1 range */
+        if (*outbytesleft < 1)
+        {
+            yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
+            return (size_t)(-1);
+        }
+        *outp++ = x;
+        (*outbytesleft)--;
+    }
+    else
+    {  /* full unicode, emit @XXXX */
+        if (*outbytesleft < 6)
+        {
+            yaz_iconv_set_errno(cd, YAZ_ICONV_E2BIG);
+            return (size_t)(-1);
+        }
+        sprintf(*outbuf, "@%04lX", x);
+        outp += 5;
+        (*outbytesleft) -= 5;
+    }
+    *outbuf = (char *) outp;
+    return 0;
+}
+
+yaz_iconv_encoder_t yaz_danmarc_encoder(const char *tocode,
+                                        yaz_iconv_encoder_t e)
+
+{
+    if (!yaz_matchstr(tocode, "danmarc"))
+    {
+        e->write_handle = write_danmarc;
+        return e;
+    }
+    return 0;
+}
+
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+
index be6eae5..f1430fe 100644 (file)
@@ -64,6 +64,8 @@ static int prepare_encoders(yaz_iconv_t cd, const char *tocode)
         return 1;
     if (yaz_wchar_encoder(tocode, &cd->encoder))
         return 1;
         return 1;
     if (yaz_wchar_encoder(tocode, &cd->encoder))
         return 1;
+    if (yaz_danmarc_encoder(tocode, &cd->encoder))
+        return 1;
     return 0;
 }
 
     return 0;
 }
 
index 4651203..e341972 100644 (file)
@@ -701,6 +701,23 @@ static void tst_danmarc_to_latin1(void)
     yaz_iconv_close(cd);
 }
 
     yaz_iconv_close(cd);
 }
 
+static void tst_utf8_to_danmarc(void)
+{
+    yaz_iconv_t cd = yaz_iconv_open("danmarc", "utf-8");
+
+    YAZ_CHECK(cd);
+    if (!cd)
+        return;
+
+    YAZ_CHECK(tst_convert(cd, "ax", "ax"));
+    YAZ_CHECK(tst_convert(cd, "@", "@@"));
+    YAZ_CHECK(tst_convert(cd, "a\xc3\xa5" "b", "a\xe5" "b")); /* aring */
+    YAZ_CHECK(tst_convert(cd, "a\xce\xbb" "b", "a@03BBb")); /* lambda */
+
+    yaz_iconv_close(cd);
+}
+
+
 
 int main (int argc, char **argv)
 {
 
 int main (int argc, char **argv)
 {
@@ -722,6 +739,7 @@ int main (int argc, char **argv)
     tst_utf8_to_marc8("marc8lossless");
 
     tst_danmarc_to_latin1();
     tst_utf8_to_marc8("marc8lossless");
 
     tst_danmarc_to_latin1();
+    tst_utf8_to_danmarc();
 
     tst_latin1_to_marc8();
 
 
     tst_latin1_to_marc8();
 
index 3b9cf54..74039cc 100644 (file)
@@ -535,6 +535,7 @@ MISC_OBJS= \
    $(OBJDIR)\match_glob.obj \
    $(OBJDIR)\poll.obj \
    $(OBJDIR)\daemon.obj \
    $(OBJDIR)\match_glob.obj \
    $(OBJDIR)\poll.obj \
    $(OBJDIR)\daemon.obj \
+   $(OBJDIR)\iconv_encode_danmarc.obj \
    $(OBJDIR)\iconv_encode_iso_8859_1.obj \
    $(OBJDIR)\iconv_encode_marc8.obj \
    $(OBJDIR)\iconv_decode_marc8.obj \
    $(OBJDIR)\iconv_encode_iso_8859_1.obj \
    $(OBJDIR)\iconv_encode_marc8.obj \
    $(OBJDIR)\iconv_decode_marc8.obj \