From 583fe2d2582ab4078527c152b232ddf359336e91 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 28 Aug 2002 19:52:28 +0000 Subject: [PATCH] Zebra uses yaz_iconv --- CHANGELOG | 11 +++++++++++ index/index.h | 13 ++++--------- index/zebraapi.c | 24 +++++++++++------------ index/zrpn.c | 10 ++++------ index/zsets.c | 9 ++++----- recctrl/recgrs.c | 5 +++-- util/charmap.c | 56 ++++++++++++++++++------------------------------------ 7 files changed, 56 insertions(+), 72 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 5335cb3..b723522 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,15 @@ +Fix MARC transfer . MARC fields had wrong data for multiple fields. + +XML record reader moved from YAZ to Zebra, to make YAZ less +dependant on external libraries. + +Zebra uses yaz_iconv which is mini iconv library supporting UTF-8, +UCS4, ISO-8859-1. This means that Zebra does UNICODE even +on systems that doesn't offer iconv. + +XML record reader supports external system entities. + --- 1.3.1 2002/08/20 New .abs-directive "xpath" that takes one argument: "enable" diff --git a/index/index.h b/index/index.h index a805d26..ccb2714 100644 --- a/index/index.h +++ b/index/index.h @@ -1,4 +1,4 @@ -/* $Id: index.h,v 1.85 2002-08-02 19:26:55 adam Exp $ +/* $Id: index.h,v 1.86 2002-08-28 19:52:29 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -35,10 +35,6 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #endif -#if HAVE_ICONV_H -#include -#endif - #include #include #include @@ -303,10 +299,9 @@ struct zebra_session { int records_deleted; int records_processed; char *record_encoding; -#if HAVE_ICONV_H - iconv_t iconv_to_utf8; - iconv_t iconv_from_utf8; -#endif + + yaz_iconv_t iconv_to_utf8; + yaz_iconv_t iconv_from_utf8; }; struct rank_control { diff --git a/index/zebraapi.c b/index/zebraapi.c index dc0913f..26ac712 100644 --- a/index/zebraapi.c +++ b/index/zebraapi.c @@ -1,4 +1,4 @@ -/* $Id: zebraapi.c,v 1.67 2002-08-18 10:20:35 adam Exp $ +/* $Id: zebraapi.c,v 1.68 2002-08-28 19:52:29 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -102,18 +102,17 @@ ZebraHandle zebra_open (ZebraService zs) default_encoding = res_get_def(zs->global_res, "encoding", "ISO-8859-1"); zh->record_encoding = xstrdup (default_encoding); -#if HAVE_ICONV_H + zh->iconv_to_utf8 = - iconv_open ("UTF-8", default_encoding); - if (zh->iconv_to_utf8 == (iconv_t)(-1)) + yaz_iconv_open ("UTF-8", default_encoding); + if (zh->iconv_to_utf8 == 0) yaz_log (LOG_WARN, "iconv: %s to UTF-8 unsupported", default_encoding); zh->iconv_from_utf8 = - iconv_open (default_encoding, "UTF-8"); - if (zh->iconv_to_utf8 == (iconv_t)(-1)) + yaz_iconv_open (default_encoding, "UTF-8"); + if (zh->iconv_to_utf8 == 0) yaz_log (LOG_WARN, "iconv: UTF-8 to %s unsupported", default_encoding); -#endif zebra_mutex_cond_lock (&zs->session_lock); @@ -431,12 +430,11 @@ void zebra_close (ZebraHandle zh) zebra_close_res (zh); xfree (zh->record_encoding); -#if HAVE_ICONV_H - if (zh->iconv_to_utf8 != (iconv_t) (-1)) - iconv_close (zh->iconv_to_utf8); - if (zh->iconv_from_utf8 != (iconv_t) (-1)) - iconv_close (zh->iconv_from_utf8); -#endif + + if (zh->iconv_to_utf8 != 0) + yaz_iconv_close (zh->iconv_to_utf8); + if (zh->iconv_from_utf8 != 0) + yaz_iconv_close (zh->iconv_from_utf8); xfree (zh->admin_databaseName); zebra_mutex_cond_lock (&zs->session_lock); diff --git a/index/zrpn.c b/index/zrpn.c index badb91e..bd0ceab 100644 --- a/index/zrpn.c +++ b/index/zrpn.c @@ -1,4 +1,4 @@ -/* $Id: zrpn.c,v 1.121 2002-08-23 14:30:51 adam Exp $ +/* $Id: zrpn.c,v 1.122 2002-08-28 19:52:29 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -1105,8 +1105,7 @@ static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, switch (term->which) { case Z_Term_general: -#if HAVE_ICONV_H - if (zh->iconv_to_utf8 != (iconv_t)(-1)) + if (zh->iconv_to_utf8 != 0) { char *inbuf = term->u.general->buf; size_t inleft = term->u.general->len; @@ -1115,18 +1114,17 @@ static int trans_term (ZebraHandle zh, Z_AttributesPlusTerm *zapt, size_t ret; yaz_log (LOG_DEBUG, "converting general from ISO-8859-1"); - ret = iconv(zh->iconv_to_utf8, &inbuf, &inleft, + ret = yaz_iconv(zh->iconv_to_utf8, &inbuf, &inleft, &outbuf, &outleft); if (ret == (size_t)(-1)) { - ret = iconv(zh->iconv_to_utf8, 0, 0, 0, 0); + ret = yaz_iconv(zh->iconv_to_utf8, 0, 0, 0, 0); zh->errCode = 125; return -1; } *outbuf = 0; return 0; } -#endif sizez = term->u.general->len; if (sizez > IT_MAX_WORD-1) sizez = IT_MAX_WORD-1; diff --git a/index/zsets.c b/index/zsets.c index 6db021b..c23c572 100644 --- a/index/zsets.c +++ b/index/zsets.c @@ -1,4 +1,4 @@ -/* $Id: zsets.c,v 1.38 2002-08-02 19:26:56 adam Exp $ +/* $Id: zsets.c,v 1.39 2002-08-28 19:52:29 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -160,13 +160,13 @@ int zebra_resultSetTerms (ZebraHandle zh, const char *setname, size_t inleft = strlen(inbuf); size_t outleft = *len - 1; int converted = 0; -#if HAVE_ICONV_H - if (zh->iconv_from_utf8 != (iconv_t)(-1)) + + if (zh->iconv_from_utf8 != 0) { char *outbuf = out; size_t ret; - ret = iconv(zh->iconv_from_utf8, &inbuf, &inleft, + ret = yaz_iconv(zh->iconv_from_utf8, &inbuf, &inleft, &outbuf, &outleft); if (ret == (size_t)(-1)) *len = 0; @@ -174,7 +174,6 @@ int zebra_resultSetTerms (ZebraHandle zh, const char *setname, *len = outbuf - out; converted = 1; } -#endif if (!converted) { if (inleft > outleft) diff --git a/recctrl/recgrs.c b/recctrl/recgrs.c index 6399224..5514b2b 100644 --- a/recctrl/recgrs.c +++ b/recctrl/recgrs.c @@ -1,4 +1,4 @@ -/* $Id: recgrs.c,v 1.62 2002-08-28 12:47:10 adam Exp $ +/* $Id: recgrs.c,v 1.63 2002-08-28 19:52:29 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -225,9 +225,10 @@ static void index_xpath (data1_node *n, struct recExtractCtrl *p, for (xp = n->u.tag.attributes; xp; xp = xp->next) { char attr_tag_path_full[1024]; + int int_len = flen; sprintf (attr_tag_path_full, "@%s/%.*s", - xp->name, flen, tag_path_full); + xp->name, int_len, tag_path_full); wrd->reg_type = '0'; wrd->attrUse = 1; diff --git a/util/charmap.c b/util/charmap.c index 011cb39..4d331d4 100644 --- a/util/charmap.c +++ b/util/charmap.c @@ -1,4 +1,4 @@ -/* $Id: charmap.c,v 1.25 2002-08-02 19:26:57 adam Exp $ +/* $Id: charmap.c,v 1.26 2002-08-28 19:52:29 adam Exp $ Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002 Index Data Aps @@ -31,17 +31,6 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include -#if HAVE_ICONV_H -#include -#else -typedef int iconv_t; -static size_t iconv(iconv_t t, char **buf, size_t *inbytesleft, - char **outbuf, size_t *outbytesleft) -{ - return -1; -} -#endif - typedef unsigned ucs4_t; #include @@ -383,18 +372,18 @@ static void fun_add_qmap(const char *s, void *data, int num) logf (LOG_DEBUG, " %3d", (unsigned char) *s); } -static int scan_to_utf8 (iconv_t t, ucs4_t *from, size_t inlen, +static int scan_to_utf8 (yaz_iconv_t t, ucs4_t *from, size_t inlen, char *outbuf, size_t outbytesleft) { size_t inbytesleft = inlen * sizeof(ucs4_t); char *inbuf = (char*) from; size_t ret; - if (t == (iconv_t)(-1)) + if (t == 0) *outbuf++ = *from; /* ISO-8859-1 is OK here */ else { - ret = iconv (t, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + ret = yaz_iconv (t, &inbuf, &inbytesleft, &outbuf, &outbytesleft); if (ret == (size_t) (-1)) { yaz_log (LOG_WARN|LOG_ERRNO, "bad unicode sequence"); @@ -406,7 +395,7 @@ static int scan_to_utf8 (iconv_t t, ucs4_t *from, size_t inlen, } static int scan_string(char *s_native, - iconv_t t_unicode, iconv_t t_utf8, + yaz_iconv_t t_unicode, yaz_iconv_t t_utf8, void (*fun)(const char *c, void *data, int num), void *data, int *num) { @@ -415,16 +404,17 @@ static int scan_string(char *s_native, ucs4_t arg[512]; ucs4_t *s0, *s = arg; ucs4_t c, begin, end; - size_t i, j; + size_t i; - if (t_unicode != (iconv_t)(-1)) + if (t_unicode != 0) { char *outbuf = (char *) arg; char *inbuf = s_native; size_t outbytesleft = sizeof(arg)-4; size_t inbytesleft = strlen(s_native); size_t ret; - ret = iconv(t_unicode, &inbuf, &inbytesleft, &outbuf, &outbytesleft); + ret = yaz_iconv(t_unicode, &inbuf, &inbytesleft, + &outbuf, &outbytesleft); if (ret == (size_t)(-1)) return -1; i = (outbuf - (char*) arg)/sizeof(ucs4_t); @@ -496,17 +486,15 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, int errors = 0; int argc, num = (int) *CHR_BASE, i; NMEM nmem; - iconv_t t_unicode = (iconv_t)(-1); - iconv_t t_utf8 = (iconv_t)(-1); + yaz_iconv_t t_unicode = 0; + yaz_iconv_t t_utf8 = 0; unsigned endian = 31; const char *ucs4_native = "UCS-4"; if (*(char*) &endian == 31) /* little endian? */ ucs4_native = "UCS-4LE"; -#if HAVE_ICONV_H - t_utf8 = iconv_open ("UTF-8", ucs4_native); -#endif + t_utf8 = yaz_iconv_open ("UTF-8", ucs4_native); logf (LOG_DEBUG, "maptab %s open", name); if (!(f = yaz_fopen(tabpath, name, "r", tabroot))) { @@ -654,13 +642,9 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, } else if (!yaz_matchstr(argv[0], "encoding")) { -#if HAVE_ICONV_H - if (t_unicode != (iconv_t)(-1)) - iconv_close (t_unicode); - t_unicode = iconv_open (ucs4_native, argv[1]); -#else - logf (LOG_WARN, "Encoding ignored. iconv not installed"); -#endif + if (t_unicode != 0) + yaz_iconv_close (t_unicode); + t_unicode = yaz_iconv_open (ucs4_native, argv[1]); } else { @@ -674,12 +658,10 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, res = 0; } logf (LOG_DEBUG, "maptab %s close %d errors", name, errors); -#if HAVE_ICONV_H - if (t_utf8 != (iconv_t)(-1)) - iconv_close(t_utf8); - if (t_unicode != (iconv_t)(-1)) - iconv_close(t_unicode); -#endif + if (t_utf8 != 0) + yaz_iconv_close(t_utf8); + if (t_unicode != 0) + yaz_iconv_close(t_unicode); return res; } -- 1.7.10.4