X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=util%2Fcharmap.c;h=bfcd9635ef7a210ca6af23cfcf906fa79b23921c;hb=f00cbbbdc1df5478ce74c4a13efafc42257e4cc1;hp=48fdec974b635d2f2d28ad5de03d462f67540675;hpb=4eb3b54bb2ca9af74f39f000d3d40dba99ded887;p=idzebra-moved-to-github.git diff --git a/util/charmap.c b/util/charmap.c index 48fdec9..bfcd963 100644 --- a/util/charmap.c +++ b/util/charmap.c @@ -1,6 +1,6 @@ -/* $Id: charmap.c,v 1.33 2004-12-13 20:51:33 adam Exp $ - Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002,2003,2004 - Index Data Aps +/* $Id: charmap.c,v 1.36 2005-03-11 17:56:36 adam Exp $ + Copyright (C) 1995-2005 + Index Data ApS This file is part of the Zebra server. @@ -29,6 +29,7 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include #include +#include #include typedef unsigned ucs4_t; @@ -75,6 +76,15 @@ typedef struct chrwork } chrwork; /* + * Callback for equivalent stuff + */ +typedef struct +{ + NMEM nmem; + int no_eq; + char *eq[CHR_MAXEQUIV]; +} chr_equiv_work; +/* * Add an entry to the character map. */ static chr_t_entry *set_map_string(chr_t_entry *root, NMEM nmem, @@ -210,6 +220,20 @@ const char **chr_map_input(chrmaptab maptab, const char **from, int len, int fir return (const char **) (res->target); } +const char **chr_map_q_input(chrmaptab maptab, + const char **from, int len, int first) +{ + chr_t_entry *t = maptab->q_input; + chr_t_entry *res; + int len_tmp[2]; + + len_tmp[0] = len; + len_tmp[1] = -1; + if (!(res = find_entry_x(t, from, len_tmp, first))) + return 0; + return (const char **) (res->target); +} + const char *chr_map_output(chrmaptab maptab, const char **from, int len) { unsigned char c = ** (unsigned char **) from; @@ -398,6 +422,18 @@ static void fun_mkstring(const char *s, void *data, int num) } /* + * Create an unmodified string (scan_string handler). + */ +static void fun_add_equivalent_string(const char *s, void *data, int num) +{ + chr_equiv_work *arg = (chr_equiv_work *) data; + + if (arg->no_eq == CHR_MAXEQUIV) + return; + arg->eq[arg->no_eq++] = nmem_strdup(arg->nmem, s); +} + +/* * Add a map to the string contained in the argument. */ static void fun_add_map(const char *s, void *data, int num) @@ -412,21 +448,6 @@ static void fun_add_map(const char *s, void *data, int num) yaz_log (YLOG_DEBUG, " %3d", (unsigned char) *s); } -/* - * Add a query map to the string contained in the argument. - */ -static void fun_add_qmap(const char *s, void *data, int num) -{ - chrwork *arg = (chrwork *) data; - - assert(arg->map->q_input); - yaz_log (YLOG_DEBUG, "set qmap %.*s", (int) strlen(s), s); - set_map_string(arg->map->q_input, arg->map->nmem, s, - strlen(s), arg->string, 0); - for (s = arg->string; *s; s++) - yaz_log (YLOG_DEBUG, " %3d", (unsigned char) *s); -} - static int scan_to_utf8 (yaz_iconv_t t, ucs4_t *from, size_t inlen, char *outbuf, size_t outbytesleft) { @@ -689,29 +710,58 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, ++errors; } } - else if (!yaz_matchstr(argv[0], "qmap")) + else if (!yaz_matchstr(argv[0], "equivalent")) { - chrwork buf; + chr_equiv_work w; - if (argc != 3) + if (argc != 2) { - yaz_log(YLOG_FATAL, "charmap directive qmap requires 2 args"); + yaz_log(YLOG_FATAL, "equivalent requires 1 argument"); ++errors; } - buf.map = res; - buf.string[0] = '\0'; - if (scan_string(argv[2], t_unicode, t_utf8, - fun_mkstring, &buf, 0) < 0) + w.nmem = res->nmem; + w.no_eq = 0; + if (scan_string(argv[1], t_unicode, t_utf8, + fun_add_equivalent_string, &w, 0) < 0) { - yaz_log(YLOG_FATAL, "Bad qmap target"); + yaz_log(YLOG_FATAL, "equivalent: invalid string"); ++errors; } - if (scan_string(argv[1], t_unicode, t_utf8, - fun_add_qmap, &buf, 0) < 0) + else if (w.no_eq == 0) { - yaz_log(YLOG_FATAL, "Bad qmap source"); + yaz_log(YLOG_FATAL, "equivalent: no strings"); ++errors; } + else + { + char *result_str; + int i, slen = 5; + + /* determine length of regular expression */ + for (i = 0; inmem, slen + 5); + + /* build the regular expression */ + *result_str = '\0'; + slen = 0; + for (i = 0; iq_input, res->nmem, + w.eq[i], strlen(w.eq[i]), + result_str, 0); + } + } } else if (!yaz_matchstr(argv[0], "encoding")) {