X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=util%2Fcharmap.c;h=bfcd9635ef7a210ca6af23cfcf906fa79b23921c;hb=d8e960f5f1060148a5fffa7a767c72391662fbdc;hp=8a5d2b8bc78917baba6a17f4f3eebe5f5e4fe739;hpb=8add234f71c852fd95ca3aef168e3563265c93b9;p=idzebra-moved-to-github.git diff --git a/util/charmap.c b/util/charmap.c index 8a5d2b8..bfcd963 100644 --- a/util/charmap.c +++ b/util/charmap.c @@ -1,4 +1,4 @@ -/* $Id: charmap.c,v 1.35 2005-01-16 23:14:58 adam Exp $ +/* $Id: charmap.c,v 1.36 2005-03-11 17:56:36 adam Exp $ Copyright (C) 1995-2005 Index Data ApS @@ -76,6 +76,15 @@ typedef struct chrwork } chrwork; /* + * Callback for equivalent stuff + */ +typedef struct +{ + NMEM nmem; + int no_eq; + char *eq[CHR_MAXEQUIV]; +} chr_equiv_work; +/* * Add an entry to the character map. */ static chr_t_entry *set_map_string(chr_t_entry *root, NMEM nmem, @@ -211,6 +220,20 @@ const char **chr_map_input(chrmaptab maptab, const char **from, int len, int fir return (const char **) (res->target); } +const char **chr_map_q_input(chrmaptab maptab, + const char **from, int len, int first) +{ + chr_t_entry *t = maptab->q_input; + chr_t_entry *res; + int len_tmp[2]; + + len_tmp[0] = len; + len_tmp[1] = -1; + if (!(res = find_entry_x(t, from, len_tmp, first))) + return 0; + return (const char **) (res->target); +} + const char *chr_map_output(chrmaptab maptab, const char **from, int len) { unsigned char c = ** (unsigned char **) from; @@ -399,6 +422,18 @@ static void fun_mkstring(const char *s, void *data, int num) } /* + * Create an unmodified string (scan_string handler). + */ +static void fun_add_equivalent_string(const char *s, void *data, int num) +{ + chr_equiv_work *arg = (chr_equiv_work *) data; + + if (arg->no_eq == CHR_MAXEQUIV) + return; + arg->eq[arg->no_eq++] = nmem_strdup(arg->nmem, s); +} + +/* * Add a map to the string contained in the argument. */ static void fun_add_map(const char *s, void *data, int num) @@ -413,21 +448,6 @@ static void fun_add_map(const char *s, void *data, int num) yaz_log (YLOG_DEBUG, " %3d", (unsigned char) *s); } -/* - * Add a query map to the string contained in the argument. - */ -static void fun_add_qmap(const char *s, void *data, int num) -{ - chrwork *arg = (chrwork *) data; - - assert(arg->map->q_input); - yaz_log (YLOG_DEBUG, "set qmap %.*s", (int) strlen(s), s); - set_map_string(arg->map->q_input, arg->map->nmem, s, - strlen(s), arg->string, 0); - for (s = arg->string; *s; s++) - yaz_log (YLOG_DEBUG, " %3d", (unsigned char) *s); -} - static int scan_to_utf8 (yaz_iconv_t t, ucs4_t *from, size_t inlen, char *outbuf, size_t outbytesleft) { @@ -690,29 +710,58 @@ chrmaptab chrmaptab_create(const char *tabpath, const char *name, int map_only, ++errors; } } - else if (!yaz_matchstr(argv[0], "qmap")) + else if (!yaz_matchstr(argv[0], "equivalent")) { - chrwork buf; + chr_equiv_work w; - if (argc != 3) + if (argc != 2) { - yaz_log(YLOG_FATAL, "charmap directive qmap requires 2 args"); + yaz_log(YLOG_FATAL, "equivalent requires 1 argument"); ++errors; } - buf.map = res; - buf.string[0] = '\0'; - if (scan_string(argv[2], t_unicode, t_utf8, - fun_mkstring, &buf, 0) < 0) + w.nmem = res->nmem; + w.no_eq = 0; + if (scan_string(argv[1], t_unicode, t_utf8, + fun_add_equivalent_string, &w, 0) < 0) { - yaz_log(YLOG_FATAL, "Bad qmap target"); + yaz_log(YLOG_FATAL, "equivalent: invalid string"); ++errors; } - if (scan_string(argv[1], t_unicode, t_utf8, - fun_add_qmap, &buf, 0) < 0) + else if (w.no_eq == 0) { - yaz_log(YLOG_FATAL, "Bad qmap source"); + yaz_log(YLOG_FATAL, "equivalent: no strings"); ++errors; } + else + { + char *result_str; + int i, slen = 5; + + /* determine length of regular expression */ + for (i = 0; inmem, slen + 5); + + /* build the regular expression */ + *result_str = '\0'; + slen = 0; + for (i = 0; iq_input, res->nmem, + w.eq[i], strlen(w.eq[i]), + result_str, 0); + } + } } else if (!yaz_matchstr(argv[0], "encoding")) {