X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dfa%2Fdfa.c;h=276ca06a8b7e4f7a473d2300c3bf610e32c5d05a;hb=af7bc27d33843a8e667022f24589450c39f907f2;hp=4c7c864d072a888c166c9fa8579eee1eef8cd087;hpb=55a416d894dd90b1d5f083f6a9a280986f4b9774;p=idzebra-moved-to-github.git diff --git a/dfa/dfa.c b/dfa/dfa.c index 4c7c864..276ca06 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -1,10 +1,19 @@ /* - * Copyright (C) 1994, Index Data I/S + * Copyright (C) 1994-1996, Index Data I/S * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.11 1996-01-08 19:15:24 adam + * Revision 1.14 1996-10-29 13:57:22 adam + * Include of zebrautl.h instead of alexutil.h. + * + * Revision 1.13 1996/06/17 14:24:08 adam + * Bug fix: read_charset didn't handle character mapping. + * + * Revision 1.12 1996/06/04 10:20:02 adam + * Added support for character mapping. + * + * Revision 1.11 1996/01/08 19:15:24 adam * Allow single $ in expressions. * * Revision 1.10 1996/01/08 09:09:17 adam @@ -48,7 +57,7 @@ #include #include -#include +#include #include "dfap.h" #include "imalloc.h" @@ -402,6 +411,15 @@ static int read_charset (void) { if (!esc0 && ch0 == ']') break; + if (parse_info->cmap) + { + char **mapto, mapfrom[2]; + const char *mcp = mapfrom; + mapfrom[0] = ch0; + mapto = (*parse_info->cmap)(&mcp, 1); + assert (mapto); + ch0 = mapto[0][0]; + } add_BSet (parse_info->charset, look_chars, ch0); ch1 = nextchar_set (&esc1); if (!esc1 && ch1 == '-') @@ -413,6 +431,15 @@ static int read_charset (void) add_BSet (parse_info->charset, look_chars, '-'); break; } + if (parse_info->cmap) + { + char **mapto, mapfrom[2]; + const char *mcp = mapfrom; + mapfrom[0] = ch1; + mapto = (*parse_info->cmap) (&mcp, 1); + assert (mapto); + ch1 = mapto[0][0]; + } for (i=ch0; ++i<=ch1;) add_BSet (parse_info->charset, look_chars, i); ch0 = nextchar_set (&esc0); @@ -428,6 +455,30 @@ static int read_charset (void) return L_CHARS; } +static int map_l_char (void) +{ + char **mapto; + const char *cp0 = (const char *) (expr_ptr-1); + int i = 0, len = strlen(cp0); + + if (cp0[0] == 1 && cp0[1]) + { + expr_ptr++; + look_ch = cp0[1]; + return L_CHAR; + } + if (!parse_info->cmap) + return L_CHAR; + + mapto = (*parse_info->cmap) (&cp0, len); + assert (mapto); + + expr_ptr = (const unsigned char *) cp0; + look_ch = mapto[i][0]; + logf (LOG_DEBUG, "map from %c to %d", expr_ptr[-1], look_ch); + return L_CHAR; +} + static int lex_sub(void) { int esc; @@ -435,11 +486,11 @@ static int lex_sub(void) if (look_ch == '\"') { if (esc) - return L_CHAR; + return map_l_char (); inside_string = !inside_string; } else if (esc || inside_string) - return L_CHAR; + return map_l_char (); else if (look_ch == '[') return read_charset(); else @@ -452,7 +503,7 @@ static int lex_sub(void) --expr_ptr; return cc[1]; } - return L_CHAR; + return map_l_char (); } return 0; } @@ -978,6 +1029,7 @@ static struct DFA_parse *dfa_parse_init (void) parse_info->use_Tnode = parse_info->max_Tnode = 0; parse_info->charMap = NULL; parse_info->charMapSize = 0; + parse_info->cmap = NULL; return parse_info; } @@ -1032,6 +1084,11 @@ struct DFA *dfa_init (void) return dfa; } +void dfa_set_cmap (struct DFA *dfa, char **(*cmap)(const char **from, int len)) +{ + dfa->parse_info->cmap = cmap; +} + int dfa_parse (struct DFA *dfa, const char **pattern) { struct Tnode *top;