X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dfa%2Fdfa.c;h=d7ef3910e2c247db9ab9932600b568b2ad3e6b50;hb=c8bb72d81cc3496fdfc7143e6fa5216fdb1a60f9;hp=4d827e516214e46a9a141ac7a031f1f7a1514661;hpb=04fba3ff986bf55dcaca1ef958f2b118e9a6832d;p=idzebra-moved-to-github.git diff --git a/dfa/dfa.c b/dfa/dfa.c index 4d827e5..d7ef391 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -1,10 +1,35 @@ /* - * Copyright (C) 1994-1998, Index Data I/S + * Copyright (C) 1994-1999, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.19 1998-01-12 14:39:39 adam + * Revision 1.27 1999-07-15 12:05:32 adam + * Bug fix: Anyset (.) includes all 8-bit characters when charmap is defined. + * + * Revision 1.26 1999/05/26 07:49:12 adam + * C++ compilation. + * + * Revision 1.25 1999/02/02 14:50:05 adam + * Updated WIN32 code specific sections. Changed header. + * + * Revision 1.24 1998/10/28 10:48:55 adam + * Added type cast to prevent warning. + * + * Revision 1.23 1998/09/02 14:15:28 adam + * Zebra uses GNU Configure. + * + * Revision 1.22 1998/06/24 12:16:10 adam + * Support for relations on text operands. Open range support in + * DFA module (i.e. [-j], [g-]). + * + * Revision 1.21 1998/06/22 11:33:39 adam + * Added two type casts. + * + * Revision 1.20 1998/06/08 14:40:44 adam + * Fixed problem with signed character(s) in regular expressions. + * + * Revision 1.19 1998/01/12 14:39:39 adam * Fixed bug in term_Tnode. * * Revision 1.18 1997/09/29 09:05:17 adam @@ -417,18 +442,28 @@ static int read_charset (struct DFA_parse *parse_info) { if (!esc0 && ch0 == ']') break; - if (parse_info->cmap) - { - const char **mapto; - char mapfrom[2]; - const char *mcp = mapfrom; - mapfrom[0] = ch0; - mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1); - assert (mapto); - ch0 = mapto[0][0]; - } - add_BSet (parse_info->charset, parse_info->look_chars, ch0); - ch1 = nextchar_set (parse_info, &esc1); + if (!esc0 && ch0 == '-') + { + ch1 = ch0; + esc1 = esc0; + ch0 = 1; + add_BSet (parse_info->charset, parse_info->look_chars, ch0); + } + else + { + if (parse_info->cmap) + { + const char **mapto; + char mapfrom[2]; + const char *mcp = mapfrom; + mapfrom[0] = ch0; + mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1); + assert (mapto); + ch0 = mapto[0][0]; + } + add_BSet (parse_info->charset, parse_info->look_chars, ch0); + ch1 = nextchar_set (parse_info, &esc1); + } if (!esc1 && ch1 == '-') { int open_range = 0; @@ -484,7 +519,7 @@ static int map_l_char (struct DFA_parse *parse_info) if (cp0[0] == 1 && cp0[1]) { parse_info->expr_ptr++; - parse_info->look_ch = cp0[1]; + parse_info->look_ch = ((unsigned char *) cp0)[1]; return L_CHAR; } if (!parse_info->cmap) @@ -494,7 +529,7 @@ static int map_l_char (struct DFA_parse *parse_info) assert (mapto); parse_info->expr_ptr = (const unsigned char *) cp0; - parse_info->look_ch = mapto[i][0]; + parse_info->look_ch = ((unsigned char **) mapto)[i][0]; logf (LOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch); return L_CHAR; } @@ -517,7 +552,7 @@ static int lex_sub(struct DFA_parse *parse_info) { const int *cc; for (cc = parse_info->charMap; *cc; cc += 2) - if (*cc == parse_info->look_ch) + if (*cc == (int) (parse_info->look_ch)) { if (!cc[1]) --parse_info->expr_ptr; @@ -537,7 +572,7 @@ static const char *str_char (unsigned c) { static char s[6]; s[0] = '\\'; - if (c < 32) + if (c < 32 || c >= 127) switch (c) { case '\r': @@ -753,6 +788,7 @@ static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n) n->lastpos = mk_Set (poset); n->lastpos = add_Set (poset, n->lastpos, n->pos); if (debug_dfa_trav) + { if (n->u.ch[0] < 0) printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]); else if (n->u.ch[1] > n->u.ch[0]) @@ -766,6 +802,7 @@ static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n) } else out_char (n->u.ch[0]); + } } if (debug_dfa_trav) { @@ -836,10 +873,12 @@ static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas) for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) if (posar[i]->u.ch[1] >= char_1 && (c=posar[i]->u.ch[0]) < char_0) + { if (c < char_1) char_0 = char_1; else char_0 = c; + } if (char_0 > max_char) break; @@ -955,7 +994,8 @@ void dfa_parse_cmap_clean (struct DFA *d) if (!dfa->charMap) { dfa->charMapSize = 7; - dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap)); + dfa->charMap = (int *) + imalloc (dfa->charMapSize * sizeof(*dfa->charMap)); } dfa->charMap[0] = 0; } @@ -975,7 +1015,7 @@ void dfa_parse_cmap_new (struct DFA *d, const int *cmap) if (dfa->charMap) ifree (dfa->charMap); dfa->charMapSize = size; - dfa->charMap = imalloc (size * sizeof(*dfa->charMap)); + dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap)); } memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap)); } @@ -1015,7 +1055,7 @@ void dfa_parse_cmap_add (struct DFA *d, int from, int to) size = dfa->charMapSize; if (indx >= size) { - int *cn = imalloc ((size+16) * sizeof(*dfa->charMap)); + int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap)); memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap)); ifree (dfa->charMap); dfa->charMap = cn; @@ -1059,7 +1099,6 @@ static struct DFA_parse *dfa_parse_init (void) parse_info->anyset = mk_BSet (&parse_info->charset); res_BSet (parse_info->charset, parse_info->anyset); - add_BSet (parse_info->charset, parse_info->anyset, '\n'); com_BSet (parse_info->charset, parse_info->anyset); parse_info->use_Tnode = parse_info->max_Tnode = 0; parse_info->start = parse_info->end = NULL; @@ -1096,7 +1135,7 @@ static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk) if (debug_dfa_followpos) pr_followpos(parse_info); - init_DFA_states (&dfas, parse_info->poset, STATE_HASH); + init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH)); mk_dfa_tran (parse_info, dfas); if (debug_dfa_tran) pr_tran (parse_info, dfas); @@ -1112,7 +1151,7 @@ struct DFA *dfa_init (void) { struct DFA *dfa; - dfa = imalloc (sizeof(*dfa)); + dfa = (struct DFA *) imalloc (sizeof(*dfa)); dfa->parse_info = dfa_parse_init (); dfa->state_info = NULL; dfa->states = NULL; @@ -1135,6 +1174,13 @@ int dfa_parse (struct DFA *dfa, const char **pattern) assert (dfa); assert (dfa->parse_info); parse_info = dfa->parse_info; + + if (!parse_info->cmap) + { + res_BSet (parse_info->charset, parse_info->anyset); + add_BSet (parse_info->charset, parse_info->anyset, '\n'); + com_BSet (parse_info->charset, parse_info->anyset); + } do_parse (parse_info, pattern, &top); if (parse_info->err_code) return parse_info->err_code;