X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=dfa%2Fdfa.c;h=45e2493be3f2b8c6b62adf879195df5273779475;hp=8d41a37ca220075960aaefe73faef9a40522d410;hb=a825e22395e14761bcb2f88177d1a86f3da2843a;hpb=41194374db152313aa9a6c2d34053dc33aab180e diff --git a/dfa/dfa.c b/dfa/dfa.c index 8d41a37..45e2493 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -4,7 +4,23 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.1 1995-01-24 16:02:52 adam + * Revision 1.6 1995-10-16 09:31:25 adam + * Bug fix. + * + * Revision 1.5 1995/10/02 15:17:58 adam + * Bug fix in dfa_delete. + * + * Revision 1.4 1995/09/28 09:18:52 adam + * Removed various preprocessor defines. + * + * Revision 1.3 1995/09/04 12:33:26 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.2 1995/01/25 11:30:50 adam + * Simple error reporting when parsing regular expressions. + * Memory usage reduced. + * + * Revision 1.1 1995/01/24 16:02:52 adam * New private header file in dfa module (dfap.h). * Module no longer uses yacc to parse regular expressions. * @@ -16,7 +32,7 @@ #include #include -#include +#include #include "dfap.h" #include "imalloc.h" @@ -45,16 +61,18 @@ struct Tblock { /* Tnode bucket (block) */ struct Tnode *tarray; /* array of nodes in bucket */ }; -#define TADD 256 +#define TADD 64 +#define STATE_HASH 199 +#define POSET_CHUNK 100 int debug_dfa_trav = 0; int debug_dfa_tran = 0; int debug_dfa_followpos = 0; int dfa_verbose = 0; -int yydebug = 0; static struct DFA_parse *parse_info = NULL; +static int err_code; static int inside_string; static const unsigned char *expr_ptr; static unsigned short *ctrl_chars; @@ -116,11 +134,13 @@ static struct Tnode *expr_1 (void) { struct Tnode *t1, *t2, *tn; - t1 = expr_2 (); + if (!(t1 = expr_2 ())) + return t1; while (lookahead == L_ALT) { lex (); - t2 = expr_2 (); + if (!(t2 = expr_2 ())) + return t2; tn = mk_Tnode (); tn->pos = OR; @@ -134,7 +154,9 @@ static struct Tnode *expr_1 (void) static struct Tnode *expr_2 (void) { struct Tnode *t1, *t2, *tn; - t1 = expr_3 (); + + if (!(t1 = expr_3 ())) + return t1; while (lookahead == L_WILD || lookahead == L_ANYZ || lookahead == L_ANY || @@ -142,7 +164,8 @@ static struct Tnode *expr_2 (void) lookahead == L_CHAR || lookahead == L_CHARS) { - t2 = expr_3 (); + if (!(t2 = expr_3 ())) + return t2; tn = mk_Tnode (); tn->pos = CAT; @@ -157,7 +180,8 @@ static struct Tnode *expr_3 (void) { struct Tnode *t1, *tn; - t1 = expr_4 (); + if (!(t1 = expr_4 ())) + return t1; if (lookahead == L_CLOS0) { lex (); @@ -190,13 +214,17 @@ static struct Tnode *expr_3 (void) static struct Tnode *expr_4 (void) { struct Tnode *t1; + switch (lookahead) { case L_LP: lex (); - t1 = expr_1 (); + if (!(t1 = expr_1 ())) + return t1; if (lookahead == L_RP) lex (); + else + return NULL; break; case L_CHAR: t1 = mk_Tnode(); @@ -227,16 +255,15 @@ static struct Tnode *expr_4 (void) lex (); default: t1 = NULL; - assert (0); - lex (); } return t1; } -static int parse (dfap, s, cc) +static void do_parse (dfap, s, cc, tnp) struct DFA_parse *dfap; char **s; const unsigned short *cc; +struct Tnode **tnp; { int i; struct Tnode *t1, *t2; @@ -287,23 +314,37 @@ const unsigned short *cc; ctrl_chars[i] = 0; } parse_info = dfap; + err_code = 0; expr_ptr = (unsigned char *) *s; inside_string = 0; lex (); t1 = expr_1 (); - t2 = mk_Tnode(); - t2->pos = ++parse_info->position; - t2->u.ch[0] = -(++parse_info->rule); - - parse_info->top = mk_Tnode(); - parse_info->top->pos = CAT; - parse_info->top->u.p[0] = t1; - parse_info->top->u.p[1] = t2; - + if (t1 && lookahead == 0) + { + t2 = mk_Tnode(); + t2->pos = ++parse_info->position; + t2->u.ch[0] = -(++parse_info->rule); + + *tnp = mk_Tnode(); + (*tnp)->pos = CAT; + (*tnp)->u.p[0] = t1; + (*tnp)->u.p[1] = t2; + } + else + { + if (!err_code) + { + if (lookahead == L_RP) + err_code = DFA_ERR_RP; + else if (lookahead == L_LP) + err_code = DFA_ERR_LP; + else + err_code = DFA_ERR_SYNTAX; + } + } *s = (char *) expr_ptr; ifree (ctrl_chars); - return 0; } static int nextchar (int *esc) @@ -338,27 +379,37 @@ static int nextchar (int *esc) } } +static int nextchar_set (int *esc) +{ + if (*expr_ptr == ' ') + { + *esc = 0; + return *expr_ptr++; + } + return nextchar (esc); +} + static int read_charset (void) { int i, ch0, ch1, esc0, esc1, cc = 0; look_chars = mk_BSet (&parse_info->charset); res_BSet (parse_info->charset, look_chars); - ch0 = nextchar (&esc0); + ch0 = nextchar_set (&esc0); if (!esc0 && ch0 == '^') { cc = 1; - ch0 = nextchar (&esc0); + ch0 = nextchar_set (&esc0); } while (ch0 != 0) { if (!esc0 && ch0 == ']') break; add_BSet (parse_info->charset, look_chars, ch0); - ch1 = nextchar (&esc1); + ch1 = nextchar_set (&esc1); if (!esc1 && ch1 == '-') { - if ((ch1 = nextchar (&esc1)) == 0) + if ((ch1 = nextchar_set (&esc1)) == 0) break; if (!esc1 && ch1 == ']') { @@ -367,7 +418,7 @@ static int read_charset (void) } for (i=ch0; ++i<=ch1;) add_BSet (parse_info->charset, look_chars, i); - ch0 = nextchar (&esc0); + ch0 = nextchar_set (&esc0); } else { @@ -715,27 +766,28 @@ static void mk_dfa_tran (struct DFA_states *dfas) { char_0 = max_char+1; for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) - if (posar[i]->u.ch[1] >= char_1) - if ((c=posar[i]->u.ch[0]) < char_0) - if (c < char_1) - char_0 = char_1; - else - char_0 = c; + if (posar[i]->u.ch[1] >= char_1 + && (c=posar[i]->u.ch[0]) < char_0) + if (c < char_1) + char_0 = char_1; + else + char_0 = c; - char_1 = max_char; if (char_0 > max_char) break; + + char_1 = max_char; + tran_set = mk_Set (poset); for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) - if ((c=posar[i]->u.ch[1]) >= char_0) - if (posar[i]->u.ch[0] <= char_0) - { - if (c < char_1) - char_1 = c; - tran_set = union_Set (poset, tran_set, followpos[i]); - } - else if (c <= char_1) - char_1 = c-1; + { + if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1) + char_1 = c - 1; /* forward chunk */ + else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1) + char_1 = c; /* backward chunk */ + if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0) + tran_set = union_Set (poset, tran_set, followpos[i]); + } if (tran_set) { add_DFA_state (dfas, &tran_set, &dfa_to); @@ -865,7 +917,7 @@ static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk) if (debug_dfa_followpos) pr_followpos(); - init_DFA_states (&dfas, poset, 401); + init_DFA_states (&dfas, poset, STATE_HASH); mk_dfa_tran (dfas); if (debug_dfa_tran) pr_tran (dfas); @@ -890,14 +942,14 @@ struct DFA *dfa_init (void) int dfa_parse (struct DFA *dfa, char **pattern) { - int i; + struct Tnode *top; assert (dfa); - i = parse (dfa->parse_info, pattern, dfa_thompson_chars); - if (i) - return i; + do_parse (dfa->parse_info, pattern, dfa_thompson_chars, &top); + if (err_code) + return err_code; if ( !dfa->parse_info->root ) - dfa->parse_info->root = dfa->parse_info->top; + dfa->parse_info->root = top; else { struct Tnode *n; @@ -905,7 +957,7 @@ int dfa_parse (struct DFA *dfa, char **pattern) n = mk_Tnode (); n->pos = OR; n->u.p[0] = dfa->parse_info->root; - n->u.p[1] = dfa->parse_info->top; + n->u.p[1] = top; dfa->parse_info->root = n; } return 0; @@ -915,7 +967,7 @@ void dfa_mkstate (struct DFA *dfa) { assert (dfa); - dfa->state_info = mk_dfas (dfa->parse_info, 200); + dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK); dfa->no_states = dfa->state_info->no; dfa->states = dfa->state_info->sortarray; rm_dfa_parse (&dfa->parse_info); @@ -927,7 +979,8 @@ void dfa_delete (struct DFA **dfap) assert (*dfap); if ((*dfap)->parse_info) rm_dfa_parse (&(*dfap)->parse_info); - rm_DFA_states (&(*dfap)->state_info); + if ((*dfap)->state_info) + rm_DFA_states (&(*dfap)->state_info); ifree (*dfap); *dfap = NULL; }