From 4461019c0de49358856cb1c84ca162395b247f16 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Wed, 25 Jan 1995 11:30:50 +0000 Subject: [PATCH] Simple error reporting when parsing regular expressions. Memory usage reduced. --- dfa/dfa.c | 108 ++++++++++++++++++++++++++++++++++++++------------------ dfa/dfap.h | 7 ++-- dfa/lexer.c | 10 +++++- dfa/readfile.c | 7 +++- dfa/states.c | 16 ++++++--- 5 files changed, 106 insertions(+), 42 deletions(-) diff --git a/dfa/dfa.c b/dfa/dfa.c index 8d41a37..0104852 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.1 1995-01-24 16:02:52 adam + * Revision 1.2 1995-01-25 11:30:50 adam + * Simple error reporting when parsing regular expressions. + * Memory usage reduced. + * + * Revision 1.1 1995/01/24 16:02:52 adam * New private header file in dfa module (dfap.h). * Module no longer uses yacc to parse regular expressions. * @@ -45,7 +49,9 @@ struct Tblock { /* Tnode bucket (block) */ struct Tnode *tarray; /* array of nodes in bucket */ }; -#define TADD 256 +#define TADD 64 +#define STATE_HASH 199 +#define POSET_CHUNK 100 int debug_dfa_trav = 0; int debug_dfa_tran = 0; @@ -55,6 +61,7 @@ int yydebug = 0; static struct DFA_parse *parse_info = NULL; +static int err_code; static int inside_string; static const unsigned char *expr_ptr; static unsigned short *ctrl_chars; @@ -116,11 +123,13 @@ static struct Tnode *expr_1 (void) { struct Tnode *t1, *t2, *tn; - t1 = expr_2 (); + if (!(t1 = expr_2 ())) + return t1; while (lookahead == L_ALT) { lex (); - t2 = expr_2 (); + if (!(t2 = expr_2 ())) + return t2; tn = mk_Tnode (); tn->pos = OR; @@ -134,7 +143,9 @@ static struct Tnode *expr_1 (void) static struct Tnode *expr_2 (void) { struct Tnode *t1, *t2, *tn; - t1 = expr_3 (); + + if (!(t1 = expr_3 ())) + return t1; while (lookahead == L_WILD || lookahead == L_ANYZ || lookahead == L_ANY || @@ -142,7 +153,8 @@ static struct Tnode *expr_2 (void) lookahead == L_CHAR || lookahead == L_CHARS) { - t2 = expr_3 (); + if (!(t2 = expr_3 ())) + return t2; tn = mk_Tnode (); tn->pos = CAT; @@ -157,7 +169,8 @@ static struct Tnode *expr_3 (void) { struct Tnode *t1, *tn; - t1 = expr_4 (); + if (!(t1 = expr_4 ())) + return t1; if (lookahead == L_CLOS0) { lex (); @@ -190,13 +203,17 @@ static struct Tnode *expr_3 (void) static struct Tnode *expr_4 (void) { struct Tnode *t1; + switch (lookahead) { case L_LP: lex (); - t1 = expr_1 (); + if (!(t1 = expr_1 ())) + return t1; if (lookahead == L_RP) lex (); + else + return NULL; break; case L_CHAR: t1 = mk_Tnode(); @@ -227,16 +244,15 @@ static struct Tnode *expr_4 (void) lex (); default: t1 = NULL; - assert (0); - lex (); } return t1; } -static int parse (dfap, s, cc) +static void do_parse (dfap, s, cc, tnp) struct DFA_parse *dfap; char **s; const unsigned short *cc; +struct Tnode **tnp; { int i; struct Tnode *t1, *t2; @@ -287,23 +303,37 @@ const unsigned short *cc; ctrl_chars[i] = 0; } parse_info = dfap; + err_code = 0; expr_ptr = (unsigned char *) *s; inside_string = 0; lex (); t1 = expr_1 (); - t2 = mk_Tnode(); - t2->pos = ++parse_info->position; - t2->u.ch[0] = -(++parse_info->rule); - - parse_info->top = mk_Tnode(); - parse_info->top->pos = CAT; - parse_info->top->u.p[0] = t1; - parse_info->top->u.p[1] = t2; - + if (t1 && lookahead == 0) + { + t2 = mk_Tnode(); + t2->pos = ++parse_info->position; + t2->u.ch[0] = -(++parse_info->rule); + + *tnp = mk_Tnode(); + (*tnp)->pos = CAT; + (*tnp)->u.p[0] = t1; + (*tnp)->u.p[1] = t2; + } + else + { + if (!err_code) + { + if (lookahead == L_RP) + err_code = DFA_ERR_RP; + else if (lookahead == L_LP) + err_code = DFA_ERR_LP; + else + err_code = DFA_ERR_SYNTAX; + } + } *s = (char *) expr_ptr; ifree (ctrl_chars); - return 0; } static int nextchar (int *esc) @@ -338,27 +368,37 @@ static int nextchar (int *esc) } } +static int nextchar_set (int *esc) +{ + if (*expr_ptr == ' ') + { + *esc = 0; + return *expr_ptr++; + } + return nextchar (esc); +} + static int read_charset (void) { int i, ch0, ch1, esc0, esc1, cc = 0; look_chars = mk_BSet (&parse_info->charset); res_BSet (parse_info->charset, look_chars); - ch0 = nextchar (&esc0); + ch0 = nextchar_set (&esc0); if (!esc0 && ch0 == '^') { cc = 1; - ch0 = nextchar (&esc0); + ch0 = nextchar_set (&esc0); } while (ch0 != 0) { if (!esc0 && ch0 == ']') break; add_BSet (parse_info->charset, look_chars, ch0); - ch1 = nextchar (&esc1); + ch1 = nextchar_set (&esc1); if (!esc1 && ch1 == '-') { - if ((ch1 = nextchar (&esc1)) == 0) + if ((ch1 = nextchar_set (&esc1)) == 0) break; if (!esc1 && ch1 == ']') { @@ -367,7 +407,7 @@ static int read_charset (void) } for (i=ch0; ++i<=ch1;) add_BSet (parse_info->charset, look_chars, i); - ch0 = nextchar (&esc0); + ch0 = nextchar_set (&esc0); } else { @@ -865,7 +905,7 @@ static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk) if (debug_dfa_followpos) pr_followpos(); - init_DFA_states (&dfas, poset, 401); + init_DFA_states (&dfas, poset, STATE_HASH); mk_dfa_tran (dfas); if (debug_dfa_tran) pr_tran (dfas); @@ -890,14 +930,14 @@ struct DFA *dfa_init (void) int dfa_parse (struct DFA *dfa, char **pattern) { - int i; + struct Tnode *top; assert (dfa); - i = parse (dfa->parse_info, pattern, dfa_thompson_chars); - if (i) - return i; + do_parse (dfa->parse_info, pattern, dfa_thompson_chars, &top); + if (err_code) + return err_code; if ( !dfa->parse_info->root ) - dfa->parse_info->root = dfa->parse_info->top; + dfa->parse_info->root = top; else { struct Tnode *n; @@ -905,7 +945,7 @@ int dfa_parse (struct DFA *dfa, char **pattern) n = mk_Tnode (); n->pos = OR; n->u.p[0] = dfa->parse_info->root; - n->u.p[1] = dfa->parse_info->top; + n->u.p[1] = top; dfa->parse_info->root = n; } return 0; @@ -915,7 +955,7 @@ void dfa_mkstate (struct DFA *dfa) { assert (dfa); - dfa->state_info = mk_dfas (dfa->parse_info, 200); + dfa->state_info = mk_dfas (dfa->parse_info, POSET_CHUNK); dfa->no_states = dfa->state_info->no; dfa->states = dfa->state_info->sortarray; rm_dfa_parse (&dfa->parse_info); diff --git a/dfa/dfap.h b/dfa/dfap.h index d58d9f4..b158d84 100644 --- a/dfa/dfap.h +++ b/dfa/dfap.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dfap.h,v $ - * Revision 1.1 1995-01-24 16:02:53 adam + * Revision 1.2 1995-01-25 11:30:50 adam + * Simple error reporting when parsing regular expressions. + * Memory usage reduced. + * + * Revision 1.1 1995/01/24 16:02:53 adam * New private header file in dfa module (dfap.h). * Module no longer uses yacc to parse regular expressions. * @@ -17,7 +21,6 @@ struct DFA_parse { struct Tnode *root; /* root of regular syntax tree */ - struct Tnode *top; /* regular tree top (returned by parse_dfa) */ int position; /* no of positions so far */ int rule; /* no of rules so far */ BSetHandle *charset; /* character set type */ diff --git a/dfa/lexer.c b/dfa/lexer.c index 2424444..85fa2e5 100644 --- a/dfa/lexer.c +++ b/dfa/lexer.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: lexer.c,v $ - * Revision 1.5 1995-01-24 16:00:22 adam + * Revision 1.6 1995-01-25 11:30:51 adam + * Simple error reporting when parsing regular expressions. + * Memory usage reduced. + * + * Revision 1.5 1995/01/24 16:00:22 adam * Added -ansi to CFLAGS. * Some changes to the dfa module. * @@ -141,6 +145,10 @@ int main (int argc, char **argv) if (i) return i; dfa_mkstate (dfa); + +#ifdef MEMDEBUG + imemstat(); +#endif } dfa_delete (&dfa); #ifdef MEMDEBUG diff --git a/dfa/readfile.c b/dfa/readfile.c index 4fd9cc9..7983ca6 100644 --- a/dfa/readfile.c +++ b/dfa/readfile.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: readfile.c,v $ - * Revision 1.3 1995-01-24 16:00:22 adam + * Revision 1.4 1995-01-25 11:30:51 adam + * Simple error reporting when parsing regular expressions. + * Memory usage reduced. + * + * Revision 1.3 1995/01/24 16:00:22 adam * Added -ansi to CFLAGS. * Some changes to the dfa module. * @@ -104,6 +108,7 @@ static void read_rules (struct DFA *dfa) { fprintf (stderr, "%s #%d: regular expression syntax error\n", inf_name, line_no); + assert (0); err_no++; } else diff --git a/dfa/states.c b/dfa/states.c index 289d824..a3f6532 100644 --- a/dfa/states.c +++ b/dfa/states.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: states.c,v $ - * Revision 1.2 1995-01-24 16:00:23 adam + * Revision 1.3 1995-01-25 11:30:51 adam + * Simple error reporting when parsing regular expressions. + * Memory usage reduced. + * + * Revision 1.2 1995/01/24 16:00:23 adam * Added -ansi to CFLAGS. * Some changes to the dfa module. * @@ -23,8 +27,8 @@ #include "dfap.h" #include "imalloc.h" -#define DFA_CHUNK 200 -#define TRAN_CHUNK 800 +#define DFA_CHUNK 40 +#define TRAN_CHUNK 100 int init_DFA_states (struct DFA_states **dfasp, SetType st, int hash) { @@ -67,7 +71,8 @@ int rm_DFA_states (struct DFA_states **dfasp) struct DFA_trans *tm, *tm1; assert (dfas); - ifree (dfas->hasharray); + if (dfas->hasharray) + ifree (dfas->hasharray); ifree (dfas->sortarray); for (tm=dfas->transmem; tm; tm=tm1) @@ -95,6 +100,7 @@ int add_DFA_state (struct DFA_states *dfas, Set *s, struct DFA_state **sp) assert (dfas); assert (*s); + assert (dfas->hasharray); sip = dfas->hasharray + (hash_Set (dfas->st, *s) % dfas->hash); for (si = *sip; si; si=si->link) if (eq_Set (dfas->st, si->set, *s)) @@ -184,4 +190,6 @@ void sort_DFA_states (struct DFA_states *dfas) imalloc (sizeof(struct DFA_state *)*dfas->no); for (s = dfas->marked; s; s=s->next) dfas->sortarray[s->no] = s; + ifree (dfas->hasharray); + dfas->hasharray = NULL; } -- 1.7.10.4