From e27ce02d4d96ac2b8220134c837c53cfef8eba23 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 8 Jan 1996 09:09:16 +0000 Subject: [PATCH] Function dfa_parse got 'const' string argument. New functions to define char mappings made public. --- dfa/agrep.c | 24 +++++- dfa/dfa.c | 231 ++++++++++++++++++++++++++++++++++---------------------- dfa/dfap.h | 8 +- dfa/grepper.c | 8 +- dfa/readfile.c | 16 ++-- 5 files changed, 186 insertions(+), 101 deletions(-) diff --git a/dfa/agrep.c b/dfa/agrep.c index 405104d..119f7ed 100644 --- a/dfa/agrep.c +++ b/dfa/agrep.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: agrep.c,v $ - * Revision 1.7 1995-10-16 09:31:24 adam + * Revision 1.8 1996-01-08 09:09:16 adam + * Function dfa_parse got 'const' string argument. + * New functions to define char mappings made public. + * + * Revision 1.7 1995/10/16 09:31:24 adam * Bug fix. * * Revision 1.6 1995/09/28 09:18:51 adam @@ -174,6 +178,7 @@ struct DFA_state **dfaar; char *p; int i; unsigned char c; + int start_line = 1; while (1) { @@ -183,7 +188,8 @@ struct DFA_state **dfaar; p = inf_ptr; do { - if ((s = dfaar[t->to])->rule_no) + if ((s = dfaar[t->to])->rule_no && + (start_line || s->rule_nno)) { inf_ptr = prline (inf_ptr); c = '\n'; @@ -200,6 +206,7 @@ struct DFA_state **dfaar; } if (c == '\n') { + start_line = 1; ++line_no; if (inf_ptr == inf_flsh) { @@ -213,6 +220,8 @@ struct DFA_state **dfaar; } } } + else + start_line = 0; } return 0; } @@ -238,12 +247,21 @@ int main (argc, argv) int argc; char **argv; { - char *pattern = NULL; + const char *pattern = NULL; char outbuf[BUFSIZ]; int fd, i, no = 0; struct DFA *dfa = dfa_init(); prog = *argv; + if (argc < 2) + { + fprintf (stderr, "usage: agrep [options] pattern file..\n"); + fprintf (stderr, " -v dfa verbose\n"); + fprintf (stderr, " -n show lines\n"); + fprintf (stderr, " -d debug\n"); + fprintf (stderr, " -V show version\n"); + exit (1); + } setbuf (stdout, outbuf); i = agrep_options (argc, argv); if (i) diff --git a/dfa/dfa.c b/dfa/dfa.c index 0a61093..2553249 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.9 1995-12-06 12:24:58 adam + * Revision 1.10 1996-01-08 09:09:17 adam + * Function dfa_parse got 'const' string argument. + * New functions to define char mappings made public. + * + * Revision 1.9 1995/12/06 12:24:58 adam * Removed verbatim mode code. * * Revision 1.8 1995/12/06 09:09:58 adam @@ -84,7 +88,6 @@ static struct DFA_parse *parse_info = NULL; static int err_code; static int inside_string; static const unsigned char *expr_ptr; -static unsigned short *ctrl_chars; static struct Tnode **posar; static SetType poset; @@ -267,58 +270,12 @@ static struct Tnode *expr_4 (void) return t1; } -static void do_parse (struct DFA_parse *dfap, char **s, - const unsigned short *cc, struct Tnode **tnp) +static void do_parse (struct DFA_parse *dfap, const char **s, struct Tnode **tnp) { - int i; int anchor_flag = 0; + int start_anchor_flag = 0; struct Tnode *t1, *t2, *tn; - for (i=0; cc[i]; i +=2) - ; - ctrl_chars = imalloc ((i+2) * sizeof(*ctrl_chars)); - for (i=0; (ctrl_chars[i] = cc[i]); i ++) - switch (cc[++i]) - { - case '(': - ctrl_chars[i] = L_LP; - break; - case ')': - ctrl_chars[i] = L_RP; - break; - case '.': - ctrl_chars[i] = L_ANY; - break; - case '|': - ctrl_chars[i] = L_ALT; - break; - case '?': - ctrl_chars[i] = L_QUEST; - break; - case '+': - ctrl_chars[i] = L_CLOS1; - break; - case '*': - ctrl_chars[i] = L_CLOS0; - break; - case '$': - ctrl_chars[i] = L_END; - break; - case '^': - ctrl_chars[i] = L_START; - break; - case '!': - ctrl_chars[i] = L_ANY; - break; - case '#': - ctrl_chars[i] = L_ANYZ; - break; - case '@': - ctrl_chars[i] = L_WILD; - break; - default: - ctrl_chars[i] = 0; - } parse_info = dfap; err_code = 0; expr_ptr = (unsigned char *) *s; @@ -327,10 +284,7 @@ static void do_parse (struct DFA_parse *dfap, char **s, lex (); if (lookahead == L_START) { - t2 = mk_Tnode (); - t2->pos = ++parse_info->position; - t2->u.ch[1] = t2->u.ch[0] = '\n'; - anchor_flag = 1; + start_anchor_flag = 1; lex (); } t1 = expr_1 (); @@ -362,7 +316,7 @@ static void do_parse (struct DFA_parse *dfap, char **s, t2 = mk_Tnode(); t2->pos = ++parse_info->position; t2->u.ch[0] = -(++parse_info->rule); - t2->u.ch[1] = anchor_flag; + t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule); *tnp = mk_Tnode(); (*tnp)->pos = CAT; @@ -382,13 +336,12 @@ static void do_parse (struct DFA_parse *dfap, char **s, } } *s = (char *) expr_ptr; - ifree (ctrl_chars); } static int nextchar (int *esc) { *esc = 0; - if (*expr_ptr == '\0' || isspace(*expr_ptr)) + if (*expr_ptr == '\0') return 0; else if (*expr_ptr != '\\') return *expr_ptr++; @@ -471,32 +424,9 @@ static int read_charset (void) return L_CHARS; } -unsigned short dfa_thompson_chars[] = -{ - '*', '*', - '+', '+', - '|', '|', - '^', '^', - '$', '$', - '?', '?', - '.', '.', - '(', '(', - ')', ')', - 0 -}; - -unsigned short dfa_ccl_chars[] = -{ - '#', '#', - '!', '!', - '?', '@', - 0 -}; - static int lex_sub(void) { int esc; - const unsigned short *cc; while ((look_ch = nextchar (&esc)) != 0) if (look_ch == '\"') { @@ -508,13 +438,18 @@ static int lex_sub(void) return L_CHAR; else if (look_ch == '[') return read_charset(); - else if (look_ch == ' ') - return 0; else { - for (cc = ctrl_chars; *cc; cc += 2) + const int *cc; + if (look_ch == '/') + logf (LOG_DEBUG, "xxxx / xxx"); + for (cc = parse_info->charMap; *cc; cc += 2) if (*cc == look_ch) + { + if (!cc[1]) + --expr_ptr; return cc[1]; + } return L_CHAR; } return 0; @@ -741,7 +676,7 @@ static void dfa_trav (struct Tnode *n) n->lastpos = add_Set (poset, n->lastpos, n->pos); if (debug_dfa_trav) if (n->u.ch[0] < 0) - printf ("#%d", -n->u.ch[0]); + printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]); else if (n->u.ch[1] > n->u.ch[0]) { putchar ('['); @@ -775,7 +710,7 @@ static void init_followpos (void) static void mk_dfa_tran (struct DFA_states *dfas) { - int i, c; + int i, j, c; int max_char; short *pos, *pos_i; Set tran_set; @@ -793,14 +728,21 @@ static void mk_dfa_tran (struct DFA_states *dfas) while ((dfa_from = get_DFA_state (dfas))) { pos_i = pos; - i = 0; + j = i = 0; for (tran_set = dfa_from->set; tran_set; tran_set = tran_set->next) if ((c = posar[tran_set->value]->u.ch[0]) >= 0 && c <= max_char) *pos_i++ = tran_set->value; - else if (c < 0 && (i == 0 || c > i)) - i = c; + else if (c < 0) + { + if (i == 0 || c > i) + i = c; + c = posar[tran_set->value]->u.ch[1]; + if (j == 0 || c > j) + j = c; + } *pos_i = -1; dfa_from->rule_no = -i; + dfa_from->rule_nno = -j; for (char_1 = 0; char_1 <= max_char; char_1++) { @@ -852,6 +794,9 @@ static void pr_tran (struct DFA_states *dfas) printf ("DFA state %d", no); if (s->rule_no) printf ("#%d", s->rule_no); + if (s->rule_nno) + printf (" #%d", s->rule_nno); + putchar (':'); pr_Set (poset, s->set); prev_no = -1; @@ -915,6 +860,106 @@ static void pr_followpos (void) putchar ('\n'); } +void dfa_parse_cmap_clean (struct DFA *d) +{ + struct DFA_parse *dfa = d->parse_info; + + assert (dfa); + if (!dfa->charMap) + { + dfa->charMapSize = 7; + dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap)); + } + dfa->charMap[0] = 0; +} + +void dfa_parse_cmap_new (struct DFA *d, const int *cmap) +{ + struct DFA_parse *dfa = d->parse_info; + const int *cp; + int size; + + assert (dfa); + for (cp = cmap; *cp; cp += 2) + ; + size = cp - cmap + 1; + if (size > dfa->charMapSize) + { + if (dfa->charMap) + ifree (dfa->charMap); + dfa->charMapSize = size; + dfa->charMap = imalloc (size * sizeof(*dfa->charMap)); + } + memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap)); +} + +void dfa_parse_cmap_del (struct DFA *d, int from) +{ + struct DFA_parse *dfa = d->parse_info; + int *cc; + + assert (dfa); + for (cc = dfa->charMap; *cc; cc += 2) + if (*cc == from) + { + while ((cc[0] = cc[2])) + { + cc[1] = cc[3]; + cc += 2; + } + break; + } +} + +void dfa_parse_cmap_add (struct DFA *d, int from, int to) +{ + struct DFA_parse *dfa = d->parse_info; + int *cc; + int indx, size; + + assert (dfa); + for (cc = dfa->charMap; *cc; cc += 2) + if (*cc == from) + { + cc[1] = to; + return ; + } + indx = cc - dfa->charMap; + size = dfa->charMapSize; + if (indx >= size) + { + int *cn = imalloc ((size+16) * sizeof(*dfa->charMap)); + memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap)); + ifree (dfa->charMap); + dfa->charMap = cn; + dfa->charMapSize = size+16; + } + dfa->charMap[indx] = from; + dfa->charMap[indx+1] = to; + dfa->charMap[indx+2] = 0; +} + +void dfa_parse_cmap_thompson (struct DFA *d) +{ + static int thompson_chars[] = + { + '*', L_CLOS0, + '+', L_CLOS1, + '|', L_ALT, + '^', L_START, + '$', L_END, + '?', L_QUEST, + '.', L_ANY, + '(', L_LP, + ')', L_RP, + ' ', 0, + '\t', 0, + '\n', 0, + 0 + }; + dfa_parse_cmap_new (d, thompson_chars); +} + static struct DFA_parse *dfa_parse_init (void) { parse_info = (struct DFA_parse *) imalloc (sizeof (struct DFA_parse)); @@ -929,6 +974,8 @@ static struct DFA_parse *dfa_parse_init (void) add_BSet (parse_info->charset, parse_info->anyset, '\n'); com_BSet (parse_info->charset, parse_info->anyset); parse_info->use_Tnode = parse_info->max_Tnode = 0; + parse_info->charMap = NULL; + parse_info->charMapSize = 0; return parse_info; } @@ -938,6 +985,7 @@ static void rm_dfa_parse (struct DFA_parse **dfap) assert (parse_info); term_Tnode(); rm_BSetHandle (&parse_info->charset); + ifree (parse_info->charMap); ifree (parse_info); *dfap = NULL; } @@ -953,6 +1001,7 @@ static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk) poset = mk_SetType (poset_chunk); init_pos(); init_followpos(); + assert (parse_info->root); dfa_trav (parse_info->root); if (debug_dfa_followpos) @@ -977,15 +1026,17 @@ struct DFA *dfa_init (void) dfa->parse_info = dfa_parse_init (); dfa->state_info = NULL; dfa->states = NULL; + dfa_parse_cmap_thompson (dfa); return dfa; } -int dfa_parse (struct DFA *dfa, char **pattern) +int dfa_parse (struct DFA *dfa, const char **pattern) { struct Tnode *top; assert (dfa); - do_parse (dfa->parse_info, pattern, dfa_thompson_chars, &top); + assert (dfa->parse_info); + do_parse (dfa->parse_info, pattern, &top); if (err_code) return err_code; if ( !dfa->parse_info->root ) diff --git a/dfa/dfap.h b/dfa/dfap.h index b158d84..2b7dbda 100644 --- a/dfa/dfap.h +++ b/dfa/dfap.h @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dfap.h,v $ - * Revision 1.2 1995-01-25 11:30:50 adam + * Revision 1.3 1996-01-08 09:09:19 adam + * Function dfa_parse got 'const' string argument. + * New functions to define char mappings made public. + * + * Revision 1.2 1995/01/25 11:30:50 adam * Simple error reporting when parsing regular expressions. * Memory usage reduced. * @@ -29,6 +33,8 @@ struct DFA_parse { int max_Tnode; /* allocated Tnodes */ struct Tblock *start; /* start block of Tnodes */ struct Tblock *end; /* end block of Tnodes */ + int *charMap; + int charMapSize; }; typedef struct DFA_stateb_ { diff --git a/dfa/grepper.c b/dfa/grepper.c index 14bd7e9..cefb2f6 100644 --- a/dfa/grepper.c +++ b/dfa/grepper.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: grepper.c,v $ - * Revision 1.5 1995-09-04 12:33:26 adam + * Revision 1.6 1996-01-08 09:09:20 adam + * Function dfa_parse got 'const' string argument. + * New functions to define char mappings made public. + * + * Revision 1.5 1995/09/04 12:33:26 adam * Various cleanup. YAZ util used instead. * * Revision 1.4 1995/01/24 16:00:21 adam @@ -350,7 +354,7 @@ int main (int argc, char **argv) int ret; int range = 0; char *arg; - char *pattern = NULL; + const char *pattern = NULL; int no_files = 0; struct DFA *dfa = dfa_init(); diff --git a/dfa/readfile.c b/dfa/readfile.c index d28bbd2..9a83df3 100644 --- a/dfa/readfile.c +++ b/dfa/readfile.c @@ -4,7 +4,11 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: readfile.c,v $ - * Revision 1.5 1995-09-04 12:33:27 adam + * Revision 1.6 1996-01-08 09:09:21 adam + * Function dfa_parse got 'const' string argument. + * New functions to define char mappings made public. + * + * Revision 1.5 1995/09/04 12:33:27 adam * Various cleanup. YAZ util used instead. * * Revision 1.4 1995/01/25 11:30:51 adam @@ -88,6 +92,7 @@ static void read_defs (void) static void read_rules (struct DFA *dfa) { char *s; + const char *sc; int i; int no = 0; @@ -106,7 +111,8 @@ static void read_rules (struct DFA *dfa) /* preprocess regular expression */ prep (&s); /* now parse regular expression */ - i = dfa_parse (dfa, &s); + sc = s; + i = dfa_parse (dfa, &sc); if (i) { fprintf (stderr, "%s #%d: regular expression syntax error\n", @@ -121,9 +127,9 @@ static void read_rules (struct DFA *dfa) no++; fprintf (outf, "\tcase %d:\n#line %d\n\t\t", no, line_no); } - while (*s == '\t' || *s == ' ') - s++; - fputs (s, outf); + while (*sc == '\t' || *sc == ' ') + sc++; + fputs (sc, outf); } } fputs ("\tYY_BREAK\n\t}\n}\n", outf); -- 1.7.10.4