X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dfa%2Fdfa.c;h=0a610934f0c38326e92231fd5546062373006a23;hb=e39158d4c6147865c129d2524f1b910e4e7921ed;hp=0104852780d919fecee39ec7c8da834245be5af3;hpb=4461019c0de49358856cb1c84ca162395b247f16;p=idzebra-moved-to-github.git diff --git a/dfa/dfa.c b/dfa/dfa.c index 0104852..0a61093 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -4,7 +4,28 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.2 1995-01-25 11:30:50 adam + * Revision 1.9 1995-12-06 12:24:58 adam + * Removed verbatim mode code. + * + * Revision 1.8 1995/12/06 09:09:58 adam + * Work on left and right anchors. + * + * Revision 1.7 1995/11/27 09:23:02 adam + * New berbatim hook in regular expressions. "[]n ..". + * + * Revision 1.6 1995/10/16 09:31:25 adam + * Bug fix. + * + * Revision 1.5 1995/10/02 15:17:58 adam + * Bug fix in dfa_delete. + * + * Revision 1.4 1995/09/28 09:18:52 adam + * Removed various preprocessor defines. + * + * Revision 1.3 1995/09/04 12:33:26 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.2 1995/01/25 11:30:50 adam * Simple error reporting when parsing regular expressions. * Memory usage reduced. * @@ -20,7 +41,7 @@ #include #include -#include +#include #include "dfap.h" #include "imalloc.h" @@ -57,7 +78,6 @@ int debug_dfa_trav = 0; int debug_dfa_tran = 0; int debug_dfa_followpos = 0; int dfa_verbose = 0; -int yydebug = 0; static struct DFA_parse *parse_info = NULL; @@ -82,7 +102,6 @@ static void add_follow (Set lastpos, Set firstpos), dfa_trav (struct Tnode *n), init_followpos (void), - mk_dfa_tran (struct DFA_states *dfas), pr_tran (struct DFA_states *dfas), pr_verbose (struct DFA_states *dfas), pr_followpos (void), @@ -217,7 +236,7 @@ static struct Tnode *expr_4 (void) break; case L_CHAR: t1 = mk_Tnode(); - t1->pos = ++(parse_info->position); + t1->pos = ++parse_info->position; t1->u.ch[1] = t1->u.ch[0] = look_ch; lex (); break; @@ -248,14 +267,12 @@ static struct Tnode *expr_4 (void) return t1; } -static void do_parse (dfap, s, cc, tnp) -struct DFA_parse *dfap; -char **s; -const unsigned short *cc; -struct Tnode **tnp; +static void do_parse (struct DFA_parse *dfap, char **s, + const unsigned short *cc, struct Tnode **tnp) { int i; - struct Tnode *t1, *t2; + int anchor_flag = 0; + struct Tnode *t1, *t2, *tn; for (i=0; cc[i]; i +=2) ; @@ -308,12 +325,44 @@ struct Tnode **tnp; inside_string = 0; lex (); + if (lookahead == L_START) + { + t2 = mk_Tnode (); + t2->pos = ++parse_info->position; + t2->u.ch[1] = t2->u.ch[0] = '\n'; + anchor_flag = 1; + lex (); + } t1 = expr_1 (); - if (t1 && lookahead == 0) + if (anchor_flag) + { + tn = mk_Tnode (); + tn->pos = CAT; + tn->u.p[0] = t2; + tn->u.p[1] = t1; + t1 = tn; + } + if (lookahead == L_END && t1) + { + t2 = mk_Tnode (); + t2->pos = ++parse_info->position; + t2->u.ch[1] = t2->u.ch[0] = '\n'; + + tn = mk_Tnode (); + tn->pos = CAT; + tn->u.p[0] = t1; + tn->u.p[1] = t2; + t1 = tn; + + anchor_flag |= 2; + lex (); + } + if (lookahead == 0 && t1) { t2 = mk_Tnode(); t2->pos = ++parse_info->position; t2->u.ch[0] = -(++parse_info->rule); + t2->u.ch[1] = anchor_flag; *tnp = mk_Tnode(); (*tnp)->pos = CAT; @@ -348,9 +397,11 @@ static int nextchar (int *esc) { case '\r': case '\n': - case '\t': case '\0': return '\\'; + case '\t': + ++expr_ptr; + return ' '; case 'n': ++expr_ptr; return '\n'; @@ -459,7 +510,7 @@ static int lex_sub(void) return read_charset(); else if (look_ch == ' ') return 0; - else + else { for (cc = ctrl_chars; *cc; cc += 2) if (*cc == look_ch) @@ -755,27 +806,28 @@ static void mk_dfa_tran (struct DFA_states *dfas) { char_0 = max_char+1; for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) - if (posar[i]->u.ch[1] >= char_1) - if ((c=posar[i]->u.ch[0]) < char_0) - if (c < char_1) - char_0 = char_1; - else - char_0 = c; + if (posar[i]->u.ch[1] >= char_1 + && (c=posar[i]->u.ch[0]) < char_0) + if (c < char_1) + char_0 = char_1; + else + char_0 = c; - char_1 = max_char; if (char_0 > max_char) break; + + char_1 = max_char; + tran_set = mk_Set (poset); for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) - if ((c=posar[i]->u.ch[1]) >= char_0) - if (posar[i]->u.ch[0] <= char_0) - { - if (c < char_1) - char_1 = c; - tran_set = union_Set (poset, tran_set, followpos[i]); - } - else if (c <= char_1) - char_1 = c-1; + { + if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1) + char_1 = c - 1; /* forward chunk */ + else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1) + char_1 = c; /* backward chunk */ + if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0) + tran_set = union_Set (poset, tran_set, followpos[i]); + } if (tran_set) { add_DFA_state (dfas, &tran_set, &dfa_to); @@ -967,7 +1019,8 @@ void dfa_delete (struct DFA **dfap) assert (*dfap); if ((*dfap)->parse_info) rm_dfa_parse (&(*dfap)->parse_info); - rm_DFA_states (&(*dfap)->state_info); + if ((*dfap)->state_info) + rm_DFA_states (&(*dfap)->state_info); ifree (*dfap); *dfap = NULL; }