X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=dfa%2Fdfa.c;h=368d275a33f1f84968ea10896dc685aecb489254;hb=e76077754edc95fd0760dc2eaf7154b6b54d30a1;hp=0104852780d919fecee39ec7c8da834245be5af3;hpb=4461019c0de49358856cb1c84ca162395b247f16;p=idzebra-moved-to-github.git diff --git a/dfa/dfa.c b/dfa/dfa.c index 0104852..368d275 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -4,7 +4,22 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.c,v $ - * Revision 1.2 1995-01-25 11:30:50 adam + * Revision 1.7 1995-11-27 09:23:02 adam + * New berbatim hook in regular expressions. "[]n ..". + * + * Revision 1.6 1995/10/16 09:31:25 adam + * Bug fix. + * + * Revision 1.5 1995/10/02 15:17:58 adam + * Bug fix in dfa_delete. + * + * Revision 1.4 1995/09/28 09:18:52 adam + * Removed various preprocessor defines. + * + * Revision 1.3 1995/09/04 12:33:26 adam + * Various cleanup. YAZ util used instead. + * + * Revision 1.2 1995/01/25 11:30:50 adam * Simple error reporting when parsing regular expressions. * Memory usage reduced. * @@ -20,7 +35,7 @@ #include #include -#include +#include #include "dfap.h" #include "imalloc.h" @@ -57,13 +72,13 @@ int debug_dfa_trav = 0; int debug_dfa_tran = 0; int debug_dfa_followpos = 0; int dfa_verbose = 0; -int yydebug = 0; static struct DFA_parse *parse_info = NULL; static int err_code; static int inside_string; static const unsigned char *expr_ptr; +static int expr_verbatim; static unsigned short *ctrl_chars; static struct Tnode **posar; @@ -305,6 +320,7 @@ struct Tnode **tnp; parse_info = dfap; err_code = 0; expr_ptr = (unsigned char *) *s; + expr_verbatim = 0; inside_string = 0; lex (); @@ -341,8 +357,30 @@ static int nextchar (int *esc) *esc = 0; if (*expr_ptr == '\0' || isspace(*expr_ptr)) return 0; - else if (*expr_ptr != '\\') + else if (*expr_ptr != '\\' || expr_verbatim) + { + if (*expr_ptr == '[' && expr_ptr[1] == ']' && !expr_verbatim) + { + int i = 2; + int val = 0; + while (expr_ptr[i] >= '0' && expr_ptr[i] <= '9') + val = val*10 + expr_ptr[i++]-'0'; + if (i > 2) + { + if (expr_ptr[i] == ' ') + i++; + expr_verbatim = val; + expr_ptr += i; + } + } + if (expr_verbatim) + { + assert (expr_verbatim > 0); + *esc = 1; + --expr_verbatim; + } return *expr_ptr++; + } *esc = 1; switch (*++expr_ptr) { @@ -459,7 +497,7 @@ static int lex_sub(void) return read_charset(); else if (look_ch == ' ') return 0; - else + else { for (cc = ctrl_chars; *cc; cc += 2) if (*cc == look_ch) @@ -755,27 +793,28 @@ static void mk_dfa_tran (struct DFA_states *dfas) { char_0 = max_char+1; for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) - if (posar[i]->u.ch[1] >= char_1) - if ((c=posar[i]->u.ch[0]) < char_0) - if (c < char_1) - char_0 = char_1; - else - char_0 = c; + if (posar[i]->u.ch[1] >= char_1 + && (c=posar[i]->u.ch[0]) < char_0) + if (c < char_1) + char_0 = char_1; + else + char_0 = c; - char_1 = max_char; if (char_0 > max_char) break; + + char_1 = max_char; + tran_set = mk_Set (poset); for (pos_i = pos; (i = *pos_i) != -1; ++pos_i) - if ((c=posar[i]->u.ch[1]) >= char_0) - if (posar[i]->u.ch[0] <= char_0) - { - if (c < char_1) - char_1 = c; - tran_set = union_Set (poset, tran_set, followpos[i]); - } - else if (c <= char_1) - char_1 = c-1; + { + if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1) + char_1 = c - 1; /* forward chunk */ + else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1) + char_1 = c; /* backward chunk */ + if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0) + tran_set = union_Set (poset, tran_set, followpos[i]); + } if (tran_set) { add_DFA_state (dfas, &tran_set, &dfa_to); @@ -967,7 +1006,8 @@ void dfa_delete (struct DFA **dfap) assert (*dfap); if ((*dfap)->parse_info) rm_dfa_parse (&(*dfap)->parse_info); - rm_DFA_states (&(*dfap)->state_info); + if ((*dfap)->state_info) + rm_DFA_states (&(*dfap)->state_info); ifree (*dfap); *dfap = NULL; }