-/*
- * Copyright (C) 1994-1996, Index Data I/S
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: dfa.c,v $
- * Revision 1.15 1997-02-10 10:19:20 adam
- * Added facility for open character sets, eg [a-].
- *
- * Revision 1.14 1996/10/29 13:57:22 adam
- * Include of zebrautl.h instead of alexutil.h.
- *
- * Revision 1.13 1996/06/17 14:24:08 adam
- * Bug fix: read_charset didn't handle character mapping.
- *
- * Revision 1.12 1996/06/04 10:20:02 adam
- * Added support for character mapping.
- *
- * Revision 1.11 1996/01/08 19:15:24 adam
- * Allow single $ in expressions.
- *
- * Revision 1.10 1996/01/08 09:09:17 adam
- * Function dfa_parse got 'const' string argument.
- * New functions to define char mappings made public.
- *
- * Revision 1.9 1995/12/06 12:24:58 adam
- * Removed verbatim mode code.
- *
- * Revision 1.8 1995/12/06 09:09:58 adam
- * Work on left and right anchors.
- *
- * Revision 1.7 1995/11/27 09:23:02 adam
- * New berbatim hook in regular expressions. "[]n ..".
- *
- * Revision 1.6 1995/10/16 09:31:25 adam
- * Bug fix.
- *
- * Revision 1.5 1995/10/02 15:17:58 adam
- * Bug fix in dfa_delete.
- *
- * Revision 1.4 1995/09/28 09:18:52 adam
- * Removed various preprocessor defines.
- *
- * Revision 1.3 1995/09/04 12:33:26 adam
- * Various cleanup. YAZ util used instead.
- *
- * Revision 1.2 1995/01/25 11:30:50 adam
- * Simple error reporting when parsing regular expressions.
- * Memory usage reduced.
- *
- * Revision 1.1 1995/01/24 16:02:52 adam
- * New private header file in dfa module (dfap.h).
- * Module no longer uses yacc to parse regular expressions.
- *
- */
+/* $Id: dfa.c,v 1.33 2005-01-15 21:45:42 adam Exp $
+ Copyright (C) 1995-2005
+ Index Data ApS
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra. If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
#include <stdio.h>
#include <assert.h>
} u;
unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */
unsigned nullable : 1;
- Set firstpos; /* first positions */
- Set lastpos; /* last positions */
+ DFASet firstpos; /* first positions */
+ DFASet lastpos; /* last positions */
};
struct Tblock { /* Tnode bucket (block) */
int debug_dfa_followpos = 0;
int dfa_verbose = 0;
-static struct DFA_parse *parse_info = NULL;
-
-static int err_code;
-static int inside_string;
-static const unsigned char *expr_ptr;
-static struct Tnode **posar;
-
-static SetType poset;
-static Set *followpos;
-
-static struct Tnode *mk_Tnode (void);
-static struct Tnode *mk_Tnode_cset (BSet charset);
-static void term_Tnode (void);
+static struct Tnode *mk_Tnode (struct DFA_parse *parse_info);
+static struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info,
+ BSet charset);
+static void term_Tnode (struct DFA_parse *parse_info);
static void
- del_followpos (void),
- init_pos (void),
- del_pos (void),
- mk_dfa_tran (struct DFA_states *dfas),
- add_follow (Set lastpos, Set firstpos),
- dfa_trav (struct Tnode *n),
- init_followpos (void),
- pr_tran (struct DFA_states *dfas),
- pr_verbose (struct DFA_states *dfas),
- pr_followpos (void),
+ del_followpos (struct DFA_parse *parse_info),
+ init_pos (struct DFA_parse *parse_info),
+ del_pos (struct DFA_parse *parse_info),
+ mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
+ add_follow (struct DFA_parse *parse_info, DFASet lastpos, DFASet firstpos),
+ dfa_trav (struct DFA_parse *parse_info, struct Tnode *n),
+ init_followpos (struct DFA_parse *parse_info),
+ pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas),
+ pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas),
+ pr_followpos (struct DFA_parse *parse_info),
out_char (int c),
- lex (void);
+ lex (struct DFA_parse *parse_info);
static int
- nextchar (int *esc),
- read_charset (void);
+ nextchar (struct DFA_parse *parse_info, int *esc),
+ read_charset (struct DFA_parse *parse_info);
static const char
*str_char (unsigned c);
#define L_END 12
#define L_START 13
-static int lookahead;
-static unsigned look_ch;
-static BSet look_chars;
-static struct Tnode *expr_1 (void),
- *expr_2 (void),
- *expr_3 (void),
- *expr_4 (void);
+static struct Tnode *expr_1 (struct DFA_parse *parse_info),
+ *expr_2 (struct DFA_parse *parse_info),
+ *expr_3 (struct DFA_parse *parse_info),
+ *expr_4 (struct DFA_parse *parse_info);
-static struct Tnode *expr_1 (void)
+static struct Tnode *expr_1 (struct DFA_parse *parse_info)
{
struct Tnode *t1, *t2, *tn;
- if (!(t1 = expr_2 ()))
+ if (!(t1 = expr_2 (parse_info)))
return t1;
- while (lookahead == L_ALT)
+ while (parse_info->lookahead == L_ALT)
{
- lex ();
- if (!(t2 = expr_2 ()))
+ lex (parse_info);
+ if (!(t2 = expr_2 (parse_info)))
return t2;
- tn = mk_Tnode ();
+ tn = mk_Tnode (parse_info);
tn->pos = OR;
tn->u.p[0] = t1;
tn->u.p[1] = t2;
return t1;
}
-static struct Tnode *expr_2 (void)
+static struct Tnode *expr_2 (struct DFA_parse *parse_info)
{
struct Tnode *t1, *t2, *tn;
- if (!(t1 = expr_3 ()))
+ if (!(t1 = expr_3 (parse_info)))
return t1;
- while (lookahead == L_WILD ||
- lookahead == L_ANYZ ||
- lookahead == L_ANY ||
- lookahead == L_LP ||
- lookahead == L_CHAR ||
- lookahead == L_CHARS)
+ while (parse_info->lookahead == L_WILD ||
+ parse_info->lookahead == L_ANYZ ||
+ parse_info->lookahead == L_ANY ||
+ parse_info->lookahead == L_LP ||
+ parse_info->lookahead == L_CHAR ||
+ parse_info->lookahead == L_CHARS)
{
- if (!(t2 = expr_3 ()))
+ if (!(t2 = expr_3 (parse_info)))
return t2;
- tn = mk_Tnode ();
+ tn = mk_Tnode (parse_info);
tn->pos = CAT;
tn->u.p[0] = t1;
tn->u.p[1] = t2;
return t1;
}
-static struct Tnode *expr_3 (void)
+static struct Tnode *expr_3 (struct DFA_parse *parse_info)
{
struct Tnode *t1, *tn;
- if (!(t1 = expr_4 ()))
+ if (!(t1 = expr_4 (parse_info)))
return t1;
- if (lookahead == L_CLOS0)
+ if (parse_info->lookahead == L_CLOS0)
{
- lex ();
- tn = mk_Tnode ();
+ lex (parse_info);
+ tn = mk_Tnode (parse_info);
tn->pos = STAR;
tn->u.p[0] = t1;
t1 = tn;
}
- else if (lookahead == L_CLOS1)
+ else if (parse_info->lookahead == L_CLOS1)
{
- lex ();
- tn = mk_Tnode ();
+ lex (parse_info);
+ tn = mk_Tnode (parse_info);
tn->pos = PLUS;
tn->u.p[0] = t1;
t1 = tn;
}
- else if (lookahead == L_QUEST)
+ else if (parse_info->lookahead == L_QUEST)
{
- lex ();
- tn = mk_Tnode();
+ lex (parse_info);
+ tn = mk_Tnode(parse_info);
tn->pos = OR;
tn->u.p[0] = t1;
- tn->u.p[1] = mk_Tnode();
+ tn->u.p[1] = mk_Tnode(parse_info);
tn->u.p[1]->pos = EPSILON;
t1 = tn;
}
return t1;
}
-static struct Tnode *expr_4 (void)
+static struct Tnode *expr_4 (struct DFA_parse *parse_info)
{
struct Tnode *t1;
- switch (lookahead)
+ switch (parse_info->lookahead)
{
case L_LP:
- lex ();
- if (!(t1 = expr_1 ()))
+ lex (parse_info);
+ if (!(t1 = expr_1 (parse_info)))
return t1;
- if (lookahead == L_RP)
- lex ();
+ if (parse_info->lookahead == L_RP)
+ lex (parse_info);
else
return NULL;
break;
case L_CHAR:
- t1 = mk_Tnode();
+ t1 = mk_Tnode(parse_info);
t1->pos = ++parse_info->position;
- t1->u.ch[1] = t1->u.ch[0] = look_ch;
- lex ();
+ t1->u.ch[1] = t1->u.ch[0] = parse_info->look_ch;
+ lex (parse_info);
break;
case L_CHARS:
- t1 = mk_Tnode_cset (look_chars);
- lex ();
+ t1 = mk_Tnode_cset (parse_info, parse_info->look_chars);
+ lex (parse_info);
break;
case L_ANY:
- t1 = mk_Tnode_cset (parse_info->anyset);
- lex ();
+ t1 = mk_Tnode_cset (parse_info, parse_info->anyset);
+ lex (parse_info);
break;
case L_ANYZ:
- t1 = mk_Tnode();
+ t1 = mk_Tnode(parse_info);
t1->pos = OR;
- t1->u.p[0] = mk_Tnode_cset (parse_info->anyset);
- t1->u.p[1] = mk_Tnode();
+ t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
+ t1->u.p[1] = mk_Tnode(parse_info);
t1->u.p[1]->pos = EPSILON;
- lex ();
+ lex (parse_info);
break;
case L_WILD:
- t1 = mk_Tnode();
+ t1 = mk_Tnode(parse_info);
t1->pos = STAR;
- t1->u.p[0] = mk_Tnode_cset (parse_info->anyset);
- lex ();
+ t1->u.p[0] = mk_Tnode_cset (parse_info, parse_info->anyset);
+ lex (parse_info);
default:
t1 = NULL;
}
return t1;
}
-static void do_parse (struct DFA_parse *dfap, const char **s,
+static void do_parse (struct DFA_parse *parse_info, const char **s,
struct Tnode **tnp)
{
int start_anchor_flag = 0;
struct Tnode *t1, *t2, *tn;
- parse_info = dfap;
- err_code = 0;
- expr_ptr = (const unsigned char *) *s;
+ parse_info->err_code = 0;
+ parse_info->expr_ptr = * (const unsigned char **) s;
- inside_string = 0;
- lex ();
- if (lookahead == L_START)
+ parse_info->inside_string = 0;
+ lex (parse_info);
+ if (parse_info->lookahead == L_START)
{
start_anchor_flag = 1;
- lex ();
+ lex (parse_info);
}
- if (lookahead == L_END)
+ if (parse_info->lookahead == L_END)
{
- t1 = mk_Tnode ();
+ t1 = mk_Tnode (parse_info);
t1->pos = ++parse_info->position;
t1->u.ch[1] = t1->u.ch[0] = '\n';
- lex ();
+ lex (parse_info);
}
else
{
- t1 = expr_1 ();
- if (t1 && lookahead == L_END)
+ t1 = expr_1 (parse_info);
+ if (t1 && parse_info->lookahead == L_END)
{
- t2 = mk_Tnode ();
+ t2 = mk_Tnode (parse_info);
t2->pos = ++parse_info->position;
t2->u.ch[1] = t2->u.ch[0] = '\n';
- tn = mk_Tnode ();
+ tn = mk_Tnode (parse_info);
tn->pos = CAT;
tn->u.p[0] = t1;
tn->u.p[1] = t2;
t1 = tn;
- lex ();
+ lex (parse_info);
}
}
- if (t1 && lookahead == 0)
+ if (t1 && parse_info->lookahead == 0)
{
- t2 = mk_Tnode();
+ t2 = mk_Tnode(parse_info);
t2->pos = ++parse_info->position;
t2->u.ch[0] = -(++parse_info->rule);
t2->u.ch[1] = start_anchor_flag ? 0 : -(parse_info->rule);
- *tnp = mk_Tnode();
+ *tnp = mk_Tnode(parse_info);
(*tnp)->pos = CAT;
(*tnp)->u.p[0] = t1;
(*tnp)->u.p[1] = t2;
}
else
{
- if (!err_code)
+ if (!parse_info->err_code)
{
- if (lookahead == L_RP)
- err_code = DFA_ERR_RP;
- else if (lookahead == L_LP)
- err_code = DFA_ERR_LP;
+ if (parse_info->lookahead == L_RP)
+ parse_info->err_code = DFA_ERR_RP;
+ else if (parse_info->lookahead == L_LP)
+ parse_info->err_code = DFA_ERR_LP;
else
- err_code = DFA_ERR_SYNTAX;
+ parse_info->err_code = DFA_ERR_SYNTAX;
}
}
- *s = (const char *) expr_ptr;
+ *s = (const char *) parse_info->expr_ptr;
}
-static int nextchar (int *esc)
+static int nextchar (struct DFA_parse *parse_info, int *esc)
{
*esc = 0;
- if (*expr_ptr == '\0')
+ if (*parse_info->expr_ptr == '\0')
return 0;
- else if (*expr_ptr != '\\')
- return *expr_ptr++;
+ else if (*parse_info->expr_ptr != '\\')
+ return *parse_info->expr_ptr++;
*esc = 1;
- switch (*++expr_ptr)
+ switch (*++parse_info->expr_ptr)
{
case '\r':
case '\n':
case '\0':
return '\\';
case '\t':
- ++expr_ptr;
+ ++parse_info->expr_ptr;
return ' ';
case 'n':
- ++expr_ptr;
+ ++parse_info->expr_ptr;
return '\n';
case 't':
- ++expr_ptr;
+ ++parse_info->expr_ptr;
return '\t';
case 'r':
- ++expr_ptr;
+ ++parse_info->expr_ptr;
return '\r';
case 'f':
- ++expr_ptr;
+ ++parse_info->expr_ptr;
return '\f';
default:
- return *expr_ptr++;
+ return *parse_info->expr_ptr++;
}
}
-static int nextchar_set (int *esc)
+static int nextchar_set (struct DFA_parse *parse_info, int *esc)
{
- if (*expr_ptr == ' ')
+ if (*parse_info->expr_ptr == ' ')
{
*esc = 0;
- return *expr_ptr++;
+ return *parse_info->expr_ptr++;
}
- return nextchar (esc);
+ return nextchar (parse_info, esc);
}
-static int read_charset (void)
+static int read_charset (struct DFA_parse *parse_info)
{
int i, ch0, ch1, esc0, esc1, cc = 0;
- look_chars = mk_BSet (&parse_info->charset);
- res_BSet (parse_info->charset, look_chars);
+ parse_info->look_chars = mk_BSet (&parse_info->charset);
+ res_BSet (parse_info->charset, parse_info->look_chars);
- ch0 = nextchar_set (&esc0);
+ ch0 = nextchar_set (parse_info, &esc0);
if (!esc0 && ch0 == '^')
{
cc = 1;
- ch0 = nextchar_set (&esc0);
+ ch0 = nextchar_set (parse_info, &esc0);
}
while (ch0 != 0)
{
if (!esc0 && ch0 == ']')
break;
- if (parse_info->cmap)
- {
- char **mapto, mapfrom[2];
- const char *mcp = mapfrom;
- mapfrom[0] = ch0;
- mapto = (*parse_info->cmap)(&mcp, 1);
- assert (mapto);
- ch0 = mapto[0][0];
- }
- add_BSet (parse_info->charset, look_chars, ch0);
- ch1 = nextchar_set (&esc1);
+ if (!esc0 && ch0 == '-')
+ {
+ ch1 = ch0;
+ esc1 = esc0;
+ ch0 = 1;
+ add_BSet (parse_info->charset, parse_info->look_chars, ch0);
+ }
+ else
+ {
+ if (parse_info->cmap)
+ {
+ const char **mapto;
+ char mapfrom[2];
+ const char *mcp = mapfrom;
+ mapfrom[0] = ch0;
+ mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1);
+ assert (mapto);
+ ch0 = mapto[0][0];
+ }
+ add_BSet (parse_info->charset, parse_info->look_chars, ch0);
+ ch1 = nextchar_set (parse_info, &esc1);
+ }
if (!esc1 && ch1 == '-')
{
int open_range = 0;
- if ((ch1 = nextchar_set (&esc1)) == 0)
+ if ((ch1 = nextchar_set (parse_info, &esc1)) == 0)
break;
#if DFA_OPEN_RANGE
if (!esc1 && ch1 == ']')
#else
if (!esc1 && ch1 == ']')
{
- add_BSet (parse_info->charset, look_chars, '-');
+ add_BSet (parse_info->charset, parse_info->look_chars, '-');
break;
}
#endif
if (!open_range && parse_info->cmap)
{
- char **mapto, mapfrom[2];
+ const char **mapto;
+ char mapfrom[2];
const char *mcp = mapfrom;
mapfrom[0] = ch1;
- mapto = (*parse_info->cmap) (&mcp, 1);
+ mapto = (*parse_info->cmap) (parse_info->cmap_data, &mcp, 1);
assert (mapto);
ch1 = mapto[0][0];
}
for (i=ch0; ++i<=ch1;)
- add_BSet (parse_info->charset, look_chars, i);
+ add_BSet (parse_info->charset, parse_info->look_chars, i);
if (!open_range)
- ch0 = nextchar_set (&esc0);
+ ch0 = nextchar_set (parse_info, &esc0);
else
break;
}
}
}
if (cc)
- com_BSet (parse_info->charset, look_chars);
+ com_BSet (parse_info->charset, parse_info->look_chars);
return L_CHARS;
}
-static int map_l_char (void)
+static int map_l_char (struct DFA_parse *parse_info)
{
- char **mapto;
- const char *cp0 = (const char *) (expr_ptr-1);
+ const char **mapto;
+ const char *cp0 = (const char *) (parse_info->expr_ptr-1);
int i = 0, len = strlen(cp0);
if (cp0[0] == 1 && cp0[1])
{
- expr_ptr++;
- look_ch = cp0[1];
+ parse_info->expr_ptr++;
+ parse_info->look_ch = ((unsigned char *) cp0)[1];
return L_CHAR;
}
if (!parse_info->cmap)
return L_CHAR;
- mapto = (*parse_info->cmap) (&cp0, len);
+ mapto = (*parse_info->cmap) (parse_info->cmap_data, &cp0, len);
assert (mapto);
- expr_ptr = (const unsigned char *) cp0;
- look_ch = mapto[i][0];
- logf (LOG_DEBUG, "map from %c to %d", expr_ptr[-1], look_ch);
+ parse_info->expr_ptr = (const unsigned char *) cp0;
+ parse_info->look_ch = ((unsigned char **) mapto)[i][0];
+ yaz_log (YLOG_DEBUG, "map from %c to %d", parse_info->expr_ptr[-1], parse_info->look_ch);
return L_CHAR;
}
-static int lex_sub(void)
+static int lex_sub(struct DFA_parse *parse_info)
{
int esc;
- while ((look_ch = nextchar (&esc)) != 0)
- if (look_ch == '\"')
+ while ((parse_info->look_ch = nextchar (parse_info, &esc)) != 0)
+ if (parse_info->look_ch == '\"')
{
if (esc)
- return map_l_char ();
- inside_string = !inside_string;
+ return map_l_char (parse_info);
+ parse_info->inside_string = !parse_info->inside_string;
}
- else if (esc || inside_string)
- return map_l_char ();
- else if (look_ch == '[')
- return read_charset();
+ else if (esc || parse_info->inside_string)
+ return map_l_char (parse_info);
+ else if (parse_info->look_ch == '[')
+ return read_charset(parse_info);
else
{
const int *cc;
for (cc = parse_info->charMap; *cc; cc += 2)
- if (*cc == look_ch)
+ if (*cc == (int) (parse_info->look_ch))
{
if (!cc[1])
- --expr_ptr;
+ --parse_info->expr_ptr;
return cc[1];
}
- return map_l_char ();
+ return map_l_char (parse_info);
}
return 0;
}
-static void lex (void)
+static void lex (struct DFA_parse *parse_info)
{
- lookahead = lex_sub ();
+ parse_info->lookahead = lex_sub (parse_info);
}
static const char *str_char (unsigned c)
{
static char s[6];
s[0] = '\\';
- if (c < 32)
+ if (c < 32 || c >= 127)
switch (c)
{
case '\r':
printf ("%s", str_char (c));
}
-static void term_Tnode (void)
+static void term_Tnode (struct DFA_parse *parse_info)
{
struct Tblock *t, *t1;
for (t = parse_info->start; (t1 = t);)
}
}
-static struct Tnode *mk_Tnode (void)
+static struct Tnode *mk_Tnode (struct DFA_parse *parse_info)
{
struct Tblock *tnew;
return parse_info->end->tarray+(parse_info->use_Tnode++ % TADD);
}
-struct Tnode *mk_Tnode_cset (BSet charset)
+struct Tnode *mk_Tnode_cset (struct DFA_parse *parse_info, BSet charset)
{
- struct Tnode *tn1, *tn0 = mk_Tnode();
+ struct Tnode *tn1, *tn0 = mk_Tnode(parse_info);
int ch1, ch0 = trav_BSet (parse_info->charset, charset, 0);
if (ch0 == -1)
tn0->pos = EPSILON;
tn0->u.ch[1] = ch1-1;
while ((ch0=trav_BSet (parse_info->charset, charset, ch1)) != -1)
{
- tn1 = mk_Tnode();
+ tn1 = mk_Tnode(parse_info);
tn1->pos = OR;
tn1->u.p[0] = tn0;
tn0 = tn1;
- tn1 = tn0->u.p[1] = mk_Tnode();
+ tn1 = tn0->u.p[1] = mk_Tnode(parse_info);
tn1->u.ch[0] = ch0;
tn1->pos = ++(parse_info->position);
if ((ch1 = travi_BSet (parse_info->charset, charset,
return tn0;
}
-static void del_followpos (void)
+static void del_followpos (struct DFA_parse *parse_info)
{
- ifree (followpos);
+ ifree (parse_info->followpos);
}
-static void init_pos (void)
+static void init_pos (struct DFA_parse *parse_info)
{
- posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
+ parse_info->posar = (struct Tnode **) imalloc (sizeof(struct Tnode*)
* (1+parse_info->position));
}
-static void del_pos (void)
+static void del_pos (struct DFA_parse *parse_info)
{
- ifree (posar);
+ ifree (parse_info->posar);
}
-static void add_follow (Set lastpos, Set firstpos)
+static void add_follow (struct DFA_parse *parse_info,
+ DFASet lastpos, DFASet firstpos)
{
while (lastpos)
{
- followpos[lastpos->value] =
- union_Set (poset, followpos[lastpos->value], firstpos);
+ parse_info->followpos[lastpos->value] =
+ union_DFASet (parse_info->poset,
+ parse_info->followpos[lastpos->value], firstpos);
lastpos = lastpos->next;
}
}
-static void dfa_trav (struct Tnode *n)
+static void dfa_trav (struct DFA_parse *parse_info, struct Tnode *n)
{
+ struct Tnode **posar = parse_info->posar;
+ DFASetType poset = parse_info->poset;
+
switch (n->pos)
{
case CAT:
- dfa_trav (n->u.p[0]);
- dfa_trav (n->u.p[1]);
+ dfa_trav (parse_info, n->u.p[0]);
+ dfa_trav (parse_info, n->u.p[1]);
n->nullable = n->u.p[0]->nullable & n->u.p[1]->nullable;
- n->firstpos = mk_Set (poset);
- n->firstpos = union_Set (poset, n->firstpos, n->u.p[0]->firstpos);
+ n->firstpos = mk_DFASet (poset);
+ n->firstpos = union_DFASet (poset, n->firstpos, n->u.p[0]->firstpos);
if (n->u.p[0]->nullable)
- n->firstpos = union_Set (poset, n->firstpos, n->u.p[1]->firstpos);
- n->lastpos = mk_Set (poset);
- n->lastpos = union_Set (poset, n->lastpos, n->u.p[1]->lastpos);
+ n->firstpos = union_DFASet (poset, n->firstpos, n->u.p[1]->firstpos);
+ n->lastpos = mk_DFASet (poset);
+ n->lastpos = union_DFASet (poset, n->lastpos, n->u.p[1]->lastpos);
if (n->u.p[1]->nullable)
- n->lastpos = union_Set (poset, n->lastpos, n->u.p[0]->lastpos);
+ n->lastpos = union_DFASet (poset, n->lastpos, n->u.p[0]->lastpos);
- add_follow (n->u.p[0]->lastpos, n->u.p[1]->firstpos);
+ add_follow (parse_info, n->u.p[0]->lastpos, n->u.p[1]->firstpos);
- n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
- n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
- n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
- n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
+ n->u.p[0]->firstpos = rm_DFASet (poset, n->u.p[0]->firstpos);
+ n->u.p[0]->lastpos = rm_DFASet (poset, n->u.p[0]->lastpos);
+ n->u.p[1]->firstpos = rm_DFASet (poset, n->u.p[1]->firstpos);
+ n->u.p[1]->lastpos = rm_DFASet (poset, n->u.p[1]->lastpos);
if (debug_dfa_trav)
printf ("CAT");
break;
case OR:
- dfa_trav (n->u.p[0]);
- dfa_trav (n->u.p[1]);
+ dfa_trav (parse_info, n->u.p[0]);
+ dfa_trav (parse_info, n->u.p[1]);
n->nullable = n->u.p[0]->nullable | n->u.p[1]->nullable;
- n->firstpos = merge_Set (poset, n->u.p[0]->firstpos,
+ n->firstpos = merge_DFASet (poset, n->u.p[0]->firstpos,
n->u.p[1]->firstpos);
- n->lastpos = merge_Set (poset, n->u.p[0]->lastpos,
+ n->lastpos = merge_DFASet (poset, n->u.p[0]->lastpos,
n->u.p[1]->lastpos);
- n->u.p[0]->firstpos = rm_Set (poset, n->u.p[0]->firstpos);
- n->u.p[0]->lastpos = rm_Set (poset, n->u.p[0]->lastpos);
- n->u.p[1]->firstpos = rm_Set (poset, n->u.p[1]->firstpos);
- n->u.p[1]->lastpos = rm_Set (poset, n->u.p[1]->lastpos);
+ n->u.p[0]->firstpos = rm_DFASet (poset, n->u.p[0]->firstpos);
+ n->u.p[0]->lastpos = rm_DFASet (poset, n->u.p[0]->lastpos);
+ n->u.p[1]->firstpos = rm_DFASet (poset, n->u.p[1]->firstpos);
+ n->u.p[1]->lastpos = rm_DFASet (poset, n->u.p[1]->lastpos);
if (debug_dfa_trav)
printf ("OR");
break;
case PLUS:
- dfa_trav (n->u.p[0]);
+ dfa_trav (parse_info, n->u.p[0]);
n->nullable = n->u.p[0]->nullable;
n->firstpos = n->u.p[0]->firstpos;
n->lastpos = n->u.p[0]->lastpos;
- add_follow (n->lastpos, n->firstpos);
+ add_follow (parse_info, n->lastpos, n->firstpos);
if (debug_dfa_trav)
printf ("PLUS");
break;
case STAR:
- dfa_trav (n->u.p[0]);
+ dfa_trav (parse_info, n->u.p[0]);
n->nullable = 1;
n->firstpos = n->u.p[0]->firstpos;
n->lastpos = n->u.p[0]->lastpos;
- add_follow (n->lastpos, n->firstpos);
+ add_follow (parse_info, n->lastpos, n->firstpos);
if (debug_dfa_trav)
printf ("STAR");
break;
case EPSILON:
n->nullable = 1;
- n->lastpos = mk_Set (poset);
- n->firstpos = mk_Set (poset);
+ n->lastpos = mk_DFASet (poset);
+ n->firstpos = mk_DFASet (poset);
if (debug_dfa_trav)
printf ("EPSILON");
break;
default:
posar[n->pos] = n;
n->nullable = 0;
- n->firstpos = mk_Set (poset);
- n->firstpos = add_Set (poset, n->firstpos, n->pos);
- n->lastpos = mk_Set (poset);
- n->lastpos = add_Set (poset, n->lastpos, n->pos);
+ n->firstpos = mk_DFASet (poset);
+ n->firstpos = add_DFASet (poset, n->firstpos, n->pos);
+ n->lastpos = mk_DFASet (poset);
+ n->lastpos = add_DFASet (poset, n->lastpos, n->pos);
if (debug_dfa_trav)
+ {
if (n->u.ch[0] < 0)
printf ("#%d (n#%d)", -n->u.ch[0], -n->u.ch[1]);
else if (n->u.ch[1] > n->u.ch[0])
}
else
out_char (n->u.ch[0]);
+ }
}
if (debug_dfa_trav)
{
printf ("\n nullable : %c\n", n->nullable ? '1' : '0');
printf (" firstpos :");
- pr_Set (poset, n->firstpos);
+ pr_DFASet (poset, n->firstpos);
printf (" lastpos :");
- pr_Set (poset, n->lastpos);
+ pr_DFASet (poset, n->lastpos);
}
}
-static void init_followpos (void)
+static void init_followpos (struct DFA_parse *parse_info)
{
- Set *fa;
+ DFASet *fa;
int i;
- followpos = fa = (Set *) imalloc (sizeof(Set) * (1+parse_info->position));
+ parse_info->followpos = fa =
+ (DFASet *) imalloc (sizeof(DFASet) * (1+parse_info->position));
for (i = parse_info->position+1; --i >= 0; fa++)
- *fa = mk_Set (poset);
+ *fa = mk_DFASet (parse_info->poset);
}
-static void mk_dfa_tran (struct DFA_states *dfas)
+static void mk_dfa_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
{
int i, j, c;
int max_char;
short *pos, *pos_i;
- Set tran_set;
+ DFASet tran_set;
int char_0, char_1;
struct DFA_state *dfa_from, *dfa_to;
+ struct Tnode **posar;
+ DFASetType poset;
+ DFASet *followpos;
assert (parse_info);
+
+ posar = parse_info->posar;
max_char = parse_info->charset->size;
pos = (short *) imalloc (sizeof(*pos) * (parse_info->position+1));
- tran_set = cp_Set (poset, parse_info->root->firstpos);
+ tran_set = cp_DFASet (parse_info->poset, parse_info->root->firstpos);
i = add_DFA_state (dfas, &tran_set, &dfa_from);
assert (i);
dfa_from->rule_no = 0;
+ poset = parse_info->poset;
+ followpos = parse_info->followpos;
while ((dfa_from = get_DFA_state (dfas)))
{
pos_i = pos;
for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
if (posar[i]->u.ch[1] >= char_1
&& (c=posar[i]->u.ch[0]) < char_0)
+ {
if (c < char_1)
char_0 = char_1;
else
char_0 = c;
+ }
if (char_0 > max_char)
break;
char_1 = max_char;
- tran_set = mk_Set (poset);
+ tran_set = mk_DFASet (poset);
for (pos_i = pos; (i = *pos_i) != -1; ++pos_i)
{
if ((c=posar[i]->u.ch[0]) > char_0 && c <= char_1)
else if ((c=posar[i]->u.ch[1]) >= char_0 && c < char_1)
char_1 = c; /* backward chunk */
if (posar[i]->u.ch[1] >= char_0 && posar[i]->u.ch[0] <= char_0)
- tran_set = union_Set (poset, tran_set, followpos[i]);
+ tran_set = union_DFASet (poset, tran_set, followpos[i]);
}
if (tran_set)
{
sort_DFA_states (dfas);
}
-static void pr_tran (struct DFA_states *dfas)
+static void pr_tran (struct DFA_parse *parse_info, struct DFA_states *dfas)
{
struct DFA_state *s;
struct DFA_tran *tran;
printf (" #%d", s->rule_nno);
putchar (':');
- pr_Set (poset, s->set);
+ pr_DFASet (parse_info->poset, s->set);
prev_no = -1;
for (i=s->tran_no, tran=s->trans; --i >= 0; tran++)
{
}
}
-static void pr_verbose (struct DFA_states *dfas)
+static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas)
{
long i, j;
int k;
k = inf_BSetHandle (parse_info->charset, &i, &j);
printf ("%ld/%ld character sets, %d bytes each\n",
i/k, j/k, k*sizeof(BSetWord));
- k = inf_SetType (poset, &i, &j);
+ k = inf_DFASetType (parse_info->poset, &i, &j);
printf ("%ld/%ld poset items, %d bytes each\n", i, j, k);
printf ("%d DFA states\n", dfas->no);
}
-static void pr_followpos (void)
+static void pr_followpos (struct DFA_parse *parse_info)
{
+ struct Tnode **posar = parse_info->posar;
int i;
printf ("\nfollowsets:\n");
for (i=1; i <= parse_info->position; i++)
{
printf ("%3d:", i);
- pr_Set (poset, followpos[i]);
+ pr_DFASet (parse_info->poset, parse_info->followpos[i]);
putchar ('\t');
if (posar[i]->u.ch[0] < 0)
if (!dfa->charMap)
{
dfa->charMapSize = 7;
- dfa->charMap = imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
+ dfa->charMap = (int *)
+ imalloc (dfa->charMapSize * sizeof(*dfa->charMap));
}
dfa->charMap[0] = 0;
}
if (dfa->charMap)
ifree (dfa->charMap);
dfa->charMapSize = size;
- dfa->charMap = imalloc (size * sizeof(*dfa->charMap));
+ dfa->charMap = (int *) imalloc (size * sizeof(*dfa->charMap));
}
memcpy (dfa->charMap, cmap, size * sizeof(*dfa->charMap));
}
size = dfa->charMapSize;
if (indx >= size)
{
- int *cn = imalloc ((size+16) * sizeof(*dfa->charMap));
+ int *cn = (int *) imalloc ((size+16) * sizeof(*dfa->charMap));
memcpy (cn, dfa->charMap, indx*sizeof(*dfa->charMap));
ifree (dfa->charMap);
dfa->charMap = cn;
static struct DFA_parse *dfa_parse_init (void)
{
- parse_info = (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
+ struct DFA_parse *parse_info =
+ (struct DFA_parse *) imalloc (sizeof (struct DFA_parse));
parse_info->charset = mk_BSetHandle (255, 20);
parse_info->position = 0;
parse_info->anyset = mk_BSet (&parse_info->charset);
res_BSet (parse_info->charset, parse_info->anyset);
- add_BSet (parse_info->charset, parse_info->anyset, '\n');
com_BSet (parse_info->charset, parse_info->anyset);
parse_info->use_Tnode = parse_info->max_Tnode = 0;
+ parse_info->start = parse_info->end = NULL;
parse_info->charMap = NULL;
parse_info->charMapSize = 0;
parse_info->cmap = NULL;
static void rm_dfa_parse (struct DFA_parse **dfap)
{
- parse_info = *dfap;
+ struct DFA_parse *parse_info = *dfap;
assert (parse_info);
- term_Tnode();
+ term_Tnode(parse_info);
rm_BSetHandle (&parse_info->charset);
ifree (parse_info->charMap);
ifree (parse_info);
static struct DFA_states *mk_dfas (struct DFA_parse *dfap, int poset_chunk)
{
struct DFA_states *dfas;
+ struct DFA_parse *parse_info = dfap;
assert (poset_chunk > 10);
assert (dfap);
- parse_info = dfap;
- poset = mk_SetType (poset_chunk);
- init_pos();
- init_followpos();
+ parse_info->poset = mk_DFASetType (poset_chunk);
+ init_pos(parse_info);
+ init_followpos(parse_info);
assert (parse_info->root);
- dfa_trav (parse_info->root);
+ dfa_trav (parse_info, parse_info->root);
if (debug_dfa_followpos)
- pr_followpos();
- init_DFA_states (&dfas, poset, STATE_HASH);
- mk_dfa_tran (dfas);
+ pr_followpos(parse_info);
+ init_DFA_states (&dfas, parse_info->poset, (int) (STATE_HASH));
+ mk_dfa_tran (parse_info, dfas);
if (debug_dfa_tran)
- pr_tran (dfas);
+ {
+ pr_tran (parse_info, dfas);
+ }
if (dfa_verbose)
- pr_verbose (dfas);
- del_pos();
- del_followpos();
- rm_SetType (poset);
+ pr_verbose (parse_info, dfas);
+ del_pos(parse_info);
+ del_followpos(parse_info);
+ rm_DFASetType (parse_info->poset);
return dfas;
}
{
struct DFA *dfa;
- dfa = imalloc (sizeof(*dfa));
+ dfa = (struct DFA *) imalloc (sizeof(*dfa));
dfa->parse_info = dfa_parse_init ();
dfa->state_info = NULL;
dfa->states = NULL;
return dfa;
}
-void dfa_set_cmap (struct DFA *dfa, char **(*cmap)(const char **from, int len))
+void dfa_set_cmap (struct DFA *dfa, void *vp,
+ const char **(*cmap)(void *vp, const char **from, int len))
{
dfa->parse_info->cmap = cmap;
+ dfa->parse_info->cmap_data = vp;
}
int dfa_parse (struct DFA *dfa, const char **pattern)
{
struct Tnode *top;
+ struct DFA_parse *parse_info;
assert (dfa);
assert (dfa->parse_info);
- do_parse (dfa->parse_info, pattern, &top);
- if (err_code)
- return err_code;
+ parse_info = dfa->parse_info;
+
+ if (!parse_info->cmap)
+ {
+ res_BSet (parse_info->charset, parse_info->anyset);
+ add_BSet (parse_info->charset, parse_info->anyset, '\n');
+ com_BSet (parse_info->charset, parse_info->anyset);
+ }
+ do_parse (parse_info, pattern, &top);
+ if (parse_info->err_code)
+ return parse_info->err_code;
if ( !dfa->parse_info->root )
dfa->parse_info->root = top;
else
{
struct Tnode *n;
- n = mk_Tnode ();
+ n = mk_Tnode (parse_info);
n->pos = OR;
n->u.p[0] = dfa->parse_info->root;
n->u.p[1] = top;