X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=dfa%2Fdfa.c;h=b20650f41de2fc9f4ce703ca2226a92e7e3c615d;hp=5c3d376a7ef0c9d0b643e4eb8b47628dcb8f5b21;hb=732870c555c7d32c5d2b6a4914c6fa7232eb26b2;hpb=deff57cfa9d9b39c4a4f1c9b82a64c6e61d821a4 diff --git a/dfa/dfa.c b/dfa/dfa.c index 5c3d376..b20650f 100644 --- a/dfa/dfa.c +++ b/dfa/dfa.c @@ -1,8 +1,5 @@ -/* $Id: dfa.c,v 1.34 2005-03-30 09:25:23 adam Exp $ - Copyright (C) 1995-2005 - Index Data ApS - -This file is part of the Zebra server. +/* This file is part of the Zebra server. + Copyright (C) 1994-2011 Index Data Zebra is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free @@ -15,12 +12,15 @@ FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with Zebra; see the file LICENSE.zebra. If not, write to the -Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA -02111-1307, USA. +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ +#if HAVE_CONFIG_H +#include +#endif #include #include @@ -32,8 +32,6 @@ Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA #include "dfap.h" #include "imalloc.h" -#define DFA_OPEN_RANGE 1 - #define CAT 16000 #define OR 16001 #define STAR 16002 @@ -357,7 +355,7 @@ static int nextchar_set (struct DFA_parse *parse_info, int *esc) static int read_charset (struct DFA_parse *parse_info) { - int i, ch0, ch1, esc0, esc1, cc = 0; + int i, ch0, esc0, cc = 0; parse_info->look_chars = mk_BSet (&parse_info->charset); res_BSet (parse_info->charset, parse_info->look_chars); @@ -367,8 +365,13 @@ static int read_charset (struct DFA_parse *parse_info) cc = 1; ch0 = nextchar_set (parse_info, &esc0); } + /** + ch0 is last met character + ch1 is "next" char + */ while (ch0 != 0) { + int ch1, esc1; if (!esc0 && ch0 == ']') break; if (!esc0 && ch0 == '-') @@ -380,16 +383,23 @@ static int read_charset (struct DFA_parse *parse_info) } else { - if (parse_info->cmap) - { - const char **mapto; - char mapfrom[2]; - const char *mcp = mapfrom; - mapfrom[0] = ch0; - mapto = (*parse_info->cmap)(parse_info->cmap_data, &mcp, 1); - assert (mapto); - ch0 = mapto[0][0]; - } + if (ch0 == 1) + { + ch0 = nextchar(parse_info, &esc0); + } + else + { + if (parse_info->cmap) + { + const char **mapto; + char mapfrom[2]; + const char *mcp = mapfrom; + mapfrom[0] = ch0; + mapto = parse_info->cmap(parse_info->cmap_data, &mcp, 1); + assert (mapto); + ch0 = mapto[0][0]; + } + } add_BSet (parse_info->charset, parse_info->look_chars, ch0); ch1 = nextchar_set (parse_info, &esc1); } @@ -398,20 +408,16 @@ static int read_charset (struct DFA_parse *parse_info) int open_range = 0; if ((ch1 = nextchar_set (parse_info, &esc1)) == 0) break; -#if DFA_OPEN_RANGE if (!esc1 && ch1 == ']') { ch1 = 255; open_range = 1; } -#else - if (!esc1 && ch1 == ']') + else if (ch1 == 1) { - add_BSet (parse_info->charset, parse_info->look_chars, '-'); - break; + ch1 = nextchar(parse_info, &esc1); } -#endif - if (!open_range && parse_info->cmap) + else if (parse_info->cmap) { const char **mapto; char mapfrom[2]; @@ -421,12 +427,12 @@ static int read_charset (struct DFA_parse *parse_info) assert (mapto); ch1 = mapto[0][0]; } - for (i=ch0; ++i<=ch1;) + for (i = ch0; ++i <= ch1;) add_BSet (parse_info->charset, parse_info->look_chars, i); - if (!open_range) - ch0 = nextchar_set (parse_info, &esc0); - else + + if (open_range) break; + ch0 = nextchar_set (parse_info, &esc0); } else { @@ -876,11 +882,11 @@ static void pr_verbose (struct DFA_parse *parse_info, struct DFA_states *dfas) { long i, j; int k; - printf ("%d/%d tree nodes used, %d bytes each\n", - parse_info->use_Tnode, parse_info->max_Tnode, sizeof (struct Tnode)); + printf ("%d/%d tree nodes used, %ld bytes each\n", + parse_info->use_Tnode, parse_info->max_Tnode, (long) sizeof (struct Tnode)); k = inf_BSetHandle (parse_info->charset, &i, &j); - printf ("%ld/%ld character sets, %d bytes each\n", - i/k, j/k, k*sizeof(BSetWord)); + printf ("%ld/%ld character sets, %ld bytes each\n", + i/k, j/k, (long) k*sizeof(BSetWord)); k = inf_DFASetType (parse_info->poset, &i, &j); printf ("%ld/%ld poset items, %d bytes each\n", i, j, k); printf ("%d DFA states\n", dfas->no); @@ -1026,9 +1032,12 @@ static struct DFA_parse *dfa_parse_init (void) parse_info->rule = 0; parse_info->root = NULL; + /* initialize the anyset which by default does not include \n */ parse_info->anyset = mk_BSet (&parse_info->charset); res_BSet (parse_info->charset, parse_info->anyset); + add_BSet (parse_info->charset, parse_info->anyset, '\n'); com_BSet (parse_info->charset, parse_info->anyset); + parse_info->use_Tnode = parse_info->max_Tnode = 0; parse_info->start = parse_info->end = NULL; parse_info->charMap = NULL; @@ -1090,6 +1099,11 @@ struct DFA *dfa_init (void) return dfa; } +void dfa_anyset_includes_nl(struct DFA *dfa) +{ + add_BSet (dfa->parse_info->charset, dfa->parse_info->anyset, '\n'); +} + void dfa_set_cmap (struct DFA *dfa, void *vp, const char **(*cmap)(void *vp, const char **from, int len)) { @@ -1097,6 +1111,11 @@ void dfa_set_cmap (struct DFA *dfa, void *vp, dfa->parse_info->cmap_data = vp; } +int dfa_get_last_rule (struct DFA *dfa) +{ + return dfa->parse_info->rule; +} + int dfa_parse (struct DFA *dfa, const char **pattern) { struct Tnode *top; @@ -1106,12 +1125,6 @@ int dfa_parse (struct DFA *dfa, const char **pattern) assert (dfa->parse_info); parse_info = dfa->parse_info; - if (!parse_info->cmap) - { - res_BSet (parse_info->charset, parse_info->anyset); - add_BSet (parse_info->charset, parse_info->anyset, '\n'); - com_BSet (parse_info->charset, parse_info->anyset); - } do_parse (parse_info, pattern, &top); if (parse_info->err_code) return parse_info->err_code; @@ -1151,3 +1164,12 @@ void dfa_delete (struct DFA **dfap) ifree (*dfap); *dfap = NULL; } +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +