X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=include%2Fdfa.h;h=a0bb90f3be39c5818131684e364a9e0ae0705a92;hp=0d58314e8ea536e12449e1312d999e5b65d1a694;hb=3c5f6226f97612c0d6ac40591f600587c5ffa858;hpb=769e5c9b42bf87531296013fac0af819af9228ee diff --git a/include/dfa.h b/include/dfa.h index 0d58314..a0bb90f 100644 --- a/include/dfa.h +++ b/include/dfa.h @@ -1,10 +1,36 @@ /* - * Copyright (C) 1994, Index Data I/S + * Copyright (C) 1994-1999, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: dfa.h,v $ - * Revision 1.1 1994-09-26 10:17:43 adam + * Revision 1.9 1999-02-02 14:50:31 adam + * Updated WIN32 code specific sections. Changed header. + * + * Revision 1.8 1997/09/18 08:59:18 adam + * Extra generic handle for the character mapping routines. + * + * Revision 1.7 1997/09/05 15:29:59 adam + * Changed prototype for chr_map_input - added const. + * Added support for C++, headers uses extern "C" for public definitions. + * + * Revision 1.6 1996/06/04 10:20:10 adam + * Added support for character mapping. + * + * Revision 1.5 1996/01/08 09:09:48 adam + * Function dfa_parse got 'const' string argument. + * + * Revision 1.4 1995/01/25 11:31:04 adam + * Simple error reporting when parsing regular expressions. + * + * Revision 1.3 1995/01/24 16:01:30 adam + * Added -ansi to CFLAGS. + * New functions and change of data structures. + * + * Revision 1.2 1994/09/26 16:31:23 adam + * Minor changes. xmalloc declares xcalloc now. + * + * Revision 1.1 1994/09/26 10:17:43 adam * Dfa-module header files. * */ @@ -14,100 +40,52 @@ #include #include -#define CAT 16000 -#define OR 16001 -#define STAR 16002 -#define PLUS 16003 -#define EPSILON 16004 - -typedef struct Tnode_ { - union { - struct Tnode_ *p[2]; /* CAT,OR,STAR,PLUS (left,right) */ - short ch[2]; /* if ch[0] >= 0 then this Tnode represents */ - /* a character in range [ch[0]..ch[1]] */ - /* otherwise ch[0] represents */ - /* accepting no (-acceptno) */ - } u; - unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */ - unsigned nullable : 1; - Set firstpos; /* first positions */ - Set lastpos; /* last positions */ -} Tnode; -typedef struct Tblock_ { /* Tnode bucket (block) */ - struct Tblock_ *next; /* pointer to next bucket */ - Tnode *tarray; /* array of nodes in bucket */ -} Tblock; - -typedef struct { - Tnode *root; /* root of regular syntax tree */ - Tnode *top; /* regular tree top (returned by parse_dfa) */ - int position; /* no of positions so far */ - int rule; /* no of rules so far */ - BSetHandle *charset; /* character set type */ - BSet anyset; /* character recognized by `.' */ - int use_Tnode; /* used Tnodes */ - int max_Tnode; /* allocated Tnodes */ - Tblock *start; /* start block of Tnodes */ - Tblock *end; /* end block of Tnodes */ -} DFA; +#ifdef __cplusplus +extern "C" { +#endif -typedef struct { +struct DFA_tran { unsigned char ch[2]; /* transition on ch[0] <= c <= ch[1] to */ unsigned short to; /* this state */ -} DFA_tran; +}; -typedef struct DFA_trans_ { - struct DFA_trans_ *next; /* next DFA transition block */ - DFA_tran *tran_block; /* pointer to transitions */ +struct DFA_trans { + struct DFA_trans *next; /* next DFA transition block */ + struct DFA_tran *tran_block; /* pointer to transitions */ int ptr; /* index of next transition in tran_block */ int size; /* allocated size of tran_block */ -} DFA_trans; +}; -typedef struct DFA_state_ { - struct DFA_state_ *next; /* next entry in free/unmarked/marked list */ - struct DFA_state_ *link; /* link to next entry in hash chain */ - DFA_tran *trans; /* transition list */ +struct DFA_state { + struct DFA_state *next; /* next entry in free/unmarked/marked list */ + struct DFA_state *link; /* link to next entry in hash chain */ + struct DFA_tran *trans; /* transition list */ Set set; /* set of positions (important nfa states) */ short no; /* no of this state */ short tran_no; /* no of transitions to other states */ short rule_no; /* if non-zero, this holds accept rule no */ -} DFA_state; + short rule_nno; /* accept rule no - except start rules */ +}; -typedef struct DFA_stateb_ { - struct DFA_stateb_ *next; - DFA_state *state_block; -} DFA_stateb; +struct DFA { + int no_states; + struct DFA_state **states; + struct DFA_states *state_info; + struct DFA_parse *parse_info; +}; -typedef struct { - DFA_state *freelist; /* chain of unused (but allocated) DFA states */ - DFA_state *unmarked; /* chain of unmarked DFA states */ - DFA_state *marked; /* chain of marked DFA states */ - DFA_stateb *statemem; /* state memory */ - int no; /* no of states (unmarked+marked) */ - SetType st; /* Position set type */ - int hash; /* no hash entries in hasharray */ - DFA_state **hasharray; /* hash pointers */ - DFA_state **sortarray; /* sorted DFA states */ - DFA_trans *transmem; /* transition memory */ -} DFA_states; - -DFA * init_dfa (void); -void rm_dfa (DFA **dfap); -int parse_dfa (DFA *, char **, const unsigned short *); -DFA_states* mk_dfas (DFA *, int poset_chunk); -void rm_dfas (DFA_states **dfas); +struct DFA *dfa_init (void); +void dfa_set_cmap (struct DFA *dfa, void *vp, + const char **(*cmap)(void *vp, const char **from, int len)); +int dfa_parse (struct DFA *, const char **); +void dfa_mkstate (struct DFA *); +void dfa_delete (struct DFA **); -Tnode * mk_Tnode (void); -Tnode * mk_Tnode_cset (BSet charset); -void term_Tnode (void); - -int init_DFA_states (DFA_states **dfasp, SetType st, int hash); -int rm_DFA_states (DFA_states **dfasp); -int add_DFA_state (DFA_states *dfas, Set *s, DFA_state **sp); -DFA_state *get_DFA_state (DFA_states *dfas); -void sort_DFA_states (DFA_states *dfas); -void add_DFA_tran (DFA_states *, DFA_state *, int, int, int); +void dfa_parse_cmap_clean (struct DFA *d); +void dfa_parse_cmap_new (struct DFA *d, const int *cmap); +void dfa_parse_cmap_del (struct DFA *d, int from); +void dfa_parse_cmap_add (struct DFA *d, int from, int to); extern int debug_dfa_trav; extern int debug_dfa_tran; @@ -115,8 +93,29 @@ extern int debug_dfa_followpos; extern int dfa_verbose; extern unsigned short - thompson_chars[], - brief_chars[], - grep_chars[], - ccl_chars[]; + dfa_thompson_chars[], + dfa_ccl_chars[]; + +#define L_LP 1 +#define L_RP 2 +#define L_CHAR 3 +#define L_CHARS 4 +#define L_ANY 5 +#define L_ALT 6 +#define L_ANYZ 7 +#define L_WILD 8 +#define L_QUEST 9 +#define L_CLOS1 10 +#define L_CLOS0 11 +#define L_END 12 +#define L_START 13 + +#define DFA_ERR_SYNTAX 1 +#define DFA_ERR_LP 2 +#define DFA_ERR_RP 3 + +#ifdef __cplusplus +} +#endif + #endif