X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=include%2Fdfa.h;h=eceb0a6bf3bde0dfd72672cff43ec6e7ebbc80a3;hp=0d58314e8ea536e12449e1312d999e5b65d1a694;hb=e1be1f5267e2be257664ded166b6890e4f24db83;hpb=769e5c9b42bf87531296013fac0af819af9228ee diff --git a/include/dfa.h b/include/dfa.h index 0d58314..eceb0a6 100644 --- a/include/dfa.h +++ b/include/dfa.h @@ -1,113 +1,78 @@ -/* - * Copyright (C) 1994, Index Data I/S - * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: dfa.h,v $ - * Revision 1.1 1994-09-26 10:17:43 adam - * Dfa-module header files. - * - */ +/* $Id: dfa.h,v 1.16 2007-01-15 20:08:24 adam Exp $ + Copyright (C) 1995-2007 + Index Data ApS + +This file is part of the Zebra server. + +Zebra is free software; you can redistribute it and/or modify it under +the terms of the GNU General Public License as published by the Free +Software Foundation; either version 2, or (at your option) any later +version. + +Zebra is distributed in the hope that it will be useful, but WITHOUT ANY +WARRANTY; without even the implied warranty of MERCHANTABILITY or +FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License +for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + +*/ #ifndef DFA_H #define DFA_H #include -#include -#define CAT 16000 -#define OR 16001 -#define STAR 16002 -#define PLUS 16003 -#define EPSILON 16004 - -typedef struct Tnode_ { - union { - struct Tnode_ *p[2]; /* CAT,OR,STAR,PLUS (left,right) */ - short ch[2]; /* if ch[0] >= 0 then this Tnode represents */ - /* a character in range [ch[0]..ch[1]] */ - /* otherwise ch[0] represents */ - /* accepting no (-acceptno) */ - } u; - unsigned pos : 15; /* CAT/OR/STAR/EPSILON or non-neg. position */ - unsigned nullable : 1; - Set firstpos; /* first positions */ - Set lastpos; /* last positions */ -} Tnode; - -typedef struct Tblock_ { /* Tnode bucket (block) */ - struct Tblock_ *next; /* pointer to next bucket */ - Tnode *tarray; /* array of nodes in bucket */ -} Tblock; - -typedef struct { - Tnode *root; /* root of regular syntax tree */ - Tnode *top; /* regular tree top (returned by parse_dfa) */ - int position; /* no of positions so far */ - int rule; /* no of rules so far */ - BSetHandle *charset; /* character set type */ - BSet anyset; /* character recognized by `.' */ - int use_Tnode; /* used Tnodes */ - int max_Tnode; /* allocated Tnodes */ - Tblock *start; /* start block of Tnodes */ - Tblock *end; /* end block of Tnodes */ -} DFA; - -typedef struct { +#include + +#include + +YAZ_BEGIN_CDECL + +struct DFA_tran { unsigned char ch[2]; /* transition on ch[0] <= c <= ch[1] to */ unsigned short to; /* this state */ -} DFA_tran; +}; -typedef struct DFA_trans_ { - struct DFA_trans_ *next; /* next DFA transition block */ - DFA_tran *tran_block; /* pointer to transitions */ +struct DFA_trans { + struct DFA_trans *next; /* next DFA transition block */ + struct DFA_tran *tran_block; /* pointer to transitions */ int ptr; /* index of next transition in tran_block */ int size; /* allocated size of tran_block */ -} DFA_trans; +}; -typedef struct DFA_state_ { - struct DFA_state_ *next; /* next entry in free/unmarked/marked list */ - struct DFA_state_ *link; /* link to next entry in hash chain */ - DFA_tran *trans; /* transition list */ - Set set; /* set of positions (important nfa states) */ +struct DFA_state { + struct DFA_state *next; /* next entry in free/unmarked/marked list */ + struct DFA_state *link; /* link to next entry in hash chain */ + struct DFA_tran *trans; /* transition list */ + DFASet set; /* set of positions (important nfa states) */ short no; /* no of this state */ short tran_no; /* no of transitions to other states */ short rule_no; /* if non-zero, this holds accept rule no */ -} DFA_state; - -typedef struct DFA_stateb_ { - struct DFA_stateb_ *next; - DFA_state *state_block; -} DFA_stateb; - -typedef struct { - DFA_state *freelist; /* chain of unused (but allocated) DFA states */ - DFA_state *unmarked; /* chain of unmarked DFA states */ - DFA_state *marked; /* chain of marked DFA states */ - DFA_stateb *statemem; /* state memory */ - int no; /* no of states (unmarked+marked) */ - SetType st; /* Position set type */ - int hash; /* no hash entries in hasharray */ - DFA_state **hasharray; /* hash pointers */ - DFA_state **sortarray; /* sorted DFA states */ - DFA_trans *transmem; /* transition memory */ -} DFA_states; - -DFA * init_dfa (void); -void rm_dfa (DFA **dfap); -int parse_dfa (DFA *, char **, const unsigned short *); -DFA_states* mk_dfas (DFA *, int poset_chunk); -void rm_dfas (DFA_states **dfas); - -Tnode * mk_Tnode (void); -Tnode * mk_Tnode_cset (BSet charset); -void term_Tnode (void); - -int init_DFA_states (DFA_states **dfasp, SetType st, int hash); -int rm_DFA_states (DFA_states **dfasp); -int add_DFA_state (DFA_states *dfas, Set *s, DFA_state **sp); -DFA_state *get_DFA_state (DFA_states *dfas); -void sort_DFA_states (DFA_states *dfas); -void add_DFA_tran (DFA_states *, DFA_state *, int, int, int); + short rule_nno; /* accept rule no - except start rules */ +}; + +struct DFA { + int no_states; + struct DFA_state **states; + struct DFA_states *state_info; + struct DFA_parse *parse_info; +}; + +struct DFA *dfa_init (void); +void dfa_anyset_includes_nl(struct DFA *dfa); +void dfa_set_cmap (struct DFA *dfa, void *vp, + const char **(*cmap)(void *vp, const char **from, int len)); +int dfa_parse (struct DFA *, const char **); +void dfa_mkstate (struct DFA *); +void dfa_delete (struct DFA **); +int dfa_get_last_rule (struct DFA *); + +void dfa_parse_cmap_clean (struct DFA *d); +void dfa_parse_cmap_new (struct DFA *d, const int *cmap); +void dfa_parse_cmap_del (struct DFA *d, int from); +void dfa_parse_cmap_add (struct DFA *d, int from, int to); extern int debug_dfa_trav; extern int debug_dfa_tran; @@ -115,8 +80,35 @@ extern int debug_dfa_followpos; extern int dfa_verbose; extern unsigned short - thompson_chars[], - brief_chars[], - grep_chars[], - ccl_chars[]; + dfa_thompson_chars[], + dfa_ccl_chars[]; + +#define L_LP 1 +#define L_RP 2 +#define L_CHAR 3 +#define L_CHARS 4 +#define L_ANY 5 +#define L_ALT 6 +#define L_ANYZ 7 +#define L_WILD 8 +#define L_QUEST 9 +#define L_CLOS1 10 +#define L_CLOS0 11 +#define L_END 12 +#define L_START 13 + +#define DFA_ERR_SYNTAX 1 +#define DFA_ERR_LP 2 +#define DFA_ERR_RP 3 + +YAZ_END_CDECL + #endif +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ +