From ead74d0c3b9d76204494553c61854812eb69bbc7 Mon Sep 17 00:00:00 2001 From: Adam Dickmeiss Date: Mon, 26 Sep 1994 10:16:52 +0000 Subject: [PATCH] First version of dfa module in alex. This version uses yacc to parse regular expressions. This should be hand-made instead. --- dfa/Makefile | 58 +++++++++++ dfa/agrep.c | 295 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ dfa/bset.c | 264 ++++++++++++++++++++++++++++++++++++++++++++++++++ dfa/imalloc.c | 121 +++++++++++++++++++++++ dfa/imalloc.h | 37 +++++++ dfa/lexer.c | 145 ++++++++++++++++++++++++++++ dfa/lexer.h | 18 ++++ dfa/readfile.c | 164 +++++++++++++++++++++++++++++++ dfa/set.c | 244 ++++++++++++++++++++++++++++++++++++++++++++++ dfa/states.c | 176 +++++++++++++++++++++++++++++++++ 10 files changed, 1522 insertions(+) create mode 100644 dfa/Makefile create mode 100644 dfa/agrep.c create mode 100644 dfa/bset.c create mode 100644 dfa/imalloc.c create mode 100644 dfa/imalloc.h create mode 100644 dfa/lexer.c create mode 100644 dfa/lexer.h create mode 100644 dfa/readfile.c create mode 100644 dfa/set.c create mode 100644 dfa/states.c diff --git a/dfa/Makefile b/dfa/Makefile new file mode 100644 index 0000000..d1b05b8 --- /dev/null +++ b/dfa/Makefile @@ -0,0 +1,58 @@ +# Copyright (C) 1994, Index Data I/S +# All rights reserved. +# Sebastian Hammer, Adam Dickmeiss +# $Id: Makefile,v 1.1 1994-09-26 10:16:52 adam Exp $ + +SHELL=/bin/sh +INCLUDE=-I../include +TPROG1=agrep +TPROG2=lexer +CFLAGS=-g -Wall -pedantic +DEFS=$(INCLUDE) -DYACC -DYYDEBUG=1 -DMEMDEBUG=1 +LIB=../lib/dfa.a +PO = regexp.o imalloc.o states.o set.o bset.o +CPP=cc -E + +all: $(LIB) + +$(TPROG1): $(TPROG1).o $(LIB) + $(CC) $(CFLAGS) -o $(TPROG1) $(TPROG1).o $(LIB) ../lib/util.a + +$(TPROG2): $(TPROG2).o readfile.o $(LIB) + $(CC) $(CFLAGS) -o $(TPROG2) $(TPROG2).o readfile.o \ + $(LIB) ../lib/util.a + +$(LIB): $(PO) + rm -f $(LIB) + ar qc $(LIB) $(PO) + ranlib $(LIB) + +.c.o: + $(CC) -c $(DEFS) $(CFLAGS) $< + +.y.o: + $(YACC) $(YFLAGS) $< + $(CC) -c $(DEFS) $(CFLAGS) y.tab.c + mv y.tab.o $*.o + +clean: + rm -f *.[oa] $(TPROG1) $(TPROG2) core mon.out gmon.out errlist y.tab.c + +depend: depend2 + +depend1: + mv Makefile Makefile.tmp + $(YACC) $(YFLAGS) regexp.y + sed '/^#Depend/q' Makefile + $(CPP) -M $(DEFS) *.c |sed 's/y\.tab\.o/regexp.o/g' >>Makefile + -rm Makefile.tmp + +depend2: + $(YACC) $(YFLAGS) regexp.y + $(CPP) -M $(DEFS) *.c |sed 's/y\.tab\.o/regexp.o/g' >.depend + +ifeq (.depend,$(wildcard .depend)) +include .depend +endif + +#Depend --- DOT NOT DELETE THIS LINE diff --git a/dfa/agrep.c b/dfa/agrep.c new file mode 100644 index 0000000..758c865 --- /dev/null +++ b/dfa/agrep.c @@ -0,0 +1,295 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: agrep.c,v $ + * Revision 1.1 1994-09-26 10:16:52 adam + * First version of dfa module in alex. This version uses yacc to parse + * regular expressions. This should be hand-made instead. + * + */ +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +#include +#include "imalloc.h" +#include "dfa.h" + +#ifndef O_BINARY +#define O_BINARY 0 +#endif + +static char *prog; + +void error( const char *format, ... ) +{ + va_list argptr; + va_start( argptr, format ); + fprintf( stderr, "%s error: ", prog ); + (void) vfprintf( stderr, format, argptr ); + putc( '\n', stderr ); + exit( 1 ); +} + +#ifdef YYDEBUG +#ifdef YACC +extern int yydebug; +#else +extern int alexdebug; +#endif +#endif + +static int show_lines = 0; + +int agrep_options( argc, argv ) +int argc; +char **argv; +{ + while( --argc > 0 ) + if( **++argv == '-' ) + while( *++*argv ) + { + switch( **argv ) + { +#ifdef __STDC__ + case 'V': + fprintf( stderr, "%s: %s %s\n", prog, __DATE__, __TIME__ ); + continue; +#endif + case 'v': + dfa_verbose = 1; + continue; + case 'n': + show_lines = 1; + continue; +#ifdef YYDEBUG + case 't': +#ifdef YACC + yydebug = 1; +#else + alexdebug = 1; +#endif + continue; +#endif + case 'd': + switch( *++*argv ) + { + case 's': + debug_dfa_tran = 1; + break; + case 't': + debug_dfa_trav = 1; + break; + case 'f': + debug_dfa_followpos = 1; + break; + default: + --*argv; + debug_dfa_tran = 1; + debug_dfa_followpos = 1; + debug_dfa_trav = 1; + } + continue; + default: + fprintf( stderr, "%s: unknown option `-%s'\n", prog, *argv ); + return 1; + } + break; + } + return 0; +} + +#define INF_BUF_SIZE 32768U +static char *inf_buf; +static char *inf_ptr, *inf_flsh; +static int inf_eof, line_no; + +static int inf_flush( fd ) +int fd; +{ + char *p; + unsigned b, r; + + r = (unsigned) (inf_buf+INF_BUF_SIZE - inf_ptr); /* no of `wrap' bytes */ + if( r ) + memcpy( inf_buf, inf_ptr, r ); + inf_ptr = p = inf_buf + r; + b = INF_BUF_SIZE - r; + do + if( (r = read( fd, p, b ) ) == (unsigned) -1 ) + return -1; + else if( r ) + p += r; + else + { + *p++ = '\n'; + inf_eof = 1; + break; + } + while( (b -= r) > 0 ); + while( p != inf_buf && *--p != '\n' ) + ; + while( p != inf_buf && *--p != '\n' ) + ; + inf_flsh = p+1; + return 0; +} + +static char *prline( p ) +char *p; +{ + char *p0; + + --p; + while( p != inf_buf && p[-1] != '\n' ) + --p; + p0 = p; + while( *p++ != '\n' ) + ; + p[-1] = '\0'; + if( show_lines ) + printf( "%5d:\t%s\n", line_no, p0 ); + else + puts( p0 ); + p[-1] = '\n'; + return p; +} + +static int go( fd, dfaar ) +int fd; +DFA_state **dfaar; +{ + DFA_state *s = dfaar[0]; + DFA_tran *t; + char *p; + int i; + unsigned char c; + + while( 1 ) + { + for( c = *inf_ptr++, t=s->trans, i=s->tran_no; --i >= 0; t++ ) + if( c >= t->ch[0] && c <= t->ch[1] ) + { + p = inf_ptr; + do + { + if( (s = dfaar[t->to] )->rule_no ) + { + inf_ptr = prline( inf_ptr ); + c = '\n'; + break; + } + for( t=s->trans, i=s->tran_no; --i >= 0; t++ ) + if( (unsigned) *p >= t->ch[0] + && (unsigned) *p <= t->ch[1] ) + break; + p++; + } while( i >= 0 ); + s = dfaar[0]; + break; + } + if( c == '\n' ) + { + ++line_no; + if( inf_ptr == inf_flsh ) + { + if( inf_eof ) + break; + ++line_no; + if( inf_flush( fd ) ) + { + fprintf( stderr, "%s: read error\n", prog ); + return -1; + } + } + } + } + return 0; +} + +int agrep( dfas, fd ) +DFA_states *dfas; +int fd; +{ + inf_buf = imalloc( sizeof(char)*INF_BUF_SIZE ); + inf_eof = 0; + inf_ptr = inf_buf+INF_BUF_SIZE; + inf_flush( fd ); + line_no = 1; + + go( fd, dfas->sortarray); + + ifree( inf_buf ); + return 0; +} + + +int main( argc, argv ) +int argc; +char **argv; +{ + char *pattern = NULL; + char outbuf[BUFSIZ]; + int fd, i, no = 0; + DFA *dfa = init_dfa(); + DFA_states *dfas; + + prog = *argv; +#ifdef YYDEBUG +#ifdef YACC + yydebug = 0; +#else + alexdebug = 0; +#endif +#endif + setbuf( stdout, outbuf ); + i = agrep_options( argc, argv ); + if( i ) + return i; + while( --argc > 0 ) + if( **++argv != '-' && **argv ) + if( !pattern ) + { + pattern = *argv; + i = parse_dfa( dfa, &pattern, grep_chars ); + if( i || *pattern ) + { + fprintf( stderr, "%s: illegal pattern\n", prog ); + return 1; + } + dfa->root = dfa->top; + dfas = mk_dfas( dfa, 200 ); + rm_dfa( &dfa ); + } + else + { + ++no; + fd = open( *argv, O_RDONLY | O_BINARY); + if( fd == -1 ) + { + fprintf( stderr, "%s: couldn't open `%s'\n", prog, *argv ); + return 1; + } + i = agrep( dfas, fd ); + close( fd ); + if( i ) + return i; + } + if( !no ) + { + fprintf( stderr, "%s: no files specified\n", prog ); + return 2; + } + fflush(stdout); + return 0; +} diff --git a/dfa/bset.c b/dfa/bset.c new file mode 100644 index 0000000..db02832 --- /dev/null +++ b/dfa/bset.c @@ -0,0 +1,264 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: bset.c,v $ + * Revision 1.1 1994-09-26 10:16:53 adam + * First version of dfa module in alex. This version uses yacc to parse + * regular expressions. This should be hand-made instead. + * + */ +#include +#include + +#include +#include + +#include +#include +#include "imalloc.h" + +#define GET_BIT(s,m) (s[(m)/(sizeof(BSetWord)*8)]&(1<<(m&(sizeof(BSetWord)*8-1)))) +#define SET_BIT(s,m) (s[(m)/(sizeof(BSetWord)*8)]|=(1<<(m&(sizeof(BSetWord)*8-1)))) + +BSetHandle *mk_BSetHandle (int size, int chunk) +{ + int wsize = 1+size/(sizeof(BSetWord)*8); + BSetHandle *sh; + + if( chunk <= 1 ) + chunk = 32; + sh = (BSetHandle *) imalloc( sizeof(BSetHandle) + + chunk*sizeof(BSetWord)*wsize ); + + sh->size = size; + sh->wsize = wsize; + sh->chunk = chunk * wsize; + sh->offset = 0; + sh->setchain = NULL; + return sh; +} + +void rm_BSetHandle (BSetHandle **shp) +{ + BSetHandle *sh, *sh1; + + assert( shp ); + sh = *shp; + assert( sh ); + while( sh ) + { + sh1 = sh->setchain; + ifree( sh ); + sh = sh1; + } +} + +int inf_BSetHandle (BSetHandle *sh, long *used, long *allocated) +{ + int wsize; + + assert( sh ); + *used = 0; + *allocated = 0; + wsize = sh->wsize; + do + { + *used += sh->offset; + *allocated += sh->chunk; + } while( (sh = sh->setchain) ); + return wsize; +} + +BSet mk_BSet (BSetHandle **shp) +{ + BSetHandle *sh, *sh1; + unsigned off; + assert( shp ); + sh = *shp; + assert( sh ); + + off = sh->offset; + if( (off + sh->wsize) > sh->chunk ) + { + sh1 = (BSetHandle *) imalloc( sizeof(BSetHandle ) + + sh->chunk*sizeof(BSetWord) ); + sh1->size = sh->size; + sh1->wsize = sh->wsize; + sh1->chunk = sh->chunk; + off = sh1->offset = 0; + sh1->setchain = sh; + sh = *shp = sh1; + } + sh->offset = off + sh->wsize; + return sh->setarray + off; +} + +void add_BSet (BSetHandle *sh, BSet dst, unsigned member) +{ + assert( dst ); + assert( sh ); + assert( member <= sh->size ); + SET_BIT(dst, member); +} + +unsigned test_BSet (BSetHandle *sh, BSet src, unsigned member) +{ + assert( src ); + assert( sh ); + assert( member <= sh->size ); + return GET_BIT( src , member) != 0; +} + +BSet cp_BSet (BSetHandle *sh, BSet dst, BSet src) +{ + assert( sh ); + assert( dst ); + assert( src ); + memcpy( dst, src, sh->wsize * sizeof(BSetWord)); + return dst; +} + +void res_BSet (BSetHandle *sh, BSet dst) +{ + assert( dst ); + memset( dst, 0, sh->wsize * sizeof(BSetWord)); +} + +void union_BSet (BSetHandle *sh, BSet dst, BSet src) +{ + int i; + assert( sh ); + assert( dst ); + assert( src ); + for( i=sh->wsize; --i >= 0; ) + *dst++ |= *src++; +} + +unsigned hash_BSet (BSetHandle *sh, BSet src) +{ + int i; + unsigned s = 0; + assert( sh ); + assert( src ); + for( i=sh->wsize; --i >= 0; ) + s += *src++; + return s; +} + +void com_BSet (BSetHandle *sh, BSet dst) +{ + int i; + assert( sh ); + assert( dst ); + for( i=sh->wsize; --i >= 0; dst++ ) + *dst = ~*dst; +} + +int eq_BSet (BSetHandle *sh, BSet dst, BSet src) +{ + int i; + assert( sh ); + assert( dst ); + assert( src ); + for( i=sh->wsize; --i >= 0; ) + if( *dst++ != *src++ ) + return 0; + return 1; +} + +int trav_BSet (BSetHandle *sh, BSet src, unsigned member) +{ + int i = sh->size - member; + BSetWord *sw = src+member/(sizeof(BSetWord)*8); + unsigned b = member & (sizeof(BSetWord)*8-1); + while(i >= 0) + if( b == 0 && *sw == 0 ) + { + member += sizeof(BSetWord)*8; + ++sw; + i -= sizeof(BSetWord)*8; + } + else if( *sw & (1<size - member; + BSetWord *sw = src+member/(sizeof(BSetWord)*8); + unsigned b = member & (sizeof(BSetWord)*8-1); + while(i >= 0) + if( b == 0 && *sw == (BSetWord) ~ 0 ) + { + member += sizeof(BSetWord)*8; + ++sw; + i -= sizeof(BSetWord)*8; + } + else if( (*sw & (1< +#include +#include + +#include +#include "imalloc.h" + +#ifdef MEMDEBUG +#define MAG1 0x8fe1 +#define MAG2 0x91 +#define MAG3 0xee + +long alloc = 0L; +long max_alloc = 0L; +int alloc_calls = 0; +int free_calls = 0; +#endif + +void *imalloc (size_t size) +{ +#ifdef MEMDEBUG + size_t words = (4*sizeof(unsigned) -1 + size)/sizeof(unsigned); + char *p = (char *)malloc( words*sizeof(unsigned) ); + if( !p ) + log (LOG_FATAL, "No memory: imalloc(%u); c/f %d/%d; %ld/%ld", + size, alloc_calls, free_calls, alloc, max_alloc ); + *((unsigned *)p) = size; + ((unsigned *)p)[1] = MAG1; + p += sizeof(unsigned)*2; + size[(unsigned char *) p] = MAG2; + size[(unsigned char *) p+1] = MAG3; + if( (alloc+=size) > max_alloc ) + max_alloc = alloc; + ++alloc_calls; + return (void *) p; +#else + void *p = (void *)malloc( size ); + if( !p ) + log (LOG_FATAL, "Out of memory (imalloc)" ); + return p; +#endif +} + +void *icalloc (size_t size) +{ +#ifdef MEMDEBUG + unsigned words = (4*sizeof(unsigned) -1 + size)/sizeof(unsigned); + char *p = (char *) calloc( words*sizeof(unsigned), 1 ); + if( !p ) + log (LOG_FATAL, "No memory: icalloc(%u); c/f %d/%d; %ld/%ld", + size, alloc_calls, free_calls, alloc, max_alloc ); + ((unsigned *)p)[0] = size; + ((unsigned *)p)[1] = MAG1; + p += sizeof(unsigned)*2; + size[(unsigned char *) p] = MAG2; + size[(unsigned char *) p+1] = MAG3; + if( (alloc+=size) > max_alloc ) + max_alloc = alloc; + ++alloc_calls; + return (void *)p; +#else + void p = (void) calloc( size, 1 ); + if( !p ) + log (LOG_FATAL, "Out of memory (icalloc)" ); + return p; +#endif +} + +#ifdef MEMDEBUG +void i_free (void *p) +{ + size_t size; + if( !p ) + return; + ++free_calls; + size = (-2)[(unsigned *) p]; + if( (-1)[(unsigned *) p] != MAG1 ) + log (LOG_FATAL,"Internal: ifree(%u) magic 1 corrupted", size ); + if( size[(unsigned char *) p] != MAG2 ) + log (LOG_FATAL,"Internal: ifree(%u) magic 2 corrupted", size ); + if( (size+1)[(unsigned char *) p] != MAG3 ) + log (LOG_FATAL,"Internal: ifree(%u) magic 3 corrupted", size ); + alloc -= size; + if( alloc < 0L ) + log (LOG_FATAL,"Internal: ifree(%u) negative alloc.", size ); + free( (unsigned *) p-2 ); +} +#else +#ifndef ANSI +void i_free (void *p) +{ + if (p) + free( p ); +} +#endif +#endif + +#ifdef MEMDEBUG +void imemstat (void) +{ + fprintf( stdout, "imalloc: calls malloc/free %d/%d, ", + alloc_calls, free_calls ); + if( alloc ) + fprintf( stdout, "memory cur/max %ld/%ld : unreleased", + alloc, max_alloc ); + else + fprintf( stdout, "memory max %ld", max_alloc ); + fputc( '\n', stdout ); +} +#endif diff --git a/dfa/imalloc.h b/dfa/imalloc.h new file mode 100644 index 0000000..6f58960 --- /dev/null +++ b/dfa/imalloc.h @@ -0,0 +1,37 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: imalloc.h,v $ + * Revision 1.1 1994-09-26 10:16:54 adam + * First version of dfa module in alex. This version uses yacc to parse + * regular expressions. This should be hand-made instead. + * + */ + +void *imalloc (size_t); +void *icalloc (size_t); + +#ifdef MEMDEBUG +void i_free (void *); +void imemstat (void); + +extern long alloc; +extern long max_alloc; +extern int alloc_calls; +extern int free_calls; +#define ifree i_free + +#else + +#ifdef ANSI +#define ifree free +#else +void i_free (void *); +#define ifree i_free +#endif + +#endif + + diff --git a/dfa/lexer.c b/dfa/lexer.c new file mode 100644 index 0000000..98337d1 --- /dev/null +++ b/dfa/lexer.c @@ -0,0 +1,145 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: lexer.c,v $ + * Revision 1.1 1994-09-26 10:16:55 adam + * First version of dfa module in alex. This version uses yacc to parse + * regular expressions. This should be hand-made instead. + * + * + * Adam Dickmeiss. 1992-1993 + * This module is actually very old... + */ +#include +#include + +#include +#include +#include + +#include +#include +#include "imalloc.h" +#include "lexer.h" + +static char *prog; + + +void error( const char *format, ... ) +{ + va_list argptr; + va_start( argptr, format ); + fprintf( stderr, "%s error: ", prog ); + (void) vfprintf( stderr, format, argptr ); + putc( '\n', stderr ); + exit( 1 ); +} + +#ifdef YACC +extern int yydebug; +#else +extern int alexdebug; +#endif +int ccluse = 0; + +static int alex_options (int argc, char **argv); + +static int alex_options (int argc, char **argv) +{ + while( --argc > 0 ) + if( **++argv == '-' ) + while( *++*argv ) + { + switch( **argv ) + { +#ifdef __STDC__ + case 'V': + fprintf( stderr, "%s: %s %s\n", prog, __DATE__, __TIME__ ); + continue; +#endif + case 'v': + dfa_verbose = 1; + continue; + case 't': +#ifdef YACC + yydebug = 1; +#else + alexdebug = 1; +#endif + continue; + case 'c': + ccluse = 1; + continue; + case 'd': + switch( *++*argv ) + { + case 's': + debug_dfa_tran = 1; + break; + case 't': + debug_dfa_trav = 1; + break; + case 'f': + debug_dfa_followpos = 1; + break; + default: + --*argv; + debug_dfa_tran = 1; + debug_dfa_followpos = 1; + debug_dfa_trav = 1; + } + continue; + default: + fprintf( stderr, "%s: unknown option `-%s'\n", prog, *argv ); + return 1; + } + break; + } + return 0; +} + +int main (int argc, char **argv ) +{ + int i, no = 0; + DFA *dfa; + DFA_states *dfas; + + prog = *argv; +#ifdef YACC + yydebug = 0; +#else + alexdebug = 0; +#endif + i = alex_options( argc, argv ); + if( i ) + return i; + + if( argc < 2 ) + { + fprintf( stderr, "%s: usage %s -cVvt -d[stf]\n", prog, prog ); + return 1; + } + else while( --argc > 0 ) + if( **++argv != '-' && **argv ) + { + ++no; + i = read_file( *argv, &dfa ); + if( i ) + return i; + dfas = mk_dfas( dfa, 2000 ); + rm_dfa( &dfa ); + rm_dfas( &dfas ); + } +#ifdef MEMDEBUG + imemstat(); +#endif + if( !no ) + { + fprintf( stderr, "%s: no files specified\n", prog ); + return 2; + } + return 0; +} + diff --git a/dfa/lexer.h b/dfa/lexer.h new file mode 100644 index 0000000..1178063 --- /dev/null +++ b/dfa/lexer.h @@ -0,0 +1,18 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: lexer.h,v $ + * Revision 1.1 1994-09-26 10:16:55 adam + * First version of dfa module in alex. This version uses yacc to parse + * regular expressions. This should be hand-made instead. + * + */ + + +int read_file ( const char *, DFA ** ); +void error ( const char *, ... ); + +extern int ccluse; + diff --git a/dfa/readfile.c b/dfa/readfile.c new file mode 100644 index 0000000..cd3f6e4 --- /dev/null +++ b/dfa/readfile.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: readfile.c,v $ + * Revision 1.1 1994-09-26 10:16:56 adam + * First version of dfa module in alex. This version uses yacc to parse + * regular expressions. This should be hand-made instead. + * + */ +#include +#include + +#include +#include +#include + +#include +#include +#include "lexer.h" + +#define MAXLINE 512 + +static FILE *inf; +static FILE *outf; +static const char *inf_name; +static int line_no; +static int err_no; + +static void + prep (char **s), + read_defs (void), + read_rules (DFA **dfap), + read_tail (void); + +static char + *read_line (); + +static void prep (char **s) +{ + static char expr_buf[MAXLINE+1]; + char *dst = expr_buf; + const char *src = *s; + int c; + + while( (c = *src++) ) + *dst++ = c; + + *dst = '\0'; + *s = expr_buf; +} + +static char *read_line (void) +{ + static char linebuf[MAXLINE+1]; + ++line_no; + return fgets( linebuf, MAXLINE, inf ); +} + +static void read_defs (void) +{ + const char *s; + while( (s=read_line()) ) + { + if( *s == '%' && s[1] == '%' ) + return; + else if( *s == '\0' || isspace( *s ) ) + fputs( s, outf ); + } + error( "missing rule section" ); +} + +static void read_rules (DFA **dfap) +{ + char *s; + Tnode *n; + int i; + DFA *dfa; + + *dfap = dfa = init_dfa(); + fputs( "\n#ifndef YY_BREAK\n#define YY_BREAK break;\n#endif\n", outf ); + fputs( "void lexact( int no )\n{\n", outf ); + fputs( "\tswitch( no )\n\t{\n", outf ); + dfa->root = NULL; + while( (s=read_line()) ) + { + if( *s == '%' && s[1] == '%' ) + break; + else if( *s == '\0' || isspace( *s ) ) + /* copy rest of line to output */ + fputs( s, outf ); + else + { + /* preprocess regular expression */ + prep( &s ); + /* now parse regular expression */ + if (ccluse) + i = parse_dfa( dfa, &s, ccl_chars ); + else + i = parse_dfa( dfa, &s, thompson_chars ); + + if( dfa->rule > 1 ) + fputs( "\t\tYY_BREAK\n", outf ); + fprintf( outf, "\tcase %d:\n#line %d\n\t\t", dfa->rule, line_no ); + if( i ) + { + fprintf( stderr, "%s #%d: regular expression syntax error\n", + inf_name, line_no ); + err_no++; + } + else if( !dfa->root ) + dfa->root = dfa->top; + else + { + n = mk_Tnode(); + n->pos = OR; + n->u.p[0] = dfa->root; + n->u.p[1] = dfa->top; + dfa->root = n; + } + while( *s == '\t' || *s == ' ' ) + s++; + fputs( s, outf ); + } + } + fputs( "\tYY_BREAK\n\t}\n}\n", outf ); + if( !dfa->root ) + error( "no regular expressions in rule section" ); +} + +static void read_tail (void) +{ + const char *s; + while( (s=read_line()) ) + fputs( s, outf ); +} + +int read_file (const char *s, DFA **dfap) +{ + inf_name = s; + if( !(inf=fopen( s,"r" )) ) + { + error( "cannot open `%s'", s ); + return -1; + } + + if( !(outf=fopen( "lex.yy.c", "w" )) ) + { + error( "cannot open `%s'", "lex.yy.c" ); + return -2; + } + + line_no = 0; + err_no = 0; + + read_defs(); + read_rules( dfap ); + read_tail(); + + fclose( outf ); + fclose( inf ); + return err_no; +} diff --git a/dfa/set.c b/dfa/set.c new file mode 100644 index 0000000..f567f29 --- /dev/null +++ b/dfa/set.c @@ -0,0 +1,244 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: set.c,v $ + * Revision 1.1 1994-09-26 10:16:57 adam + * First version of dfa module in alex. This version uses yacc to parse + * regular expressions. This should be hand-made instead. + * + */ +#include +#include + +#include +#include + +#include +#include +#include "imalloc.h" + + +static void rm_SetElement (SetType st, SetElement *p); +static Set mk_SetElement (SetType st, int n); + +SetType mk_SetType (int chunk) +{ + SetType st; + + assert( chunk > 8 && chunk < 8000 ); + + st = (SetType) imalloc( sizeof(*st) ); + assert( st ); + + st->alloclist = st->freelist = NULL; + st->used = 0; + st->chunk = chunk; + return st; +} + +int inf_SetType (SetType st, long *used, long *allocated) +{ + Set s; + assert( st ); + *used = st->used; + *allocated = 0; + for( s = st->alloclist; s; s = s->next ) + *allocated += st->chunk; + return sizeof( SetElement ); +} + +SetType rm_SetType (SetType st) +{ + Set s, s1; + for( s = st->alloclist; (s1 = s); ) + { + s = s->next; + ifree( s1 ); + } + ifree( st ); + return NULL; +} + +Set mk_Set (SetType st) +{ + assert( st ); + return NULL; +} + +static Set mk_SetElement (SetType st, int n) +{ + Set s; + int i; + assert( st ); + + assert( st->chunk > 8 ); + if( ! st->freelist ) + { + s = (Set) imalloc( sizeof(*s) * (1+st->chunk) ); + assert( s ); + s->next = st->alloclist; + st->alloclist = s; + st->freelist = ++s; + for( i=st->chunk; --i > 0; s++ ) + s->next = s+1; + s->next = NULL; + } + st->used++; + s = st->freelist; + st->freelist = s->next; + s->value = n; + return s; +} + +static void rm_SetElement (SetType st, SetElement *p) +{ + assert( st ); + assert( st->used > 0 ); + p->next = st->freelist; + st->freelist = p; + st->used--; +} + +Set rm_Set (SetType st, Set s) +{ + Set s1 = s; + int i = 1; + + if( s ) + { + while( s->next ) + { + s = s->next; + ++i; + } + s->next = st->freelist; + st->freelist = s1; + st->used -= i; + assert( st->used >= 0 ); + } + return NULL; +} + +Set add_Set (SetType st, Set s, int n) +{ + SetElement dummy; + Set p = &dummy, new; + p->next = s; + while( p->next && p->next->value < n ) + p = p->next; + assert( p ); + if( !(p->next && p->next->value == n ) ) + { + new = mk_SetElement( st, n ); + new->next = p->next; + p->next = new; + } + return dummy.next; +} + +Set union_Set (SetType st, Set s1, Set s2) +{ + SetElement dummy; + Set p; + assert( st ); + + for( p = &dummy; s1 && s2; ) + if( s1->value < s2->value ) + { + p = p->next = s1; + s1 = s1->next; + } + else if( s1->value > s2->value ) + { + p = p->next = mk_SetElement( st, s2->value ); + s2 = s2->next; + } + else + { + p = p->next = s1; + s1 = s1->next; + s2 = s2->next; + } + if( s1 ) + p->next = s1; + else + { + while( s2 ) + { + p = p->next = mk_SetElement( st, s2->value ); + s2 = s2->next; + } + p->next = NULL; + } + return dummy.next; +} + +Set cp_Set (SetType st, Set s) +{ + return merge_Set( st, s, NULL ); +} + +Set merge_Set (SetType st, Set s1, Set s2) +{ + SetElement dummy; + Set p; + assert( st ); + for( p = &dummy; s1 && s2; p = p->next ) + if( s1->value < s2->value ) + { + p->next = mk_SetElement( st, s1->value ); + s1 = s1->next; + } + else if( s1->value > s2->value ) + { + p->next = mk_SetElement( st, s2->value ); + s2 = s2->next; + } + else + { + p->next = mk_SetElement( st, s1->value ); + s1 = s1->next; + s2 = s2->next; + } + if( !s1 ) + s1 = s2; + while( s1 ) + { + p = p->next = mk_SetElement( st, s1->value ); + s1 = s1->next; + } + p->next = NULL; + return dummy.next; +} + +void pr_Set (SetType st, Set s) +{ + assert( st ); + while( s ) + { + printf( " %d", s->value ); + s = s->next; + } + putchar( '\n' ); +} + +unsigned hash_Set (SetType st, Set s) +{ + unsigned n = 0; + while( s ) + { + n += 11*s->value; + s = s->next; + } + return n; +} + +int eq_Set (SetType st, Set s1, Set s2) +{ + for( ; s1 && s2; s1=s1->next, s2=s2->next ) + if( s1->value != s2->value ) + return 0; + return s1 == s2; +} diff --git a/dfa/states.c b/dfa/states.c new file mode 100644 index 0000000..8c450c2 --- /dev/null +++ b/dfa/states.c @@ -0,0 +1,176 @@ +/* + * Copyright (C) 1994, Index Data I/S + * All rights reserved. + * Sebastian Hammer, Adam Dickmeiss + * + * $Log: states.c,v $ + * Revision 1.1 1994-09-26 10:16:58 adam + * First version of dfa module in alex. This version uses yacc to parse + * regular expressions. This should be hand-made instead. + * + */ +#include +#include + +#include +#include + +#include +#include +#include "imalloc.h" + +#define DFA_CHUNK 200 +#define TRAN_CHUNK 800 + +int init_DFA_states (DFA_states **dfasp, SetType st, int hash) +{ + DFA_states *dfas; + DFA_trans *tm; + int i; + + dfas = (DFA_states *) imalloc( sizeof(DFA_states) ); + assert( dfas ); + dfas->hasharray = (DFA_state **) imalloc( sizeof(DFA_state*) * hash ); + assert( dfas->hasharray ); + *dfasp = dfas; + dfas->freelist = dfas->unmarked = dfas->marked = NULL; + dfas->statemem = NULL; + dfas->hash = hash; + dfas->st = st; + dfas->no = 0; + + dfas->transmem = tm = (DFA_trans *) imalloc( sizeof(DFA_trans) ); + assert( tm ); + tm->next = NULL; + tm->size = TRAN_CHUNK; + tm->ptr = 0; + tm->tran_block = (DFA_tran *) imalloc( sizeof(DFA_tran) * tm->size ); + assert( tm->tran_block ); + + dfas->sortarray = NULL; + for( i=0; ihash; i++ ) + dfas->hasharray[i] = NULL; + return 0; +} + +int rm_DFA_states (DFA_states **dfasp) +{ + DFA_states *dfas = *dfasp; + DFA_stateb *sm, *sm1; + DFA_trans *tm, *tm1; + + assert( dfas ); + ifree( dfas->hasharray ); + ifree( dfas->sortarray ); + + for( tm=dfas->transmem; tm; tm=tm1 ) + { + ifree( tm->tran_block ); + tm1 = tm->next; + ifree( tm ); + } + for( sm=dfas->statemem; sm; sm=sm1 ) + { + ifree( sm->state_block ); + sm1 = sm->next; + ifree( sm ); + } + ifree( dfas ); + *dfasp = NULL; + return 0; +} + +int add_DFA_state (DFA_states *dfas, Set *s, DFA_state **sp) +{ + int i; + DFA_state *si, **sip; + DFA_stateb *sb; + + assert( dfas ); + assert( *s ); + sip = dfas->hasharray + (hash_Set( dfas->st, *s ) % dfas->hash); + for( si = *sip; si; si=si->link ) + if( eq_Set( dfas->st, si->set, *s ) ) + { + *sp = si; + *s = rm_Set( dfas->st, *s ); + return 0; + } + if( !dfas->freelist ) + { + sb = (DFA_stateb *) imalloc( sizeof(*sb) ); + sb->next = dfas->statemem; + dfas->statemem = sb; + sb->state_block = si = dfas->freelist = + (DFA_state *) imalloc( sizeof(DFA_state)*DFA_CHUNK); + for( i = 0; inext = si+1; + si->next = NULL; + } + + si = dfas->freelist; + dfas->freelist = si->next; + + si->next = dfas->unmarked; + dfas->unmarked = si; + + si->link = *sip; + *sip = si; + + si->no = (dfas->no)++; + si->tran_no = 0; + si->set = *s; + *s = NULL; + *sp = si; + return 1; +} + +void add_DFA_tran (DFA_states *dfas, DFA_state *s, int ch0, int ch1, int to) +{ + DFA_trans *tm; + DFA_tran *t; + + tm = dfas->transmem; + if( tm->ptr == tm->size ) + { + tm = (DFA_trans *) imalloc( sizeof(DFA_trans) ); + assert( tm ); + tm->next = dfas->transmem; + dfas->transmem = tm; + tm->size = s->tran_no >= TRAN_CHUNK ? s->tran_no+8 : TRAN_CHUNK; + tm->tran_block = (DFA_tran *) imalloc( sizeof(DFA_tran) * tm->size ); + assert( tm->tran_block ); + if( s->tran_no ) + memcpy( tm->tran_block, s->trans, s->tran_no*sizeof( DFA_tran ) ); + tm->ptr = s->tran_no; + s->trans = tm->tran_block; + } + s->tran_no++; + t = tm->tran_block + tm->ptr++; + t->ch[0] = ch0; + t->ch[1] = ch1; + t->to = to; +} + +DFA_state *get_DFA_state (DFA_states *dfas) +{ + DFA_state *si; + assert( dfas ); + if( !(si = dfas->unmarked) ) + return NULL; + dfas->unmarked = si->next; + si->next = dfas->marked; + dfas->marked = si; + si->trans = dfas->transmem->tran_block + dfas->transmem->ptr; + + return si; +} + +void sort_DFA_states (DFA_states *dfas) +{ + DFA_state *s; + assert( dfas ); + dfas->sortarray = (DFA_state **) imalloc( sizeof(DFA_state *)*dfas->no ); + for( s = dfas->marked; s; s=s->next ) + dfas->sortarray[s->no] = s; +} -- 1.7.10.4