X-Git-Url: http://git.indexdata.com/?p=idzebra-moved-to-github.git;a=blobdiff_plain;f=recctrl%2Fregxread.c;h=74e1f5c7a7a6643e542ccc19fa3c2e58dcad656e;hp=17ce5d98a1d523d76b69314ceec664e001c15651;hb=4415da5dbbba04e50d4524347486d60113ed569c;hpb=77405f9b10e80d5c0d08db19b299e9bf8d96387b diff --git a/recctrl/regxread.c b/recctrl/regxread.c index 17ce5d9..74e1f5c 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,10 +1,52 @@ /* - * Copyright (C) 1994-1996, Index Data I/S + * Copyright (C) 1994-1999, Index Data * All rights reserved. * Sebastian Hammer, Adam Dickmeiss * * $Log: regxread.c,v $ - * Revision 1.10 1997-10-31 12:36:12 adam + * Revision 1.23 1999-05-20 12:57:18 adam + * Implemented TCL filter. Updated recctrl system. + * + * Revision 1.22 1998/11/03 16:07:13 adam + * Yet another fix. + * + * Revision 1.21 1998/11/03 15:43:39 adam + * Fixed bug introduced by previous commit. + * + * Revision 1.20 1998/11/03 14:51:28 adam + * Changed code so that it creates as few data1 nodes as possible. + * + * Revision 1.19 1998/11/03 10:22:39 adam + * Fixed memory leak that could occur for when large data1 node were + * concatenated. Data-type data1_nodes may have multiple nodes. + * + * Revision 1.18 1998/10/15 13:11:47 adam + * Added support for option -record for "end element". When specified + * end element will mark end-of-record when at outer-level. + * + * Revision 1.17 1998/07/01 10:13:51 adam + * Minor fix. + * + * Revision 1.16 1998/06/30 15:15:09 adam + * Tags are trimmed: white space removed before- and after the tag. + * + * Revision 1.15 1998/06/30 12:55:45 adam + * Bug fix. + * + * Revision 1.14 1998/03/05 08:41:00 adam + * Implemented rule contexts. + * + * Revision 1.13 1997/12/12 06:33:58 adam + * Fixed bug that showed up when multiple filter where used. + * Made one routine thread-safe. + * + * Revision 1.12 1997/11/18 10:03:24 adam + * Member num_children removed from data1_node. + * + * Revision 1.11 1997/11/06 11:41:01 adam + * Implemented "begin variant" for the sgml.regx filter. + * + * Revision 1.10 1997/10/31 12:36:12 adam * Minor change that avoids compiler warning. * * Revision 1.9 1997/09/29 09:02:49 adam @@ -119,12 +161,17 @@ #include #include #include +#include #include #include #include #include "grsread.h" +#if HAVE_TCL_H +#include +#endif + #define REGX_DEBUG 0 #define F_WIN_EOF 2000000000 @@ -136,6 +183,8 @@ #define REGX_BEGIN 3 #define REGX_END 4 #define REGX_CODE 5 +#define REGX_CONTEXT 6 +#define REGX_INIT 7 struct regxCode { char *str; @@ -163,40 +212,77 @@ struct lexRule { struct lexRule *next; }; -struct lexTrans { +struct lexContext { + char *name; struct DFA *dfa; struct lexRule *rules; struct lexRuleInfo **fastRule; int ruleNo; + int initFlag; + + struct lexRuleAction *beginActionList; + struct lexRuleAction *endActionList; + struct lexRuleAction *initActionList; + struct lexContext *next; +}; + +struct lexConcatBuf { + int len; + int max; + char *buf; }; struct lexSpec { char *name; - struct lexTrans trans; + struct lexContext *context; + + struct lexContext **context_stack; + int context_stack_size; + int context_stack_top; + int lineNo; NMEM m; data1_handle dh; +#if HAVE_TCL_H + Tcl_Interp *tcl_interp; +#endif void *f_win_fh; void (*f_win_ef)(void *, off_t); - int f_win_start; - int f_win_end; - int f_win_size; - char *f_win_buf; + int f_win_start; /* first byte of buffer is this file offset */ + int f_win_end; /* last byte of buffer is this offset - 1 */ + int f_win_size; /* size of buffer */ + char *f_win_buf; /* buffer itself */ int (*f_win_rf)(void *, char *, size_t); off_t (*f_win_sf)(void *, off_t); - struct lexRuleAction *beginActionList; - struct lexRuleAction *endActionList; + struct lexConcatBuf **concatBuf; + int maxLevel; + data1_node **d1_stack; + int d1_level; + int stop_flag; + + int *arg_start; + int *arg_end; + int arg_no; + int ptr; }; +struct lexSpecs { + struct lexSpec *spec; +}; static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos, int *size) { - int i, r, off; + int i, r, off = start_pos - spec->f_win_start; - if (start_pos < spec->f_win_start || start_pos >= spec->f_win_end) + if (off >= 0 && end_pos <= spec->f_win_end) + { + *size = end_pos - start_pos; + return spec->f_win_buf + off; + } + if (off < 0 || start_pos >= spec->f_win_end) { (*spec->f_win_sf)(spec->f_win_fh, start_pos); spec->f_win_start = start_pos; @@ -211,12 +297,6 @@ static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos, *size = end_pos - start_pos; return spec->f_win_buf; } - if (end_pos <= spec->f_win_end) - { - *size = end_pos - start_pos; - return spec->f_win_buf + (start_pos - spec->f_win_start); - } - off = start_pos - spec->f_win_start; for (i = 0; if_win_end - start_pos; i++) spec->f_win_buf[i] = spec->f_win_buf[i + off]; r = (*spec->f_win_rf)(spec->f_win_fh, @@ -282,22 +362,6 @@ static struct DFA *lexSpecDFA (void) return dfa; } -static struct lexSpec *lexSpecMk (const char *name) -{ - struct lexSpec *p; - - p = xmalloc (sizeof(*p)); - p->name = xmalloc (strlen(name)+1); - strcpy (p->name, name); - p->trans.dfa = lexSpecDFA (); - p->trans.rules = NULL; - p->trans.fastRule = NULL; - p->beginActionList = NULL; - p->endActionList = NULL; - p->f_win_buf = NULL; - return p; -} - static void actionListDel (struct lexRuleAction **rap) { struct lexRuleAction *ra1, *ra; @@ -319,26 +383,102 @@ static void actionListDel (struct lexRuleAction **rap) *rap = NULL; } -static void lexSpecDel (struct lexSpec **pp) +static struct lexContext *lexContextCreate (const char *name) +{ + struct lexContext *p = xmalloc (sizeof(*p)); + + p->name = xstrdup (name); + p->ruleNo = 1; + p->initFlag = 0; + p->dfa = lexSpecDFA (); + p->rules = NULL; + p->fastRule = NULL; + p->beginActionList = NULL; + p->endActionList = NULL; + p->initActionList = NULL; + p->next = NULL; + return p; +} + +static void lexContextDestroy (struct lexContext *p) { - struct lexSpec *p; struct lexRule *rp, *rp1; - assert (pp); - p = *pp; - if (!p) - return ; - dfa_delete (&p->trans.dfa); - xfree (p->name); - xfree (p->trans.fastRule); - for (rp = p->trans.rules; rp; rp = rp1) + xfree (p->fastRule); + for (rp = p->rules; rp; rp = rp1) { + rp1 = rp->next; actionListDel (&rp->info.actionList); xfree (rp); } actionListDel (&p->beginActionList); actionListDel (&p->endActionList); + xfree (p->name); + xfree (p); +} + +static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh) +{ + struct lexSpec *p; + int i; + + p = xmalloc (sizeof(*p)); + p->name = xmalloc (strlen(name)+1); + strcpy (p->name, name); + +#if HAVE_TCL_H + p->tcl_interp = 0; +#endif + p->dh = dh; + p->context = NULL; + p->context_stack_size = 100; + p->context_stack = xmalloc (sizeof(*p->context_stack) * + p->context_stack_size); + p->f_win_buf = NULL; + + p->maxLevel = 128; + p->concatBuf = xmalloc (sizeof(*p->concatBuf) * p->maxLevel); + for (i = 0; i < p->maxLevel; i++) + { + p->concatBuf[i] = xmalloc (sizeof(**p->concatBuf)); + p->concatBuf[i]->len = p->concatBuf[i]->max = 0; + p->concatBuf[i]->buf = 0; + } + p->d1_stack = xmalloc (sizeof(*p->d1_stack) * p->maxLevel); + p->d1_level = 0; + return p; +} + +static void lexSpecDestroy (struct lexSpec **pp) +{ + struct lexSpec *p; + struct lexContext *lt; + int i; + + assert (pp); + p = *pp; + if (!p) + return ; + + for (i = 0; i < p->maxLevel; i++) + xfree (p->concatBuf[i]); + xfree (p->concatBuf); + + lt = p->context; + while (lt) + { + struct lexContext *lt_next = lt->next; + lexContextDestroy (lt); + lt = lt_next; + } +#if HAVE_TCL_H + if (p->tcl_interp) + Tcl_DeleteInterp (p->tcl_interp); +#endif + xfree (p->name); xfree (p->f_win_buf); + xfree (p->context_stack); + xfree (p->d1_stack); xfree (p); *pp = NULL; } @@ -384,9 +524,8 @@ static int readParseToken (const char **cpp, int *len) cmd[i] = *cp + 'a' - 'A'; else break; - if (i > sizeof(cmd)-2) - break; - i++; + if (i < sizeof(cmd)-2) + i++; cp++; } cmd[i] = '\0'; @@ -406,6 +545,10 @@ static int readParseToken (const char **cpp, int *len) return REGX_END; else if (!strcmp (cmd, "body")) return REGX_BODY; + else if (!strcmp (cmd, "context")) + return REGX_CONTEXT; + else if (!strcmp (cmd, "init")) + return REGX_INIT; else { logf (LOG_WARN, "bad command %s", cmd); @@ -419,6 +562,7 @@ static int actionListMk (struct lexSpec *spec, const char *s, { int r, tok, len; int bodyMark = 0; + const char *s0; while ((tok = readParseToken (&s, &len))) { @@ -439,19 +583,23 @@ static int actionListMk (struct lexSpec *spec, const char *s, (*ap)->u.pattern.body = bodyMark; bodyMark = 0; (*ap)->u.pattern.dfa = lexSpecDFA (); + s0 = s; r = dfa_parse ((*ap)->u.pattern.dfa, &s); if (r || *s != '/') { xfree (*ap); *ap = NULL; - logf (LOG_WARN, "regular expression error. r=%d", r); + logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0); return -1; } dfa_mkstate ((*ap)->u.pattern.dfa); s++; break; case REGX_BEGIN: - logf (LOG_WARN, "cannot use begin here"); + logf (LOG_WARN, "cannot use BEGIN here"); + continue; + case REGX_INIT: + logf (LOG_WARN, "cannot use INIT here"); continue; case REGX_END: *ap = xmalloc (sizeof(**ap)); @@ -466,24 +614,51 @@ static int actionListMk (struct lexSpec *spec, const char *s, int readOneSpec (struct lexSpec *spec, const char *s) { - int tok, len; + int len, r, tok; + struct lexRule *rp; + struct lexContext *lc; tok = readParseToken (&s, &len); - if (tok == REGX_BEGIN) + if (tok == REGX_CONTEXT) { - actionListDel (&spec->beginActionList); - actionListMk (spec, s, &spec->beginActionList); + char context_name[32]; + tok = readParseToken (&s, &len); + if (tok != REGX_CODE) + { + logf (LOG_WARN, "missing name after CONTEXT keyword"); + return 0; + } + if (len > 31) + len = 31; + memcpy (context_name, s, len); + context_name[len] = '\0'; + lc = lexContextCreate (context_name); + lc->next = spec->context; + spec->context = lc; + return 0; } - else if (tok == REGX_END) + if (!spec->context) + spec->context = lexContextCreate ("main"); + + switch (tok) { - actionListDel (&spec->endActionList); - actionListMk (spec, s, &spec->endActionList); - } - else if (tok == REGX_PATTERN) - { - int r; - struct lexRule *rp; - r = dfa_parse (spec->trans.dfa, &s); + case REGX_BEGIN: + actionListDel (&spec->context->beginActionList); + actionListMk (spec, s, &spec->context->beginActionList); + break; + case REGX_END: + actionListDel (&spec->context->endActionList); + actionListMk (spec, s, &spec->context->endActionList); + break; + case REGX_INIT: + actionListDel (&spec->context->initActionList); + actionListMk (spec, s, &spec->context->initActionList); + break; + case REGX_PATTERN: +#if REGX_DEBUG + logf (LOG_DEBUG, "rule %d %s", spec->context->ruleNo, s); +#endif + r = dfa_parse (spec->context->dfa, &s); if (r) { logf (LOG_WARN, "regular expression error. r=%d", r); @@ -496,9 +671,9 @@ int readOneSpec (struct lexSpec *spec, const char *s) } s++; rp = xmalloc (sizeof(*rp)); - rp->info.no = spec->trans.ruleNo++; - rp->next = spec->trans.rules; - spec->trans.rules = rp; + rp->info.no = spec->context->ruleNo++; + rp->next = spec->context->rules; + spec->context->rules = rp; actionListMk (spec, s, &rp->info.actionList); } return 0; @@ -506,9 +681,9 @@ int readOneSpec (struct lexSpec *spec, const char *s) int readFileSpec (struct lexSpec *spec) { + struct lexContext *lc; char *lineBuf; int lineSize = 512; - struct lexRule *rp; int c, i, errors = 0; FILE *spec_inf; @@ -523,7 +698,6 @@ int readFileSpec (struct lexSpec *spec) return -1; } spec->lineNo = 0; - spec->trans.ruleNo = 1; c = getc (spec_inf); while (c != EOF) { @@ -564,137 +738,267 @@ int readFileSpec (struct lexSpec *spec) } fclose (spec_inf); xfree (lineBuf); - spec->trans.fastRule = xmalloc (sizeof(*spec->trans.fastRule) * - spec->trans.ruleNo); - for (i = 0; itrans.ruleNo; i++) - spec->trans.fastRule[i] = NULL; - for (rp = spec->trans.rules; rp; rp = rp->next) - spec->trans.fastRule[rp->info.no] = &rp->info; - if (errors) - return -1; + #if 0 debug_dfa_trav = 1; debug_dfa_tran = 1; debug_dfa_followpos = 1; dfa_verbose = 1; #endif - dfa_mkstate (spec->trans.dfa); + for (lc = spec->context; lc; lc = lc->next) + { + struct lexRule *rp; + lc->fastRule = xmalloc (sizeof(*lc->fastRule) * lc->ruleNo); + for (i = 0; i < lc->ruleNo; i++) + lc->fastRule[i] = NULL; + for (rp = lc->rules; rp; rp = rp->next) + lc->fastRule[rp->info.no] = &rp->info; + dfa_mkstate (lc->dfa); + } + if (errors) + return -1; + return 0; } +#if 0 static struct lexSpec *curLexSpec = NULL; - -static void destroy_data (struct data1_node *n) -{ - assert (n->which == DATA1N_data); - xfree (n->u.data.data); -} +#endif static void execData (struct lexSpec *spec, - data1_node **d1_stack, int *d1_level, const char *ebuf, int elen, int formatted_text) { struct data1_node *res, *parent; + int org_len; if (elen == 0) /* shouldn't happen, but it does! */ return ; #if REGX_DEBUG if (elen > 40) - logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15); + logf (LOG_DEBUG, "data (%d bytes) %.15s ... %.*s", elen, + ebuf, 15, ebuf + elen-15); else if (elen > 0) - logf (LOG_DEBUG, "execData %.*s", elen, ebuf); + logf (LOG_DEBUG, "data (%d bytes) %.*s", elen, elen, ebuf); else - logf (LOG_DEBUG, "execData len=%d", elen); + logf (LOG_DEBUG, "data (%d bytes)", elen); #endif - if (*d1_level <= 1) + if (spec->d1_level <= 1) return; - parent = d1_stack[*d1_level -1]; + parent = spec->d1_stack[spec->d1_level -1]; assert (parent); - if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data) + + if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data) + org_len = res->u.data.len; + else { - if (elen + res->u.data.len <= DATA1_LOCALDATA) - memcpy (res->u.data.data + res->u.data.len, ebuf, elen); - else - { - char *nb = xmalloc (elen + res->u.data.len); - memcpy (nb, res->u.data.data, res->u.data.len); - memcpy (nb + res->u.data.len, ebuf, elen); - if (res->u.data.len > DATA1_LOCALDATA) - xfree (res->u.data.data); - res->u.data.data = nb; - res->destroy = destroy_data; - } - res->u.data.len += elen; + org_len = 0; + + res = data1_mk_node (spec->dh, spec->m); + res->parent = parent; + res->which = DATA1N_data; + res->u.data.what = DATA1I_text; + res->u.data.len = 0; + res->u.data.formatted_text = formatted_text; +#if 0 + if (elen > DATA1_LOCALDATA) + res->u.data.data = nmem_malloc (spec->m, elen); + else + res->u.data.data = res->lbuf; + memcpy (res->u.data.data, ebuf, elen); +#else + res->u.data.data = 0; +#endif + res->root = parent->root; + + parent->last_child = res; + if (spec->d1_stack[spec->d1_level]) + spec->d1_stack[spec->d1_level]->next = res; + else + parent->child = res; + spec->d1_stack[spec->d1_level] = res; } - else + if (org_len + elen >= spec->concatBuf[spec->d1_level]->max) { - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_data; - res->u.data.what = DATA1I_text; - res->u.data.len = elen; - res->u.data.formatted_text = formatted_text; - if (elen > DATA1_LOCALDATA) - { - res->u.data.data = xmalloc (elen); - res->destroy = destroy_data; - } - else - res->u.data.data = res->lbuf; - memcpy (res->u.data.data, ebuf, elen); - res->root = parent->root; - - parent->num_children++; - parent->last_child = res; - if (d1_stack[*d1_level]) - d1_stack[*d1_level]->next = res; - else - parent->child = res; - d1_stack[*d1_level] = res; + char *old_buf, *new_buf; + + spec->concatBuf[spec->d1_level]->max = org_len + elen + 256; + new_buf = xmalloc (spec->concatBuf[spec->d1_level]->max); + if ((old_buf = spec->concatBuf[spec->d1_level]->buf)) + { + memcpy (new_buf, old_buf, org_len); + xfree (old_buf); + } + spec->concatBuf[spec->d1_level]->buf = new_buf; } + assert (spec->concatBuf[spec->d1_level]); + memcpy (spec->concatBuf[spec->d1_level]->buf + org_len, ebuf, elen); + res->u.data.len += elen; } static void execDataP (struct lexSpec *spec, - data1_node **d1_stack, int *d1_level, const char *ebuf, int elen, int formatted_text) { - execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text); + execData (spec, ebuf, elen, formatted_text); +} + +static void tagDataRelease (struct lexSpec *spec) +{ + data1_node *res; + + if ((res = spec->d1_stack[spec->d1_level]) && + res->which == DATA1N_data && + res->u.data.what == DATA1I_text) + { + assert (!res->u.data.data); + assert (res->u.data.len > 0); + if (res->u.data.len > DATA1_LOCALDATA) + res->u.data.data = nmem_malloc (spec->m, res->u.data.len); + else + res->u.data.data = res->lbuf; + memcpy (res->u.data.data, spec->concatBuf[spec->d1_level]->buf, + res->u.data.len); + } +} + +static void variantBegin (struct lexSpec *spec, + const char *class_str, int class_len, + const char *type_str, int type_len, + const char *value_str, int value_len) +{ + struct data1_node *parent = spec->d1_stack[spec->d1_level -1]; + char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL]; + data1_vartype *tp; + int i; + data1_node *res; + + if (spec->d1_level == 0) + { + logf (LOG_WARN, "in variant begin. No record type defined"); + return ; + } + if (class_len >= DATA1_MAX_SYMBOL) + class_len = DATA1_MAX_SYMBOL-1; + memcpy (tclass, class_str, class_len); + tclass[class_len] = '\0'; + + if (type_len >= DATA1_MAX_SYMBOL) + type_len = DATA1_MAX_SYMBOL-1; + memcpy (ttype, type_str, type_len); + ttype[type_len] = '\0'; + +#if REGX_DEBUG + logf (LOG_DEBUG, "variant begin %s %s (%d)", tclass, ttype, + spec->d1_level); +#endif + + if (!(tp = + data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset, + tclass, ttype))) + return; + + if (parent->which != DATA1N_variant) + { + res = data1_mk_node (spec->dh, spec->m); + res->parent = parent; + res->which = DATA1N_variant; + res->u.variant.type = 0; + res->u.variant.value = 0; + res->root = parent->root; + + parent->last_child = res; + if (spec->d1_stack[spec->d1_level]) + { + tagDataRelease (spec); + spec->d1_stack[spec->d1_level]->next = res; + } + else + parent->child = res; + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; + } + for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--) + if (spec->d1_stack[i]->u.variant.type == tp) + { + spec->d1_level = i; + break; + } + +#if REGX_DEBUG + logf (LOG_DEBUG, "variant node (%d)", spec->d1_level); +#endif + parent = spec->d1_stack[spec->d1_level-1]; + res = data1_mk_node (spec->dh, spec->m); + res->parent = parent; + res->which = DATA1N_variant; + res->root = parent->root; + res->u.variant.type = tp; + + if (value_len >= DATA1_LOCALDATA) + value_len =DATA1_LOCALDATA-1; + memcpy (res->lbuf, value_str, value_len); + res->lbuf[value_len] = '\0'; + + res->u.variant.value = res->lbuf; + + parent->last_child = res; + if (spec->d1_stack[spec->d1_level]) + { + tagDataRelease (spec); + spec->d1_stack[spec->d1_level]->next = res; + } + else + parent->child = res; + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; } +static void tagStrip (const char **tag, int *len) +{ + int i; + + for (i = *len; i > 0 && isspace((*tag)[i-1]); --i) + ; + *len = i; + for (i = 0; i < *len && isspace((*tag)[i]); i++) + ; + *tag += i; + *len -= i; +} static void tagBegin (struct lexSpec *spec, - data1_node **d1_stack, int *d1_level, const char *tag, int len) { - struct data1_node *parent = d1_stack[*d1_level -1]; + struct data1_node *parent = spec->d1_stack[spec->d1_level -1]; data1_element *elem = NULL; data1_node *partag = get_parent_tag(spec->dh, parent); data1_node *res; data1_element *e = NULL; int localtag = 0; - if (*d1_level == 0) + if (spec->d1_level == 0) { logf (LOG_WARN, "in element begin. No record type defined"); return ; } - + tagStrip (&tag, &len); + res = data1_mk_node (spec->dh, spec->m); res->parent = parent; res->which = DATA1N_tag; - res->u.tag.tag = res->lbuf; res->u.tag.get_bytes = -1; if (len >= DATA1_LOCALDATA) - len = DATA1_LOCALDATA-1; + res->u.tag.tag = nmem_malloc (spec->m, len+1); + else + res->u.tag.tag = res->lbuf; memcpy (res->u.tag.tag, tag, len); res->u.tag.tag[len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "tag begin %s (%d)", res->u.tag.tag, *d1_level); + logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); #endif if (parent->which == DATA1N_variant) return ; @@ -702,38 +1006,46 @@ static void tagBegin (struct lexSpec *spec, if (!(e = partag->u.tag.element)) localtag = 1; - elem = data1_getelementbytagname (spec->dh, d1_stack[0]->u.root.absyn, + elem = data1_getelementbytagname (spec->dh, + spec->d1_stack[0]->u.root.absyn, e, res->u.tag.tag); - res->u.tag.element = elem; res->u.tag.node_selected = 0; res->u.tag.make_variantlist = 0; res->u.tag.no_data_requested = 0; res->root = parent->root; - parent->num_children++; + parent->last_child = res; - if (d1_stack[*d1_level]) - d1_stack[*d1_level]->next = res; + if (spec->d1_stack[spec->d1_level]) + { + tagDataRelease (spec); + spec->d1_stack[spec->d1_level]->next = res; + } else parent->child = res; - d1_stack[*d1_level] = res; - d1_stack[++(*d1_level)] = NULL; + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; } -static void tagEnd (struct lexSpec *spec, - data1_node **d1_stack, int *d1_level, +static void tagEnd (struct lexSpec *spec, int min_level, const char *tag, int len) { - while (*d1_level > 1) + tagStrip (&tag, &len); + while (spec->d1_level > min_level) { - (*d1_level)--; - if (!tag || - (strlen(d1_stack[*d1_level]->u.tag.tag) == (size_t) len && - !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len))) + tagDataRelease (spec); + (spec->d1_level)--; + if (spec->d1_level == 0) + break; + if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) && + (!tag || + (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) == + (size_t) len && + !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len)))) break; } #if REGX_DEBUG - logf (LOG_DEBUG, "tag end (%d)", *d1_level); + logf (LOG_DEBUG, "end tag (%d)", spec->d1_level); #endif } @@ -745,10 +1057,10 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, struct DFA_tran *t; unsigned char c; unsigned char c_prev = 0; - int ptr = *pptr; - int start_ptr = *pptr; - int last_rule = 0; - int last_ptr = 0; + int ptr = *pptr; /* current pointer */ + int start_ptr = *pptr; /* first char of match */ + int last_ptr = 0; /* last char of match */ + int last_rule = 0; /* rule number of current match */ int i; while (1) @@ -805,7 +1117,6 @@ static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr, } static int execTok (struct lexSpec *spec, const char **src, - int arg_no, int *arg_start, int *arg_end, const char **tokBuf, int *tokLen) { const char *s = *src; @@ -820,16 +1131,17 @@ static int execTok (struct lexSpec *spec, const char **src, s++; while (*s >= '0' && *s <= '9') n = n*10 + (*s++ -'0'); - if (arg_no == 0) + if (spec->arg_no == 0) { *tokBuf = ""; *tokLen = 0; } else { - if (n >= arg_no) - n = arg_no-1; - *tokBuf = f_win_get (spec, arg_start[n], arg_end[n], tokLen); + if (n >= spec->arg_no) + n = spec->arg_no-1; + *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n], + tokLen); } } else if (*s == '\"') @@ -867,10 +1179,8 @@ static int execTok (struct lexSpec *spec, const char **src, return 2; } -static char *regxStrz (const char *src, int len) +static char *regxStrz (const char *src, int len, char *str) { - static char str[64]; - if (len > 63) len = 63; memcpy (str, src, len); @@ -878,42 +1188,247 @@ static char *regxStrz (const char *src, int len) return str; } -static int execCode (struct lexSpec *spec, - int arg_no, int *arg_start, int *arg_end, int *pptr, - struct regxCode *code, - data1_node **d1_stack, int *d1_level) +#if HAVE_TCL_H +static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, + int argc, char **argv) +{ + struct lexSpec *spec = clientData; + if (argc < 2) + return TCL_ERROR; + if (!strcmp(argv[1], "record") && argc == 3) + { + char *absynName = argv[2]; + data1_absyn *absyn; + +#if REGX_DEBUG + logf (LOG_DEBUG, "begin record %s", absynName); +#endif + if (!(absyn = data1_get_absyn (spec->dh, absynName))) + logf (LOG_WARN, "Unknown tagset: %s", absynName); + else + { + data1_node *res; + + res = data1_mk_node (spec->dh, spec->m); + res->which = DATA1N_root; + res->u.root.type = absynName; + res->u.root.absyn = absyn; + res->root = res; + + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; + } + } + else if (!strcmp(argv[1], "element") && argc == 3) + { + tagBegin (spec, argv[2], strlen(argv[2])); + } + else if (!strcmp (argv[1], "variant") && argc == 5) + { + variantBegin (spec, argv[2], strlen(argv[2]), + argv[3], strlen(argv[3]), + argv[4], strlen(argv[4])); + } + else if (!strcmp (argv[1], "context") && argc == 3) + { + struct lexContext *lc = spec->context; +#if REGX_DEBUG + logf (LOG_DEBUG, "begin context %s",argv[2]); +#endif + while (lc && strcmp (argv[2], lc->name)) + lc = lc->next; + if (lc) + { + spec->context_stack[++(spec->context_stack_top)] = lc; + } + else + logf (LOG_WARN, "unknown context %s", argv[2]); + } + else + return TCL_ERROR; + return TCL_OK; +} + +static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, + int argc, char **argv) +{ + struct lexSpec *spec = clientData; + if (argc < 2) + return TCL_ERROR; + + if (!strcmp (argv[1], "record")) + { + while (spec->d1_level) + { + tagDataRelease (spec); + (spec->d1_level)--; + } +#if REGX_DEBUG + logf (LOG_DEBUG, "end record"); +#endif + spec->stop_flag = 1; + } + else if (!strcmp (argv[1], "element")) + { + int min_level = 1; + char *element = 0; + if (!strcmp(argv[2], "-record")) + { + min_level = 0; + if (argc == 4) + element = argv[3]; + } + else + { + if (argc == 3) + element = argv[2]; + } + tagEnd (spec, min_level, element, (element ? strlen(element) : 0)); + if (spec->d1_level == 0) + { +#if REGX_DEBUG + logf (LOG_DEBUG, "end element end records"); +#endif + spec->stop_flag = 1; + } + } + else if (!strcmp (argv[1], "context")) + { +#if REGX_DEBUG + logf (LOG_DEBUG, "end context"); +#endif + if (spec->context_stack_top) + (spec->context_stack_top)--; + } + else + return TCL_ERROR; + return TCL_OK; +} + +static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, + int argc, char **argv) +{ + int argi = 1; + int textFlag = 0; + const char *element = 0; + struct lexSpec *spec = clientData; + + while (argi < argc) + { + if (!strcmp("-text", argv[argi])) + { + textFlag = 1; + argi++; + } + else if (!strcmp("-element", argv[argi])) + { + argi++; + if (argi < argc) + element = argv[argi++]; + } + else + break; + } + if (element) + tagBegin (spec, element, strlen(element)); + + while (argi < argc) + { + execData (spec, argv[argi], strlen(argv[argi]), textFlag); + argi++; + } + if (element) + tagEnd (spec, 1, NULL, 0); + return TCL_OK; +} + +static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp, + int argc, char **argv) +{ + struct lexSpec *spec = clientData; + int argi = 1; + int offset = 0; + int no; + + while (argi < argc) + { + if (!strcmp("-offset", argv[argi])) + { + argi++; + if (argi < argc) + { + offset = atoi(argv[argi]); + argi++; + } + } + else + break; + } + if (argi != argc-1) + return TCL_ERROR; + no = atoi(argv[argi]); + if (no >= spec->arg_no) + no = spec->arg_no - 1; + spec->ptr = spec->arg_start[no] + offset; + return TCL_OK; +} + +static void execTcl (struct lexSpec *spec, struct regxCode *code) +{ + int i; + for (i = 0; i < spec->arg_no; i++) + { + char var_name[10], *var_buf; + int var_len, ch; + + sprintf (var_name, "%d", i); + var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i], + &var_len); + if (var_buf) + { + ch = var_buf[var_len]; + var_buf[var_len] = '\0'; + Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0); + var_buf[var_len] = ch; + } + } + Tcl_Eval (spec->tcl_interp, code->str); +} +/* HAVE_TCL_H */ +#endif + +static void execCode (struct lexSpec *spec, struct regxCode *code) { const char *s = code->str; int cmd_len, r; - int returnCode = 1; const char *cmd_str; - r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); while (r) { - char *p; + char *p, ptmp[64]; if (r == 1) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); continue; } - p = regxStrz (cmd_str, cmd_len); + p = regxStrz (cmd_str, cmd_len, ptmp); if (!strcmp (p, "begin")) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) + { + logf (LOG_WARN, "missing keyword after 'begin'"); continue; - p = regxStrz (cmd_str, cmd_len); + } + p = regxStrz (cmd_str, cmd_len, ptmp); if (!strcmp (p, "record")) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) continue; - if (*d1_level == 0) + if (spec->d1_level == 0) { static char absynName[64]; data1_absyn *absyn; @@ -938,82 +1453,147 @@ static int execCode (struct lexSpec *spec, res->u.root.absyn = absyn; res->root = res; - d1_stack[*d1_level] = res; - d1_stack[++(*d1_level)] = NULL; + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; } } - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); } else if (!strcmp (p, "element")) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) continue; - tagBegin (spec, d1_stack, d1_level, cmd_str, cmd_len); - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); - } + tagBegin (spec, cmd_str, cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else if (!strcmp (p, "variant")) + { + int class_len; + const char *class_str = NULL; + int type_len; + const char *type_str = NULL; + int value_len; + const char *value_str = NULL; + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + continue; + class_str = cmd_str; + class_len = cmd_len; + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + continue; + type_str = cmd_str; + type_len = cmd_len; + + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + continue; + value_str = cmd_str; + value_len = cmd_len; + + variantBegin (spec, class_str, class_len, + type_str, type_len, value_str, value_len); + + + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else if (!strcmp (p, "context")) + { + if (r > 1) + { + struct lexContext *lc = spec->context; + r = execTok (spec, &s, &cmd_str, &cmd_len); + p = regxStrz (cmd_str, cmd_len, ptmp); +#if REGX_DEBUG + logf (LOG_DEBUG, "begin context %s", p); +#endif + while (lc && strcmp (p, lc->name)) + lc = lc->next; + if (lc) + spec->context_stack[++(spec->context_stack_top)] = lc; + else + logf (LOG_WARN, "unknown context %s", p); + + } + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else + { + logf (LOG_WARN, "bad keyword '%s' after begin", p); + } } else if (!strcmp (p, "end")) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); - if (r > 1) - { - p = regxStrz (cmd_str, cmd_len); - if (!strcmp (p, "record")) - { - *d1_level = 0; - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); + if (r < 2) + { + logf (LOG_WARN, "missing keyword after 'end'"); + continue; + } + p = regxStrz (cmd_str, cmd_len, ptmp); + if (!strcmp (p, "record")) + { + while (spec->d1_level) + { + tagDataRelease (spec); + (spec->d1_level)--; + } + r = execTok (spec, &s, &cmd_str, &cmd_len); #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_DEBUG, "end record"); #endif - returnCode = 0; + spec->stop_flag = 1; + } + else if (!strcmp (p, "element")) + { + int min_level = 1; + while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) + { + if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len)) + min_level = 0; } - else if (!strcmp (p, "element")) + if (r > 2) + { + tagEnd (spec, min_level, cmd_str, cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else + tagEnd (spec, min_level, NULL, 0); + if (spec->d1_level == 0) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); -#if 0 - if (*d1_level == 1) - { - *d1_level = 0; - returnCode = 0; - } +#if REGX_DEBUG + logf (LOG_DEBUG, "end element end records"); #endif - if (r > 2) - { - tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len); - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); - } - else - tagEnd (spec, d1_stack, d1_level, NULL, 0); + spec->stop_flag = 1; } - else - logf (LOG_WARN, "missing record/element/variant"); - } - else - logf (LOG_WARN, "missing record/element/variant"); - } + + } + else if (!strcmp (p, "context")) + { +#if REGX_DEBUG + logf (LOG_DEBUG, "end context"); +#endif + if (spec->context_stack_top) + (spec->context_stack_top)--; + r = execTok (spec, &s, &cmd_str, &cmd_len); + } + else + logf (LOG_WARN, "bad keyword '%s' after end", p); + } else if (!strcmp (p, "data")) { int textFlag = 0; int element_len; const char *element_str = NULL; - while ((r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len)) == 3) + while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3) { if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len)) textFlag = 1; else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len)) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &element_str, &element_len); + r = execTok (spec, &s, &element_str, &element_len); if (r < 2) break; } @@ -1027,35 +1607,30 @@ static int execCode (struct lexSpec *spec, continue; } if (element_str) - tagBegin (spec, d1_stack, d1_level, element_str, element_len); + tagBegin (spec, element_str, element_len); do { - execData (spec, d1_stack, d1_level, cmd_str, cmd_len, - textFlag); - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + execData (spec, cmd_str, cmd_len,textFlag); + r = execTok (spec, &s, &cmd_str, &cmd_len); } while (r > 1); if (element_str) - tagEnd (spec, d1_stack, d1_level, NULL, 0); + tagEnd (spec, 1, NULL, 0); } else if (!strcmp (p, "unread")) { int no, offset; - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len)) { - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); if (r < 2) { logf (LOG_WARN, "missing number after -offset"); continue; } - p = regxStrz (cmd_str, cmd_len); + p = regxStrz (cmd_str, cmd_len, ptmp); offset = atoi (p); - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); } else offset = 0; @@ -1072,35 +1647,48 @@ static int execCode (struct lexSpec *spec, else { no = *cmd_str - '0'; - if (no >= arg_no) - no = arg_no - 1; - *pptr = arg_start[no] + offset; + if (no >= spec->arg_no) + no = spec->arg_no - 1; + spec->ptr = spec->arg_start[no] + offset; } - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); } + else if (!strcmp (p, "context")) + { + if (r > 1) + { + struct lexContext *lc = spec->context; + r = execTok (spec, &s, &cmd_str, &cmd_len); + p = regxStrz (cmd_str, cmd_len, ptmp); + + while (lc && strcmp (p, lc->name)) + lc = lc->next; + if (lc) + spec->context_stack[spec->context_stack_top] = lc; + else + logf (LOG_WARN, "unknown context %s", p); + + } + r = execTok (spec, &s, &cmd_str, &cmd_len); + } else { - logf (LOG_WARN, "unknown code command: %.*s", cmd_len, cmd_str); - r = execTok (spec, &s, arg_no, arg_start, arg_end, - &cmd_str, &cmd_len); + logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str); + r = execTok (spec, &s, &cmd_str, &cmd_len); continue; } if (r > 1) { logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str); do { - r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, - &cmd_len); + r = execTok (spec, &s, &cmd_str, &cmd_len); } while (r > 1); } } - return returnCode; } static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, - data1_node **d1_stack, int *d1_level, int start_ptr, int *pptr) { int sptr; @@ -1108,8 +1696,12 @@ static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, int arg_end[20]; int arg_no = 1; + if (!ap) + return 1; arg_start[0] = start_ptr; arg_end[0] = *pptr; + spec->arg_start = arg_start; + spec->arg_end = arg_end; while (ap) { @@ -1147,9 +1739,19 @@ static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, arg_no++; break; case REGX_CODE: - if (!execCode (spec, arg_no, arg_start, arg_end, pptr, - ap->u.code, d1_stack, d1_level)) - return 0; + spec->arg_no = arg_no; + spec->ptr = *pptr; +#if HAVE_TCL_H + if (spec->tcl_interp) + execTcl(spec, ap->u.code); + else + execCode (spec, ap->u.code); +#else + execCode (spec, ap->u.code); +#endif + *pptr = spec->ptr; + if (spec->stop_flag) + return 0; break; case REGX_END: arg_start[arg_no] = *pptr; @@ -1162,58 +1764,62 @@ static int execAction (struct lexSpec *spec, struct lexRuleAction *ap, return 1; } -static int execRule (struct lexSpec *spec, struct lexTrans *trans, - data1_node **d1_stack, int *d1_level, +static int execRule (struct lexSpec *spec, struct lexContext *context, int ruleNo, int start_ptr, int *pptr) { #if REGX_DEBUG - logf (LOG_DEBUG, "execRule %d", ruleNo); + logf (LOG_DEBUG, "exec rule %d", ruleNo); #endif - return execAction (spec, trans->fastRule[ruleNo]->actionList, - d1_stack, d1_level, start_ptr, pptr); + return execAction (spec, context->fastRule[ruleNo]->actionList, + start_ptr, pptr); } -data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans, - data1_node **d1_stack, int *d1_level, - int *ptr) +data1_node *lexNode (struct lexSpec *spec, int *ptr) { - struct DFA_state *state = trans->dfa->states[0]; + struct lexContext *context = spec->context_stack[spec->context_stack_top]; + struct DFA_state *state = context->dfa->states[0]; struct DFA_tran *t; unsigned char c; unsigned char c_prev = '\n'; int i; - int last_rule = 0; - int last_ptr = *ptr; - int start_ptr = *ptr; - int skip_ptr = *ptr; + int last_rule = 0; /* rule number of current match */ + int last_ptr = *ptr; /* last char of match */ + int start_ptr = *ptr; /* first char of match */ + int skip_ptr = *ptr; /* first char of run */ while (1) { c = f_win_advance (spec, ptr); if (*ptr == F_WIN_EOF) { + /* end of file met */ if (last_rule) { + /* there was a match */ if (skip_ptr < start_ptr) { + /* deal with chars that didn't match */ int size; char *buf; buf = f_win_get (spec, skip_ptr, start_ptr, &size); - execDataP (spec, d1_stack, d1_level, buf, size, 0); + execDataP (spec, buf, size, 0); } + /* restore pointer */ *ptr = last_ptr; - if (!execRule (spec, trans, d1_stack, d1_level, last_rule, - start_ptr, ptr)) - break; + /* execute rule */ + if (!execRule (spec, context, last_rule, start_ptr, ptr)) + break; + /* restore skip pointer */ skip_ptr = *ptr; last_rule = 0; } else if (skip_ptr < *ptr) { + /* deal with chars that didn't match */ int size; char *buf; buf = f_win_get (spec, skip_ptr, *ptr, &size); - execDataP (spec, d1_stack, d1_level, buf, size, 0); + execDataP (spec, buf, size, 0); } if (*ptr == F_WIN_EOF) break; @@ -1227,14 +1833,15 @@ data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans, { if (skip_ptr < start_ptr) { + /* deal with chars that didn't match */ int size; char *buf; buf = f_win_get (spec, skip_ptr, start_ptr, &size); - execDataP (spec, d1_stack, d1_level, buf, size, 0); + execDataP (spec, buf, size, 0); } + /* restore pointer */ *ptr = last_ptr; - if (!execRule (spec, trans, d1_stack, d1_level, last_rule, - start_ptr, ptr)) + if (!execRule (spec, context, last_rule, start_ptr, ptr)) { if (spec->f_win_ef && *ptr != F_WIN_EOF) { @@ -1245,9 +1852,10 @@ data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans, } return NULL; } + context = spec->context_stack[spec->context_stack_top]; skip_ptr = *ptr; last_rule = 0; - start_ptr = *ptr; + last_ptr = start_ptr = *ptr; if (start_ptr > 0) { --start_ptr; @@ -1259,12 +1867,12 @@ data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans, c_prev = f_win_advance (spec, &start_ptr); *ptr = start_ptr; } - state = trans->dfa->states[0]; + state = context->dfa->states[0]; break; } else if (c >= t->ch[0] && c <= t->ch[1]) { /* transition ... */ - state = trans->dfa->states[t->to]; + state = context->dfa->states[t->to]; if (state->rule_no) { if (c_prev == '\n') @@ -1286,55 +1894,158 @@ data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans, return NULL; } -static data1_node *lexRoot (struct lexSpec *spec, off_t offset) +static data1_node *lexRoot (struct lexSpec *spec, off_t offset, + const char *context_name) { - data1_node *d1_stack[512]; - int d1_level = 0; + struct lexContext *lt = spec->context; int ptr = offset; - d1_stack[d1_level] = NULL; - if (spec->beginActionList) - execAction (spec, spec->beginActionList, - d1_stack, &d1_level, 0, &ptr); - lexNode (spec, &spec->trans, d1_stack, &d1_level, &ptr); - if (spec->endActionList) - execAction (spec, spec->endActionList, - d1_stack, &d1_level, ptr, &ptr); - return *d1_stack; + spec->stop_flag = 0; + spec->d1_level = 0; + spec->context_stack_top = 0; + while (lt) + { + if (!strcmp (lt->name, context_name)) + break; + lt = lt->next; + } + if (!lt) + { + logf (LOG_WARN, "cannot find context %s", context_name); + return NULL; + } + spec->context_stack[spec->context_stack_top] = lt; + spec->d1_stack[spec->d1_level] = NULL; +#if 1 + if (!lt->initFlag) + { + lt->initFlag = 1; + execAction (spec, lt->initActionList, ptr, &ptr); + } +#endif + execAction (spec, lt->beginActionList, ptr, &ptr); + lexNode (spec, &ptr); + while (spec->d1_level) + { + tagDataRelease (spec); + (spec->d1_level)--; + } + execAction (spec, lt->endActionList, ptr, &ptr); + return spec->d1_stack[0]; +} + +void grs_destroy(void *clientData) +{ + struct lexSpecs *specs = clientData; + if (specs->spec) + { + lexSpecDestroy(&specs->spec); + } + xfree (specs); +} + +void *grs_init(void) +{ + struct lexSpecs *specs = xmalloc (sizeof(*specs)); + specs->spec = 0; + return specs; } data1_node *grs_read_regx (struct grs_read_info *p) { int res; - data1_node *n; + struct lexSpecs *specs = p->clientData; + struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG logf (LOG_DEBUG, "grs_read_regx"); #endif - if (!curLexSpec || strcmp (curLexSpec->name, p->type)) + if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { - if (curLexSpec) - lexSpecDel (&curLexSpec); - curLexSpec = lexSpecMk (p->type); - curLexSpec->dh = p->dh; - res = readFileSpec (curLexSpec); + if (*curLexSpec) + lexSpecDestroy (curLexSpec); + *curLexSpec = lexSpecCreate (p->type, p->dh); + res = readFileSpec (*curLexSpec); if (res) { - lexSpecDel (&curLexSpec); + lexSpecDestroy (curLexSpec); return NULL; } } + (*curLexSpec)->dh = p->dh; if (!p->offset) { - curLexSpec->f_win_start = 0; - curLexSpec->f_win_end = 0; - curLexSpec->f_win_rf = p->readf; - curLexSpec->f_win_sf = p->seekf; - curLexSpec->f_win_fh = p->fh; - curLexSpec->f_win_ef = p->endf; - curLexSpec->f_win_size = 500000; + (*curLexSpec)->f_win_start = 0; + (*curLexSpec)->f_win_end = 0; + (*curLexSpec)->f_win_rf = p->readf; + (*curLexSpec)->f_win_sf = p->seekf; + (*curLexSpec)->f_win_fh = p->fh; + (*curLexSpec)->f_win_ef = p->endf; + (*curLexSpec)->f_win_size = 500000; } - curLexSpec->m = p->mem; - n = lexRoot (curLexSpec, p->offset); - return n; + (*curLexSpec)->m = p->mem; + return lexRoot (*curLexSpec, p->offset, "main"); } + +static struct recTypeGrs regx_type = { + "regx", + grs_init, + grs_destroy, + grs_read_regx +}; + +RecTypeGrs recTypeGrs_regx = ®x_type; + +#if HAVE_TCL_H +data1_node *grs_read_tcl (struct grs_read_info *p) +{ + int res; + struct lexSpecs *specs = p->clientData; + struct lexSpec **curLexSpec = &specs->spec; + +#if REGX_DEBUG + logf (LOG_DEBUG, "grs_read_tcl"); +#endif + if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) + { + Tcl_Interp *tcl_interp; + if (*curLexSpec) + lexSpecDestroy (curLexSpec); + *curLexSpec = lexSpecCreate (p->type, p->dh); + tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp(); + Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0); + Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0); + Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0); + Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread, + *curLexSpec, 0); + res = readFileSpec (*curLexSpec); + if (res) + { + lexSpecDestroy (curLexSpec); + return NULL; + } + } + (*curLexSpec)->dh = p->dh; + if (!p->offset) + { + (*curLexSpec)->f_win_start = 0; + (*curLexSpec)->f_win_end = 0; + (*curLexSpec)->f_win_rf = p->readf; + (*curLexSpec)->f_win_sf = p->seekf; + (*curLexSpec)->f_win_fh = p->fh; + (*curLexSpec)->f_win_ef = p->endf; + (*curLexSpec)->f_win_size = 500000; + } + (*curLexSpec)->m = p->mem; + return lexRoot (*curLexSpec, p->offset, "main"); +} + +static struct recTypeGrs tcl_type = { + "tcl", + grs_init, + grs_destroy, + grs_read_tcl +}; + +RecTypeGrs recTypeGrs_tcl = &tcl_type; +#endif