X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=5d9ce2f115acd6fbcfadd60f91cdcc62fb2a3674;hb=d697b6568abc53f9b3b83abb42ca545195dae3dc;hp=bb26858b7fe941dc9913d4a5d4e1d60e83b62a91;hpb=0a50c507e32a11d9ef0d1f8458b02590044f25d5;p=idzebra-moved-to-github.git diff --git a/recctrl/regxread.c b/recctrl/regxread.c index bb26858..5d9ce2f 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,179 +1,25 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2002, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss - * - * $Log: regxread.c,v $ - * Revision 1.24 1999-05-21 11:08:46 adam - * Tcl filter attempts to read .tflt. Improvements to configure - * script so that it reads uninstalled Tcl source. - * - * Revision 1.23 1999/05/20 12:57:18 adam - * Implemented TCL filter. Updated recctrl system. - * - * Revision 1.22 1998/11/03 16:07:13 adam - * Yet another fix. - * - * Revision 1.21 1998/11/03 15:43:39 adam - * Fixed bug introduced by previous commit. - * - * Revision 1.20 1998/11/03 14:51:28 adam - * Changed code so that it creates as few data1 nodes as possible. - * - * Revision 1.19 1998/11/03 10:22:39 adam - * Fixed memory leak that could occur for when large data1 node were - * concatenated. Data-type data1_nodes may have multiple nodes. - * - * Revision 1.18 1998/10/15 13:11:47 adam - * Added support for option -record for "end element". When specified - * end element will mark end-of-record when at outer-level. - * - * Revision 1.17 1998/07/01 10:13:51 adam - * Minor fix. - * - * Revision 1.16 1998/06/30 15:15:09 adam - * Tags are trimmed: white space removed before- and after the tag. - * - * Revision 1.15 1998/06/30 12:55:45 adam - * Bug fix. - * - * Revision 1.14 1998/03/05 08:41:00 adam - * Implemented rule contexts. - * - * Revision 1.13 1997/12/12 06:33:58 adam - * Fixed bug that showed up when multiple filter where used. - * Made one routine thread-safe. - * - * Revision 1.12 1997/11/18 10:03:24 adam - * Member num_children removed from data1_node. - * - * Revision 1.11 1997/11/06 11:41:01 adam - * Implemented "begin variant" for the sgml.regx filter. - * - * Revision 1.10 1997/10/31 12:36:12 adam - * Minor change that avoids compiler warning. - * - * Revision 1.9 1997/09/29 09:02:49 adam - * Fixed small bug (introduced by previous commit). - * - * Revision 1.8 1997/09/17 12:19:22 adam - * Zebra version corresponds to YAZ version 1.4. - * Changed Zebra server so that it doesn't depend on global common_resource. - * - * Revision 1.7 1997/07/15 16:33:07 adam - * Check for zero length in execData. - * - * Revision 1.6 1997/02/24 10:41:51 adam - * Cleanup of code and commented out the "end element-end-record" code. - * - * Revision 1.5 1997/02/19 16:22:33 adam - * Fixed "end element" to terminate record in outer-most level. - * - * Revision 1.4 1997/02/12 20:42:58 adam - * Changed some log messages. - * - * Revision 1.3 1996/11/08 14:05:33 adam - * Bug fix: data1 node member u.tag.get_bytes weren't initialized. - * - * Revision 1.2 1996/10/29 14:02:09 adam - * Doesn't use the global data1_tabpath (from YAZ). Instead the function - * data1_get_tabpath is used. - * - * Revision 1.1 1996/10/11 10:57:30 adam - * New module recctrl. Used to manage records (extract/retrieval). - * - * Revision 1.24 1996/06/17 14:25:31 adam - * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG. - * - * Revision 1.23 1996/06/04 10:19:00 adam - * Minor changes - removed include of ctype.h. - * - * Revision 1.22 1996/06/03 15:23:13 adam - * Bug fix: /../ BODY /../ - pattern didn't match EOF. - * - * Revision 1.21 1996/05/14 16:58:38 adam - * Minor change. - * - * Revision 1.20 1996/05/01 13:46:36 adam - * First work on multiple records in one file. - * New option, -offset, to the "unread" command in the filter module. - * - * Revision 1.19 1996/02/12 16:18:20 adam - * Yet another bug fix in implementation of unread command. - * - * Revision 1.18 1996/02/12 16:07:54 adam - * Bug fix in new unread command. - * - * Revision 1.17 1996/02/12 15:56:11 adam - * New code command: unread. - * - * Revision 1.16 1996/01/17 14:57:51 adam - * Prototype changed for reader functions in extract/retrieve. File - * is identified by 'void *' instead of 'int. - * - * Revision 1.15 1996/01/08 19:15:47 adam - * New input filter that works! - * - * Revision 1.14 1996/01/08 09:10:38 adam - * Yet another complete rework on this module. - * - * Revision 1.13 1995/12/15 17:21:50 adam - * This version is able to set data.formatted_text in data1-nodes. - * - * Revision 1.12 1995/12/15 16:20:10 adam - * The filter files (*.flt) are read from the path given by data1_tabpath. - * - * Revision 1.11 1995/12/15 12:35:16 adam - * Better logging. - * - * Revision 1.10 1995/12/15 10:35:36 adam - * Misc. bug fixes. - * - * Revision 1.9 1995/12/14 16:38:48 adam - * Completely new attempt to make regular expression parsing. - * - * Revision 1.8 1995/12/13 17:16:59 adam - * Small changes. - * - * Revision 1.7 1995/12/13 16:51:58 adam - * Modified to set last_child in data1_nodes. - * Uses destroy handler to free up data text nodes. - * - * Revision 1.6 1995/12/13 13:45:37 quinn - * Changed data1 to use nmem. - * - * Revision 1.5 1995/12/11 09:12:52 adam - * The rec_get function returns NULL if record doesn't exist - will - * happen in the server if the result set records have been deleted since - * the creation of the set (i.e. the search). - * The server saves a result temporarily if it is 'volatile', i.e. the - * set is register dependent. - * - * Revision 1.4 1995/12/05 16:57:40 adam - * More work on regular patterns. - * - * Revision 1.3 1995/12/05 09:37:09 adam - * One malloc was renamed to xmalloc. - * - * Revision 1.2 1995/12/04 17:59:24 adam - * More work on regular expression conversion. - * - * Revision 1.1 1995/12/04 14:25:30 adam - * Started work on regular expression parsed input to structured records. * + * $Id: regxread.c,v 1.42 2002-05-07 11:05:20 adam Exp $ */ #include #include #include #include -#include +#include #include #include #include "grsread.h" #if HAVE_TCL_H #include + +#if MAJOR_VERSION >= 8 +#define HAVE_TCL_OBJECTS +#endif #endif #define REGX_DEBUG 0 @@ -192,6 +38,9 @@ struct regxCode { char *str; +#if HAVE_TCL_OBJECTS + Tcl_Obj *tcl_obj; +#endif }; struct lexRuleAction { @@ -231,7 +80,6 @@ struct lexContext { }; struct lexConcatBuf { - int len; int max; char *buf; }; @@ -260,7 +108,7 @@ struct lexSpec { int (*f_win_rf)(void *, char *, size_t); off_t (*f_win_sf)(void *, off_t); - struct lexConcatBuf **concatBuf; + struct lexConcatBuf *concatBuf; int maxLevel; data1_node **d1_stack; int d1_level; @@ -292,7 +140,7 @@ static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos, spec->f_win_start = start_pos; if (!spec->f_win_buf) - spec->f_win_buf = xmalloc (spec->f_win_size); + spec->f_win_buf = (char *) xmalloc (spec->f_win_size); *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf, spec->f_win_size); spec->f_win_end = spec->f_win_start + *size; @@ -338,6 +186,10 @@ static void regxCodeDel (struct regxCode **pp) struct regxCode *p = *pp; if (p) { +#if HAVE_TCL_OBJECTS + if (p->tcl_obj) + Tcl_DecrRefCount (p->tcl_obj); +#endif xfree (p->str); xfree (p); *pp = NULL; @@ -348,10 +200,15 @@ static void regxCodeMk (struct regxCode **pp, const char *buf, int len) { struct regxCode *p; - p = xmalloc (sizeof(*p)); - p->str = xmalloc (len+1); + p = (struct regxCode *) xmalloc (sizeof(*p)); + p->str = (char *) xmalloc (len+1); memcpy (p->str, buf, len); p->str[len] = '\0'; +#if HAVE_TCL_OBJECTS + p->tcl_obj = Tcl_NewStringObj ((char *) buf, len); + if (p->tcl_obj) + Tcl_IncrRefCount (p->tcl_obj); +#endif *pp = p; } @@ -389,7 +246,7 @@ static void actionListDel (struct lexRuleAction **rap) static struct lexContext *lexContextCreate (const char *name) { - struct lexContext *p = xmalloc (sizeof(*p)); + struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p)); p->name = xstrdup (name); p->ruleNo = 1; @@ -408,6 +265,7 @@ static void lexContextDestroy (struct lexContext *p) { struct lexRule *rp, *rp1; + dfa_delete (&p->dfa); xfree (p->fastRule); for (rp = p->rules; rp; rp = rp1) { @@ -417,6 +275,7 @@ static void lexContextDestroy (struct lexContext *p) } actionListDel (&p->beginActionList); actionListDel (&p->endActionList); + actionListDel (&p->initActionList); xfree (p->name); xfree (p); } @@ -426,8 +285,8 @@ static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh) struct lexSpec *p; int i; - p = xmalloc (sizeof(*p)); - p->name = xmalloc (strlen(name)+1); + p = (struct lexSpec *) xmalloc (sizeof(*p)); + p->name = (char *) xmalloc (strlen(name)+1); strcpy (p->name, name); #if HAVE_TCL_H @@ -436,19 +295,19 @@ static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh) p->dh = dh; p->context = NULL; p->context_stack_size = 100; - p->context_stack = xmalloc (sizeof(*p->context_stack) * - p->context_stack_size); + p->context_stack = (struct lexContext **) + xmalloc (sizeof(*p->context_stack) * p->context_stack_size); p->f_win_buf = NULL; p->maxLevel = 128; - p->concatBuf = xmalloc (sizeof(*p->concatBuf) * p->maxLevel); + p->concatBuf = (struct lexConcatBuf *) + xmalloc (sizeof(*p->concatBuf) * p->maxLevel); for (i = 0; i < p->maxLevel; i++) { - p->concatBuf[i] = xmalloc (sizeof(**p->concatBuf)); - p->concatBuf[i]->len = p->concatBuf[i]->max = 0; - p->concatBuf[i]->buf = 0; + p->concatBuf[i].max = 0; + p->concatBuf[i].buf = 0; } - p->d1_stack = xmalloc (sizeof(*p->d1_stack) * p->maxLevel); + p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel); p->d1_level = 0; return p; } @@ -465,7 +324,7 @@ static void lexSpecDestroy (struct lexSpec **pp) return ; for (i = 0; i < p->maxLevel; i++) - xfree (p->concatBuf[i]); + xfree (p->concatBuf[i].buf); xfree (p->concatBuf); lt = p->context; @@ -475,7 +334,7 @@ static void lexSpecDestroy (struct lexSpec **pp) lexContextDestroy (lt); lt = lt_next; } -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS if (p->tcl_interp) Tcl_DeleteInterp (p->tcl_interp); #endif @@ -493,7 +352,7 @@ static int readParseToken (const char **cpp, int *len) char cmd[32]; int i, level; - while (*cp == ' ' || *cp == '\t' || *cp == '\n') + while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r') cp++; switch (*cp) { @@ -528,7 +387,7 @@ static int readParseToken (const char **cpp, int *len) cmd[i] = *cp + 'a' - 'A'; else break; - if (i < sizeof(cmd)-2) + if (i < (int) sizeof(cmd)-2) i++; cp++; } @@ -537,7 +396,8 @@ static int readParseToken (const char **cpp, int *len) { logf (LOG_WARN, "bad character %d %c", *cp, *cp); cp++; - while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n') + while (*cp && *cp != ' ' && *cp != '\t' && + *cp != '\n' && *cp != '\r') cp++; *cpp = cp; return 0; @@ -576,13 +436,13 @@ static int actionListMk (struct lexSpec *spec, const char *s, bodyMark = 1; continue; case REGX_CODE: - *ap = xmalloc (sizeof(**ap)); + *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); (*ap)->which = tok; regxCodeMk (&(*ap)->u.code, s, len); s += len+1; break; case REGX_PATTERN: - *ap = xmalloc (sizeof(**ap)); + *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); (*ap)->which = tok; (*ap)->u.pattern.body = bodyMark; bodyMark = 0; @@ -606,7 +466,7 @@ static int actionListMk (struct lexSpec *spec, const char *s, logf (LOG_WARN, "cannot use INIT here"); continue; case REGX_END: - *ap = xmalloc (sizeof(**ap)); + *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap)); (*ap)->which = tok; break; } @@ -660,7 +520,7 @@ int readOneSpec (struct lexSpec *spec, const char *s) break; case REGX_PATTERN: #if REGX_DEBUG - logf (LOG_DEBUG, "rule %d %s", spec->context->ruleNo, s); + logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s); #endif r = dfa_parse (spec->context->dfa, &s); if (r) @@ -674,7 +534,7 @@ int readOneSpec (struct lexSpec *spec, const char *s) return -1; } s++; - rp = xmalloc (sizeof(*rp)); + rp = (struct lexRule *) xmalloc (sizeof(*rp)); rp->info.no = spec->context->ruleNo++; rp->next = spec->context->rules; spec->context->rules = rp; @@ -686,49 +546,40 @@ int readOneSpec (struct lexSpec *spec, const char *s) int readFileSpec (struct lexSpec *spec) { struct lexContext *lc; - char *lineBuf; - int lineSize = 512; int c, i, errors = 0; FILE *spec_inf = 0; + WRBUF lineBuf; + char fname[256]; - lineBuf = xmalloc (1+lineSize); #if HAVE_TCL_H if (spec->tcl_interp) { - sprintf (lineBuf, "%s.tflt", spec->name); - spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), lineBuf, "r"); + sprintf (fname, "%s.tflt", spec->name); + spec_inf = data1_path_fopen (spec->dh, fname, "r"); } #endif if (!spec_inf) { - sprintf (lineBuf, "%s.flt", spec->name); - spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), lineBuf, "r"); + sprintf (fname, "%s.flt", spec->name); + spec_inf = data1_path_fopen (spec->dh, fname, "r"); } if (!spec_inf) { logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name); - xfree (lineBuf); - return -1; - } - logf (LOG_LOG, "reading regx filter %s.flt", lineBuf); - sprintf (lineBuf, "%s.flt", spec->name); - if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), - lineBuf, "r"))) - { - logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name); - xfree (lineBuf); return -1; } + logf (LOG_LOG, "reading regx filter %s", fname); #if HAVE_TCL_H if (spec->tcl_interp) logf (LOG_LOG, "Tcl enabled"); #endif + lineBuf = wrbuf_alloc(); spec->lineNo = 0; c = getc (spec_inf); while (c != EOF) { - int off = 0; - if (c == '#' || c == '\n' || c == ' ' || c == '\t') + wrbuf_rewind (lineBuf); + if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r') { while (c != '\n' && c != EOF) c = getc (spec_inf); @@ -739,12 +590,14 @@ int readFileSpec (struct lexSpec *spec) else { int addLine = 0; - - lineBuf[off++] = c; + while (1) { int c1 = c; + wrbuf_putc(lineBuf, c); c = getc (spec_inf); + while (c == '\r') + c = getc (spec_inf); if (c == EOF) break; if (c1 == '\n') @@ -753,17 +606,14 @@ int readFileSpec (struct lexSpec *spec) break; addLine++; } - lineBuf[off] = c; - if (off < lineSize) - off++; } - lineBuf[off] = '\0'; - readOneSpec (spec, lineBuf); + wrbuf_putc(lineBuf, '\0'); + readOneSpec (spec, wrbuf_buf(lineBuf)); spec->lineNo += addLine; } } fclose (spec_inf); - xfree (lineBuf); + wrbuf_free(lineBuf, 1); #if 0 debug_dfa_trav = 1; @@ -774,7 +624,8 @@ int readFileSpec (struct lexSpec *spec) for (lc = spec->context; lc; lc = lc->next) { struct lexRule *rp; - lc->fastRule = xmalloc (sizeof(*lc->fastRule) * lc->ruleNo); + lc->fastRule = (struct lexRuleInfo **) + xmalloc (sizeof(*lc->fastRule) * lc->ruleNo); for (i = 0; i < lc->ruleNo; i++) lc->fastRule[i] = NULL; for (rp = lc->rules; rp; rp = rp->next) @@ -801,12 +652,12 @@ static void execData (struct lexSpec *spec, return ; #if REGX_DEBUG if (elen > 40) - logf (LOG_DEBUG, "data (%d bytes) %.15s ... %.*s", elen, + logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen, ebuf, 15, ebuf + elen-15); else if (elen > 0) - logf (LOG_DEBUG, "data (%d bytes) %.*s", elen, elen, ebuf); + logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf); else - logf (LOG_DEBUG, "data (%d bytes)", elen); + logf (LOG_LOG, "data (%d bytes)", elen); #endif if (spec->d1_level <= 1) @@ -821,45 +672,30 @@ static void execData (struct lexSpec *spec, { org_len = 0; - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_data; + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent); res->u.data.what = DATA1I_text; res->u.data.len = 0; res->u.data.formatted_text = formatted_text; -#if 0 - if (elen > DATA1_LOCALDATA) - res->u.data.data = nmem_malloc (spec->m, elen); - else - res->u.data.data = res->lbuf; - memcpy (res->u.data.data, ebuf, elen); -#else res->u.data.data = 0; -#endif - res->root = parent->root; - parent->last_child = res; if (spec->d1_stack[spec->d1_level]) spec->d1_stack[spec->d1_level]->next = res; - else - parent->child = res; spec->d1_stack[spec->d1_level] = res; } - if (org_len + elen >= spec->concatBuf[spec->d1_level]->max) + if (org_len + elen >= spec->concatBuf[spec->d1_level].max) { char *old_buf, *new_buf; - spec->concatBuf[spec->d1_level]->max = org_len + elen + 256; - new_buf = xmalloc (spec->concatBuf[spec->d1_level]->max); - if ((old_buf = spec->concatBuf[spec->d1_level]->buf)) + spec->concatBuf[spec->d1_level].max = org_len + elen + 256; + new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max); + if ((old_buf = spec->concatBuf[spec->d1_level].buf)) { memcpy (new_buf, old_buf, org_len); xfree (old_buf); } - spec->concatBuf[spec->d1_level]->buf = new_buf; + spec->concatBuf[spec->d1_level].buf = new_buf; } - assert (spec->concatBuf[spec->d1_level]); - memcpy (spec->concatBuf[spec->d1_level]->buf + org_len, ebuf, elen); + memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen); res->u.data.len += elen; } @@ -880,10 +716,10 @@ static void tagDataRelease (struct lexSpec *spec) assert (!res->u.data.data); assert (res->u.data.len > 0); if (res->u.data.len > DATA1_LOCALDATA) - res->u.data.data = nmem_malloc (spec->m, res->u.data.len); + res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len); else res->u.data.data = res->lbuf; - memcpy (res->u.data.data, spec->concatBuf[spec->d1_level]->buf, + memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf, res->u.data.len); } } @@ -915,7 +751,7 @@ static void variantBegin (struct lexSpec *spec, ttype[type_len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "variant begin %s %s (%d)", tclass, ttype, + logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype, spec->d1_level); #endif @@ -926,21 +762,9 @@ static void variantBegin (struct lexSpec *spec, if (parent->which != DATA1N_variant) { - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_variant; - res->u.variant.type = 0; - res->u.variant.value = 0; - res->root = parent->root; - - parent->last_child = res; + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent); if (spec->d1_stack[spec->d1_level]) - { tagDataRelease (spec); - spec->d1_stack[spec->d1_level]->next = res; - } - else - parent->child = res; spec->d1_stack[spec->d1_level] = res; spec->d1_stack[++(spec->d1_level)] = NULL; } @@ -952,13 +776,10 @@ static void variantBegin (struct lexSpec *spec, } #if REGX_DEBUG - logf (LOG_DEBUG, "variant node (%d)", spec->d1_level); + logf (LOG_LOG, "variant node (%d)", spec->d1_level); #endif parent = spec->d1_stack[spec->d1_level-1]; - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_variant; - res->root = parent->root; + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent); res->u.variant.type = tp; if (value_len >= DATA1_LOCALDATA) @@ -968,14 +789,8 @@ static void variantBegin (struct lexSpec *spec, res->u.variant.value = res->lbuf; - parent->last_child = res; if (spec->d1_stack[spec->d1_level]) - { tagDataRelease (spec); - spec->d1_stack[spec->d1_level]->next = res; - } - else - parent->child = res; spec->d1_stack[spec->d1_level] = res; spec->d1_stack[++(spec->d1_level)] = NULL; } @@ -996,9 +811,9 @@ static void tagStrip (const char **tag, int *len) static void tagBegin (struct lexSpec *spec, const char *tag, int len) { - struct data1_node *parent = spec->d1_stack[spec->d1_level -1]; + struct data1_node *parent; data1_element *elem = NULL; - data1_node *partag = get_parent_tag(spec->dh, parent); + data1_node *partag; data1_node *res; data1_element *e = NULL; int localtag = 0; @@ -1009,14 +824,14 @@ static void tagBegin (struct lexSpec *spec, return ; } tagStrip (&tag, &len); + + parent = spec->d1_stack[spec->d1_level -1]; + partag = get_parent_tag(spec->dh, parent); - res = data1_mk_node (spec->dh, spec->m); - res->parent = parent; - res->which = DATA1N_tag; - res->u.tag.get_bytes = -1; + res = data1_mk_node2 (spec->dh, spec->m, DATA1N_tag, parent); if (len >= DATA1_LOCALDATA) - res->u.tag.tag = nmem_malloc (spec->m, len+1); + res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1); else res->u.tag.tag = res->lbuf; @@ -1024,7 +839,7 @@ static void tagBegin (struct lexSpec *spec, res->u.tag.tag[len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); + logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); #endif if (parent->which == DATA1N_variant) return ; @@ -1036,19 +851,9 @@ static void tagBegin (struct lexSpec *spec, spec->d1_stack[0]->u.root.absyn, e, res->u.tag.tag); res->u.tag.element = elem; - res->u.tag.node_selected = 0; - res->u.tag.make_variantlist = 0; - res->u.tag.no_data_requested = 0; - res->root = parent->root; - parent->last_child = res; if (spec->d1_stack[spec->d1_level]) - { tagDataRelease (spec); - spec->d1_stack[spec->d1_level]->next = res; - } - else - parent->child = res; spec->d1_stack[spec->d1_level] = res; spec->d1_stack[++(spec->d1_level)] = NULL; } @@ -1071,7 +876,7 @@ static void tagEnd (struct lexSpec *spec, int min_level, break; } #if REGX_DEBUG - logf (LOG_DEBUG, "end tag (%d)", spec->d1_level); + logf (LOG_LOG, "end tag (%d)", spec->d1_level); #endif } @@ -1188,7 +993,8 @@ static int execTok (struct lexSpec *spec, const char **src, else if (*s == '-') { *tokBuf = s++; - while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';') + while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' && + *s != ';') s++; *tokLen = s - *tokBuf; *src = s; @@ -1197,7 +1003,8 @@ static int execTok (struct lexSpec *spec, const char **src, else { *tokBuf = s++; - while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';') + while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' && + *s != ';') s++; *tokLen = s - *tokBuf; } @@ -1218,32 +1025,21 @@ static char *regxStrz (const char *src, int len, char *str) static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, int argc, char **argv) { - struct lexSpec *spec = clientData; + struct lexSpec *spec = (struct lexSpec *) clientData; if (argc < 2) return TCL_ERROR; if (!strcmp(argv[1], "record") && argc == 3) { char *absynName = argv[2]; - data1_absyn *absyn; + data1_node *res; #if REGX_DEBUG - logf (LOG_DEBUG, "begin record %s", absynName); + logf (LOG_LOG, "begin record %s", absynName); #endif - if (!(absyn = data1_get_absyn (spec->dh, absynName))) - logf (LOG_WARN, "Unknown tagset: %s", absynName); - else - { - data1_node *res; - - res = data1_mk_node (spec->dh, spec->m); - res->which = DATA1N_root; - res->u.root.type = absynName; - res->u.root.absyn = absyn; - res->root = res; - - spec->d1_stack[spec->d1_level] = res; - spec->d1_stack[++(spec->d1_level)] = NULL; - } + res = data1_mk_root (spec->dh, spec->m, absynName); + + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; } else if (!strcmp(argv[1], "element") && argc == 3) { @@ -1259,7 +1055,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, { struct lexContext *lc = spec->context; #if REGX_DEBUG - logf (LOG_DEBUG, "begin context %s",argv[2]); + logf (LOG_LOG, "begin context %s",argv[2]); #endif while (lc && strcmp (argv[2], lc->name)) lc = lc->next; @@ -1278,7 +1074,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, int argc, char **argv) { - struct lexSpec *spec = clientData; + struct lexSpec *spec = (struct lexSpec *) clientData; if (argc < 2) return TCL_ERROR; @@ -1290,7 +1086,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, (spec->d1_level)--; } #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_LOG, "end record"); #endif spec->stop_flag = 1; } @@ -1298,22 +1094,20 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, { int min_level = 1; char *element = 0; - if (!strcmp(argv[2], "-record")) + if (argc >= 3 && !strcmp(argv[2], "-record")) { min_level = 0; if (argc == 4) element = argv[3]; } else - { if (argc == 3) element = argv[2]; - } tagEnd (spec, min_level, element, (element ? strlen(element) : 0)); if (spec->d1_level == 0) { #if REGX_DEBUG - logf (LOG_DEBUG, "end element end records"); + logf (LOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1321,7 +1115,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, else if (!strcmp (argv[1], "context")) { #if REGX_DEBUG - logf (LOG_DEBUG, "end context"); + logf (LOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1337,7 +1131,7 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, int argi = 1; int textFlag = 0; const char *element = 0; - struct lexSpec *spec = clientData; + struct lexSpec *spec = (struct lexSpec *) clientData; while (argi < argc) { @@ -1360,7 +1154,14 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, while (argi < argc) { +#if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0) + Tcl_DString ds; + char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds); + execData (spec, native, strlen(native), textFlag); + Tcl_DStringFree (&ds); +#else execData (spec, argv[argi], strlen(argv[argi]), textFlag); +#endif argi++; } if (element) @@ -1371,7 +1172,7 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp, int argc, char **argv) { - struct lexSpec *spec = clientData; + struct lexSpec *spec = (struct lexSpec *) clientData; int argi = 1; int offset = 0; int no; @@ -1402,6 +1203,7 @@ static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp, static void execTcl (struct lexSpec *spec, struct regxCode *code) { int i; + int ret; for (i = 0; i < spec->arg_no; i++) { char var_name[10], *var_buf; @@ -1418,7 +1220,19 @@ static void execTcl (struct lexSpec *spec, struct regxCode *code) var_buf[var_len] = ch; } } - Tcl_Eval (spec->tcl_interp, code->str); +#if HAVE_TCL_OBJECTS + ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj); +#else + ret = Tcl_GlobalEval (spec->tcl_interp, code->str); +#endif + if (ret != TCL_OK) + { + const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0); + logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s", + spec->tcl_interp->errorLine, + spec->tcl_interp->result, + err ? err : "[NO ERRORINFO]"); + } } /* HAVE_TCL_H */ #endif @@ -1457,31 +1271,19 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) if (spec->d1_level == 0) { static char absynName[64]; - data1_absyn *absyn; + data1_node *res; if (cmd_len > 63) cmd_len = 63; memcpy (absynName, cmd_str, cmd_len); absynName[cmd_len] = '\0'; - #if REGX_DEBUG - logf (LOG_DEBUG, "begin record %s", absynName); + logf (LOG_LOG, "begin record %s", absynName); #endif - if (!(absyn = data1_get_absyn (spec->dh, absynName))) - logf (LOG_WARN, "Unknown tagset: %s", absynName); - else - { - data1_node *res; - - res = data1_mk_node (spec->dh, spec->m); - res->which = DATA1N_root; - res->u.root.type = absynName; - res->u.root.absyn = absyn; - res->root = res; - - spec->d1_stack[spec->d1_level] = res; - spec->d1_stack[++(spec->d1_level)] = NULL; - } + res = data1_mk_root (spec->dh, spec->m, absynName); + + spec->d1_stack[spec->d1_level] = res; + spec->d1_stack[++(spec->d1_level)] = NULL; } r = execTok (spec, &s, &cmd_str, &cmd_len); } @@ -1532,7 +1334,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); p = regxStrz (cmd_str, cmd_len, ptmp); #if REGX_DEBUG - logf (LOG_DEBUG, "begin context %s", p); + logf (LOG_LOG, "begin context %s", p); #endif while (lc && strcmp (p, lc->name)) lc = lc->next; @@ -1567,7 +1369,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } r = execTok (spec, &s, &cmd_str, &cmd_len); #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_LOG, "end record"); #endif spec->stop_flag = 1; } @@ -1589,7 +1391,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) if (spec->d1_level == 0) { #if REGX_DEBUG - logf (LOG_DEBUG, "end element end records"); + logf (LOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1598,7 +1400,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) else if (!strcmp (p, "context")) { #if REGX_DEBUG - logf (LOG_DEBUG, "end context"); + logf (LOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1794,7 +1596,7 @@ static int execRule (struct lexSpec *spec, struct lexContext *context, int ruleNo, int start_ptr, int *pptr) { #if REGX_DEBUG - logf (LOG_DEBUG, "exec rule %d", ruleNo); + logf (LOG_LOG, "exec rule %d", ruleNo); #endif return execAction (spec, context->fastRule[ruleNo]->actionList, start_ptr, pptr); @@ -1872,7 +1674,7 @@ data1_node *lexNode (struct lexSpec *spec, int *ptr) if (spec->f_win_ef && *ptr != F_WIN_EOF) { #if REGX_DEBUG - logf (LOG_DEBUG, "regx: endf ptr=%d", *ptr); + logf (LOG_LOG, "regx: endf ptr=%d", *ptr); #endif (*spec->f_win_ef)(spec->f_win_fh, *ptr); } @@ -1962,7 +1764,7 @@ static data1_node *lexRoot (struct lexSpec *spec, off_t offset, void grs_destroy(void *clientData) { - struct lexSpecs *specs = clientData; + struct lexSpecs *specs = (struct lexSpecs *) clientData; if (specs->spec) { lexSpecDestroy(&specs->spec); @@ -1972,7 +1774,7 @@ void grs_destroy(void *clientData) void *grs_init(void) { - struct lexSpecs *specs = xmalloc (sizeof(*specs)); + struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs)); specs->spec = 0; return specs; } @@ -1980,11 +1782,11 @@ void *grs_init(void) data1_node *grs_read_regx (struct grs_read_info *p) { int res; - struct lexSpecs *specs = p->clientData; + struct lexSpecs *specs = (struct lexSpecs *) p->clientData; struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_DEBUG, "grs_read_regx"); + logf (LOG_LOG, "grs_read_regx"); #endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { @@ -2026,11 +1828,11 @@ RecTypeGrs recTypeGrs_regx = ®x_type; data1_node *grs_read_tcl (struct grs_read_info *p) { int res; - struct lexSpecs *specs = p->clientData; + struct lexSpecs *specs = (struct lexSpecs *) p->clientData; struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_DEBUG, "grs_read_tcl"); + logf (LOG_LOG, "grs_read_tcl"); #endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { @@ -2038,7 +1840,9 @@ data1_node *grs_read_tcl (struct grs_read_info *p) if (*curLexSpec) lexSpecDestroy (curLexSpec); *curLexSpec = lexSpecCreate (p->type, p->dh); + Tcl_FindExecutable(""); tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp(); + Tcl_Init(tcl_interp); Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);