X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=d24be824a12b2424d7792f3d7589c374086032af;hb=01ddc55fd5a59535e229c09440cfdadccadf3555;hp=76f8e59d785022c1f6567698ce568ecf32407760;hpb=4d6d0d754cc1f0b1f7f814a2268e679d26b890f7;p=idzebra-moved-to-github.git diff --git a/recctrl/regxread.c b/recctrl/regxread.c index 76f8e59..d24be82 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -4,7 +4,23 @@ * Sebastian Hammer, Adam Dickmeiss * * $Log: regxread.c,v $ - * Revision 1.27 1999-06-28 13:25:40 quinn + * Revision 1.32 1999-09-07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.31 1999/07/14 13:05:29 adam + * Tcl filter works with objects when TCL is version 8 or later; filter + * works with strings otherwise (slow). + * + * Revision 1.30 1999/07/14 10:55:28 adam + * Fixed memory leak. + * + * Revision 1.29 1999/07/12 07:27:54 adam + * Improved speed of Tcl processing. Fixed one memory leak. + * + * Revision 1.28 1999/07/06 12:26:04 adam + * Fixed filters so that MS-DOS CR is ignored. + * + * Revision 1.27 1999/06/28 13:25:40 quinn * Improved diagnostics for Tcl * * Revision 1.26 1999/05/26 07:49:14 adam @@ -183,6 +199,10 @@ #if HAVE_TCL_H #include + +#if MAJOR_VERSION >= 8 +#define HAVE_TCL_OBJECTS +#endif #endif #define REGX_DEBUG 0 @@ -201,6 +221,9 @@ struct regxCode { char *str; +#if HAVE_TCL_OBJECTS + Tcl_Obj *tcl_obj; +#endif }; struct lexRuleAction { @@ -240,7 +263,6 @@ struct lexContext { }; struct lexConcatBuf { - int len; int max; char *buf; }; @@ -269,7 +291,7 @@ struct lexSpec { int (*f_win_rf)(void *, char *, size_t); off_t (*f_win_sf)(void *, off_t); - struct lexConcatBuf **concatBuf; + struct lexConcatBuf *concatBuf; int maxLevel; data1_node **d1_stack; int d1_level; @@ -347,6 +369,10 @@ static void regxCodeDel (struct regxCode **pp) struct regxCode *p = *pp; if (p) { +#if HAVE_TCL_OBJECTS + if (p->tcl_obj) + Tcl_DecrRefCount (p->tcl_obj); +#endif xfree (p->str); xfree (p); *pp = NULL; @@ -361,6 +387,11 @@ static void regxCodeMk (struct regxCode **pp, const char *buf, int len) p->str = (char *) xmalloc (len+1); memcpy (p->str, buf, len); p->str[len] = '\0'; +#if HAVE_TCL_OBJECTS + p->tcl_obj = Tcl_NewStringObj ((char *) buf, len); + if (p->tcl_obj) + Tcl_IncrRefCount (p->tcl_obj); +#endif *pp = p; } @@ -417,6 +448,7 @@ static void lexContextDestroy (struct lexContext *p) { struct lexRule *rp, *rp1; + dfa_delete (&p->dfa); xfree (p->fastRule); for (rp = p->rules; rp; rp = rp1) { @@ -426,6 +458,7 @@ static void lexContextDestroy (struct lexContext *p) } actionListDel (&p->beginActionList); actionListDel (&p->endActionList); + actionListDel (&p->initActionList); xfree (p->name); xfree (p); } @@ -450,14 +483,12 @@ static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh) p->f_win_buf = NULL; p->maxLevel = 128; - p->concatBuf = (struct lexConcatBuf **) + p->concatBuf = (struct lexConcatBuf *) xmalloc (sizeof(*p->concatBuf) * p->maxLevel); for (i = 0; i < p->maxLevel; i++) { - p->concatBuf[i] = (struct lexConcatBuf *) - xmalloc (sizeof(**p->concatBuf)); - p->concatBuf[i]->len = p->concatBuf[i]->max = 0; - p->concatBuf[i]->buf = 0; + p->concatBuf[i].max = 0; + p->concatBuf[i].buf = 0; } p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel); p->d1_level = 0; @@ -476,7 +507,7 @@ static void lexSpecDestroy (struct lexSpec **pp) return ; for (i = 0; i < p->maxLevel; i++) - xfree (p->concatBuf[i]); + xfree (p->concatBuf[i].buf); xfree (p->concatBuf); lt = p->context; @@ -486,7 +517,7 @@ static void lexSpecDestroy (struct lexSpec **pp) lexContextDestroy (lt); lt = lt_next; } -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS if (p->tcl_interp) Tcl_DeleteInterp (p->tcl_interp); #endif @@ -504,7 +535,7 @@ static int readParseToken (const char **cpp, int *len) char cmd[32]; int i, level; - while (*cp == ' ' || *cp == '\t' || *cp == '\n') + while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r') cp++; switch (*cp) { @@ -548,7 +579,8 @@ static int readParseToken (const char **cpp, int *len) { logf (LOG_WARN, "bad character %d %c", *cp, *cp); cp++; - while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n') + while (*cp && *cp != ' ' && *cp != '\t' && + *cp != '\n' && *cp != '\r') cp++; *cpp = cp; return 0; @@ -697,41 +729,40 @@ int readOneSpec (struct lexSpec *spec, const char *s) int readFileSpec (struct lexSpec *spec) { struct lexContext *lc; - char *lineBuf; - int lineSize = 512; int c, i, errors = 0; FILE *spec_inf = 0; + WRBUF lineBuf; + char fname[256]; - lineBuf = (char *) xmalloc (1+lineSize); #if HAVE_TCL_H if (spec->tcl_interp) { - sprintf (lineBuf, "%s.tflt", spec->name); - spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), lineBuf, "r"); + sprintf (fname, "%s.tflt", spec->name); + spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), fname, "r"); } #endif if (!spec_inf) { - sprintf (lineBuf, "%s.flt", spec->name); - spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), lineBuf, "r"); + sprintf (fname, "%s.flt", spec->name); + spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), fname, "r"); } if (!spec_inf) { logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name); - xfree (lineBuf); return -1; } - logf (LOG_LOG, "reading regx filter %s", lineBuf); + logf (LOG_LOG, "reading regx filter %s", fname); #if HAVE_TCL_H if (spec->tcl_interp) logf (LOG_LOG, "Tcl enabled"); #endif + lineBuf = wrbuf_alloc(); spec->lineNo = 0; c = getc (spec_inf); while (c != EOF) { - int off = 0; - if (c == '#' || c == '\n' || c == ' ' || c == '\t') + wrbuf_rewind (lineBuf); + if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r') { while (c != '\n' && c != EOF) c = getc (spec_inf); @@ -742,12 +773,14 @@ int readFileSpec (struct lexSpec *spec) else { int addLine = 0; - - lineBuf[off++] = c; + while (1) { int c1 = c; + wrbuf_putc(lineBuf, c); c = getc (spec_inf); + while (c == '\r') + c = getc (spec_inf); if (c == EOF) break; if (c1 == '\n') @@ -756,17 +789,14 @@ int readFileSpec (struct lexSpec *spec) break; addLine++; } - lineBuf[off] = c; - if (off < lineSize) - off++; } - lineBuf[off] = '\0'; - readOneSpec (spec, lineBuf); + wrbuf_putc(lineBuf, '\0'); + readOneSpec (spec, wrbuf_buf(lineBuf)); spec->lineNo += addLine; } } fclose (spec_inf); - xfree (lineBuf); + wrbuf_free(lineBuf, 1); #if 0 debug_dfa_trav = 1; @@ -849,21 +879,20 @@ static void execData (struct lexSpec *spec, parent->child = res; spec->d1_stack[spec->d1_level] = res; } - if (org_len + elen >= spec->concatBuf[spec->d1_level]->max) + if (org_len + elen >= spec->concatBuf[spec->d1_level].max) { char *old_buf, *new_buf; - spec->concatBuf[spec->d1_level]->max = org_len + elen + 256; - new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level]->max); - if ((old_buf = spec->concatBuf[spec->d1_level]->buf)) + spec->concatBuf[spec->d1_level].max = org_len + elen + 256; + new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max); + if ((old_buf = spec->concatBuf[spec->d1_level].buf)) { memcpy (new_buf, old_buf, org_len); xfree (old_buf); } - spec->concatBuf[spec->d1_level]->buf = new_buf; + spec->concatBuf[spec->d1_level].buf = new_buf; } - assert (spec->concatBuf[spec->d1_level]); - memcpy (spec->concatBuf[spec->d1_level]->buf + org_len, ebuf, elen); + memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen); res->u.data.len += elen; } @@ -887,7 +916,7 @@ static void tagDataRelease (struct lexSpec *spec) res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len); else res->u.data.data = res->lbuf; - memcpy (res->u.data.data, spec->concatBuf[spec->d1_level]->buf, + memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf, res->u.data.len); } } @@ -1000,9 +1029,9 @@ static void tagStrip (const char **tag, int *len) static void tagBegin (struct lexSpec *spec, const char *tag, int len) { - struct data1_node *parent = spec->d1_stack[spec->d1_level -1]; + struct data1_node *parent; data1_element *elem = NULL; - data1_node *partag = get_parent_tag(spec->dh, parent); + data1_node *partag; data1_node *res; data1_element *e = NULL; int localtag = 0; @@ -1013,6 +1042,9 @@ static void tagBegin (struct lexSpec *spec, return ; } tagStrip (&tag, &len); + + parent = spec->d1_stack[spec->d1_level -1]; + partag = get_parent_tag(spec->dh, parent); res = data1_mk_node (spec->dh, spec->m); res->parent = parent; @@ -1192,7 +1224,8 @@ static int execTok (struct lexSpec *spec, const char **src, else if (*s == '-') { *tokBuf = s++; - while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';') + while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' && + *s != ';') s++; *tokLen = s - *tokBuf; *src = s; @@ -1201,7 +1234,8 @@ static int execTok (struct lexSpec *spec, const char **src, else { *tokBuf = s++; - while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';') + while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' && + *s != ';') s++; *tokLen = s - *tokBuf; } @@ -1404,6 +1438,7 @@ static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp, static void execTcl (struct lexSpec *spec, struct regxCode *code) { int i; + int ret; for (i = 0; i < spec->arg_no; i++) { char var_name[10], *var_buf; @@ -1420,7 +1455,12 @@ static void execTcl (struct lexSpec *spec, struct regxCode *code) var_buf[var_len] = ch; } } - if (Tcl_Eval (spec->tcl_interp, code->str) != TCL_OK) +#if HAVE_TCL_OBJECTS + ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj); +#else + ret = Tcl_GlobalEval (spec->tcl_interp, code->str); +#endif + if (ret != TCL_OK) { const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0); logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s", @@ -1428,7 +1468,6 @@ static void execTcl (struct lexSpec *spec, struct regxCode *code) spec->tcl_interp->result, err ? err : "[NO ERRORINFO]"); } - } /* HAVE_TCL_H */ #endif