X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=73744fc44fc1b54485d8bac51235ee3684abd77e;hb=7a49c3db444b475f63722c3da03e15c6db93f1a9;hp=fc5f56460d9830e06fc18d22e73eba98469da981;hpb=4c32050b877fd85e68771370d63f9a96dd7aa5a3;p=idzebra-moved-to-github.git diff --git a/recctrl/regxread.c b/recctrl/regxread.c index fc5f564..73744fc 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,10 +1,29 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2001, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss * * $Log: regxread.c,v $ - * Revision 1.29 1999-07-12 07:27:54 adam + * Revision 1.35 2001-03-29 21:31:31 adam + * Fixed "record begin" for Tcl filter. + * + * Revision 1.34 2000/11/29 14:24:01 adam + * Script configure uses yaz pthreads options. Added locking for + * zebra_register_{lock,unlock}. + * + * Revision 1.33 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.32 1999/09/07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.31 1999/07/14 13:05:29 adam + * Tcl filter works with objects when TCL is version 8 or later; filter + * works with strings otherwise (slow). + * + * Revision 1.30 1999/07/14 10:55:28 adam + * Fixed memory leak. + * + * Revision 1.29 1999/07/12 07:27:54 adam * Improved speed of Tcl processing. Fixed one memory leak. * * Revision 1.28 1999/07/06 12:26:04 adam @@ -182,13 +201,17 @@ #include #include -#include +#include #include #include #include "grsread.h" #if HAVE_TCL_H #include + +#if MAJOR_VERSION >= 8 +#define HAVE_TCL_OBJECTS +#endif #endif #define REGX_DEBUG 0 @@ -207,7 +230,7 @@ struct regxCode { char *str; -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS Tcl_Obj *tcl_obj; #endif }; @@ -249,7 +272,6 @@ struct lexContext { }; struct lexConcatBuf { - int len; int max; char *buf; }; @@ -278,7 +300,7 @@ struct lexSpec { int (*f_win_rf)(void *, char *, size_t); off_t (*f_win_sf)(void *, off_t); - struct lexConcatBuf **concatBuf; + struct lexConcatBuf *concatBuf; int maxLevel; data1_node **d1_stack; int d1_level; @@ -356,7 +378,7 @@ static void regxCodeDel (struct regxCode **pp) struct regxCode *p = *pp; if (p) { -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS if (p->tcl_obj) Tcl_DecrRefCount (p->tcl_obj); #endif @@ -374,7 +396,7 @@ static void regxCodeMk (struct regxCode **pp, const char *buf, int len) p->str = (char *) xmalloc (len+1); memcpy (p->str, buf, len); p->str[len] = '\0'; -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS p->tcl_obj = Tcl_NewStringObj ((char *) buf, len); if (p->tcl_obj) Tcl_IncrRefCount (p->tcl_obj); @@ -470,14 +492,12 @@ static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh) p->f_win_buf = NULL; p->maxLevel = 128; - p->concatBuf = (struct lexConcatBuf **) + p->concatBuf = (struct lexConcatBuf *) xmalloc (sizeof(*p->concatBuf) * p->maxLevel); for (i = 0; i < p->maxLevel; i++) { - p->concatBuf[i] = (struct lexConcatBuf *) - xmalloc (sizeof(**p->concatBuf)); - p->concatBuf[i]->len = p->concatBuf[i]->max = 0; - p->concatBuf[i]->buf = 0; + p->concatBuf[i].max = 0; + p->concatBuf[i].buf = 0; } p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel); p->d1_level = 0; @@ -496,7 +516,7 @@ static void lexSpecDestroy (struct lexSpec **pp) return ; for (i = 0; i < p->maxLevel; i++) - xfree (p->concatBuf[i]); + xfree (p->concatBuf[i].buf); xfree (p->concatBuf); lt = p->context; @@ -506,7 +526,7 @@ static void lexSpecDestroy (struct lexSpec **pp) lexContextDestroy (lt); lt = lt_next; } -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS if (p->tcl_interp) Tcl_DeleteInterp (p->tcl_interp); #endif @@ -692,7 +712,7 @@ int readOneSpec (struct lexSpec *spec, const char *s) break; case REGX_PATTERN: #if REGX_DEBUG - logf (LOG_DEBUG, "rule %d %s", spec->context->ruleNo, s); + logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s); #endif r = dfa_parse (spec->context->dfa, &s); if (r) @@ -824,12 +844,12 @@ static void execData (struct lexSpec *spec, return ; #if REGX_DEBUG if (elen > 40) - logf (LOG_DEBUG, "data (%d bytes) %.15s ... %.*s", elen, + logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen, ebuf, 15, ebuf + elen-15); else if (elen > 0) - logf (LOG_DEBUG, "data (%d bytes) %.*s", elen, elen, ebuf); + logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf); else - logf (LOG_DEBUG, "data (%d bytes)", elen); + logf (LOG_LOG, "data (%d bytes)", elen); #endif if (spec->d1_level <= 1) @@ -868,21 +888,20 @@ static void execData (struct lexSpec *spec, parent->child = res; spec->d1_stack[spec->d1_level] = res; } - if (org_len + elen >= spec->concatBuf[spec->d1_level]->max) + if (org_len + elen >= spec->concatBuf[spec->d1_level].max) { char *old_buf, *new_buf; - spec->concatBuf[spec->d1_level]->max = org_len + elen + 256; - new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level]->max); - if ((old_buf = spec->concatBuf[spec->d1_level]->buf)) + spec->concatBuf[spec->d1_level].max = org_len + elen + 256; + new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max); + if ((old_buf = spec->concatBuf[spec->d1_level].buf)) { memcpy (new_buf, old_buf, org_len); xfree (old_buf); } - spec->concatBuf[spec->d1_level]->buf = new_buf; + spec->concatBuf[spec->d1_level].buf = new_buf; } - assert (spec->concatBuf[spec->d1_level]); - memcpy (spec->concatBuf[spec->d1_level]->buf + org_len, ebuf, elen); + memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen); res->u.data.len += elen; } @@ -906,7 +925,7 @@ static void tagDataRelease (struct lexSpec *spec) res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len); else res->u.data.data = res->lbuf; - memcpy (res->u.data.data, spec->concatBuf[spec->d1_level]->buf, + memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf, res->u.data.len); } } @@ -938,7 +957,7 @@ static void variantBegin (struct lexSpec *spec, ttype[type_len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "variant begin %s %s (%d)", tclass, ttype, + logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype, spec->d1_level); #endif @@ -975,7 +994,7 @@ static void variantBegin (struct lexSpec *spec, } #if REGX_DEBUG - logf (LOG_DEBUG, "variant node (%d)", spec->d1_level); + logf (LOG_LOG, "variant node (%d)", spec->d1_level); #endif parent = spec->d1_stack[spec->d1_level-1]; res = data1_mk_node (spec->dh, spec->m); @@ -1019,9 +1038,9 @@ static void tagStrip (const char **tag, int *len) static void tagBegin (struct lexSpec *spec, const char *tag, int len) { - struct data1_node *parent = spec->d1_stack[spec->d1_level -1]; + struct data1_node *parent; data1_element *elem = NULL; - data1_node *partag = get_parent_tag(spec->dh, parent); + data1_node *partag; data1_node *res; data1_element *e = NULL; int localtag = 0; @@ -1032,11 +1051,12 @@ static void tagBegin (struct lexSpec *spec, return ; } tagStrip (&tag, &len); + + parent = spec->d1_stack[spec->d1_level -1]; + partag = get_parent_tag(spec->dh, parent); - res = data1_mk_node (spec->dh, spec->m); + res = data1_mk_node_type (spec->dh, spec->m, DATA1N_tag); res->parent = parent; - res->which = DATA1N_tag; - res->u.tag.get_bytes = -1; if (len >= DATA1_LOCALDATA) res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1); @@ -1047,7 +1067,7 @@ static void tagBegin (struct lexSpec *spec, res->u.tag.tag[len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); + logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); #endif if (parent->which == DATA1N_variant) return ; @@ -1059,9 +1079,6 @@ static void tagBegin (struct lexSpec *spec, spec->d1_stack[0]->u.root.absyn, e, res->u.tag.tag); res->u.tag.element = elem; - res->u.tag.node_selected = 0; - res->u.tag.make_variantlist = 0; - res->u.tag.no_data_requested = 0; res->root = parent->root; parent->last_child = res; @@ -1094,7 +1111,7 @@ static void tagEnd (struct lexSpec *spec, int min_level, break; } #if REGX_DEBUG - logf (LOG_DEBUG, "end tag (%d)", spec->d1_level); + logf (LOG_LOG, "end tag (%d)", spec->d1_level); #endif } @@ -1252,7 +1269,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, data1_absyn *absyn; #if REGX_DEBUG - logf (LOG_DEBUG, "begin record %s", absynName); + logf (LOG_LOG, "begin record %s", absynName); #endif if (!(absyn = data1_get_absyn (spec->dh, absynName))) logf (LOG_WARN, "Unknown tagset: %s", absynName); @@ -1262,7 +1279,8 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, res = data1_mk_node (spec->dh, spec->m); res->which = DATA1N_root; - res->u.root.type = absynName; + res->u.root.type = + data1_insert_string(spec->dh, res, spec->m, absynName); res->u.root.absyn = absyn; res->root = res; @@ -1284,7 +1302,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, { struct lexContext *lc = spec->context; #if REGX_DEBUG - logf (LOG_DEBUG, "begin context %s",argv[2]); + logf (LOG_LOG, "begin context %s",argv[2]); #endif while (lc && strcmp (argv[2], lc->name)) lc = lc->next; @@ -1315,7 +1333,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, (spec->d1_level)--; } #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_LOG, "end record"); #endif spec->stop_flag = 1; } @@ -1336,7 +1354,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, if (spec->d1_level == 0) { #if REGX_DEBUG - logf (LOG_DEBUG, "end element end records"); + logf (LOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1344,7 +1362,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, else if (!strcmp (argv[1], "context")) { #if REGX_DEBUG - logf (LOG_DEBUG, "end context"); + logf (LOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1442,10 +1460,11 @@ static void execTcl (struct lexSpec *spec, struct regxCode *code) var_buf[var_len] = ch; } } - if (code->tcl_obj) - ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj); - else - ret = Tcl_GlobalEval (spec->tcl_interp, code->str); +#if HAVE_TCL_OBJECTS + ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj); +#else + ret = Tcl_GlobalEval (spec->tcl_interp, code->str); +#endif if (ret != TCL_OK) { const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0); @@ -1500,7 +1519,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) absynName[cmd_len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "begin record %s", absynName); + logf (LOG_LOG, "begin record %s", absynName); #endif if (!(absyn = data1_get_absyn (spec->dh, absynName))) logf (LOG_WARN, "Unknown tagset: %s", absynName); @@ -1567,7 +1586,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); p = regxStrz (cmd_str, cmd_len, ptmp); #if REGX_DEBUG - logf (LOG_DEBUG, "begin context %s", p); + logf (LOG_LOG, "begin context %s", p); #endif while (lc && strcmp (p, lc->name)) lc = lc->next; @@ -1602,7 +1621,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } r = execTok (spec, &s, &cmd_str, &cmd_len); #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_LOG, "end record"); #endif spec->stop_flag = 1; } @@ -1624,7 +1643,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) if (spec->d1_level == 0) { #if REGX_DEBUG - logf (LOG_DEBUG, "end element end records"); + logf (LOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1633,7 +1652,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) else if (!strcmp (p, "context")) { #if REGX_DEBUG - logf (LOG_DEBUG, "end context"); + logf (LOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1829,7 +1848,7 @@ static int execRule (struct lexSpec *spec, struct lexContext *context, int ruleNo, int start_ptr, int *pptr) { #if REGX_DEBUG - logf (LOG_DEBUG, "exec rule %d", ruleNo); + logf (LOG_LOG, "exec rule %d", ruleNo); #endif return execAction (spec, context->fastRule[ruleNo]->actionList, start_ptr, pptr); @@ -1907,7 +1926,7 @@ data1_node *lexNode (struct lexSpec *spec, int *ptr) if (spec->f_win_ef && *ptr != F_WIN_EOF) { #if REGX_DEBUG - logf (LOG_DEBUG, "regx: endf ptr=%d", *ptr); + logf (LOG_LOG, "regx: endf ptr=%d", *ptr); #endif (*spec->f_win_ef)(spec->f_win_fh, *ptr); } @@ -2019,7 +2038,7 @@ data1_node *grs_read_regx (struct grs_read_info *p) struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_DEBUG, "grs_read_regx"); + logf (LOG_LOG, "grs_read_regx"); #endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { @@ -2065,7 +2084,7 @@ data1_node *grs_read_tcl (struct grs_read_info *p) struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_DEBUG, "grs_read_tcl"); + logf (LOG_LOG, "grs_read_tcl"); #endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) {