X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=6fdbd89b2ce47bd18af85a7bc04fd4922f903b54;hb=5adb31268b057741850de38eafd42baf8dc947ea;hp=0662a1a0e847188c4d707cfba8e51d69f49a2ec2;hpb=0a76e10d0d8914522c3d54723250f8141e7ad13f;p=idzebra-moved-to-github.git diff --git a/recctrl/regxread.c b/recctrl/regxread.c index 0662a1a..6fdbd89 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,10 +1,32 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2001, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss * * $Log: regxread.c,v $ - * Revision 1.30 1999-07-14 10:55:28 adam + * Revision 1.37 2001-05-29 08:51:59 adam + * More fixes for character encodings. + * + * Revision 1.36 2001/05/22 21:02:26 adam + * Fixes for Tcl UTF8 character handling. + * + * Revision 1.35 2001/03/29 21:31:31 adam + * Fixed "record begin" for Tcl filter. + * + * Revision 1.34 2000/11/29 14:24:01 adam + * Script configure uses yaz pthreads options. Added locking for + * zebra_register_{lock,unlock}. + * + * Revision 1.33 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.32 1999/09/07 07:19:21 adam + * Work on character mapping. Implemented replace rules. + * + * Revision 1.31 1999/07/14 13:05:29 adam + * Tcl filter works with objects when TCL is version 8 or later; filter + * works with strings otherwise (slow). + * + * Revision 1.30 1999/07/14 10:55:28 adam * Fixed memory leak. * * Revision 1.29 1999/07/12 07:27:54 adam @@ -185,13 +207,17 @@ #include #include -#include +#include #include #include #include "grsread.h" #if HAVE_TCL_H #include + +#if MAJOR_VERSION >= 8 +#define HAVE_TCL_OBJECTS +#endif #endif #define REGX_DEBUG 0 @@ -210,7 +236,7 @@ struct regxCode { char *str; -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS Tcl_Obj *tcl_obj; #endif }; @@ -358,7 +384,7 @@ static void regxCodeDel (struct regxCode **pp) struct regxCode *p = *pp; if (p) { -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS if (p->tcl_obj) Tcl_DecrRefCount (p->tcl_obj); #endif @@ -376,7 +402,7 @@ static void regxCodeMk (struct regxCode **pp, const char *buf, int len) p->str = (char *) xmalloc (len+1); memcpy (p->str, buf, len); p->str[len] = '\0'; -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS p->tcl_obj = Tcl_NewStringObj ((char *) buf, len); if (p->tcl_obj) Tcl_IncrRefCount (p->tcl_obj); @@ -506,7 +532,7 @@ static void lexSpecDestroy (struct lexSpec **pp) lexContextDestroy (lt); lt = lt_next; } -#if HAVE_TCL_H +#if HAVE_TCL_OBJECTS if (p->tcl_interp) Tcl_DeleteInterp (p->tcl_interp); #endif @@ -692,7 +718,7 @@ int readOneSpec (struct lexSpec *spec, const char *s) break; case REGX_PATTERN: #if REGX_DEBUG - logf (LOG_DEBUG, "rule %d %s", spec->context->ruleNo, s); + logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s); #endif r = dfa_parse (spec->context->dfa, &s); if (r) @@ -824,12 +850,12 @@ static void execData (struct lexSpec *spec, return ; #if REGX_DEBUG if (elen > 40) - logf (LOG_DEBUG, "data (%d bytes) %.15s ... %.*s", elen, + logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen, ebuf, 15, ebuf + elen-15); else if (elen > 0) - logf (LOG_DEBUG, "data (%d bytes) %.*s", elen, elen, ebuf); + logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf); else - logf (LOG_DEBUG, "data (%d bytes)", elen); + logf (LOG_LOG, "data (%d bytes)", elen); #endif if (spec->d1_level <= 1) @@ -937,7 +963,7 @@ static void variantBegin (struct lexSpec *spec, ttype[type_len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "variant begin %s %s (%d)", tclass, ttype, + logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype, spec->d1_level); #endif @@ -974,7 +1000,7 @@ static void variantBegin (struct lexSpec *spec, } #if REGX_DEBUG - logf (LOG_DEBUG, "variant node (%d)", spec->d1_level); + logf (LOG_LOG, "variant node (%d)", spec->d1_level); #endif parent = spec->d1_stack[spec->d1_level-1]; res = data1_mk_node (spec->dh, spec->m); @@ -1018,9 +1044,9 @@ static void tagStrip (const char **tag, int *len) static void tagBegin (struct lexSpec *spec, const char *tag, int len) { - struct data1_node *parent = spec->d1_stack[spec->d1_level -1]; + struct data1_node *parent; data1_element *elem = NULL; - data1_node *partag = get_parent_tag(spec->dh, parent); + data1_node *partag; data1_node *res; data1_element *e = NULL; int localtag = 0; @@ -1031,11 +1057,12 @@ static void tagBegin (struct lexSpec *spec, return ; } tagStrip (&tag, &len); + + parent = spec->d1_stack[spec->d1_level -1]; + partag = get_parent_tag(spec->dh, parent); - res = data1_mk_node (spec->dh, spec->m); + res = data1_mk_node_type (spec->dh, spec->m, DATA1N_tag); res->parent = parent; - res->which = DATA1N_tag; - res->u.tag.get_bytes = -1; if (len >= DATA1_LOCALDATA) res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1); @@ -1046,7 +1073,7 @@ static void tagBegin (struct lexSpec *spec, res->u.tag.tag[len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); + logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); #endif if (parent->which == DATA1N_variant) return ; @@ -1058,9 +1085,6 @@ static void tagBegin (struct lexSpec *spec, spec->d1_stack[0]->u.root.absyn, e, res->u.tag.tag); res->u.tag.element = elem; - res->u.tag.node_selected = 0; - res->u.tag.make_variantlist = 0; - res->u.tag.no_data_requested = 0; res->root = parent->root; parent->last_child = res; @@ -1093,7 +1117,7 @@ static void tagEnd (struct lexSpec *spec, int min_level, break; } #if REGX_DEBUG - logf (LOG_DEBUG, "end tag (%d)", spec->d1_level); + logf (LOG_LOG, "end tag (%d)", spec->d1_level); #endif } @@ -1251,7 +1275,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, data1_absyn *absyn; #if REGX_DEBUG - logf (LOG_DEBUG, "begin record %s", absynName); + logf (LOG_LOG, "begin record %s", absynName); #endif if (!(absyn = data1_get_absyn (spec->dh, absynName))) logf (LOG_WARN, "Unknown tagset: %s", absynName); @@ -1261,7 +1285,8 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, res = data1_mk_node (spec->dh, spec->m); res->which = DATA1N_root; - res->u.root.type = absynName; + res->u.root.type = + data1_insert_string(spec->dh, res, spec->m, absynName); res->u.root.absyn = absyn; res->root = res; @@ -1283,7 +1308,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, { struct lexContext *lc = spec->context; #if REGX_DEBUG - logf (LOG_DEBUG, "begin context %s",argv[2]); + logf (LOG_LOG, "begin context %s",argv[2]); #endif while (lc && strcmp (argv[2], lc->name)) lc = lc->next; @@ -1314,7 +1339,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, (spec->d1_level)--; } #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_LOG, "end record"); #endif spec->stop_flag = 1; } @@ -1335,7 +1360,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, if (spec->d1_level == 0) { #if REGX_DEBUG - logf (LOG_DEBUG, "end element end records"); + logf (LOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1343,7 +1368,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, else if (!strcmp (argv[1], "context")) { #if REGX_DEBUG - logf (LOG_DEBUG, "end context"); + logf (LOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1382,7 +1407,14 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, while (argi < argc) { +#if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0) + Tcl_DString ds; + char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds); + execData (spec, native, strlen(native), textFlag); + Tcl_DStringFree (&ds); +#else execData (spec, argv[argi], strlen(argv[argi]), textFlag); +#endif argi++; } if (element) @@ -1441,10 +1473,11 @@ static void execTcl (struct lexSpec *spec, struct regxCode *code) var_buf[var_len] = ch; } } - if (code->tcl_obj) - ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj); - else - ret = Tcl_GlobalEval (spec->tcl_interp, code->str); +#if HAVE_TCL_OBJECTS + ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj); +#else + ret = Tcl_GlobalEval (spec->tcl_interp, code->str); +#endif if (ret != TCL_OK) { const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0); @@ -1499,7 +1532,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) absynName[cmd_len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "begin record %s", absynName); + logf (LOG_LOG, "begin record %s", absynName); #endif if (!(absyn = data1_get_absyn (spec->dh, absynName))) logf (LOG_WARN, "Unknown tagset: %s", absynName); @@ -1566,7 +1599,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); p = regxStrz (cmd_str, cmd_len, ptmp); #if REGX_DEBUG - logf (LOG_DEBUG, "begin context %s", p); + logf (LOG_LOG, "begin context %s", p); #endif while (lc && strcmp (p, lc->name)) lc = lc->next; @@ -1601,7 +1634,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } r = execTok (spec, &s, &cmd_str, &cmd_len); #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_LOG, "end record"); #endif spec->stop_flag = 1; } @@ -1623,7 +1656,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) if (spec->d1_level == 0) { #if REGX_DEBUG - logf (LOG_DEBUG, "end element end records"); + logf (LOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1632,7 +1665,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) else if (!strcmp (p, "context")) { #if REGX_DEBUG - logf (LOG_DEBUG, "end context"); + logf (LOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1828,7 +1861,7 @@ static int execRule (struct lexSpec *spec, struct lexContext *context, int ruleNo, int start_ptr, int *pptr) { #if REGX_DEBUG - logf (LOG_DEBUG, "exec rule %d", ruleNo); + logf (LOG_LOG, "exec rule %d", ruleNo); #endif return execAction (spec, context->fastRule[ruleNo]->actionList, start_ptr, pptr); @@ -1906,7 +1939,7 @@ data1_node *lexNode (struct lexSpec *spec, int *ptr) if (spec->f_win_ef && *ptr != F_WIN_EOF) { #if REGX_DEBUG - logf (LOG_DEBUG, "regx: endf ptr=%d", *ptr); + logf (LOG_LOG, "regx: endf ptr=%d", *ptr); #endif (*spec->f_win_ef)(spec->f_win_fh, *ptr); } @@ -2018,7 +2051,7 @@ data1_node *grs_read_regx (struct grs_read_info *p) struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_DEBUG, "grs_read_regx"); + logf (LOG_LOG, "grs_read_regx"); #endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { @@ -2064,7 +2097,7 @@ data1_node *grs_read_tcl (struct grs_read_info *p) struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_DEBUG, "grs_read_tcl"); + logf (LOG_LOG, "grs_read_tcl"); #endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { @@ -2072,7 +2105,9 @@ data1_node *grs_read_tcl (struct grs_read_info *p) if (*curLexSpec) lexSpecDestroy (curLexSpec); *curLexSpec = lexSpecCreate (p->type, p->dh); + Tcl_FindExecutable(""); tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp(); + Tcl_Init(tcl_interp); Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);