X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=6fdbd89b2ce47bd18af85a7bc04fd4922f903b54;hb=5adb31268b057741850de38eafd42baf8dc947ea;hp=d24be824a12b2424d7792f3d7589c374086032af;hpb=01ddc55fd5a59535e229c09440cfdadccadf3555;p=idzebra-moved-to-github.git diff --git a/recctrl/regxread.c b/recctrl/regxread.c index d24be82..6fdbd89 100644 --- a/recctrl/regxread.c +++ b/recctrl/regxread.c @@ -1,10 +1,25 @@ /* - * Copyright (C) 1994-1999, Index Data + * Copyright (C) 1994-2001, Index Data * All rights reserved. - * Sebastian Hammer, Adam Dickmeiss * * $Log: regxread.c,v $ - * Revision 1.32 1999-09-07 07:19:21 adam + * Revision 1.37 2001-05-29 08:51:59 adam + * More fixes for character encodings. + * + * Revision 1.36 2001/05/22 21:02:26 adam + * Fixes for Tcl UTF8 character handling. + * + * Revision 1.35 2001/03/29 21:31:31 adam + * Fixed "record begin" for Tcl filter. + * + * Revision 1.34 2000/11/29 14:24:01 adam + * Script configure uses yaz pthreads options. Added locking for + * zebra_register_{lock,unlock}. + * + * Revision 1.33 1999/11/30 13:48:04 adam + * Improved installation. Updated for inclusion of YAZ header files. + * + * Revision 1.32 1999/09/07 07:19:21 adam * Work on character mapping. Implemented replace rules. * * Revision 1.31 1999/07/14 13:05:29 adam @@ -192,7 +207,7 @@ #include #include -#include +#include #include #include #include "grsread.h" @@ -703,7 +718,7 @@ int readOneSpec (struct lexSpec *spec, const char *s) break; case REGX_PATTERN: #if REGX_DEBUG - logf (LOG_DEBUG, "rule %d %s", spec->context->ruleNo, s); + logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s); #endif r = dfa_parse (spec->context->dfa, &s); if (r) @@ -835,12 +850,12 @@ static void execData (struct lexSpec *spec, return ; #if REGX_DEBUG if (elen > 40) - logf (LOG_DEBUG, "data (%d bytes) %.15s ... %.*s", elen, + logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen, ebuf, 15, ebuf + elen-15); else if (elen > 0) - logf (LOG_DEBUG, "data (%d bytes) %.*s", elen, elen, ebuf); + logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf); else - logf (LOG_DEBUG, "data (%d bytes)", elen); + logf (LOG_LOG, "data (%d bytes)", elen); #endif if (spec->d1_level <= 1) @@ -948,7 +963,7 @@ static void variantBegin (struct lexSpec *spec, ttype[type_len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "variant begin %s %s (%d)", tclass, ttype, + logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype, spec->d1_level); #endif @@ -985,7 +1000,7 @@ static void variantBegin (struct lexSpec *spec, } #if REGX_DEBUG - logf (LOG_DEBUG, "variant node (%d)", spec->d1_level); + logf (LOG_LOG, "variant node (%d)", spec->d1_level); #endif parent = spec->d1_stack[spec->d1_level-1]; res = data1_mk_node (spec->dh, spec->m); @@ -1046,10 +1061,8 @@ static void tagBegin (struct lexSpec *spec, parent = spec->d1_stack[spec->d1_level -1]; partag = get_parent_tag(spec->dh, parent); - res = data1_mk_node (spec->dh, spec->m); + res = data1_mk_node_type (spec->dh, spec->m, DATA1N_tag); res->parent = parent; - res->which = DATA1N_tag; - res->u.tag.get_bytes = -1; if (len >= DATA1_LOCALDATA) res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1); @@ -1060,7 +1073,7 @@ static void tagBegin (struct lexSpec *spec, res->u.tag.tag[len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); + logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level); #endif if (parent->which == DATA1N_variant) return ; @@ -1072,9 +1085,6 @@ static void tagBegin (struct lexSpec *spec, spec->d1_stack[0]->u.root.absyn, e, res->u.tag.tag); res->u.tag.element = elem; - res->u.tag.node_selected = 0; - res->u.tag.make_variantlist = 0; - res->u.tag.no_data_requested = 0; res->root = parent->root; parent->last_child = res; @@ -1107,7 +1117,7 @@ static void tagEnd (struct lexSpec *spec, int min_level, break; } #if REGX_DEBUG - logf (LOG_DEBUG, "end tag (%d)", spec->d1_level); + logf (LOG_LOG, "end tag (%d)", spec->d1_level); #endif } @@ -1265,7 +1275,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, data1_absyn *absyn; #if REGX_DEBUG - logf (LOG_DEBUG, "begin record %s", absynName); + logf (LOG_LOG, "begin record %s", absynName); #endif if (!(absyn = data1_get_absyn (spec->dh, absynName))) logf (LOG_WARN, "Unknown tagset: %s", absynName); @@ -1275,7 +1285,8 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, res = data1_mk_node (spec->dh, spec->m); res->which = DATA1N_root; - res->u.root.type = absynName; + res->u.root.type = + data1_insert_string(spec->dh, res, spec->m, absynName); res->u.root.absyn = absyn; res->root = res; @@ -1297,7 +1308,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp, { struct lexContext *lc = spec->context; #if REGX_DEBUG - logf (LOG_DEBUG, "begin context %s",argv[2]); + logf (LOG_LOG, "begin context %s",argv[2]); #endif while (lc && strcmp (argv[2], lc->name)) lc = lc->next; @@ -1328,7 +1339,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, (spec->d1_level)--; } #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_LOG, "end record"); #endif spec->stop_flag = 1; } @@ -1349,7 +1360,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, if (spec->d1_level == 0) { #if REGX_DEBUG - logf (LOG_DEBUG, "end element end records"); + logf (LOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1357,7 +1368,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp, else if (!strcmp (argv[1], "context")) { #if REGX_DEBUG - logf (LOG_DEBUG, "end context"); + logf (LOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1396,7 +1407,14 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp, while (argi < argc) { +#if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0) + Tcl_DString ds; + char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds); + execData (spec, native, strlen(native), textFlag); + Tcl_DStringFree (&ds); +#else execData (spec, argv[argi], strlen(argv[argi]), textFlag); +#endif argi++; } if (element) @@ -1514,7 +1532,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) absynName[cmd_len] = '\0'; #if REGX_DEBUG - logf (LOG_DEBUG, "begin record %s", absynName); + logf (LOG_LOG, "begin record %s", absynName); #endif if (!(absyn = data1_get_absyn (spec->dh, absynName))) logf (LOG_WARN, "Unknown tagset: %s", absynName); @@ -1581,7 +1599,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) r = execTok (spec, &s, &cmd_str, &cmd_len); p = regxStrz (cmd_str, cmd_len, ptmp); #if REGX_DEBUG - logf (LOG_DEBUG, "begin context %s", p); + logf (LOG_LOG, "begin context %s", p); #endif while (lc && strcmp (p, lc->name)) lc = lc->next; @@ -1616,7 +1634,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) } r = execTok (spec, &s, &cmd_str, &cmd_len); #if REGX_DEBUG - logf (LOG_DEBUG, "end record"); + logf (LOG_LOG, "end record"); #endif spec->stop_flag = 1; } @@ -1638,7 +1656,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) if (spec->d1_level == 0) { #if REGX_DEBUG - logf (LOG_DEBUG, "end element end records"); + logf (LOG_LOG, "end element end records"); #endif spec->stop_flag = 1; } @@ -1647,7 +1665,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code) else if (!strcmp (p, "context")) { #if REGX_DEBUG - logf (LOG_DEBUG, "end context"); + logf (LOG_LOG, "end context"); #endif if (spec->context_stack_top) (spec->context_stack_top)--; @@ -1843,7 +1861,7 @@ static int execRule (struct lexSpec *spec, struct lexContext *context, int ruleNo, int start_ptr, int *pptr) { #if REGX_DEBUG - logf (LOG_DEBUG, "exec rule %d", ruleNo); + logf (LOG_LOG, "exec rule %d", ruleNo); #endif return execAction (spec, context->fastRule[ruleNo]->actionList, start_ptr, pptr); @@ -1921,7 +1939,7 @@ data1_node *lexNode (struct lexSpec *spec, int *ptr) if (spec->f_win_ef && *ptr != F_WIN_EOF) { #if REGX_DEBUG - logf (LOG_DEBUG, "regx: endf ptr=%d", *ptr); + logf (LOG_LOG, "regx: endf ptr=%d", *ptr); #endif (*spec->f_win_ef)(spec->f_win_fh, *ptr); } @@ -2033,7 +2051,7 @@ data1_node *grs_read_regx (struct grs_read_info *p) struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_DEBUG, "grs_read_regx"); + logf (LOG_LOG, "grs_read_regx"); #endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { @@ -2079,7 +2097,7 @@ data1_node *grs_read_tcl (struct grs_read_info *p) struct lexSpec **curLexSpec = &specs->spec; #if REGX_DEBUG - logf (LOG_DEBUG, "grs_read_tcl"); + logf (LOG_LOG, "grs_read_tcl"); #endif if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type)) { @@ -2087,7 +2105,9 @@ data1_node *grs_read_tcl (struct grs_read_info *p) if (*curLexSpec) lexSpecDestroy (curLexSpec); *curLexSpec = lexSpecCreate (p->type, p->dh); + Tcl_FindExecutable(""); tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp(); + Tcl_Init(tcl_interp); Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0); Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);