/*
- * Copyright (C) 1994-1999, Index Data
+ * Copyright (C) 1994-2002, Index Data
* All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: regxread.c,v $
- * Revision 1.30 1999-07-14 10:55:28 adam
- * Fixed memory leak.
- *
- * Revision 1.29 1999/07/12 07:27:54 adam
- * Improved speed of Tcl processing. Fixed one memory leak.
- *
- * Revision 1.28 1999/07/06 12:26:04 adam
- * Fixed filters so that MS-DOS CR is ignored.
- *
- * Revision 1.27 1999/06/28 13:25:40 quinn
- * Improved diagnostics for Tcl
- *
- * Revision 1.26 1999/05/26 07:49:14 adam
- * C++ compilation.
- *
- * Revision 1.25 1999/05/25 12:33:32 adam
- * Fixed bug in Tcl filter.
- *
- * Revision 1.24 1999/05/21 11:08:46 adam
- * Tcl filter attempts to read <filt>.tflt. Improvements to configure
- * script so that it reads uninstalled Tcl source.
- *
- * Revision 1.23 1999/05/20 12:57:18 adam
- * Implemented TCL filter. Updated recctrl system.
- *
- * Revision 1.22 1998/11/03 16:07:13 adam
- * Yet another fix.
- *
- * Revision 1.21 1998/11/03 15:43:39 adam
- * Fixed bug introduced by previous commit.
- *
- * Revision 1.20 1998/11/03 14:51:28 adam
- * Changed code so that it creates as few data1 nodes as possible.
- *
- * Revision 1.19 1998/11/03 10:22:39 adam
- * Fixed memory leak that could occur for when large data1 node were
- * concatenated. Data-type data1_nodes may have multiple nodes.
- *
- * Revision 1.18 1998/10/15 13:11:47 adam
- * Added support for option -record for "end element". When specified
- * end element will mark end-of-record when at outer-level.
- *
- * Revision 1.17 1998/07/01 10:13:51 adam
- * Minor fix.
- *
- * Revision 1.16 1998/06/30 15:15:09 adam
- * Tags are trimmed: white space removed before- and after the tag.
- *
- * Revision 1.15 1998/06/30 12:55:45 adam
- * Bug fix.
- *
- * Revision 1.14 1998/03/05 08:41:00 adam
- * Implemented rule contexts.
- *
- * Revision 1.13 1997/12/12 06:33:58 adam
- * Fixed bug that showed up when multiple filter where used.
- * Made one routine thread-safe.
- *
- * Revision 1.12 1997/11/18 10:03:24 adam
- * Member num_children removed from data1_node.
- *
- * Revision 1.11 1997/11/06 11:41:01 adam
- * Implemented "begin variant" for the sgml.regx filter.
- *
- * Revision 1.10 1997/10/31 12:36:12 adam
- * Minor change that avoids compiler warning.
- *
- * Revision 1.9 1997/09/29 09:02:49 adam
- * Fixed small bug (introduced by previous commit).
- *
- * Revision 1.8 1997/09/17 12:19:22 adam
- * Zebra version corresponds to YAZ version 1.4.
- * Changed Zebra server so that it doesn't depend on global common_resource.
- *
- * Revision 1.7 1997/07/15 16:33:07 adam
- * Check for zero length in execData.
- *
- * Revision 1.6 1997/02/24 10:41:51 adam
- * Cleanup of code and commented out the "end element-end-record" code.
- *
- * Revision 1.5 1997/02/19 16:22:33 adam
- * Fixed "end element" to terminate record in outer-most level.
- *
- * Revision 1.4 1997/02/12 20:42:58 adam
- * Changed some log messages.
- *
- * Revision 1.3 1996/11/08 14:05:33 adam
- * Bug fix: data1 node member u.tag.get_bytes weren't initialized.
- *
- * Revision 1.2 1996/10/29 14:02:09 adam
- * Doesn't use the global data1_tabpath (from YAZ). Instead the function
- * data1_get_tabpath is used.
- *
- * Revision 1.1 1996/10/11 10:57:30 adam
- * New module recctrl. Used to manage records (extract/retrieval).
- *
- * Revision 1.24 1996/06/17 14:25:31 adam
- * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
- *
- * Revision 1.23 1996/06/04 10:19:00 adam
- * Minor changes - removed include of ctype.h.
- *
- * Revision 1.22 1996/06/03 15:23:13 adam
- * Bug fix: /../ BODY /../ - pattern didn't match EOF.
- *
- * Revision 1.21 1996/05/14 16:58:38 adam
- * Minor change.
- *
- * Revision 1.20 1996/05/01 13:46:36 adam
- * First work on multiple records in one file.
- * New option, -offset, to the "unread" command in the filter module.
- *
- * Revision 1.19 1996/02/12 16:18:20 adam
- * Yet another bug fix in implementation of unread command.
- *
- * Revision 1.18 1996/02/12 16:07:54 adam
- * Bug fix in new unread command.
- *
- * Revision 1.17 1996/02/12 15:56:11 adam
- * New code command: unread.
- *
- * Revision 1.16 1996/01/17 14:57:51 adam
- * Prototype changed for reader functions in extract/retrieve. File
- * is identified by 'void *' instead of 'int.
- *
- * Revision 1.15 1996/01/08 19:15:47 adam
- * New input filter that works!
- *
- * Revision 1.14 1996/01/08 09:10:38 adam
- * Yet another complete rework on this module.
- *
- * Revision 1.13 1995/12/15 17:21:50 adam
- * This version is able to set data.formatted_text in data1-nodes.
- *
- * Revision 1.12 1995/12/15 16:20:10 adam
- * The filter files (*.flt) are read from the path given by data1_tabpath.
- *
- * Revision 1.11 1995/12/15 12:35:16 adam
- * Better logging.
- *
- * Revision 1.10 1995/12/15 10:35:36 adam
- * Misc. bug fixes.
- *
- * Revision 1.9 1995/12/14 16:38:48 adam
- * Completely new attempt to make regular expression parsing.
- *
- * Revision 1.8 1995/12/13 17:16:59 adam
- * Small changes.
- *
- * Revision 1.7 1995/12/13 16:51:58 adam
- * Modified to set last_child in data1_nodes.
- * Uses destroy handler to free up data text nodes.
- *
- * Revision 1.6 1995/12/13 13:45:37 quinn
- * Changed data1 to use nmem.
- *
- * Revision 1.5 1995/12/11 09:12:52 adam
- * The rec_get function returns NULL if record doesn't exist - will
- * happen in the server if the result set records have been deleted since
- * the creation of the set (i.e. the search).
- * The server saves a result temporarily if it is 'volatile', i.e. the
- * set is register dependent.
- *
- * Revision 1.4 1995/12/05 16:57:40 adam
- * More work on regular patterns.
- *
- * Revision 1.3 1995/12/05 09:37:09 adam
- * One malloc was renamed to xmalloc.
- *
- * Revision 1.2 1995/12/04 17:59:24 adam
- * More work on regular expression conversion.
- *
- * Revision 1.1 1995/12/04 14:25:30 adam
- * Started work on regular expression parsed input to structured records.
*
+ * $Id: regxread.c,v 1.40 2002-05-03 13:50:25 adam Exp $
*/
#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <ctype.h>
-#include <tpath.h>
+#include <yaz/tpath.h>
#include <zebrautl.h>
#include <dfa.h>
#include "grsread.h"
#if HAVE_TCL_H
#include <tcl.h>
+
+#if MAJOR_VERSION >= 8
+#define HAVE_TCL_OBJECTS
+#endif
#endif
#define REGX_DEBUG 0
struct regxCode {
char *str;
-#if HAVE_TCL_H
+#if HAVE_TCL_OBJECTS
Tcl_Obj *tcl_obj;
#endif
};
struct regxCode *p = *pp;
if (p)
{
-#if HAVE_TCL_H
+#if HAVE_TCL_OBJECTS
if (p->tcl_obj)
Tcl_DecrRefCount (p->tcl_obj);
#endif
p->str = (char *) xmalloc (len+1);
memcpy (p->str, buf, len);
p->str[len] = '\0';
-#if HAVE_TCL_H
+#if HAVE_TCL_OBJECTS
p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
if (p->tcl_obj)
Tcl_IncrRefCount (p->tcl_obj);
lexContextDestroy (lt);
lt = lt_next;
}
-#if HAVE_TCL_H
+#if HAVE_TCL_OBJECTS
if (p->tcl_interp)
Tcl_DeleteInterp (p->tcl_interp);
#endif
break;
case REGX_PATTERN:
#if REGX_DEBUG
- logf (LOG_DEBUG, "rule %d %s", spec->context->ruleNo, s);
+ logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
#endif
r = dfa_parse (spec->context->dfa, &s);
if (r)
if (spec->tcl_interp)
{
sprintf (fname, "%s.tflt", spec->name);
- spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), fname, "r");
+ spec_inf = data1_path_fopen (spec->dh, fname, "r");
}
#endif
if (!spec_inf)
{
sprintf (fname, "%s.flt", spec->name);
- spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), fname, "r");
+ spec_inf = data1_path_fopen (spec->dh, fname, "r");
}
if (!spec_inf)
{
return ;
#if REGX_DEBUG
if (elen > 40)
- logf (LOG_DEBUG, "data (%d bytes) %.15s ... %.*s", elen,
+ logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen,
ebuf, 15, ebuf + elen-15);
else if (elen > 0)
- logf (LOG_DEBUG, "data (%d bytes) %.*s", elen, elen, ebuf);
+ logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf);
else
- logf (LOG_DEBUG, "data (%d bytes)", elen);
+ logf (LOG_LOG, "data (%d bytes)", elen);
#endif
if (spec->d1_level <= 1)
{
org_len = 0;
- res = data1_mk_node (spec->dh, spec->m);
- res->parent = parent;
- res->which = DATA1N_data;
+ res = data1_mk_node (spec->dh, spec->m, DATA1N_data, parent);
res->u.data.what = DATA1I_text;
res->u.data.len = 0;
res->u.data.formatted_text = formatted_text;
-#if 0
- if (elen > DATA1_LOCALDATA)
- res->u.data.data = nmem_malloc (spec->m, elen);
- else
- res->u.data.data = res->lbuf;
- memcpy (res->u.data.data, ebuf, elen);
-#else
res->u.data.data = 0;
-#endif
- res->root = parent->root;
- parent->last_child = res;
if (spec->d1_stack[spec->d1_level])
spec->d1_stack[spec->d1_level]->next = res;
- else
- parent->child = res;
spec->d1_stack[spec->d1_level] = res;
}
if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
ttype[type_len] = '\0';
#if REGX_DEBUG
- logf (LOG_DEBUG, "variant begin %s %s (%d)", tclass, ttype,
+ logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype,
spec->d1_level);
#endif
if (parent->which != DATA1N_variant)
{
- res = data1_mk_node (spec->dh, spec->m);
- res->parent = parent;
- res->which = DATA1N_variant;
- res->u.variant.type = 0;
- res->u.variant.value = 0;
- res->root = parent->root;
-
- parent->last_child = res;
+ res = data1_mk_node (spec->dh, spec->m, DATA1N_variant, parent);
if (spec->d1_stack[spec->d1_level])
- {
tagDataRelease (spec);
- spec->d1_stack[spec->d1_level]->next = res;
- }
- else
- parent->child = res;
spec->d1_stack[spec->d1_level] = res;
spec->d1_stack[++(spec->d1_level)] = NULL;
}
}
#if REGX_DEBUG
- logf (LOG_DEBUG, "variant node (%d)", spec->d1_level);
+ logf (LOG_LOG, "variant node (%d)", spec->d1_level);
#endif
parent = spec->d1_stack[spec->d1_level-1];
- res = data1_mk_node (spec->dh, spec->m);
- res->parent = parent;
- res->which = DATA1N_variant;
- res->root = parent->root;
+ res = data1_mk_node (spec->dh, spec->m, DATA1N_variant, parent);
res->u.variant.type = tp;
if (value_len >= DATA1_LOCALDATA)
res->u.variant.value = res->lbuf;
- parent->last_child = res;
if (spec->d1_stack[spec->d1_level])
- {
tagDataRelease (spec);
- spec->d1_stack[spec->d1_level]->next = res;
- }
- else
- parent->child = res;
spec->d1_stack[spec->d1_level] = res;
spec->d1_stack[++(spec->d1_level)] = NULL;
}
static void tagBegin (struct lexSpec *spec,
const char *tag, int len)
{
- struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
+ struct data1_node *parent;
data1_element *elem = NULL;
- data1_node *partag = get_parent_tag(spec->dh, parent);
+ data1_node *partag;
data1_node *res;
data1_element *e = NULL;
int localtag = 0;
return ;
}
tagStrip (&tag, &len);
+
+ parent = spec->d1_stack[spec->d1_level -1];
+ partag = get_parent_tag(spec->dh, parent);
- res = data1_mk_node (spec->dh, spec->m);
- res->parent = parent;
- res->which = DATA1N_tag;
- res->u.tag.get_bytes = -1;
+ res = data1_mk_node (spec->dh, spec->m, DATA1N_tag, parent);
if (len >= DATA1_LOCALDATA)
res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1);
res->u.tag.tag[len] = '\0';
#if REGX_DEBUG
- logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level);
+ logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level);
#endif
if (parent->which == DATA1N_variant)
return ;
spec->d1_stack[0]->u.root.absyn,
e, res->u.tag.tag);
res->u.tag.element = elem;
- res->u.tag.node_selected = 0;
- res->u.tag.make_variantlist = 0;
- res->u.tag.no_data_requested = 0;
- res->root = parent->root;
- parent->last_child = res;
if (spec->d1_stack[spec->d1_level])
- {
tagDataRelease (spec);
- spec->d1_stack[spec->d1_level]->next = res;
- }
- else
- parent->child = res;
spec->d1_stack[spec->d1_level] = res;
spec->d1_stack[++(spec->d1_level)] = NULL;
}
break;
}
#if REGX_DEBUG
- logf (LOG_DEBUG, "end tag (%d)", spec->d1_level);
+ logf (LOG_LOG, "end tag (%d)", spec->d1_level);
#endif
}
{
char *absynName = argv[2];
data1_absyn *absyn;
+ data1_node *res;
#if REGX_DEBUG
- logf (LOG_DEBUG, "begin record %s", absynName);
+ logf (LOG_LOG, "begin record %s", absynName);
#endif
- if (!(absyn = data1_get_absyn (spec->dh, absynName)))
- logf (LOG_WARN, "Unknown tagset: %s", absynName);
- else
- {
- data1_node *res;
-
- res = data1_mk_node (spec->dh, spec->m);
- res->which = DATA1N_root;
- res->u.root.type = absynName;
- res->u.root.absyn = absyn;
- res->root = res;
-
- spec->d1_stack[spec->d1_level] = res;
- spec->d1_stack[++(spec->d1_level)] = NULL;
- }
+ absyn = data1_get_absyn (spec->dh, absynName);
+
+ res = data1_mk_node (spec->dh, spec->m);
+ res->which = DATA1N_root;
+ res->u.root.type =
+ data1_insert_string(spec->dh, res, spec->m, absynName);
+ res->u.root.absyn = absyn;
+ res->root = res;
+
+ spec->d1_stack[spec->d1_level] = res;
+ spec->d1_stack[++(spec->d1_level)] = NULL;
}
else if (!strcmp(argv[1], "element") && argc == 3)
{
{
struct lexContext *lc = spec->context;
#if REGX_DEBUG
- logf (LOG_DEBUG, "begin context %s",argv[2]);
+ logf (LOG_LOG, "begin context %s",argv[2]);
#endif
while (lc && strcmp (argv[2], lc->name))
lc = lc->next;
(spec->d1_level)--;
}
#if REGX_DEBUG
- logf (LOG_DEBUG, "end record");
+ logf (LOG_LOG, "end record");
#endif
spec->stop_flag = 1;
}
if (spec->d1_level == 0)
{
#if REGX_DEBUG
- logf (LOG_DEBUG, "end element end records");
+ logf (LOG_LOG, "end element end records");
#endif
spec->stop_flag = 1;
}
else if (!strcmp (argv[1], "context"))
{
#if REGX_DEBUG
- logf (LOG_DEBUG, "end context");
+ logf (LOG_LOG, "end context");
#endif
if (spec->context_stack_top)
(spec->context_stack_top)--;
while (argi < argc)
{
+#if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
+ Tcl_DString ds;
+ char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
+ execData (spec, native, strlen(native), textFlag);
+ Tcl_DStringFree (&ds);
+#else
execData (spec, argv[argi], strlen(argv[argi]), textFlag);
+#endif
argi++;
}
if (element)
var_buf[var_len] = ch;
}
}
- if (code->tcl_obj)
- ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
- else
- ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
+#if HAVE_TCL_OBJECTS
+ ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
+#else
+ ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
+#endif
if (ret != TCL_OK)
{
const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
{
static char absynName[64];
data1_absyn *absyn;
+ data1_node *res;
if (cmd_len > 63)
cmd_len = 63;
absynName[cmd_len] = '\0';
#if REGX_DEBUG
- logf (LOG_DEBUG, "begin record %s", absynName);
+ logf (LOG_LOG, "begin record %s", absynName);
#endif
- if (!(absyn = data1_get_absyn (spec->dh, absynName)))
- logf (LOG_WARN, "Unknown tagset: %s", absynName);
- else
- {
- data1_node *res;
-
- res = data1_mk_node (spec->dh, spec->m);
- res->which = DATA1N_root;
- res->u.root.type = absynName;
- res->u.root.absyn = absyn;
- res->root = res;
-
- spec->d1_stack[spec->d1_level] = res;
- spec->d1_stack[++(spec->d1_level)] = NULL;
- }
+ absyn = data1_get_absyn (spec->dh, absynName);
+
+ res = data1_mk_node (spec->dh, spec->m, DATA1N_root, 0);
+ res->u.root.type = absynName;
+ res->u.root.absyn = absyn;
+
+ spec->d1_stack[spec->d1_level] = res;
+ spec->d1_stack[++(spec->d1_level)] = NULL;
}
r = execTok (spec, &s, &cmd_str, &cmd_len);
}
r = execTok (spec, &s, &cmd_str, &cmd_len);
p = regxStrz (cmd_str, cmd_len, ptmp);
#if REGX_DEBUG
- logf (LOG_DEBUG, "begin context %s", p);
+ logf (LOG_LOG, "begin context %s", p);
#endif
while (lc && strcmp (p, lc->name))
lc = lc->next;
}
r = execTok (spec, &s, &cmd_str, &cmd_len);
#if REGX_DEBUG
- logf (LOG_DEBUG, "end record");
+ logf (LOG_LOG, "end record");
#endif
spec->stop_flag = 1;
}
if (spec->d1_level == 0)
{
#if REGX_DEBUG
- logf (LOG_DEBUG, "end element end records");
+ logf (LOG_LOG, "end element end records");
#endif
spec->stop_flag = 1;
}
else if (!strcmp (p, "context"))
{
#if REGX_DEBUG
- logf (LOG_DEBUG, "end context");
+ logf (LOG_LOG, "end context");
#endif
if (spec->context_stack_top)
(spec->context_stack_top)--;
int ruleNo, int start_ptr, int *pptr)
{
#if REGX_DEBUG
- logf (LOG_DEBUG, "exec rule %d", ruleNo);
+ logf (LOG_LOG, "exec rule %d", ruleNo);
#endif
return execAction (spec, context->fastRule[ruleNo]->actionList,
start_ptr, pptr);
if (spec->f_win_ef && *ptr != F_WIN_EOF)
{
#if REGX_DEBUG
- logf (LOG_DEBUG, "regx: endf ptr=%d", *ptr);
+ logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
#endif
(*spec->f_win_ef)(spec->f_win_fh, *ptr);
}
struct lexSpec **curLexSpec = &specs->spec;
#if REGX_DEBUG
- logf (LOG_DEBUG, "grs_read_regx");
+ logf (LOG_LOG, "grs_read_regx");
#endif
if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
{
struct lexSpec **curLexSpec = &specs->spec;
#if REGX_DEBUG
- logf (LOG_DEBUG, "grs_read_tcl");
+ logf (LOG_LOG, "grs_read_tcl");
#endif
if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
{
if (*curLexSpec)
lexSpecDestroy (curLexSpec);
*curLexSpec = lexSpecCreate (p->type, p->dh);
+ Tcl_FindExecutable("");
tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
+ Tcl_Init(tcl_interp);
Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);