Minor

[idzebra-moved-to-github.git] / recctrl / regxread.c
diff --git a/recctrl/regxread.c b/recctrl/regxread.c

index b7204dd..f56738d 100644 (file)
--- a/recctrl/regxread.c
+++ b/recctrl/regxread.c
@@ -1,182 +1,42 @@
-/*
- * Copyright (C) 1994-1999, Index Data
- * All rights reserved.
- * Sebastian Hammer, Adam Dickmeiss
- *
- * $Log: regxread.c,v $
- * Revision 1.25  1999-05-25 12:33:32  adam
- * Fixed bug in Tcl filter.
- *
- * Revision 1.24  1999/05/21 11:08:46  adam
- * Tcl filter attempts to read <filt>.tflt. Improvements to configure
- * script so that it reads uninstalled Tcl source.
- *
- * Revision 1.23  1999/05/20 12:57:18  adam
- * Implemented TCL filter. Updated recctrl system.
- *
- * Revision 1.22  1998/11/03 16:07:13  adam
- * Yet another fix.
- *
- * Revision 1.21  1998/11/03 15:43:39  adam
- * Fixed bug introduced by previous commit.
- *
- * Revision 1.20  1998/11/03 14:51:28  adam
- * Changed code so that it creates as few data1 nodes as possible.
- *
- * Revision 1.19  1998/11/03 10:22:39  adam
- * Fixed memory leak that could occur for when large data1 node were
- * concatenated. Data-type data1_nodes may have multiple nodes.
- *
- * Revision 1.18  1998/10/15 13:11:47  adam
- * Added support for option -record for "end element". When specified
- * end element will mark end-of-record when at outer-level.
- *
- * Revision 1.17  1998/07/01 10:13:51  adam
- * Minor fix.
- *
- * Revision 1.16  1998/06/30 15:15:09  adam
- * Tags are trimmed: white space removed before- and after the tag.
- *
- * Revision 1.15  1998/06/30 12:55:45  adam
- * Bug fix.
- *
- * Revision 1.14  1998/03/05 08:41:00  adam
- * Implemented rule contexts.
- *
- * Revision 1.13  1997/12/12 06:33:58  adam
- * Fixed bug that showed up when multiple filter where used.
- * Made one routine thread-safe.
- *
- * Revision 1.12  1997/11/18 10:03:24  adam
- * Member num_children removed from data1_node.
- *
- * Revision 1.11  1997/11/06 11:41:01  adam
- * Implemented "begin variant" for the sgml.regx filter.
- *
- * Revision 1.10  1997/10/31 12:36:12  adam
- * Minor change that avoids compiler warning.
- *
- * Revision 1.9  1997/09/29 09:02:49  adam
- * Fixed small bug (introduced by previous commit).
- *
- * Revision 1.8  1997/09/17 12:19:22  adam
- * Zebra version corresponds to YAZ version 1.4.
- * Changed Zebra server so that it doesn't depend on global common_resource.
- *
- * Revision 1.7  1997/07/15 16:33:07  adam
- * Check for zero length in execData.
- *
- * Revision 1.6  1997/02/24 10:41:51  adam
- * Cleanup of code and commented out the "end element-end-record" code.
- *
- * Revision 1.5  1997/02/19 16:22:33  adam
- * Fixed "end element" to terminate record in outer-most level.
- *
- * Revision 1.4  1997/02/12 20:42:58  adam
- * Changed some log messages.
- *
- * Revision 1.3  1996/11/08 14:05:33  adam
- * Bug fix: data1 node member u.tag.get_bytes weren't initialized.
- *
- * Revision 1.2  1996/10/29  14:02:09  adam
- * Doesn't use the global data1_tabpath (from YAZ). Instead the function
- * data1_get_tabpath is used.
- *
- * Revision 1.1  1996/10/11 10:57:30  adam
- * New module recctrl. Used to manage records (extract/retrieval).
- *
- * Revision 1.24  1996/06/17 14:25:31  adam
- * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
- *
- * Revision 1.23  1996/06/04 10:19:00  adam
- * Minor changes - removed include of ctype.h.
- *
- * Revision 1.22  1996/06/03  15:23:13  adam
- * Bug fix: /../ BODY /../ - pattern didn't match EOF.
- *
- * Revision 1.21  1996/05/14  16:58:38  adam
- * Minor change.
- *
- * Revision 1.20  1996/05/01  13:46:36  adam
- * First work on multiple records in one file.
- * New option, -offset, to the "unread" command in the filter module.
- *
- * Revision 1.19  1996/02/12  16:18:20  adam
- * Yet another bug fix in implementation of unread command.
- *
- * Revision 1.18  1996/02/12  16:07:54  adam
- * Bug fix in new unread command.
- *
- * Revision 1.17  1996/02/12  15:56:11  adam
- * New code command: unread.
- *
- * Revision 1.16  1996/01/17  14:57:51  adam
- * Prototype changed for reader functions in extract/retrieve. File
- *  is identified by 'void *' instead of 'int.
- *
- * Revision 1.15  1996/01/08  19:15:47  adam
- * New input filter that works!
- *
- * Revision 1.14  1996/01/08  09:10:38  adam
- * Yet another complete rework on this module.
- *
- * Revision 1.13  1995/12/15  17:21:50  adam
- * This version is able to set data.formatted_text in data1-nodes.
- *
- * Revision 1.12  1995/12/15  16:20:10  adam
- * The filter files (*.flt) are read from the path given by data1_tabpath.
- *
- * Revision 1.11  1995/12/15  12:35:16  adam
- * Better logging.
- *
- * Revision 1.10  1995/12/15  10:35:36  adam
- * Misc. bug fixes.
- *
- * Revision 1.9  1995/12/14  16:38:48  adam
- * Completely new attempt to make regular expression parsing.
- *
- * Revision 1.8  1995/12/13  17:16:59  adam
- * Small changes.
- *
- * Revision 1.7  1995/12/13  16:51:58  adam
- * Modified to set last_child in data1_nodes.
- * Uses destroy handler to free up data text nodes.
- *
- * Revision 1.6  1995/12/13  13:45:37  quinn
- * Changed data1 to use nmem.
- *
- * Revision 1.5  1995/12/11  09:12:52  adam
- * The rec_get function returns NULL if record doesn't exist - will
- * happen in the server if the result set records have been deleted since
- * the creation of the set (i.e. the search).
- * The server saves a result temporarily if it is 'volatile', i.e. the
- * set is register dependent.
- *
- * Revision 1.4  1995/12/05  16:57:40  adam
- * More work on regular patterns.
- *
- * Revision 1.3  1995/12/05  09:37:09  adam
- * One malloc was renamed to xmalloc.
- *
- * Revision 1.2  1995/12/04  17:59:24  adam
- * More work on regular expression conversion.
- *
- * Revision 1.1  1995/12/04  14:25:30  adam
- * Started work on regular expression parsed input to structured records.
- *
- */
+/* $Id: regxread.c,v 1.46 2002-09-24 19:41:00 adam Exp $
+   Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
+   Index Data Aps
+
+This file is part of the Zebra server.
+
+Zebra is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free
+Software Foundation; either version 2, or (at your option) any later
+version.
+
+Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
+WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
+for more details.
+
+You should have received a copy of the GNU General Public License
+along with Zebra; see the file LICENSE.zebra.  If not, write to the
+Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
+02111-1307, USA.
+*/
+
+
  #include <stdio.h>
  #include <assert.h>
  #include <string.h>
  #include <ctype.h>
  
-#include <tpath.h>
+#include <yaz/tpath.h>
  #include <zebrautl.h>
  #include <dfa.h>
  #include "grsread.h"
  
  #if HAVE_TCL_H
  #include <tcl.h>
+
+#if MAJOR_VERSION >= 8
+#define HAVE_TCL_OBJECTS
+#endif
  #endif
  
  #define REGX_DEBUG 0
@@ -195,6 +55,9 @@
  
  struct regxCode {
      char *str;
+#if HAVE_TCL_OBJECTS
+    Tcl_Obj *tcl_obj;
+#endif
  };
  
  struct lexRuleAction {
@@ -234,7 +97,6 @@ struct lexContext {
  };
  
  struct lexConcatBuf {
-    int len;
      int max;
      char *buf;
  };
@@ -263,7 +125,7 @@ struct lexSpec {
      int (*f_win_rf)(void *, char *, size_t);
      off_t (*f_win_sf)(void *, off_t);
  
-    struct lexConcatBuf **concatBuf;
+    struct lexConcatBuf *concatBuf;
      int maxLevel;
      data1_node **d1_stack;
      int d1_level;
@@ -295,7 +157,7 @@ static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
          spec->f_win_start = start_pos;
  
          if (!spec->f_win_buf)
-            spec->f_win_buf = xmalloc (spec->f_win_size);
+            spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
          *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
                                    spec->f_win_size);
          spec->f_win_end = spec->f_win_start + *size;
@@ -341,6 +203,10 @@ static void regxCodeDel (struct regxCode **pp)
      struct regxCode *p = *pp;
      if (p)
      {
+#if HAVE_TCL_OBJECTS
+       if (p->tcl_obj)
+           Tcl_DecrRefCount (p->tcl_obj);
+#endif
          xfree (p->str); 
          xfree (p);
          *pp = NULL;
@@ -351,10 +217,15 @@ static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
  {
      struct regxCode *p;
  
-    p = xmalloc (sizeof(*p));
-    p->str = xmalloc (len+1);
+    p = (struct regxCode *) xmalloc (sizeof(*p));
+    p->str = (char *) xmalloc (len+1);
      memcpy (p->str, buf, len);
      p->str[len] = '\0';
+#if HAVE_TCL_OBJECTS
+    p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
+    if (p->tcl_obj)
+       Tcl_IncrRefCount (p->tcl_obj);
+#endif
      *pp = p;
  }
  
@@ -392,7 +263,7 @@ static void actionListDel (struct lexRuleAction **rap)
  
  static struct lexContext *lexContextCreate (const char *name)
  {
-    struct lexContext *p = xmalloc (sizeof(*p));
+    struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
  
      p->name = xstrdup (name);
      p->ruleNo = 1;
@@ -411,6 +282,7 @@ static void lexContextDestroy (struct lexContext *p)
  {
      struct lexRule *rp, *rp1;
  
+    dfa_delete (&p->dfa);
      xfree (p->fastRule);
      for (rp = p->rules; rp; rp = rp1)
      {
@@ -420,6 +292,7 @@ static void lexContextDestroy (struct lexContext *p)
      }
      actionListDel (&p->beginActionList);
      actionListDel (&p->endActionList);
+    actionListDel (&p->initActionList);
      xfree (p->name);
      xfree (p);
  }
@@ -429,8 +302,8 @@ static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
      struct lexSpec *p;
      int i;
      
-    p = xmalloc (sizeof(*p));
-    p->name = xmalloc (strlen(name)+1);
+    p = (struct lexSpec *) xmalloc (sizeof(*p));
+    p->name = (char *) xmalloc (strlen(name)+1);
      strcpy (p->name, name);
  
  #if HAVE_TCL_H
@@ -439,19 +312,19 @@ static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
      p->dh = dh;
      p->context = NULL;
      p->context_stack_size = 100;
-    p->context_stack = xmalloc (sizeof(*p->context_stack) *
-                               p->context_stack_size);
+    p->context_stack = (struct lexContext **)
+       xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
      p->f_win_buf = NULL;
  
      p->maxLevel = 128;
-    p->concatBuf = xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
+    p->concatBuf = (struct lexConcatBuf *)
+       xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
      for (i = 0; i < p->maxLevel; i++)
      {
-       p->concatBuf[i] = xmalloc (sizeof(**p->concatBuf));
-       p->concatBuf[i]->len = p->concatBuf[i]->max = 0;
-       p->concatBuf[i]->buf = 0;
+       p->concatBuf[i].max = 0;
+       p->concatBuf[i].buf = 0;
      }
-    p->d1_stack = xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
+    p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
      p->d1_level = 0;
      return p;
  }
@@ -468,7 +341,7 @@ static void lexSpecDestroy (struct lexSpec **pp)
          return ;
  
      for (i = 0; i < p->maxLevel; i++)
-       xfree (p->concatBuf[i]);
+       xfree (p->concatBuf[i].buf);
      xfree (p->concatBuf);
  
      lt = p->context;
@@ -478,7 +351,7 @@ static void lexSpecDestroy (struct lexSpec **pp)
         lexContextDestroy (lt);
         lt = lt_next;
      }
-#if HAVE_TCL_H
+#if HAVE_TCL_OBJECTS
      if (p->tcl_interp)
         Tcl_DeleteInterp (p->tcl_interp);
  #endif
@@ -496,7 +369,7 @@ static int readParseToken (const char **cpp, int *len)
      char cmd[32];
      int i, level;
  
-    while (*cp == ' ' || *cp == '\t' || *cp == '\n')
+    while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
          cp++;
      switch (*cp)
      {
@@ -531,7 +404,7 @@ static int readParseToken (const char **cpp, int *len)
                  cmd[i] = *cp + 'a' - 'A';
              else
                  break;
-            if (i < sizeof(cmd)-2)
+            if (i < (int) sizeof(cmd)-2)
                 i++;
              cp++;
          }
@@ -540,7 +413,8 @@ static int readParseToken (const char **cpp, int *len)
          {
              logf (LOG_WARN, "bad character %d %c", *cp, *cp);
              cp++;
-            while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
+            while (*cp && *cp != ' ' && *cp != '\t' &&
+                   *cp != '\n' && *cp != '\r')
                  cp++;
              *cpp = cp;
              return 0;
@@ -579,13 +453,13 @@ static int actionListMk (struct lexSpec *spec, const char *s,
              bodyMark = 1;
              continue;
          case REGX_CODE:
-            *ap = xmalloc (sizeof(**ap));
+            *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
              (*ap)->which = tok;
              regxCodeMk (&(*ap)->u.code, s, len);
              s += len+1;
              break;
          case REGX_PATTERN:
-            *ap = xmalloc (sizeof(**ap));
+            *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
              (*ap)->which = tok;
              (*ap)->u.pattern.body = bodyMark;
              bodyMark = 0;
@@ -609,7 +483,7 @@ static int actionListMk (struct lexSpec *spec, const char *s,
              logf (LOG_WARN, "cannot use INIT here");
              continue;
          case REGX_END:
-            *ap = xmalloc (sizeof(**ap));
+            *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
              (*ap)->which = tok;
              break;
          }
@@ -663,7 +537,7 @@ int readOneSpec (struct lexSpec *spec, const char *s)
         break;
      case REGX_PATTERN:
  #if REGX_DEBUG
-       logf (LOG_DEBUG, "rule %d %s", spec->context->ruleNo, s);
+       logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
  #endif
          r = dfa_parse (spec->context->dfa, &s);
          if (r)
@@ -677,7 +551,7 @@ int readOneSpec (struct lexSpec *spec, const char *s)
              return -1;
          }
          s++;
-        rp = xmalloc (sizeof(*rp));
+        rp = (struct lexRule *) xmalloc (sizeof(*rp));
          rp->info.no = spec->context->ruleNo++;
          rp->next = spec->context->rules;
          spec->context->rules = rp;
@@ -689,41 +563,40 @@ int readOneSpec (struct lexSpec *spec, const char *s)
  int readFileSpec (struct lexSpec *spec)
  {
      struct lexContext *lc;
-    char *lineBuf;
-    int lineSize = 512;
      int c, i, errors = 0;
      FILE *spec_inf = 0;
+    WRBUF lineBuf;
+    char fname[256];
  
-    lineBuf = xmalloc (1+lineSize);
  #if HAVE_TCL_H
      if (spec->tcl_interp)
      {
-       sprintf (lineBuf, "%s.tflt", spec->name);
-       spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), lineBuf, "r");
+       sprintf (fname, "%s.tflt", spec->name);
+       spec_inf = data1_path_fopen (spec->dh, fname, "r");
      }
  #endif
      if (!spec_inf)
      {
-       sprintf (lineBuf, "%s.flt", spec->name);
-       spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), lineBuf, "r");
+       sprintf (fname, "%s.flt", spec->name);
+       spec_inf = data1_path_fopen (spec->dh, fname, "r");
      }
      if (!spec_inf)
      {
          logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
-        xfree (lineBuf);
          return -1;
      }
-    logf (LOG_LOG, "reading regx filter %s", lineBuf);
+    logf (LOG_LOG, "reading regx filter %s", fname);
  #if HAVE_TCL_H
      if (spec->tcl_interp)
         logf (LOG_LOG, "Tcl enabled");
  #endif
+    lineBuf = wrbuf_alloc();
      spec->lineNo = 0;
      c = getc (spec_inf);
      while (c != EOF)
      {
-        int off = 0;
-        if (c == '#' || c == '\n' || c == ' ' || c == '\t')
+       wrbuf_rewind (lineBuf);
+        if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
          {
              while (c != '\n' && c != EOF)
                  c = getc (spec_inf);
@@ -734,12 +607,14 @@ int readFileSpec (struct lexSpec *spec)
          else
          {
              int addLine = 0;
-
-            lineBuf[off++] = c;
+           
              while (1)
              {
                  int c1 = c;
+               wrbuf_putc(lineBuf, c);
                  c = getc (spec_inf);
+               while (c == '\r')
+                   c = getc (spec_inf);
                  if (c == EOF)
                      break;
                  if (c1 == '\n')
@@ -748,17 +623,14 @@ int readFileSpec (struct lexSpec *spec)
                          break;
                      addLine++;
                  }
-                lineBuf[off] = c;
-                if (off < lineSize)
-                    off++;
              }
-            lineBuf[off] = '\0';
-            readOneSpec (spec, lineBuf);
+           wrbuf_putc(lineBuf, '\0');
+            readOneSpec (spec, wrbuf_buf(lineBuf));
              spec->lineNo += addLine;
          }
      }
      fclose (spec_inf);
-    xfree (lineBuf);
+    wrbuf_free(lineBuf, 1);
  
  #if 0
      debug_dfa_trav = 1;
@@ -769,7 +641,8 @@ int readFileSpec (struct lexSpec *spec)
      for (lc = spec->context; lc; lc = lc->next)
      {
         struct lexRule *rp;
-       lc->fastRule = xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
+       lc->fastRule = (struct lexRuleInfo **)
+           xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
         for (i = 0; i < lc->ruleNo; i++)
             lc->fastRule[i] = NULL;
         for (rp = lc->rules; rp; rp = rp->next)
@@ -796,12 +669,12 @@ static void execData (struct lexSpec *spec,
         return ;
  #if REGX_DEBUG
      if (elen > 40)
-        logf (LOG_DEBUG, "data (%d bytes) %.15s ... %.*s", elen,
+        logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen,
               ebuf, 15, ebuf + elen-15);
      else if (elen > 0)
-        logf (LOG_DEBUG, "data (%d bytes) %.*s", elen, elen, ebuf);
+        logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf);
      else 
-        logf (LOG_DEBUG, "data (%d bytes)", elen);
+        logf (LOG_LOG, "data (%d bytes)", elen);
  #endif
          
      if (spec->d1_level <= 1)
@@ -816,45 +689,30 @@ static void execData (struct lexSpec *spec,
      {
         org_len = 0;
  
-       res = data1_mk_node (spec->dh, spec->m);
-       res->parent = parent;
-       res->which = DATA1N_data;
+       res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
         res->u.data.what = DATA1I_text;
         res->u.data.len = 0;
         res->u.data.formatted_text = formatted_text;
-#if 0
-       if (elen > DATA1_LOCALDATA)
-           res->u.data.data = nmem_malloc (spec->m, elen);
-       else
-           res->u.data.data = res->lbuf;
-       memcpy (res->u.data.data, ebuf, elen);
-#else
         res->u.data.data = 0;
-#endif
-       res->root = parent->root;
         
-       parent->last_child = res;
         if (spec->d1_stack[spec->d1_level])
             spec->d1_stack[spec->d1_level]->next = res;
-       else
-           parent->child = res;
         spec->d1_stack[spec->d1_level] = res;
      }
-    if (org_len + elen >= spec->concatBuf[spec->d1_level]->max)
+    if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
      {
         char *old_buf, *new_buf;
  
-       spec->concatBuf[spec->d1_level]->max = org_len + elen + 256;
-       new_buf = xmalloc (spec->concatBuf[spec->d1_level]->max);
-       if ((old_buf = spec->concatBuf[spec->d1_level]->buf))
+       spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
+       new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
+       if ((old_buf = spec->concatBuf[spec->d1_level].buf))
         {
             memcpy (new_buf, old_buf, org_len);
             xfree (old_buf);
         }
-       spec->concatBuf[spec->d1_level]->buf = new_buf;
+       spec->concatBuf[spec->d1_level].buf = new_buf;
      }
-    assert (spec->concatBuf[spec->d1_level]);
-    memcpy (spec->concatBuf[spec->d1_level]->buf + org_len, ebuf, elen);
+    memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
      res->u.data.len += elen;
  }
  
@@ -875,10 +733,10 @@ static void tagDataRelease (struct lexSpec *spec)
         assert (!res->u.data.data);
         assert (res->u.data.len > 0);
         if (res->u.data.len > DATA1_LOCALDATA)
-           res->u.data.data = nmem_malloc (spec->m, res->u.data.len);
+           res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
         else
             res->u.data.data = res->lbuf;
-       memcpy (res->u.data.data, spec->concatBuf[spec->d1_level]->buf,
+       memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
                 res->u.data.len);
      }
  }
@@ -910,7 +768,7 @@ static void variantBegin (struct lexSpec *spec,
      ttype[type_len] = '\0';
  
  #if REGX_DEBUG 
-    logf (LOG_DEBUG, "variant begin %s %s (%d)", tclass, ttype,
+    logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype,
           spec->d1_level);
  #endif
  
@@ -921,21 +779,9 @@ static void variantBegin (struct lexSpec *spec,
      
      if (parent->which != DATA1N_variant)
      {
-       res = data1_mk_node (spec->dh, spec->m);
-       res->parent = parent;
-       res->which = DATA1N_variant;
-       res->u.variant.type = 0;
-       res->u.variant.value = 0;
-       res->root = parent->root;
-
-       parent->last_child = res;
+       res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
         if (spec->d1_stack[spec->d1_level])
-       {
             tagDataRelease (spec);
-           spec->d1_stack[spec->d1_level]->next = res;
-       }
-       else
-           parent->child = res;
         spec->d1_stack[spec->d1_level] = res;
         spec->d1_stack[++(spec->d1_level)] = NULL;
      }
@@ -947,13 +793,10 @@ static void variantBegin (struct lexSpec *spec,
         }
  
  #if REGX_DEBUG 
-    logf (LOG_DEBUG, "variant node (%d)", spec->d1_level);
+    logf (LOG_LOG, "variant node (%d)", spec->d1_level);
  #endif
      parent = spec->d1_stack[spec->d1_level-1];
-    res = data1_mk_node (spec->dh, spec->m);
-    res->parent = parent;
-    res->which = DATA1N_variant;
-    res->root = parent->root;
+    res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
      res->u.variant.type = tp;
  
      if (value_len >= DATA1_LOCALDATA)
@@ -963,14 +806,8 @@ static void variantBegin (struct lexSpec *spec,
  
      res->u.variant.value = res->lbuf;
      
-    parent->last_child = res;
      if (spec->d1_stack[spec->d1_level])
-    {
         tagDataRelease (spec);
-        spec->d1_stack[spec->d1_level]->next = res;
-    }
-    else
-        parent->child = res;
      spec->d1_stack[spec->d1_level] = res;
      spec->d1_stack[++(spec->d1_level)] = NULL;
  }
@@ -991,60 +828,21 @@ static void tagStrip (const char **tag, int *len)
  static void tagBegin (struct lexSpec *spec, 
                        const char *tag, int len)
  {
-    struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
-    data1_element *elem = NULL;
-    data1_node *partag = get_parent_tag(spec->dh, parent);
-    data1_node *res;
-    data1_element *e = NULL;
-    int localtag = 0;
-
      if (spec->d1_level == 0)
      {
          logf (LOG_WARN, "in element begin. No record type defined");
          return ;
      }
      tagStrip (&tag, &len);
-   
-    res = data1_mk_node (spec->dh, spec->m);
-    res->parent = parent;
-    res->which = DATA1N_tag;
-    res->u.tag.get_bytes = -1;
-
-    if (len >= DATA1_LOCALDATA)
-       res->u.tag.tag = nmem_malloc (spec->m, len+1);
-    else
-       res->u.tag.tag = res->lbuf;
+    if (spec->d1_stack[spec->d1_level])
+       tagDataRelease (spec);
  
-    memcpy (res->u.tag.tag, tag, len);
-    res->u.tag.tag[len] = '\0';
-   
  #if REGX_DEBUG 
-    logf (LOG_DEBUG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level);
+    logf (LOG_LOG, "begin tag %s (%d)", tag, spec->d1_level);
  #endif
-    if (parent->which == DATA1N_variant)
-        return ;
-    if (partag)
-        if (!(e = partag->u.tag.element))
-            localtag = 1;
-    
-    elem = data1_getelementbytagname (spec->dh,
-                                     spec->d1_stack[0]->u.root.absyn,
-                                     e, res->u.tag.tag);
-    res->u.tag.element = elem;
-    res->u.tag.node_selected = 0;
-    res->u.tag.make_variantlist = 0;
-    res->u.tag.no_data_requested = 0;
-    res->root = parent->root;
-
-    parent->last_child = res;
-    if (spec->d1_stack[spec->d1_level])
-    {
-       tagDataRelease (spec);
-        spec->d1_stack[spec->d1_level]->next = res;
-    }
-    else
-        parent->child = res;
-    spec->d1_stack[spec->d1_level] = res;
+
+    spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
+        spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
      spec->d1_stack[++(spec->d1_level)] = NULL;
  }
  
@@ -1066,7 +864,7 @@ static void tagEnd (struct lexSpec *spec, int min_level,
              break;
      }
  #if REGX_DEBUG
-    logf (LOG_DEBUG, "end tag (%d)", spec->d1_level);
+    logf (LOG_LOG, "end tag (%d)", spec->d1_level);
  #endif
  }
  
@@ -1183,7 +981,8 @@ static int execTok (struct lexSpec *spec, const char **src,
      else if (*s == '-')
      {
          *tokBuf = s++;
-        while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
+        while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
+               *s != ';')
              s++;
          *tokLen = s - *tokBuf;
          *src = s;
@@ -1192,7 +991,8 @@ static int execTok (struct lexSpec *spec, const char **src,
      else
      {
          *tokBuf = s++;
-        while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
+        while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
+               *s != ';')
              s++;
          *tokLen = s - *tokBuf;
      }
@@ -1213,32 +1013,26 @@ static char *regxStrz (const char *src, int len, char *str)
  static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
                           int argc, char **argv)
  {
-    struct lexSpec *spec = clientData;
+    struct lexSpec *spec = (struct lexSpec *) clientData;
      if (argc < 2)
         return TCL_ERROR;
      if (!strcmp(argv[1], "record") && argc == 3)
      {
         char *absynName = argv[2];
-       data1_absyn *absyn;
+        data1_node *res;
  
  #if REGX_DEBUG
-       logf (LOG_DEBUG, "begin record %s", absynName);
+       logf (LOG_LOG, "begin record %s", absynName);
  #endif
-       if (!(absyn = data1_get_absyn (spec->dh, absynName)))
-           logf (LOG_WARN, "Unknown tagset: %s", absynName);
-       else
-       {
-           data1_node *res;
-           
-           res = data1_mk_node (spec->dh, spec->m);
-           res->which = DATA1N_root;
-           res->u.root.type = absynName;
-           res->u.root.absyn = absyn;
-           res->root = res;
-           
-           spec->d1_stack[spec->d1_level] = res;
-           spec->d1_stack[++(spec->d1_level)] = NULL;
-       }
+        res = data1_mk_root (spec->dh, spec->m, absynName);
+        
+        spec->d1_stack[spec->d1_level++] = res;
+
+        res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
+
+        spec->d1_stack[spec->d1_level++] = res;
+
+        spec->d1_stack[spec->d1_level] = NULL;
      }
      else if (!strcmp(argv[1], "element") && argc == 3)
      {
@@ -1254,7 +1048,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
      {
         struct lexContext *lc = spec->context;
  #if REGX_DEBUG
-       logf (LOG_DEBUG, "begin context %s",argv[2]);
+       logf (LOG_LOG, "begin context %s",argv[2]);
  #endif
         while (lc && strcmp (argv[2], lc->name))
             lc = lc->next;
@@ -1273,7 +1067,7 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
  static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
                         int argc, char **argv)
  {
-    struct lexSpec *spec = clientData;
+    struct lexSpec *spec = (struct lexSpec *) clientData;
      if (argc < 2)
         return TCL_ERROR;
      
@@ -1285,7 +1079,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
             (spec->d1_level)--;
         }
  #if REGX_DEBUG
-       logf (LOG_DEBUG, "end record");
+       logf (LOG_LOG, "end record");
  #endif
         spec->stop_flag = 1;
      }
@@ -1306,7 +1100,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
         if (spec->d1_level == 0)
         {
  #if REGX_DEBUG
-           logf (LOG_DEBUG, "end element end records");
+           logf (LOG_LOG, "end element end records");
  #endif
             spec->stop_flag = 1;
         }
@@ -1314,7 +1108,7 @@ static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
      else if (!strcmp (argv[1], "context"))
      {
  #if REGX_DEBUG
-       logf (LOG_DEBUG, "end context");
+       logf (LOG_LOG, "end context");
  #endif
         if (spec->context_stack_top)
             (spec->context_stack_top)--;
@@ -1330,7 +1124,7 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
      int argi = 1;
      int textFlag = 0;
      const char *element = 0;
-    struct lexSpec *spec = clientData;
+    struct lexSpec *spec = (struct lexSpec *) clientData;
      
      while (argi < argc)
      {
@@ -1353,7 +1147,14 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
  
      while (argi < argc)
      {
+#if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
+       Tcl_DString ds;
+       char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
+       execData (spec, native, strlen(native), textFlag);
+       Tcl_DStringFree (&ds);
+#else
         execData (spec, argv[argi], strlen(argv[argi]), textFlag);
+#endif
         argi++;
      }
      if (element)
@@ -1364,7 +1165,7 @@ static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
  static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
                            int argc, char **argv)
  {
-    struct lexSpec *spec = clientData;
+    struct lexSpec *spec = (struct lexSpec *) clientData;
      int argi = 1;
      int offset = 0;
      int no;
@@ -1395,6 +1196,7 @@ static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
  static void execTcl (struct lexSpec *spec, struct regxCode *code)
  {   
      int i;
+    int ret;
      for (i = 0; i < spec->arg_no; i++)
      {
         char var_name[10], *var_buf;
@@ -1411,7 +1213,19 @@ static void execTcl (struct lexSpec *spec, struct regxCode *code)
             var_buf[var_len] = ch;
         }
      }
-    Tcl_Eval (spec->tcl_interp, code->str);
+#if HAVE_TCL_OBJECTS
+    ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
+#else
+    ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
+#endif
+    if (ret != TCL_OK)
+    {
+       const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
+       logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s", 
+           spec->tcl_interp->errorLine,
+           spec->tcl_interp->result,
+           err ? err : "[NO ERRORINFO]");
+    }
  }
  /* HAVE_TCL_H */
  #endif
@@ -1450,31 +1264,24 @@ static void execCode (struct lexSpec *spec, struct regxCode *code)
                  if (spec->d1_level == 0)
                  {
                      static char absynName[64];
-                    data1_absyn *absyn;
+                    data1_node *res;
  
                      if (cmd_len > 63)
                          cmd_len = 63;
                      memcpy (absynName, cmd_str, cmd_len);
                      absynName[cmd_len] = '\0';
-
  #if REGX_DEBUG
-                    logf (LOG_DEBUG, "begin record %s", absynName);
+                    logf (LOG_LOG, "begin record %s", absynName);
  #endif
-                    if (!(absyn = data1_get_absyn (spec->dh, absynName)))
-                        logf (LOG_WARN, "Unknown tagset: %s", absynName);
-                    else
-                    {
-                        data1_node *res;
-
-                        res = data1_mk_node (spec->dh, spec->m);
-                        res->which = DATA1N_root;
-                        res->u.root.type = absynName;
-                        res->u.root.absyn = absyn;
-                        res->root = res;
-                        
-                        spec->d1_stack[spec->d1_level] = res;
-                        spec->d1_stack[++(spec->d1_level)] = NULL;
-                    }
+                    res = data1_mk_root (spec->dh, spec->m, absynName);
+                    
+                    spec->d1_stack[spec->d1_level++] = res;
+
+                    res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
+
+                    spec->d1_stack[spec->d1_level++] = res;
+
+                    spec->d1_stack[spec->d1_level] = NULL;
                  }
                  r = execTok (spec, &s, &cmd_str, &cmd_len);
              }
@@ -1525,7 +1332,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code)
                     r = execTok (spec, &s, &cmd_str, &cmd_len);
                     p = regxStrz (cmd_str, cmd_len, ptmp);
  #if REGX_DEBUG
-                   logf (LOG_DEBUG, "begin context %s", p);
+                   logf (LOG_LOG, "begin context %s", p);
  #endif
                     while (lc && strcmp (p, lc->name))
                         lc = lc->next;
@@ -1560,7 +1367,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code)
                 }
                 r = execTok (spec, &s, &cmd_str, &cmd_len);
  #if REGX_DEBUG
-               logf (LOG_DEBUG, "end record");
+               logf (LOG_LOG, "end record");
  #endif
                 spec->stop_flag = 1;
             }
@@ -1582,7 +1389,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code)
                  if (spec->d1_level == 0)
                  {
  #if REGX_DEBUG
-                   logf (LOG_DEBUG, "end element end records");
+                   logf (LOG_LOG, "end element end records");
  #endif
                     spec->stop_flag = 1;
                  }
@@ -1591,7 +1398,7 @@ static void execCode (struct lexSpec *spec, struct regxCode *code)
             else if (!strcmp (p, "context"))
             {
  #if REGX_DEBUG
-               logf (LOG_DEBUG, "end context");
+               logf (LOG_LOG, "end context");
  #endif
                 if (spec->context_stack_top)
                     (spec->context_stack_top)--;
@@ -1787,7 +1594,7 @@ static int execRule (struct lexSpec *spec, struct lexContext *context,
                       int ruleNo, int start_ptr, int *pptr)
  {
  #if REGX_DEBUG
-    logf (LOG_DEBUG, "exec rule %d", ruleNo);
+    logf (LOG_LOG, "exec rule %d", ruleNo);
  #endif
      return execAction (spec, context->fastRule[ruleNo]->actionList,
                         start_ptr, pptr);
@@ -1865,7 +1672,7 @@ data1_node *lexNode (struct lexSpec *spec, int *ptr)
                          if (spec->f_win_ef && *ptr != F_WIN_EOF)
                         {
  #if REGX_DEBUG
-                           logf (LOG_DEBUG, "regx: endf ptr=%d", *ptr);
+                           logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
  #endif
                              (*spec->f_win_ef)(spec->f_win_fh, *ptr);
                         }
@@ -1955,7 +1762,7 @@ static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
  
  void grs_destroy(void *clientData)
  {
-    struct lexSpecs *specs = clientData;
+    struct lexSpecs *specs = (struct lexSpecs *) clientData;
      if (specs->spec)
      {
         lexSpecDestroy(&specs->spec);
@@ -1965,7 +1772,7 @@ void grs_destroy(void *clientData)
  
  void *grs_init(void)
  {
-    struct lexSpecs *specs = xmalloc (sizeof(*specs));
+    struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
      specs->spec = 0;
      return specs;
  }
@@ -1973,11 +1780,11 @@ void *grs_init(void)
  data1_node *grs_read_regx (struct grs_read_info *p)
  {
      int res;
-    struct lexSpecs *specs = p->clientData;
+    struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
      struct lexSpec **curLexSpec = &specs->spec;
  
  #if REGX_DEBUG
-    logf (LOG_DEBUG, "grs_read_regx");
+    logf (LOG_LOG, "grs_read_regx");
  #endif
      if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
      {
@@ -2019,11 +1826,11 @@ RecTypeGrs recTypeGrs_regx = &regx_type;
  data1_node *grs_read_tcl (struct grs_read_info *p)
  {
      int res;
-    struct lexSpecs *specs = p->clientData;
+    struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
      struct lexSpec **curLexSpec = &specs->spec;
  
  #if REGX_DEBUG
-    logf (LOG_DEBUG, "grs_read_tcl");
+    logf (LOG_LOG, "grs_read_tcl");
  #endif
      if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
      {
@@ -2031,7 +1838,9 @@ data1_node *grs_read_tcl (struct grs_read_info *p)
          if (*curLexSpec)
              lexSpecDestroy (curLexSpec);
          *curLexSpec = lexSpecCreate (p->type, p->dh);
+       Tcl_FindExecutable("");
         tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
+       Tcl_Init(tcl_interp);
         Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
         Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
         Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);