X-Git-Url: http://git.indexdata.com/?a=blobdiff_plain;f=recctrl%2Fregxread.c;h=0bf186c1e6faf699e6d6dfd6af2a98dc7868adbe;hb=47ea1fc957c7b97bb30a26698f072109cae275e4;hp=5ec5624874407521ae4ee671d4d27ea4c1d552f6;hpb=1577e2d83cdd5e6c251d0c674d327252cf9b27be;p=idzebra-moved-to-github.git

diff --git a/recctrl/regxread.c b/recctrl/regxread.c
index 5ec5624..0bf186c 100644
--- a/recctrl/regxread.c
+++ b/recctrl/regxread.c
@@ -1,203 +1,8 @@
 /*
- * Copyright (C) 1994-2001, Index Data
+ * Copyright (C) 1994-2002, Index Data
  * All rights reserved.
  *
- * $Log: regxread.c,v $
- * Revision 1.36  2001-05-22 21:02:26  adam
- * Fixes for Tcl UTF8 character handling.
- *
- * Revision 1.35  2001/03/29 21:31:31  adam
- * Fixed "record begin" for Tcl filter.
- *
- * Revision 1.34  2000/11/29 14:24:01  adam
- * Script configure uses yaz pthreads options. Added locking for
- * zebra_register_{lock,unlock}.
- *
- * Revision 1.33  1999/11/30 13:48:04  adam
- * Improved installation. Updated for inclusion of YAZ header files.
- *
- * Revision 1.32  1999/09/07 07:19:21  adam
- * Work on character mapping. Implemented replace rules.
- *
- * Revision 1.31  1999/07/14 13:05:29  adam
- * Tcl filter works with objects when TCL is version 8 or later; filter
- * works with strings otherwise (slow).
- *
- * Revision 1.30  1999/07/14 10:55:28  adam
- * Fixed memory leak.
- *
- * Revision 1.29  1999/07/12 07:27:54  adam
- * Improved speed of Tcl processing. Fixed one memory leak.
- *
- * Revision 1.28  1999/07/06 12:26:04  adam
- * Fixed filters so that MS-DOS CR is ignored.
- *
- * Revision 1.27  1999/06/28 13:25:40  quinn
- * Improved diagnostics for Tcl
- *
- * Revision 1.26  1999/05/26 07:49:14  adam
- * C++ compilation.
- *
- * Revision 1.25  1999/05/25 12:33:32  adam
- * Fixed bug in Tcl filter.
- *
- * Revision 1.24  1999/05/21 11:08:46  adam
- * Tcl filter attempts to read <filt>.tflt. Improvements to configure
- * script so that it reads uninstalled Tcl source.
- *
- * Revision 1.23  1999/05/20 12:57:18  adam
- * Implemented TCL filter. Updated recctrl system.
- *
- * Revision 1.22  1998/11/03 16:07:13  adam
- * Yet another fix.
- *
- * Revision 1.21  1998/11/03 15:43:39  adam
- * Fixed bug introduced by previous commit.
- *
- * Revision 1.20  1998/11/03 14:51:28  adam
- * Changed code so that it creates as few data1 nodes as possible.
- *
- * Revision 1.19  1998/11/03 10:22:39  adam
- * Fixed memory leak that could occur for when large data1 node were
- * concatenated. Data-type data1_nodes may have multiple nodes.
- *
- * Revision 1.18  1998/10/15 13:11:47  adam
- * Added support for option -record for "end element". When specified
- * end element will mark end-of-record when at outer-level.
- *
- * Revision 1.17  1998/07/01 10:13:51  adam
- * Minor fix.
- *
- * Revision 1.16  1998/06/30 15:15:09  adam
- * Tags are trimmed: white space removed before- and after the tag.
- *
- * Revision 1.15  1998/06/30 12:55:45  adam
- * Bug fix.
- *
- * Revision 1.14  1998/03/05 08:41:00  adam
- * Implemented rule contexts.
- *
- * Revision 1.13  1997/12/12 06:33:58  adam
- * Fixed bug that showed up when multiple filter where used.
- * Made one routine thread-safe.
- *
- * Revision 1.12  1997/11/18 10:03:24  adam
- * Member num_children removed from data1_node.
- *
- * Revision 1.11  1997/11/06 11:41:01  adam
- * Implemented "begin variant" for the sgml.regx filter.
- *
- * Revision 1.10  1997/10/31 12:36:12  adam
- * Minor change that avoids compiler warning.
- *
- * Revision 1.9  1997/09/29 09:02:49  adam
- * Fixed small bug (introduced by previous commit).
- *
- * Revision 1.8  1997/09/17 12:19:22  adam
- * Zebra version corresponds to YAZ version 1.4.
- * Changed Zebra server so that it doesn't depend on global common_resource.
- *
- * Revision 1.7  1997/07/15 16:33:07  adam
- * Check for zero length in execData.
- *
- * Revision 1.6  1997/02/24 10:41:51  adam
- * Cleanup of code and commented out the "end element-end-record" code.
- *
- * Revision 1.5  1997/02/19 16:22:33  adam
- * Fixed "end element" to terminate record in outer-most level.
- *
- * Revision 1.4  1997/02/12 20:42:58  adam
- * Changed some log messages.
- *
- * Revision 1.3  1996/11/08 14:05:33  adam
- * Bug fix: data1 node member u.tag.get_bytes weren't initialized.
- *
- * Revision 1.2  1996/10/29  14:02:09  adam
- * Doesn't use the global data1_tabpath (from YAZ). Instead the function
- * data1_get_tabpath is used.
- *
- * Revision 1.1  1996/10/11 10:57:30  adam
- * New module recctrl. Used to manage records (extract/retrieval).
- *
- * Revision 1.24  1996/06/17 14:25:31  adam
- * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
- *
- * Revision 1.23  1996/06/04 10:19:00  adam
- * Minor changes - removed include of ctype.h.
- *
- * Revision 1.22  1996/06/03  15:23:13  adam
- * Bug fix: /../ BODY /../ - pattern didn't match EOF.
- *
- * Revision 1.21  1996/05/14  16:58:38  adam
- * Minor change.
- *
- * Revision 1.20  1996/05/01  13:46:36  adam
- * First work on multiple records in one file.
- * New option, -offset, to the "unread" command in the filter module.
- *
- * Revision 1.19  1996/02/12  16:18:20  adam
- * Yet another bug fix in implementation of unread command.
- *
- * Revision 1.18  1996/02/12  16:07:54  adam
- * Bug fix in new unread command.
- *
- * Revision 1.17  1996/02/12  15:56:11  adam
- * New code command: unread.
- *
- * Revision 1.16  1996/01/17  14:57:51  adam
- * Prototype changed for reader functions in extract/retrieve. File
- *  is identified by 'void *' instead of 'int.
- *
- * Revision 1.15  1996/01/08  19:15:47  adam
- * New input filter that works!
- *
- * Revision 1.14  1996/01/08  09:10:38  adam
- * Yet another complete rework on this module.
- *
- * Revision 1.13  1995/12/15  17:21:50  adam
- * This version is able to set data.formatted_text in data1-nodes.
- *
- * Revision 1.12  1995/12/15  16:20:10  adam
- * The filter files (*.flt) are read from the path given by data1_tabpath.
- *
- * Revision 1.11  1995/12/15  12:35:16  adam
- * Better logging.
- *
- * Revision 1.10  1995/12/15  10:35:36  adam
- * Misc. bug fixes.
- *
- * Revision 1.9  1995/12/14  16:38:48  adam
- * Completely new attempt to make regular expression parsing.
- *
- * Revision 1.8  1995/12/13  17:16:59  adam
- * Small changes.
- *
- * Revision 1.7  1995/12/13  16:51:58  adam
- * Modified to set last_child in data1_nodes.
- * Uses destroy handler to free up data text nodes.
- *
- * Revision 1.6  1995/12/13  13:45:37  quinn
- * Changed data1 to use nmem.
- *
- * Revision 1.5  1995/12/11  09:12:52  adam
- * The rec_get function returns NULL if record doesn't exist - will
- * happen in the server if the result set records have been deleted since
- * the creation of the set (i.e. the search).
- * The server saves a result temporarily if it is 'volatile', i.e. the
- * set is register dependent.
- *
- * Revision 1.4  1995/12/05  16:57:40  adam
- * More work on regular patterns.
- *
- * Revision 1.3  1995/12/05  09:37:09  adam
- * One malloc was renamed to xmalloc.
- *
- * Revision 1.2  1995/12/04  17:59:24  adam
- * More work on regular expression conversion.
- *
- * Revision 1.1  1995/12/04  14:25:30  adam
- * Started work on regular expression parsed input to structured records.
- *
+ * $Id: regxread.c,v 1.43 2002-07-05 12:43:30 adam Exp $
  */
 #include <stdio.h>
 #include <assert.h>
@@ -750,13 +555,13 @@ int readFileSpec (struct lexSpec *spec)
     if (spec->tcl_interp)
     {
 	sprintf (fname, "%s.tflt", spec->name);
-	spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), fname, "r");
+	spec_inf = data1_path_fopen (spec->dh, fname, "r");
     }
 #endif
     if (!spec_inf)
     {
 	sprintf (fname, "%s.flt", spec->name);
-	spec_inf = yaz_path_fopen (data1_get_tabpath(spec->dh), fname, "r");
+	spec_inf = data1_path_fopen (spec->dh, fname, "r");
     }
     if (!spec_inf)
     {
@@ -867,28 +672,14 @@ static void execData (struct lexSpec *spec,
     {
 	org_len = 0;
 
-	res = data1_mk_node (spec->dh, spec->m);
-	res->parent = parent;
-	res->which = DATA1N_data;
+	res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
 	res->u.data.what = DATA1I_text;
 	res->u.data.len = 0;
 	res->u.data.formatted_text = formatted_text;
-#if 0
-	if (elen > DATA1_LOCALDATA)
-	    res->u.data.data = nmem_malloc (spec->m, elen);
-	else
-	    res->u.data.data = res->lbuf;
-	memcpy (res->u.data.data, ebuf, elen);
-#else
 	res->u.data.data = 0;
-#endif
-	res->root = parent->root;
 	
-	parent->last_child = res;
 	if (spec->d1_stack[spec->d1_level])
 	    spec->d1_stack[spec->d1_level]->next = res;
-	else
-	    parent->child = res;
 	spec->d1_stack[spec->d1_level] = res;
     }
     if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
@@ -971,21 +762,9 @@ static void variantBegin (struct lexSpec *spec,
     
     if (parent->which != DATA1N_variant)
     {
-	res = data1_mk_node (spec->dh, spec->m);
-	res->parent = parent;
-	res->which = DATA1N_variant;
-	res->u.variant.type = 0;
-	res->u.variant.value = 0;
-	res->root = parent->root;
-
-	parent->last_child = res;
+	res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
 	if (spec->d1_stack[spec->d1_level])
-	{
 	    tagDataRelease (spec);
-	    spec->d1_stack[spec->d1_level]->next = res;
-	}
-	else
-	    parent->child = res;
 	spec->d1_stack[spec->d1_level] = res;
 	spec->d1_stack[++(spec->d1_level)] = NULL;
     }
@@ -1000,10 +779,7 @@ static void variantBegin (struct lexSpec *spec,
     logf (LOG_LOG, "variant node (%d)", spec->d1_level);
 #endif
     parent = spec->d1_stack[spec->d1_level-1];
-    res = data1_mk_node (spec->dh, spec->m);
-    res->parent = parent;
-    res->which = DATA1N_variant;
-    res->root = parent->root;
+    res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
     res->u.variant.type = tp;
 
     if (value_len >= DATA1_LOCALDATA)
@@ -1013,14 +789,8 @@ static void variantBegin (struct lexSpec *spec,
 
     res->u.variant.value = res->lbuf;
     
-    parent->last_child = res;
     if (spec->d1_stack[spec->d1_level])
-    {
 	tagDataRelease (spec);
-        spec->d1_stack[spec->d1_level]->next = res;
-    }
-    else
-        parent->child = res;
     spec->d1_stack[spec->d1_level] = res;
     spec->d1_stack[++(spec->d1_level)] = NULL;
 }
@@ -1058,8 +828,7 @@ static void tagBegin (struct lexSpec *spec,
     parent = spec->d1_stack[spec->d1_level -1];
     partag = get_parent_tag(spec->dh, parent);
    
-    res = data1_mk_node_type (spec->dh, spec->m, DATA1N_tag);
-    res->parent = parent;
+    res = data1_mk_node2 (spec->dh, spec->m, DATA1N_tag, parent);
 
     if (len >= DATA1_LOCALDATA)
 	res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1);
@@ -1082,16 +851,9 @@ static void tagBegin (struct lexSpec *spec,
 				      spec->d1_stack[0]->u.root.absyn,
 				      e, res->u.tag.tag);
     res->u.tag.element = elem;
-    res->root = parent->root;
 
-    parent->last_child = res;
     if (spec->d1_stack[spec->d1_level])
-    {
 	tagDataRelease (spec);
-        spec->d1_stack[spec->d1_level]->next = res;
-    }
-    else
-        parent->child = res;
     spec->d1_stack[spec->d1_level] = res;
     spec->d1_stack[++(spec->d1_level)] = NULL;
 }
@@ -1269,27 +1031,20 @@ static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
     if (!strcmp(argv[1], "record") && argc == 3)
     {
 	char *absynName = argv[2];
-	data1_absyn *absyn;
+        data1_node *res;
 
 #if REGX_DEBUG
 	logf (LOG_LOG, "begin record %s", absynName);
 #endif
-	if (!(absyn = data1_get_absyn (spec->dh, absynName)))
-	    logf (LOG_WARN, "Unknown tagset: %s", absynName);
-	else
-	{
-	    data1_node *res;
-	    
-	    res = data1_mk_node (spec->dh, spec->m);
-	    res->which = DATA1N_root;
-	    res->u.root.type =
-                data1_insert_string(spec->dh, res, spec->m, absynName);
-	    res->u.root.absyn = absyn;
-	    res->root = res;
-	    
-	    spec->d1_stack[spec->d1_level] = res;
-	    spec->d1_stack[++(spec->d1_level)] = NULL;
-	}
+        res = data1_mk_root (spec->dh, spec->m, absynName);
+        
+        spec->d1_stack[spec->d1_level++] = res;
+
+        res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
+
+        spec->d1_stack[spec->d1_level++] = res;
+
+        spec->d1_stack[spec->d1_level] = NULL;
     }
     else if (!strcmp(argv[1], "element") && argc == 3)
     {
@@ -1521,31 +1276,24 @@ static void execCode (struct lexSpec *spec, struct regxCode *code)
                 if (spec->d1_level == 0)
                 {
                     static char absynName[64];
-                    data1_absyn *absyn;
+                    data1_node *res;
 
                     if (cmd_len > 63)
                         cmd_len = 63;
                     memcpy (absynName, cmd_str, cmd_len);
                     absynName[cmd_len] = '\0';
-
 #if REGX_DEBUG
                     logf (LOG_LOG, "begin record %s", absynName);
 #endif
-                    if (!(absyn = data1_get_absyn (spec->dh, absynName)))
-                        logf (LOG_WARN, "Unknown tagset: %s", absynName);
-                    else
-                    {
-                        data1_node *res;
-
-                        res = data1_mk_node (spec->dh, spec->m);
-                        res->which = DATA1N_root;
-                        res->u.root.type = absynName;
-                        res->u.root.absyn = absyn;
-                        res->root = res;
-                        
-                        spec->d1_stack[spec->d1_level] = res;
-                        spec->d1_stack[++(spec->d1_level)] = NULL;
-                    }
+                    res = data1_mk_root (spec->dh, spec->m, absynName);
+                    
+                    spec->d1_stack[spec->d1_level++] = res;
+
+                    res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
+
+                    spec->d1_stack[spec->d1_level++] = res;
+
+                    spec->d1_stack[spec->d1_level] = NULL;
                 }
                 r = execTok (spec, &s, &cmd_str, &cmd_len);
             }
@@ -2102,7 +1850,9 @@ data1_node *grs_read_tcl (struct grs_read_info *p)
         if (*curLexSpec)
             lexSpecDestroy (curLexSpec);
         *curLexSpec = lexSpecCreate (p->type, p->dh);
+	Tcl_FindExecutable("");
 	tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
+	Tcl_Init(tcl_interp);
 	Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
 	Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
 	Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);