2 * Copyright (C) 1994-2002, Index Data
5 * $Id: regxread.c,v 1.39 2002-04-15 09:07:10 adam Exp $
12 #include <yaz/tpath.h>
20 #if MAJOR_VERSION >= 8
21 #define HAVE_TCL_OBJECTS
27 #define F_WIN_EOF 2000000000
31 #define REGX_PATTERN 1
36 #define REGX_CONTEXT 6
46 struct lexRuleAction {
50 struct DFA *dfa; /* REGX_PATTERN */
53 struct regxCode *code; /* REGX_CODE */
55 struct lexRuleAction *next;
60 struct lexRuleAction *actionList;
64 struct lexRuleInfo info;
71 struct lexRule *rules;
72 struct lexRuleInfo **fastRule;
76 struct lexRuleAction *beginActionList;
77 struct lexRuleAction *endActionList;
78 struct lexRuleAction *initActionList;
79 struct lexContext *next;
89 struct lexContext *context;
91 struct lexContext **context_stack;
92 int context_stack_size;
93 int context_stack_top;
99 Tcl_Interp *tcl_interp;
102 void (*f_win_ef)(void *, off_t);
104 int f_win_start; /* first byte of buffer is this file offset */
105 int f_win_end; /* last byte of buffer is this offset - 1 */
106 int f_win_size; /* size of buffer */
107 char *f_win_buf; /* buffer itself */
108 int (*f_win_rf)(void *, char *, size_t);
109 off_t (*f_win_sf)(void *, off_t);
111 struct lexConcatBuf *concatBuf;
113 data1_node **d1_stack;
124 struct lexSpec *spec;
127 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
130 int i, r, off = start_pos - spec->f_win_start;
132 if (off >= 0 && end_pos <= spec->f_win_end)
134 *size = end_pos - start_pos;
135 return spec->f_win_buf + off;
137 if (off < 0 || start_pos >= spec->f_win_end)
139 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
140 spec->f_win_start = start_pos;
142 if (!spec->f_win_buf)
143 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
144 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
146 spec->f_win_end = spec->f_win_start + *size;
148 if (*size > end_pos - start_pos)
149 *size = end_pos - start_pos;
150 return spec->f_win_buf;
152 for (i = 0; i<spec->f_win_end - start_pos; i++)
153 spec->f_win_buf[i] = spec->f_win_buf[i + off];
154 r = (*spec->f_win_rf)(spec->f_win_fh,
156 spec->f_win_size - i);
157 spec->f_win_start = start_pos;
158 spec->f_win_end += r;
160 if (*size > end_pos - start_pos)
161 *size = end_pos - start_pos;
162 return spec->f_win_buf;
165 static int f_win_advance (struct lexSpec *spec, int *pos)
170 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
171 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
172 if (*pos == F_WIN_EOF)
174 buf = f_win_get (spec, *pos, *pos+1, &size);
184 static void regxCodeDel (struct regxCode **pp)
186 struct regxCode *p = *pp;
191 Tcl_DecrRefCount (p->tcl_obj);
199 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
203 p = (struct regxCode *) xmalloc (sizeof(*p));
204 p->str = (char *) xmalloc (len+1);
205 memcpy (p->str, buf, len);
208 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
210 Tcl_IncrRefCount (p->tcl_obj);
215 static struct DFA *lexSpecDFA (void)
220 dfa_parse_cmap_del (dfa, ' ');
221 dfa_parse_cmap_del (dfa, '\t');
222 dfa_parse_cmap_add (dfa, '/', 0);
226 static void actionListDel (struct lexRuleAction **rap)
228 struct lexRuleAction *ra1, *ra;
230 for (ra = *rap; ra; ra = ra1)
236 dfa_delete (&ra->u.pattern.dfa);
239 regxCodeDel (&ra->u.code);
247 static struct lexContext *lexContextCreate (const char *name)
249 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
251 p->name = xstrdup (name);
254 p->dfa = lexSpecDFA ();
257 p->beginActionList = NULL;
258 p->endActionList = NULL;
259 p->initActionList = NULL;
264 static void lexContextDestroy (struct lexContext *p)
266 struct lexRule *rp, *rp1;
268 dfa_delete (&p->dfa);
270 for (rp = p->rules; rp; rp = rp1)
273 actionListDel (&rp->info.actionList);
276 actionListDel (&p->beginActionList);
277 actionListDel (&p->endActionList);
278 actionListDel (&p->initActionList);
283 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
288 p = (struct lexSpec *) xmalloc (sizeof(*p));
289 p->name = (char *) xmalloc (strlen(name)+1);
290 strcpy (p->name, name);
297 p->context_stack_size = 100;
298 p->context_stack = (struct lexContext **)
299 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
303 p->concatBuf = (struct lexConcatBuf *)
304 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
305 for (i = 0; i < p->maxLevel; i++)
307 p->concatBuf[i].max = 0;
308 p->concatBuf[i].buf = 0;
310 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
315 static void lexSpecDestroy (struct lexSpec **pp)
318 struct lexContext *lt;
326 for (i = 0; i < p->maxLevel; i++)
327 xfree (p->concatBuf[i].buf);
328 xfree (p->concatBuf);
333 struct lexContext *lt_next = lt->next;
334 lexContextDestroy (lt);
339 Tcl_DeleteInterp (p->tcl_interp);
342 xfree (p->f_win_buf);
343 xfree (p->context_stack);
349 static int readParseToken (const char **cpp, int *len)
351 const char *cp = *cpp;
355 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
384 if (*cp >= 'a' && *cp <= 'z')
386 else if (*cp >= 'A' && *cp <= 'Z')
387 cmd[i] = *cp + 'a' - 'A';
390 if (i < (int) sizeof(cmd)-2)
397 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
399 while (*cp && *cp != ' ' && *cp != '\t' &&
400 *cp != '\n' && *cp != '\r')
406 if (!strcmp (cmd, "begin"))
408 else if (!strcmp (cmd, "end"))
410 else if (!strcmp (cmd, "body"))
412 else if (!strcmp (cmd, "context"))
414 else if (!strcmp (cmd, "init"))
418 logf (LOG_WARN, "bad command %s", cmd);
424 static int actionListMk (struct lexSpec *spec, const char *s,
425 struct lexRuleAction **ap)
431 while ((tok = readParseToken (&s, &len)))
439 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
441 regxCodeMk (&(*ap)->u.code, s, len);
445 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
447 (*ap)->u.pattern.body = bodyMark;
449 (*ap)->u.pattern.dfa = lexSpecDFA ();
451 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
456 logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
459 dfa_mkstate ((*ap)->u.pattern.dfa);
463 logf (LOG_WARN, "cannot use BEGIN here");
466 logf (LOG_WARN, "cannot use INIT here");
469 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
479 int readOneSpec (struct lexSpec *spec, const char *s)
483 struct lexContext *lc;
485 tok = readParseToken (&s, &len);
486 if (tok == REGX_CONTEXT)
488 char context_name[32];
489 tok = readParseToken (&s, &len);
490 if (tok != REGX_CODE)
492 logf (LOG_WARN, "missing name after CONTEXT keyword");
497 memcpy (context_name, s, len);
498 context_name[len] = '\0';
499 lc = lexContextCreate (context_name);
500 lc->next = spec->context;
505 spec->context = lexContextCreate ("main");
510 actionListDel (&spec->context->beginActionList);
511 actionListMk (spec, s, &spec->context->beginActionList);
514 actionListDel (&spec->context->endActionList);
515 actionListMk (spec, s, &spec->context->endActionList);
518 actionListDel (&spec->context->initActionList);
519 actionListMk (spec, s, &spec->context->initActionList);
523 logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
525 r = dfa_parse (spec->context->dfa, &s);
528 logf (LOG_WARN, "regular expression error. r=%d", r);
533 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
537 rp = (struct lexRule *) xmalloc (sizeof(*rp));
538 rp->info.no = spec->context->ruleNo++;
539 rp->next = spec->context->rules;
540 spec->context->rules = rp;
541 actionListMk (spec, s, &rp->info.actionList);
546 int readFileSpec (struct lexSpec *spec)
548 struct lexContext *lc;
549 int c, i, errors = 0;
555 if (spec->tcl_interp)
557 sprintf (fname, "%s.tflt", spec->name);
558 spec_inf = data1_path_fopen (spec->dh, fname, "r");
563 sprintf (fname, "%s.flt", spec->name);
564 spec_inf = data1_path_fopen (spec->dh, fname, "r");
568 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
571 logf (LOG_LOG, "reading regx filter %s", fname);
573 if (spec->tcl_interp)
574 logf (LOG_LOG, "Tcl enabled");
576 lineBuf = wrbuf_alloc();
581 wrbuf_rewind (lineBuf);
582 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
584 while (c != '\n' && c != EOF)
597 wrbuf_putc(lineBuf, c);
605 if (c != ' ' && c != '\t')
610 wrbuf_putc(lineBuf, '\0');
611 readOneSpec (spec, wrbuf_buf(lineBuf));
612 spec->lineNo += addLine;
616 wrbuf_free(lineBuf, 1);
621 debug_dfa_followpos = 1;
624 for (lc = spec->context; lc; lc = lc->next)
627 lc->fastRule = (struct lexRuleInfo **)
628 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
629 for (i = 0; i < lc->ruleNo; i++)
630 lc->fastRule[i] = NULL;
631 for (rp = lc->rules; rp; rp = rp->next)
632 lc->fastRule[rp->info.no] = &rp->info;
633 dfa_mkstate (lc->dfa);
642 static struct lexSpec *curLexSpec = NULL;
645 static void execData (struct lexSpec *spec,
646 const char *ebuf, int elen, int formatted_text)
648 struct data1_node *res, *parent;
651 if (elen == 0) /* shouldn't happen, but it does! */
655 logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen,
656 ebuf, 15, ebuf + elen-15);
658 logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf);
660 logf (LOG_LOG, "data (%d bytes)", elen);
663 if (spec->d1_level <= 1)
666 parent = spec->d1_stack[spec->d1_level -1];
669 if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data)
670 org_len = res->u.data.len;
675 res = data1_mk_node (spec->dh, spec->m);
676 res->parent = parent;
677 res->which = DATA1N_data;
678 res->u.data.what = DATA1I_text;
680 res->u.data.formatted_text = formatted_text;
682 if (elen > DATA1_LOCALDATA)
683 res->u.data.data = nmem_malloc (spec->m, elen);
685 res->u.data.data = res->lbuf;
686 memcpy (res->u.data.data, ebuf, elen);
688 res->u.data.data = 0;
690 res->root = parent->root;
692 parent->last_child = res;
693 if (spec->d1_stack[spec->d1_level])
694 spec->d1_stack[spec->d1_level]->next = res;
697 spec->d1_stack[spec->d1_level] = res;
699 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
701 char *old_buf, *new_buf;
703 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
704 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
705 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
707 memcpy (new_buf, old_buf, org_len);
710 spec->concatBuf[spec->d1_level].buf = new_buf;
712 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
713 res->u.data.len += elen;
716 static void execDataP (struct lexSpec *spec,
717 const char *ebuf, int elen, int formatted_text)
719 execData (spec, ebuf, elen, formatted_text);
722 static void tagDataRelease (struct lexSpec *spec)
726 if ((res = spec->d1_stack[spec->d1_level]) &&
727 res->which == DATA1N_data &&
728 res->u.data.what == DATA1I_text)
730 assert (!res->u.data.data);
731 assert (res->u.data.len > 0);
732 if (res->u.data.len > DATA1_LOCALDATA)
733 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
735 res->u.data.data = res->lbuf;
736 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
741 static void variantBegin (struct lexSpec *spec,
742 const char *class_str, int class_len,
743 const char *type_str, int type_len,
744 const char *value_str, int value_len)
746 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
747 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
752 if (spec->d1_level == 0)
754 logf (LOG_WARN, "in variant begin. No record type defined");
757 if (class_len >= DATA1_MAX_SYMBOL)
758 class_len = DATA1_MAX_SYMBOL-1;
759 memcpy (tclass, class_str, class_len);
760 tclass[class_len] = '\0';
762 if (type_len >= DATA1_MAX_SYMBOL)
763 type_len = DATA1_MAX_SYMBOL-1;
764 memcpy (ttype, type_str, type_len);
765 ttype[type_len] = '\0';
768 logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype,
773 data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset,
777 if (parent->which != DATA1N_variant)
779 res = data1_mk_node (spec->dh, spec->m);
780 res->parent = parent;
781 res->which = DATA1N_variant;
782 res->u.variant.type = 0;
783 res->u.variant.value = 0;
784 res->root = parent->root;
786 parent->last_child = res;
787 if (spec->d1_stack[spec->d1_level])
789 tagDataRelease (spec);
790 spec->d1_stack[spec->d1_level]->next = res;
794 spec->d1_stack[spec->d1_level] = res;
795 spec->d1_stack[++(spec->d1_level)] = NULL;
797 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
798 if (spec->d1_stack[i]->u.variant.type == tp)
805 logf (LOG_LOG, "variant node (%d)", spec->d1_level);
807 parent = spec->d1_stack[spec->d1_level-1];
808 res = data1_mk_node (spec->dh, spec->m);
809 res->parent = parent;
810 res->which = DATA1N_variant;
811 res->root = parent->root;
812 res->u.variant.type = tp;
814 if (value_len >= DATA1_LOCALDATA)
815 value_len =DATA1_LOCALDATA-1;
816 memcpy (res->lbuf, value_str, value_len);
817 res->lbuf[value_len] = '\0';
819 res->u.variant.value = res->lbuf;
821 parent->last_child = res;
822 if (spec->d1_stack[spec->d1_level])
824 tagDataRelease (spec);
825 spec->d1_stack[spec->d1_level]->next = res;
829 spec->d1_stack[spec->d1_level] = res;
830 spec->d1_stack[++(spec->d1_level)] = NULL;
833 static void tagStrip (const char **tag, int *len)
837 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
840 for (i = 0; i < *len && isspace((*tag)[i]); i++)
846 static void tagBegin (struct lexSpec *spec,
847 const char *tag, int len)
849 struct data1_node *parent;
850 data1_element *elem = NULL;
853 data1_element *e = NULL;
856 if (spec->d1_level == 0)
858 logf (LOG_WARN, "in element begin. No record type defined");
861 tagStrip (&tag, &len);
863 parent = spec->d1_stack[spec->d1_level -1];
864 partag = get_parent_tag(spec->dh, parent);
866 res = data1_mk_node_type (spec->dh, spec->m, DATA1N_tag);
867 res->parent = parent;
869 if (len >= DATA1_LOCALDATA)
870 res->u.tag.tag = (char *) nmem_malloc (spec->m, len+1);
872 res->u.tag.tag = res->lbuf;
874 memcpy (res->u.tag.tag, tag, len);
875 res->u.tag.tag[len] = '\0';
878 logf (LOG_LOG, "begin tag %s (%d)", res->u.tag.tag, spec->d1_level);
880 if (parent->which == DATA1N_variant)
883 if (!(e = partag->u.tag.element))
886 elem = data1_getelementbytagname (spec->dh,
887 spec->d1_stack[0]->u.root.absyn,
889 res->u.tag.element = elem;
890 res->root = parent->root;
892 parent->last_child = res;
893 if (spec->d1_stack[spec->d1_level])
895 tagDataRelease (spec);
896 spec->d1_stack[spec->d1_level]->next = res;
900 spec->d1_stack[spec->d1_level] = res;
901 spec->d1_stack[++(spec->d1_level)] = NULL;
904 static void tagEnd (struct lexSpec *spec, int min_level,
905 const char *tag, int len)
907 tagStrip (&tag, &len);
908 while (spec->d1_level > min_level)
910 tagDataRelease (spec);
912 if (spec->d1_level == 0)
914 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
916 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
918 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
922 logf (LOG_LOG, "end tag (%d)", spec->d1_level);
927 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
930 struct DFA_state *state = dfa->states[0];
933 unsigned char c_prev = 0;
934 int ptr = *pptr; /* current pointer */
935 int start_ptr = *pptr; /* first char of match */
936 int last_ptr = 0; /* last char of match */
937 int last_rule = 0; /* rule number of current match */
942 c = f_win_advance (spec, &ptr);
943 if (ptr == F_WIN_EOF)
960 *mptr = start_ptr; /* match starts here */
961 *pptr = last_ptr; /* match end here (+1) */
964 state = dfa->states[0];
969 else if (c >= t->ch[0] && c <= t->ch[1])
971 state = dfa->states[t->to];
976 last_rule = state->rule_no;
981 last_rule = state->rule_nno;
993 static int execTok (struct lexSpec *spec, const char **src,
994 const char **tokBuf, int *tokLen)
996 const char *s = *src;
998 while (*s == ' ' || *s == '\t')
1002 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
1006 while (*s >= '0' && *s <= '9')
1007 n = n*10 + (*s++ -'0');
1008 if (spec->arg_no == 0)
1015 if (n >= spec->arg_no)
1017 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
1021 else if (*s == '\"')
1024 while (*s && *s != '\"')
1026 *tokLen = s - *tokBuf;
1031 else if (*s == '\n' || *s == ';')
1039 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1042 *tokLen = s - *tokBuf;
1049 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1052 *tokLen = s - *tokBuf;
1058 static char *regxStrz (const char *src, int len, char *str)
1062 memcpy (str, src, len);
1068 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1069 int argc, char **argv)
1071 struct lexSpec *spec = (struct lexSpec *) clientData;
1074 if (!strcmp(argv[1], "record") && argc == 3)
1076 char *absynName = argv[2];
1081 logf (LOG_LOG, "begin record %s", absynName);
1083 absyn = data1_get_absyn (spec->dh, absynName);
1085 res = data1_mk_node (spec->dh, spec->m);
1086 res->which = DATA1N_root;
1088 data1_insert_string(spec->dh, res, spec->m, absynName);
1089 res->u.root.absyn = absyn;
1092 spec->d1_stack[spec->d1_level] = res;
1093 spec->d1_stack[++(spec->d1_level)] = NULL;
1095 else if (!strcmp(argv[1], "element") && argc == 3)
1097 tagBegin (spec, argv[2], strlen(argv[2]));
1099 else if (!strcmp (argv[1], "variant") && argc == 5)
1101 variantBegin (spec, argv[2], strlen(argv[2]),
1102 argv[3], strlen(argv[3]),
1103 argv[4], strlen(argv[4]));
1105 else if (!strcmp (argv[1], "context") && argc == 3)
1107 struct lexContext *lc = spec->context;
1109 logf (LOG_LOG, "begin context %s",argv[2]);
1111 while (lc && strcmp (argv[2], lc->name))
1115 spec->context_stack[++(spec->context_stack_top)] = lc;
1118 logf (LOG_WARN, "unknown context %s", argv[2]);
1125 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1126 int argc, char **argv)
1128 struct lexSpec *spec = (struct lexSpec *) clientData;
1132 if (!strcmp (argv[1], "record"))
1134 while (spec->d1_level)
1136 tagDataRelease (spec);
1140 logf (LOG_LOG, "end record");
1142 spec->stop_flag = 1;
1144 else if (!strcmp (argv[1], "element"))
1148 if (argc >= 3 && !strcmp(argv[2], "-record"))
1157 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1158 if (spec->d1_level == 0)
1161 logf (LOG_LOG, "end element end records");
1163 spec->stop_flag = 1;
1166 else if (!strcmp (argv[1], "context"))
1169 logf (LOG_LOG, "end context");
1171 if (spec->context_stack_top)
1172 (spec->context_stack_top)--;
1179 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1180 int argc, char **argv)
1184 const char *element = 0;
1185 struct lexSpec *spec = (struct lexSpec *) clientData;
1189 if (!strcmp("-text", argv[argi]))
1194 else if (!strcmp("-element", argv[argi]))
1198 element = argv[argi++];
1204 tagBegin (spec, element, strlen(element));
1208 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1210 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1211 execData (spec, native, strlen(native), textFlag);
1212 Tcl_DStringFree (&ds);
1214 execData (spec, argv[argi], strlen(argv[argi]), textFlag);
1219 tagEnd (spec, 1, NULL, 0);
1223 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1224 int argc, char **argv)
1226 struct lexSpec *spec = (struct lexSpec *) clientData;
1233 if (!strcmp("-offset", argv[argi]))
1238 offset = atoi(argv[argi]);
1247 no = atoi(argv[argi]);
1248 if (no >= spec->arg_no)
1249 no = spec->arg_no - 1;
1250 spec->ptr = spec->arg_start[no] + offset;
1254 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1258 for (i = 0; i < spec->arg_no; i++)
1260 char var_name[10], *var_buf;
1263 sprintf (var_name, "%d", i);
1264 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1268 ch = var_buf[var_len];
1269 var_buf[var_len] = '\0';
1270 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1271 var_buf[var_len] = ch;
1274 #if HAVE_TCL_OBJECTS
1275 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1277 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1281 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1282 logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1283 spec->tcl_interp->errorLine,
1284 spec->tcl_interp->result,
1285 err ? err : "[NO ERRORINFO]");
1291 static void execCode (struct lexSpec *spec, struct regxCode *code)
1293 const char *s = code->str;
1295 const char *cmd_str;
1297 r = execTok (spec, &s, &cmd_str, &cmd_len);
1304 r = execTok (spec, &s, &cmd_str, &cmd_len);
1307 p = regxStrz (cmd_str, cmd_len, ptmp);
1308 if (!strcmp (p, "begin"))
1310 r = execTok (spec, &s, &cmd_str, &cmd_len);
1313 logf (LOG_WARN, "missing keyword after 'begin'");
1316 p = regxStrz (cmd_str, cmd_len, ptmp);
1317 if (!strcmp (p, "record"))
1319 r = execTok (spec, &s, &cmd_str, &cmd_len);
1322 if (spec->d1_level == 0)
1324 static char absynName[64];
1330 memcpy (absynName, cmd_str, cmd_len);
1331 absynName[cmd_len] = '\0';
1334 logf (LOG_LOG, "begin record %s", absynName);
1336 absyn = data1_get_absyn (spec->dh, absynName);
1338 res = data1_mk_node (spec->dh, spec->m);
1339 res->which = DATA1N_root;
1340 res->u.root.type = absynName;
1341 res->u.root.absyn = absyn;
1344 spec->d1_stack[spec->d1_level] = res;
1345 spec->d1_stack[++(spec->d1_level)] = NULL;
1347 r = execTok (spec, &s, &cmd_str, &cmd_len);
1349 else if (!strcmp (p, "element"))
1351 r = execTok (spec, &s, &cmd_str, &cmd_len);
1354 tagBegin (spec, cmd_str, cmd_len);
1355 r = execTok (spec, &s, &cmd_str, &cmd_len);
1357 else if (!strcmp (p, "variant"))
1360 const char *class_str = NULL;
1362 const char *type_str = NULL;
1364 const char *value_str = NULL;
1365 r = execTok (spec, &s, &cmd_str, &cmd_len);
1368 class_str = cmd_str;
1369 class_len = cmd_len;
1370 r = execTok (spec, &s, &cmd_str, &cmd_len);
1376 r = execTok (spec, &s, &cmd_str, &cmd_len);
1379 value_str = cmd_str;
1380 value_len = cmd_len;
1382 variantBegin (spec, class_str, class_len,
1383 type_str, type_len, value_str, value_len);
1386 r = execTok (spec, &s, &cmd_str, &cmd_len);
1388 else if (!strcmp (p, "context"))
1392 struct lexContext *lc = spec->context;
1393 r = execTok (spec, &s, &cmd_str, &cmd_len);
1394 p = regxStrz (cmd_str, cmd_len, ptmp);
1396 logf (LOG_LOG, "begin context %s", p);
1398 while (lc && strcmp (p, lc->name))
1401 spec->context_stack[++(spec->context_stack_top)] = lc;
1403 logf (LOG_WARN, "unknown context %s", p);
1406 r = execTok (spec, &s, &cmd_str, &cmd_len);
1410 logf (LOG_WARN, "bad keyword '%s' after begin", p);
1413 else if (!strcmp (p, "end"))
1415 r = execTok (spec, &s, &cmd_str, &cmd_len);
1418 logf (LOG_WARN, "missing keyword after 'end'");
1421 p = regxStrz (cmd_str, cmd_len, ptmp);
1422 if (!strcmp (p, "record"))
1424 while (spec->d1_level)
1426 tagDataRelease (spec);
1429 r = execTok (spec, &s, &cmd_str, &cmd_len);
1431 logf (LOG_LOG, "end record");
1433 spec->stop_flag = 1;
1435 else if (!strcmp (p, "element"))
1438 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1440 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1445 tagEnd (spec, min_level, cmd_str, cmd_len);
1446 r = execTok (spec, &s, &cmd_str, &cmd_len);
1449 tagEnd (spec, min_level, NULL, 0);
1450 if (spec->d1_level == 0)
1453 logf (LOG_LOG, "end element end records");
1455 spec->stop_flag = 1;
1459 else if (!strcmp (p, "context"))
1462 logf (LOG_LOG, "end context");
1464 if (spec->context_stack_top)
1465 (spec->context_stack_top)--;
1466 r = execTok (spec, &s, &cmd_str, &cmd_len);
1469 logf (LOG_WARN, "bad keyword '%s' after end", p);
1471 else if (!strcmp (p, "data"))
1475 const char *element_str = NULL;
1477 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1479 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1481 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1483 r = execTok (spec, &s, &element_str, &element_len);
1488 logf (LOG_WARN, "bad data option: %.*s",
1493 logf (LOG_WARN, "missing data item after data");
1497 tagBegin (spec, element_str, element_len);
1500 execData (spec, cmd_str, cmd_len,textFlag);
1501 r = execTok (spec, &s, &cmd_str, &cmd_len);
1504 tagEnd (spec, 1, NULL, 0);
1506 else if (!strcmp (p, "unread"))
1509 r = execTok (spec, &s, &cmd_str, &cmd_len);
1510 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1512 r = execTok (spec, &s, &cmd_str, &cmd_len);
1515 logf (LOG_WARN, "missing number after -offset");
1518 p = regxStrz (cmd_str, cmd_len, ptmp);
1520 r = execTok (spec, &s, &cmd_str, &cmd_len);
1526 logf (LOG_WARN, "missing index after unread command");
1529 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1531 logf (LOG_WARN, "bad index after unread command");
1536 no = *cmd_str - '0';
1537 if (no >= spec->arg_no)
1538 no = spec->arg_no - 1;
1539 spec->ptr = spec->arg_start[no] + offset;
1541 r = execTok (spec, &s, &cmd_str, &cmd_len);
1543 else if (!strcmp (p, "context"))
1547 struct lexContext *lc = spec->context;
1548 r = execTok (spec, &s, &cmd_str, &cmd_len);
1549 p = regxStrz (cmd_str, cmd_len, ptmp);
1551 while (lc && strcmp (p, lc->name))
1554 spec->context_stack[spec->context_stack_top] = lc;
1556 logf (LOG_WARN, "unknown context %s", p);
1559 r = execTok (spec, &s, &cmd_str, &cmd_len);
1563 logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1564 r = execTok (spec, &s, &cmd_str, &cmd_len);
1569 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1571 r = execTok (spec, &s, &cmd_str, &cmd_len);
1578 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1579 int start_ptr, int *pptr)
1588 arg_start[0] = start_ptr;
1590 spec->arg_start = arg_start;
1591 spec->arg_end = arg_end;
1598 if (ap->u.pattern.body)
1600 arg_start[arg_no] = *pptr;
1601 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1603 arg_end[arg_no] = F_WIN_EOF;
1605 arg_start[arg_no] = F_WIN_EOF;
1606 arg_end[arg_no] = F_WIN_EOF;
1611 arg_end[arg_no] = sptr;
1613 arg_start[arg_no] = sptr;
1614 arg_end[arg_no] = *pptr;
1619 arg_start[arg_no] = *pptr;
1620 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1622 if (sptr != arg_start[arg_no])
1624 arg_end[arg_no] = *pptr;
1629 spec->arg_no = arg_no;
1632 if (spec->tcl_interp)
1633 execTcl(spec, ap->u.code);
1635 execCode (spec, ap->u.code);
1637 execCode (spec, ap->u.code);
1640 if (spec->stop_flag)
1644 arg_start[arg_no] = *pptr;
1645 arg_end[arg_no] = F_WIN_EOF;
1654 static int execRule (struct lexSpec *spec, struct lexContext *context,
1655 int ruleNo, int start_ptr, int *pptr)
1658 logf (LOG_LOG, "exec rule %d", ruleNo);
1660 return execAction (spec, context->fastRule[ruleNo]->actionList,
1664 data1_node *lexNode (struct lexSpec *spec, int *ptr)
1666 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1667 struct DFA_state *state = context->dfa->states[0];
1670 unsigned char c_prev = '\n';
1672 int last_rule = 0; /* rule number of current match */
1673 int last_ptr = *ptr; /* last char of match */
1674 int start_ptr = *ptr; /* first char of match */
1675 int skip_ptr = *ptr; /* first char of run */
1679 c = f_win_advance (spec, ptr);
1680 if (*ptr == F_WIN_EOF)
1682 /* end of file met */
1685 /* there was a match */
1686 if (skip_ptr < start_ptr)
1688 /* deal with chars that didn't match */
1691 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1692 execDataP (spec, buf, size, 0);
1694 /* restore pointer */
1697 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1699 /* restore skip pointer */
1703 else if (skip_ptr < *ptr)
1705 /* deal with chars that didn't match */
1708 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1709 execDataP (spec, buf, size, 0);
1711 if (*ptr == F_WIN_EOF)
1718 { /* no transition for character c ... */
1721 if (skip_ptr < start_ptr)
1723 /* deal with chars that didn't match */
1726 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1727 execDataP (spec, buf, size, 0);
1729 /* restore pointer */
1731 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1733 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1736 logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
1738 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1742 context = spec->context_stack[spec->context_stack_top];
1745 last_ptr = start_ptr = *ptr;
1749 c_prev = f_win_advance (spec, &start_ptr);
1754 c_prev = f_win_advance (spec, &start_ptr);
1757 state = context->dfa->states[0];
1760 else if (c >= t->ch[0] && c <= t->ch[1])
1761 { /* transition ... */
1762 state = context->dfa->states[t->to];
1767 last_rule = state->rule_no;
1770 else if (state->rule_nno)
1772 last_rule = state->rule_nno;
1784 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1785 const char *context_name)
1787 struct lexContext *lt = spec->context;
1790 spec->stop_flag = 0;
1792 spec->context_stack_top = 0;
1795 if (!strcmp (lt->name, context_name))
1801 logf (LOG_WARN, "cannot find context %s", context_name);
1804 spec->context_stack[spec->context_stack_top] = lt;
1805 spec->d1_stack[spec->d1_level] = NULL;
1810 execAction (spec, lt->initActionList, ptr, &ptr);
1813 execAction (spec, lt->beginActionList, ptr, &ptr);
1814 lexNode (spec, &ptr);
1815 while (spec->d1_level)
1817 tagDataRelease (spec);
1820 execAction (spec, lt->endActionList, ptr, &ptr);
1821 return spec->d1_stack[0];
1824 void grs_destroy(void *clientData)
1826 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1829 lexSpecDestroy(&specs->spec);
1834 void *grs_init(void)
1836 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1841 data1_node *grs_read_regx (struct grs_read_info *p)
1844 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1845 struct lexSpec **curLexSpec = &specs->spec;
1848 logf (LOG_LOG, "grs_read_regx");
1850 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1853 lexSpecDestroy (curLexSpec);
1854 *curLexSpec = lexSpecCreate (p->type, p->dh);
1855 res = readFileSpec (*curLexSpec);
1858 lexSpecDestroy (curLexSpec);
1862 (*curLexSpec)->dh = p->dh;
1865 (*curLexSpec)->f_win_start = 0;
1866 (*curLexSpec)->f_win_end = 0;
1867 (*curLexSpec)->f_win_rf = p->readf;
1868 (*curLexSpec)->f_win_sf = p->seekf;
1869 (*curLexSpec)->f_win_fh = p->fh;
1870 (*curLexSpec)->f_win_ef = p->endf;
1871 (*curLexSpec)->f_win_size = 500000;
1873 (*curLexSpec)->m = p->mem;
1874 return lexRoot (*curLexSpec, p->offset, "main");
1877 static struct recTypeGrs regx_type = {
1884 RecTypeGrs recTypeGrs_regx = ®x_type;
1887 data1_node *grs_read_tcl (struct grs_read_info *p)
1890 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1891 struct lexSpec **curLexSpec = &specs->spec;
1894 logf (LOG_LOG, "grs_read_tcl");
1896 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1898 Tcl_Interp *tcl_interp;
1900 lexSpecDestroy (curLexSpec);
1901 *curLexSpec = lexSpecCreate (p->type, p->dh);
1902 Tcl_FindExecutable("");
1903 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1904 Tcl_Init(tcl_interp);
1905 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1906 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1907 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1908 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1910 res = readFileSpec (*curLexSpec);
1913 lexSpecDestroy (curLexSpec);
1917 (*curLexSpec)->dh = p->dh;
1920 (*curLexSpec)->f_win_start = 0;
1921 (*curLexSpec)->f_win_end = 0;
1922 (*curLexSpec)->f_win_rf = p->readf;
1923 (*curLexSpec)->f_win_sf = p->seekf;
1924 (*curLexSpec)->f_win_fh = p->fh;
1925 (*curLexSpec)->f_win_ef = p->endf;
1926 (*curLexSpec)->f_win_size = 500000;
1928 (*curLexSpec)->m = p->mem;
1929 return lexRoot (*curLexSpec, p->offset, "main");
1932 static struct recTypeGrs tcl_type = {
1939 RecTypeGrs recTypeGrs_tcl = &tcl_type;