1 /* $Id: regxread.c,v 1.46 2002-09-24 19:41:00 adam Exp $
2 Copyright (C) 1995,1996,1997,1998,1999,2000,2001,2002
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with Zebra; see the file LICENSE.zebra. If not, write to the
19 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
29 #include <yaz/tpath.h>
37 #if MAJOR_VERSION >= 8
38 #define HAVE_TCL_OBJECTS
44 #define F_WIN_EOF 2000000000
48 #define REGX_PATTERN 1
53 #define REGX_CONTEXT 6
63 struct lexRuleAction {
67 struct DFA *dfa; /* REGX_PATTERN */
70 struct regxCode *code; /* REGX_CODE */
72 struct lexRuleAction *next;
77 struct lexRuleAction *actionList;
81 struct lexRuleInfo info;
88 struct lexRule *rules;
89 struct lexRuleInfo **fastRule;
93 struct lexRuleAction *beginActionList;
94 struct lexRuleAction *endActionList;
95 struct lexRuleAction *initActionList;
96 struct lexContext *next;
106 struct lexContext *context;
108 struct lexContext **context_stack;
109 int context_stack_size;
110 int context_stack_top;
116 Tcl_Interp *tcl_interp;
119 void (*f_win_ef)(void *, off_t);
121 int f_win_start; /* first byte of buffer is this file offset */
122 int f_win_end; /* last byte of buffer is this offset - 1 */
123 int f_win_size; /* size of buffer */
124 char *f_win_buf; /* buffer itself */
125 int (*f_win_rf)(void *, char *, size_t);
126 off_t (*f_win_sf)(void *, off_t);
128 struct lexConcatBuf *concatBuf;
130 data1_node **d1_stack;
141 struct lexSpec *spec;
144 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
147 int i, r, off = start_pos - spec->f_win_start;
149 if (off >= 0 && end_pos <= spec->f_win_end)
151 *size = end_pos - start_pos;
152 return spec->f_win_buf + off;
154 if (off < 0 || start_pos >= spec->f_win_end)
156 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
157 spec->f_win_start = start_pos;
159 if (!spec->f_win_buf)
160 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
161 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
163 spec->f_win_end = spec->f_win_start + *size;
165 if (*size > end_pos - start_pos)
166 *size = end_pos - start_pos;
167 return spec->f_win_buf;
169 for (i = 0; i<spec->f_win_end - start_pos; i++)
170 spec->f_win_buf[i] = spec->f_win_buf[i + off];
171 r = (*spec->f_win_rf)(spec->f_win_fh,
173 spec->f_win_size - i);
174 spec->f_win_start = start_pos;
175 spec->f_win_end += r;
177 if (*size > end_pos - start_pos)
178 *size = end_pos - start_pos;
179 return spec->f_win_buf;
182 static int f_win_advance (struct lexSpec *spec, int *pos)
187 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
188 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
189 if (*pos == F_WIN_EOF)
191 buf = f_win_get (spec, *pos, *pos+1, &size);
201 static void regxCodeDel (struct regxCode **pp)
203 struct regxCode *p = *pp;
208 Tcl_DecrRefCount (p->tcl_obj);
216 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
220 p = (struct regxCode *) xmalloc (sizeof(*p));
221 p->str = (char *) xmalloc (len+1);
222 memcpy (p->str, buf, len);
225 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
227 Tcl_IncrRefCount (p->tcl_obj);
232 static struct DFA *lexSpecDFA (void)
237 dfa_parse_cmap_del (dfa, ' ');
238 dfa_parse_cmap_del (dfa, '\t');
239 dfa_parse_cmap_add (dfa, '/', 0);
243 static void actionListDel (struct lexRuleAction **rap)
245 struct lexRuleAction *ra1, *ra;
247 for (ra = *rap; ra; ra = ra1)
253 dfa_delete (&ra->u.pattern.dfa);
256 regxCodeDel (&ra->u.code);
264 static struct lexContext *lexContextCreate (const char *name)
266 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
268 p->name = xstrdup (name);
271 p->dfa = lexSpecDFA ();
274 p->beginActionList = NULL;
275 p->endActionList = NULL;
276 p->initActionList = NULL;
281 static void lexContextDestroy (struct lexContext *p)
283 struct lexRule *rp, *rp1;
285 dfa_delete (&p->dfa);
287 for (rp = p->rules; rp; rp = rp1)
290 actionListDel (&rp->info.actionList);
293 actionListDel (&p->beginActionList);
294 actionListDel (&p->endActionList);
295 actionListDel (&p->initActionList);
300 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
305 p = (struct lexSpec *) xmalloc (sizeof(*p));
306 p->name = (char *) xmalloc (strlen(name)+1);
307 strcpy (p->name, name);
314 p->context_stack_size = 100;
315 p->context_stack = (struct lexContext **)
316 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
320 p->concatBuf = (struct lexConcatBuf *)
321 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
322 for (i = 0; i < p->maxLevel; i++)
324 p->concatBuf[i].max = 0;
325 p->concatBuf[i].buf = 0;
327 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
332 static void lexSpecDestroy (struct lexSpec **pp)
335 struct lexContext *lt;
343 for (i = 0; i < p->maxLevel; i++)
344 xfree (p->concatBuf[i].buf);
345 xfree (p->concatBuf);
350 struct lexContext *lt_next = lt->next;
351 lexContextDestroy (lt);
356 Tcl_DeleteInterp (p->tcl_interp);
359 xfree (p->f_win_buf);
360 xfree (p->context_stack);
366 static int readParseToken (const char **cpp, int *len)
368 const char *cp = *cpp;
372 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
401 if (*cp >= 'a' && *cp <= 'z')
403 else if (*cp >= 'A' && *cp <= 'Z')
404 cmd[i] = *cp + 'a' - 'A';
407 if (i < (int) sizeof(cmd)-2)
414 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
416 while (*cp && *cp != ' ' && *cp != '\t' &&
417 *cp != '\n' && *cp != '\r')
423 if (!strcmp (cmd, "begin"))
425 else if (!strcmp (cmd, "end"))
427 else if (!strcmp (cmd, "body"))
429 else if (!strcmp (cmd, "context"))
431 else if (!strcmp (cmd, "init"))
435 logf (LOG_WARN, "bad command %s", cmd);
441 static int actionListMk (struct lexSpec *spec, const char *s,
442 struct lexRuleAction **ap)
448 while ((tok = readParseToken (&s, &len)))
456 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
458 regxCodeMk (&(*ap)->u.code, s, len);
462 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
464 (*ap)->u.pattern.body = bodyMark;
466 (*ap)->u.pattern.dfa = lexSpecDFA ();
468 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
473 logf (LOG_WARN, "regular expression error '%.*s'", s-s0, s0);
476 dfa_mkstate ((*ap)->u.pattern.dfa);
480 logf (LOG_WARN, "cannot use BEGIN here");
483 logf (LOG_WARN, "cannot use INIT here");
486 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
496 int readOneSpec (struct lexSpec *spec, const char *s)
500 struct lexContext *lc;
502 tok = readParseToken (&s, &len);
503 if (tok == REGX_CONTEXT)
505 char context_name[32];
506 tok = readParseToken (&s, &len);
507 if (tok != REGX_CODE)
509 logf (LOG_WARN, "missing name after CONTEXT keyword");
514 memcpy (context_name, s, len);
515 context_name[len] = '\0';
516 lc = lexContextCreate (context_name);
517 lc->next = spec->context;
522 spec->context = lexContextCreate ("main");
527 actionListDel (&spec->context->beginActionList);
528 actionListMk (spec, s, &spec->context->beginActionList);
531 actionListDel (&spec->context->endActionList);
532 actionListMk (spec, s, &spec->context->endActionList);
535 actionListDel (&spec->context->initActionList);
536 actionListMk (spec, s, &spec->context->initActionList);
540 logf (LOG_LOG, "rule %d %s", spec->context->ruleNo, s);
542 r = dfa_parse (spec->context->dfa, &s);
545 logf (LOG_WARN, "regular expression error. r=%d", r);
550 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
554 rp = (struct lexRule *) xmalloc (sizeof(*rp));
555 rp->info.no = spec->context->ruleNo++;
556 rp->next = spec->context->rules;
557 spec->context->rules = rp;
558 actionListMk (spec, s, &rp->info.actionList);
563 int readFileSpec (struct lexSpec *spec)
565 struct lexContext *lc;
566 int c, i, errors = 0;
572 if (spec->tcl_interp)
574 sprintf (fname, "%s.tflt", spec->name);
575 spec_inf = data1_path_fopen (spec->dh, fname, "r");
580 sprintf (fname, "%s.flt", spec->name);
581 spec_inf = data1_path_fopen (spec->dh, fname, "r");
585 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
588 logf (LOG_LOG, "reading regx filter %s", fname);
590 if (spec->tcl_interp)
591 logf (LOG_LOG, "Tcl enabled");
593 lineBuf = wrbuf_alloc();
598 wrbuf_rewind (lineBuf);
599 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
601 while (c != '\n' && c != EOF)
614 wrbuf_putc(lineBuf, c);
622 if (c != ' ' && c != '\t')
627 wrbuf_putc(lineBuf, '\0');
628 readOneSpec (spec, wrbuf_buf(lineBuf));
629 spec->lineNo += addLine;
633 wrbuf_free(lineBuf, 1);
638 debug_dfa_followpos = 1;
641 for (lc = spec->context; lc; lc = lc->next)
644 lc->fastRule = (struct lexRuleInfo **)
645 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
646 for (i = 0; i < lc->ruleNo; i++)
647 lc->fastRule[i] = NULL;
648 for (rp = lc->rules; rp; rp = rp->next)
649 lc->fastRule[rp->info.no] = &rp->info;
650 dfa_mkstate (lc->dfa);
659 static struct lexSpec *curLexSpec = NULL;
662 static void execData (struct lexSpec *spec,
663 const char *ebuf, int elen, int formatted_text)
665 struct data1_node *res, *parent;
668 if (elen == 0) /* shouldn't happen, but it does! */
672 logf (LOG_LOG, "data (%d bytes) %.15s ... %.*s", elen,
673 ebuf, 15, ebuf + elen-15);
675 logf (LOG_LOG, "data (%d bytes) %.*s", elen, elen, ebuf);
677 logf (LOG_LOG, "data (%d bytes)", elen);
680 if (spec->d1_level <= 1)
683 parent = spec->d1_stack[spec->d1_level -1];
686 if ((res = spec->d1_stack[spec->d1_level]) && res->which == DATA1N_data)
687 org_len = res->u.data.len;
692 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
693 res->u.data.what = DATA1I_text;
695 res->u.data.formatted_text = formatted_text;
696 res->u.data.data = 0;
698 if (spec->d1_stack[spec->d1_level])
699 spec->d1_stack[spec->d1_level]->next = res;
700 spec->d1_stack[spec->d1_level] = res;
702 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
704 char *old_buf, *new_buf;
706 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
707 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
708 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
710 memcpy (new_buf, old_buf, org_len);
713 spec->concatBuf[spec->d1_level].buf = new_buf;
715 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
716 res->u.data.len += elen;
719 static void execDataP (struct lexSpec *spec,
720 const char *ebuf, int elen, int formatted_text)
722 execData (spec, ebuf, elen, formatted_text);
725 static void tagDataRelease (struct lexSpec *spec)
729 if ((res = spec->d1_stack[spec->d1_level]) &&
730 res->which == DATA1N_data &&
731 res->u.data.what == DATA1I_text)
733 assert (!res->u.data.data);
734 assert (res->u.data.len > 0);
735 if (res->u.data.len > DATA1_LOCALDATA)
736 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
738 res->u.data.data = res->lbuf;
739 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
744 static void variantBegin (struct lexSpec *spec,
745 const char *class_str, int class_len,
746 const char *type_str, int type_len,
747 const char *value_str, int value_len)
749 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
750 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
755 if (spec->d1_level == 0)
757 logf (LOG_WARN, "in variant begin. No record type defined");
760 if (class_len >= DATA1_MAX_SYMBOL)
761 class_len = DATA1_MAX_SYMBOL-1;
762 memcpy (tclass, class_str, class_len);
763 tclass[class_len] = '\0';
765 if (type_len >= DATA1_MAX_SYMBOL)
766 type_len = DATA1_MAX_SYMBOL-1;
767 memcpy (ttype, type_str, type_len);
768 ttype[type_len] = '\0';
771 logf (LOG_LOG, "variant begin %s %s (%d)", tclass, ttype,
776 data1_getvartypebyct(spec->dh, parent->root->u.root.absyn->varset,
780 if (parent->which != DATA1N_variant)
782 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
783 if (spec->d1_stack[spec->d1_level])
784 tagDataRelease (spec);
785 spec->d1_stack[spec->d1_level] = res;
786 spec->d1_stack[++(spec->d1_level)] = NULL;
788 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
789 if (spec->d1_stack[i]->u.variant.type == tp)
796 logf (LOG_LOG, "variant node (%d)", spec->d1_level);
798 parent = spec->d1_stack[spec->d1_level-1];
799 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
800 res->u.variant.type = tp;
802 if (value_len >= DATA1_LOCALDATA)
803 value_len =DATA1_LOCALDATA-1;
804 memcpy (res->lbuf, value_str, value_len);
805 res->lbuf[value_len] = '\0';
807 res->u.variant.value = res->lbuf;
809 if (spec->d1_stack[spec->d1_level])
810 tagDataRelease (spec);
811 spec->d1_stack[spec->d1_level] = res;
812 spec->d1_stack[++(spec->d1_level)] = NULL;
815 static void tagStrip (const char **tag, int *len)
819 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
822 for (i = 0; i < *len && isspace((*tag)[i]); i++)
828 static void tagBegin (struct lexSpec *spec,
829 const char *tag, int len)
831 if (spec->d1_level == 0)
833 logf (LOG_WARN, "in element begin. No record type defined");
836 tagStrip (&tag, &len);
837 if (spec->d1_stack[spec->d1_level])
838 tagDataRelease (spec);
841 logf (LOG_LOG, "begin tag %s (%d)", tag, spec->d1_level);
844 spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
845 spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
846 spec->d1_stack[++(spec->d1_level)] = NULL;
849 static void tagEnd (struct lexSpec *spec, int min_level,
850 const char *tag, int len)
852 tagStrip (&tag, &len);
853 while (spec->d1_level > min_level)
855 tagDataRelease (spec);
857 if (spec->d1_level == 0)
859 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
861 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
863 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
867 logf (LOG_LOG, "end tag (%d)", spec->d1_level);
872 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
875 struct DFA_state *state = dfa->states[0];
878 unsigned char c_prev = 0;
879 int ptr = *pptr; /* current pointer */
880 int start_ptr = *pptr; /* first char of match */
881 int last_ptr = 0; /* last char of match */
882 int last_rule = 0; /* rule number of current match */
887 c = f_win_advance (spec, &ptr);
888 if (ptr == F_WIN_EOF)
905 *mptr = start_ptr; /* match starts here */
906 *pptr = last_ptr; /* match end here (+1) */
909 state = dfa->states[0];
914 else if (c >= t->ch[0] && c <= t->ch[1])
916 state = dfa->states[t->to];
921 last_rule = state->rule_no;
926 last_rule = state->rule_nno;
938 static int execTok (struct lexSpec *spec, const char **src,
939 const char **tokBuf, int *tokLen)
941 const char *s = *src;
943 while (*s == ' ' || *s == '\t')
947 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
951 while (*s >= '0' && *s <= '9')
952 n = n*10 + (*s++ -'0');
953 if (spec->arg_no == 0)
960 if (n >= spec->arg_no)
962 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
969 while (*s && *s != '\"')
971 *tokLen = s - *tokBuf;
976 else if (*s == '\n' || *s == ';')
984 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
987 *tokLen = s - *tokBuf;
994 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
997 *tokLen = s - *tokBuf;
1003 static char *regxStrz (const char *src, int len, char *str)
1007 memcpy (str, src, len);
1013 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1014 int argc, char **argv)
1016 struct lexSpec *spec = (struct lexSpec *) clientData;
1019 if (!strcmp(argv[1], "record") && argc == 3)
1021 char *absynName = argv[2];
1025 logf (LOG_LOG, "begin record %s", absynName);
1027 res = data1_mk_root (spec->dh, spec->m, absynName);
1029 spec->d1_stack[spec->d1_level++] = res;
1031 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1033 spec->d1_stack[spec->d1_level++] = res;
1035 spec->d1_stack[spec->d1_level] = NULL;
1037 else if (!strcmp(argv[1], "element") && argc == 3)
1039 tagBegin (spec, argv[2], strlen(argv[2]));
1041 else if (!strcmp (argv[1], "variant") && argc == 5)
1043 variantBegin (spec, argv[2], strlen(argv[2]),
1044 argv[3], strlen(argv[3]),
1045 argv[4], strlen(argv[4]));
1047 else if (!strcmp (argv[1], "context") && argc == 3)
1049 struct lexContext *lc = spec->context;
1051 logf (LOG_LOG, "begin context %s",argv[2]);
1053 while (lc && strcmp (argv[2], lc->name))
1057 spec->context_stack[++(spec->context_stack_top)] = lc;
1060 logf (LOG_WARN, "unknown context %s", argv[2]);
1067 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1068 int argc, char **argv)
1070 struct lexSpec *spec = (struct lexSpec *) clientData;
1074 if (!strcmp (argv[1], "record"))
1076 while (spec->d1_level)
1078 tagDataRelease (spec);
1082 logf (LOG_LOG, "end record");
1084 spec->stop_flag = 1;
1086 else if (!strcmp (argv[1], "element"))
1090 if (argc >= 3 && !strcmp(argv[2], "-record"))
1099 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1100 if (spec->d1_level == 0)
1103 logf (LOG_LOG, "end element end records");
1105 spec->stop_flag = 1;
1108 else if (!strcmp (argv[1], "context"))
1111 logf (LOG_LOG, "end context");
1113 if (spec->context_stack_top)
1114 (spec->context_stack_top)--;
1121 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1122 int argc, char **argv)
1126 const char *element = 0;
1127 struct lexSpec *spec = (struct lexSpec *) clientData;
1131 if (!strcmp("-text", argv[argi]))
1136 else if (!strcmp("-element", argv[argi]))
1140 element = argv[argi++];
1146 tagBegin (spec, element, strlen(element));
1150 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1152 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1153 execData (spec, native, strlen(native), textFlag);
1154 Tcl_DStringFree (&ds);
1156 execData (spec, argv[argi], strlen(argv[argi]), textFlag);
1161 tagEnd (spec, 1, NULL, 0);
1165 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1166 int argc, char **argv)
1168 struct lexSpec *spec = (struct lexSpec *) clientData;
1175 if (!strcmp("-offset", argv[argi]))
1180 offset = atoi(argv[argi]);
1189 no = atoi(argv[argi]);
1190 if (no >= spec->arg_no)
1191 no = spec->arg_no - 1;
1192 spec->ptr = spec->arg_start[no] + offset;
1196 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1200 for (i = 0; i < spec->arg_no; i++)
1202 char var_name[10], *var_buf;
1205 sprintf (var_name, "%d", i);
1206 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1210 ch = var_buf[var_len];
1211 var_buf[var_len] = '\0';
1212 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1213 var_buf[var_len] = ch;
1216 #if HAVE_TCL_OBJECTS
1217 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1219 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1223 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1224 logf(LOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1225 spec->tcl_interp->errorLine,
1226 spec->tcl_interp->result,
1227 err ? err : "[NO ERRORINFO]");
1233 static void execCode (struct lexSpec *spec, struct regxCode *code)
1235 const char *s = code->str;
1237 const char *cmd_str;
1239 r = execTok (spec, &s, &cmd_str, &cmd_len);
1246 r = execTok (spec, &s, &cmd_str, &cmd_len);
1249 p = regxStrz (cmd_str, cmd_len, ptmp);
1250 if (!strcmp (p, "begin"))
1252 r = execTok (spec, &s, &cmd_str, &cmd_len);
1255 logf (LOG_WARN, "missing keyword after 'begin'");
1258 p = regxStrz (cmd_str, cmd_len, ptmp);
1259 if (!strcmp (p, "record"))
1261 r = execTok (spec, &s, &cmd_str, &cmd_len);
1264 if (spec->d1_level == 0)
1266 static char absynName[64];
1271 memcpy (absynName, cmd_str, cmd_len);
1272 absynName[cmd_len] = '\0';
1274 logf (LOG_LOG, "begin record %s", absynName);
1276 res = data1_mk_root (spec->dh, spec->m, absynName);
1278 spec->d1_stack[spec->d1_level++] = res;
1280 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1282 spec->d1_stack[spec->d1_level++] = res;
1284 spec->d1_stack[spec->d1_level] = NULL;
1286 r = execTok (spec, &s, &cmd_str, &cmd_len);
1288 else if (!strcmp (p, "element"))
1290 r = execTok (spec, &s, &cmd_str, &cmd_len);
1293 tagBegin (spec, cmd_str, cmd_len);
1294 r = execTok (spec, &s, &cmd_str, &cmd_len);
1296 else if (!strcmp (p, "variant"))
1299 const char *class_str = NULL;
1301 const char *type_str = NULL;
1303 const char *value_str = NULL;
1304 r = execTok (spec, &s, &cmd_str, &cmd_len);
1307 class_str = cmd_str;
1308 class_len = cmd_len;
1309 r = execTok (spec, &s, &cmd_str, &cmd_len);
1315 r = execTok (spec, &s, &cmd_str, &cmd_len);
1318 value_str = cmd_str;
1319 value_len = cmd_len;
1321 variantBegin (spec, class_str, class_len,
1322 type_str, type_len, value_str, value_len);
1325 r = execTok (spec, &s, &cmd_str, &cmd_len);
1327 else if (!strcmp (p, "context"))
1331 struct lexContext *lc = spec->context;
1332 r = execTok (spec, &s, &cmd_str, &cmd_len);
1333 p = regxStrz (cmd_str, cmd_len, ptmp);
1335 logf (LOG_LOG, "begin context %s", p);
1337 while (lc && strcmp (p, lc->name))
1340 spec->context_stack[++(spec->context_stack_top)] = lc;
1342 logf (LOG_WARN, "unknown context %s", p);
1345 r = execTok (spec, &s, &cmd_str, &cmd_len);
1349 logf (LOG_WARN, "bad keyword '%s' after begin", p);
1352 else if (!strcmp (p, "end"))
1354 r = execTok (spec, &s, &cmd_str, &cmd_len);
1357 logf (LOG_WARN, "missing keyword after 'end'");
1360 p = regxStrz (cmd_str, cmd_len, ptmp);
1361 if (!strcmp (p, "record"))
1363 while (spec->d1_level)
1365 tagDataRelease (spec);
1368 r = execTok (spec, &s, &cmd_str, &cmd_len);
1370 logf (LOG_LOG, "end record");
1372 spec->stop_flag = 1;
1374 else if (!strcmp (p, "element"))
1377 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1379 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1384 tagEnd (spec, min_level, cmd_str, cmd_len);
1385 r = execTok (spec, &s, &cmd_str, &cmd_len);
1388 tagEnd (spec, min_level, NULL, 0);
1389 if (spec->d1_level == 0)
1392 logf (LOG_LOG, "end element end records");
1394 spec->stop_flag = 1;
1398 else if (!strcmp (p, "context"))
1401 logf (LOG_LOG, "end context");
1403 if (spec->context_stack_top)
1404 (spec->context_stack_top)--;
1405 r = execTok (spec, &s, &cmd_str, &cmd_len);
1408 logf (LOG_WARN, "bad keyword '%s' after end", p);
1410 else if (!strcmp (p, "data"))
1414 const char *element_str = NULL;
1416 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1418 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1420 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1422 r = execTok (spec, &s, &element_str, &element_len);
1427 logf (LOG_WARN, "bad data option: %.*s",
1432 logf (LOG_WARN, "missing data item after data");
1436 tagBegin (spec, element_str, element_len);
1439 execData (spec, cmd_str, cmd_len,textFlag);
1440 r = execTok (spec, &s, &cmd_str, &cmd_len);
1443 tagEnd (spec, 1, NULL, 0);
1445 else if (!strcmp (p, "unread"))
1448 r = execTok (spec, &s, &cmd_str, &cmd_len);
1449 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1451 r = execTok (spec, &s, &cmd_str, &cmd_len);
1454 logf (LOG_WARN, "missing number after -offset");
1457 p = regxStrz (cmd_str, cmd_len, ptmp);
1459 r = execTok (spec, &s, &cmd_str, &cmd_len);
1465 logf (LOG_WARN, "missing index after unread command");
1468 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1470 logf (LOG_WARN, "bad index after unread command");
1475 no = *cmd_str - '0';
1476 if (no >= spec->arg_no)
1477 no = spec->arg_no - 1;
1478 spec->ptr = spec->arg_start[no] + offset;
1480 r = execTok (spec, &s, &cmd_str, &cmd_len);
1482 else if (!strcmp (p, "context"))
1486 struct lexContext *lc = spec->context;
1487 r = execTok (spec, &s, &cmd_str, &cmd_len);
1488 p = regxStrz (cmd_str, cmd_len, ptmp);
1490 while (lc && strcmp (p, lc->name))
1493 spec->context_stack[spec->context_stack_top] = lc;
1495 logf (LOG_WARN, "unknown context %s", p);
1498 r = execTok (spec, &s, &cmd_str, &cmd_len);
1502 logf (LOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1503 r = execTok (spec, &s, &cmd_str, &cmd_len);
1508 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1510 r = execTok (spec, &s, &cmd_str, &cmd_len);
1517 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1518 int start_ptr, int *pptr)
1527 arg_start[0] = start_ptr;
1529 spec->arg_start = arg_start;
1530 spec->arg_end = arg_end;
1537 if (ap->u.pattern.body)
1539 arg_start[arg_no] = *pptr;
1540 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1542 arg_end[arg_no] = F_WIN_EOF;
1544 arg_start[arg_no] = F_WIN_EOF;
1545 arg_end[arg_no] = F_WIN_EOF;
1550 arg_end[arg_no] = sptr;
1552 arg_start[arg_no] = sptr;
1553 arg_end[arg_no] = *pptr;
1558 arg_start[arg_no] = *pptr;
1559 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1561 if (sptr != arg_start[arg_no])
1563 arg_end[arg_no] = *pptr;
1568 spec->arg_no = arg_no;
1571 if (spec->tcl_interp)
1572 execTcl(spec, ap->u.code);
1574 execCode (spec, ap->u.code);
1576 execCode (spec, ap->u.code);
1579 if (spec->stop_flag)
1583 arg_start[arg_no] = *pptr;
1584 arg_end[arg_no] = F_WIN_EOF;
1593 static int execRule (struct lexSpec *spec, struct lexContext *context,
1594 int ruleNo, int start_ptr, int *pptr)
1597 logf (LOG_LOG, "exec rule %d", ruleNo);
1599 return execAction (spec, context->fastRule[ruleNo]->actionList,
1603 data1_node *lexNode (struct lexSpec *spec, int *ptr)
1605 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1606 struct DFA_state *state = context->dfa->states[0];
1609 unsigned char c_prev = '\n';
1611 int last_rule = 0; /* rule number of current match */
1612 int last_ptr = *ptr; /* last char of match */
1613 int start_ptr = *ptr; /* first char of match */
1614 int skip_ptr = *ptr; /* first char of run */
1618 c = f_win_advance (spec, ptr);
1619 if (*ptr == F_WIN_EOF)
1621 /* end of file met */
1624 /* there was a match */
1625 if (skip_ptr < start_ptr)
1627 /* deal with chars that didn't match */
1630 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1631 execDataP (spec, buf, size, 0);
1633 /* restore pointer */
1636 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1638 /* restore skip pointer */
1642 else if (skip_ptr < *ptr)
1644 /* deal with chars that didn't match */
1647 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1648 execDataP (spec, buf, size, 0);
1650 if (*ptr == F_WIN_EOF)
1657 { /* no transition for character c ... */
1660 if (skip_ptr < start_ptr)
1662 /* deal with chars that didn't match */
1665 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1666 execDataP (spec, buf, size, 0);
1668 /* restore pointer */
1670 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1672 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1675 logf (LOG_LOG, "regx: endf ptr=%d", *ptr);
1677 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1681 context = spec->context_stack[spec->context_stack_top];
1684 last_ptr = start_ptr = *ptr;
1688 c_prev = f_win_advance (spec, &start_ptr);
1693 c_prev = f_win_advance (spec, &start_ptr);
1696 state = context->dfa->states[0];
1699 else if (c >= t->ch[0] && c <= t->ch[1])
1700 { /* transition ... */
1701 state = context->dfa->states[t->to];
1706 last_rule = state->rule_no;
1709 else if (state->rule_nno)
1711 last_rule = state->rule_nno;
1723 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1724 const char *context_name)
1726 struct lexContext *lt = spec->context;
1729 spec->stop_flag = 0;
1731 spec->context_stack_top = 0;
1734 if (!strcmp (lt->name, context_name))
1740 logf (LOG_WARN, "cannot find context %s", context_name);
1743 spec->context_stack[spec->context_stack_top] = lt;
1744 spec->d1_stack[spec->d1_level] = NULL;
1749 execAction (spec, lt->initActionList, ptr, &ptr);
1752 execAction (spec, lt->beginActionList, ptr, &ptr);
1753 lexNode (spec, &ptr);
1754 while (spec->d1_level)
1756 tagDataRelease (spec);
1759 execAction (spec, lt->endActionList, ptr, &ptr);
1760 return spec->d1_stack[0];
1763 void grs_destroy(void *clientData)
1765 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1768 lexSpecDestroy(&specs->spec);
1773 void *grs_init(void)
1775 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1780 data1_node *grs_read_regx (struct grs_read_info *p)
1783 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1784 struct lexSpec **curLexSpec = &specs->spec;
1787 logf (LOG_LOG, "grs_read_regx");
1789 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1792 lexSpecDestroy (curLexSpec);
1793 *curLexSpec = lexSpecCreate (p->type, p->dh);
1794 res = readFileSpec (*curLexSpec);
1797 lexSpecDestroy (curLexSpec);
1801 (*curLexSpec)->dh = p->dh;
1804 (*curLexSpec)->f_win_start = 0;
1805 (*curLexSpec)->f_win_end = 0;
1806 (*curLexSpec)->f_win_rf = p->readf;
1807 (*curLexSpec)->f_win_sf = p->seekf;
1808 (*curLexSpec)->f_win_fh = p->fh;
1809 (*curLexSpec)->f_win_ef = p->endf;
1810 (*curLexSpec)->f_win_size = 500000;
1812 (*curLexSpec)->m = p->mem;
1813 return lexRoot (*curLexSpec, p->offset, "main");
1816 static struct recTypeGrs regx_type = {
1823 RecTypeGrs recTypeGrs_regx = ®x_type;
1826 data1_node *grs_read_tcl (struct grs_read_info *p)
1829 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1830 struct lexSpec **curLexSpec = &specs->spec;
1833 logf (LOG_LOG, "grs_read_tcl");
1835 if (!*curLexSpec || strcmp ((*curLexSpec)->name, p->type))
1837 Tcl_Interp *tcl_interp;
1839 lexSpecDestroy (curLexSpec);
1840 *curLexSpec = lexSpecCreate (p->type, p->dh);
1841 Tcl_FindExecutable("");
1842 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1843 Tcl_Init(tcl_interp);
1844 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1845 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1846 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1847 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1849 res = readFileSpec (*curLexSpec);
1852 lexSpecDestroy (curLexSpec);
1856 (*curLexSpec)->dh = p->dh;
1859 (*curLexSpec)->f_win_start = 0;
1860 (*curLexSpec)->f_win_end = 0;
1861 (*curLexSpec)->f_win_rf = p->readf;
1862 (*curLexSpec)->f_win_sf = p->seekf;
1863 (*curLexSpec)->f_win_fh = p->fh;
1864 (*curLexSpec)->f_win_ef = p->endf;
1865 (*curLexSpec)->f_win_size = 500000;
1867 (*curLexSpec)->m = p->mem;
1868 return lexRoot (*curLexSpec, p->offset, "main");
1871 static struct recTypeGrs tcl_type = {
1878 RecTypeGrs recTypeGrs_tcl = &tcl_type;