1 /* $Id: regxread.c,v 1.5 2006-10-30 14:05:30 adam Exp $
2 Copyright (C) 1995-2006
5 This file is part of the Zebra server.
7 Zebra is free software; you can redistribute it and/or modify it under
8 the terms of the GNU General Public License as published by the Free
9 Software Foundation; either version 2, or (at your option) any later
12 Zebra is distributed in the hope that it will be useful, but WITHOUT ANY
13 WARRANTY; without even the implied warranty of MERCHANTABILITY or
14 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 You should have received a copy of the GNU General Public License
18 along with this program; if not, write to the Free Software
19 Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
29 #include <yaz/tpath.h>
30 #include <idzebra/util.h>
32 #include <idzebra/recgrs.h>
37 #if MAJOR_VERSION >= 8
38 #define HAVE_TCL_OBJECTS
44 #define F_WIN_EOF 2000000000
48 #define REGX_PATTERN 1
53 #define REGX_CONTEXT 6
63 struct lexRuleAction {
67 struct DFA *dfa; /* REGX_PATTERN */
70 struct regxCode *code; /* REGX_CODE */
72 struct lexRuleAction *next;
77 struct lexRuleAction *actionList;
81 struct lexRuleInfo info;
88 struct lexRule *rules;
89 struct lexRuleInfo **fastRule;
93 struct lexRuleAction *beginActionList;
94 struct lexRuleAction *endActionList;
95 struct lexRuleAction *initActionList;
96 struct lexContext *next;
106 struct lexContext *context;
108 struct lexContext **context_stack;
109 int context_stack_size;
110 int context_stack_top;
116 Tcl_Interp *tcl_interp;
118 struct ZebraRecStream *stream;
119 off_t (*f_win_ef)(struct ZebraRecStream *s, off_t *);
121 int f_win_start; /* first byte of buffer is this file offset */
122 int f_win_end; /* last byte of buffer is this offset - 1 */
123 int f_win_size; /* size of buffer */
124 char *f_win_buf; /* buffer itself */
125 int (*f_win_rf)(struct ZebraRecStream *, char *, size_t);
126 off_t (*f_win_sf)(struct ZebraRecStream *, off_t);
128 struct lexConcatBuf *concatBuf;
130 data1_node **d1_stack;
141 struct lexSpec *spec;
145 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
148 int i, r, off = start_pos - spec->f_win_start;
150 if (off >= 0 && end_pos <= spec->f_win_end)
152 *size = end_pos - start_pos;
153 return spec->f_win_buf + off;
155 if (off < 0 || start_pos >= spec->f_win_end)
157 (*spec->f_win_sf)(spec->stream, start_pos);
158 spec->f_win_start = start_pos;
160 if (!spec->f_win_buf)
161 spec->f_win_buf = (char *) xmalloc (spec->f_win_size);
162 *size = (*spec->f_win_rf)(spec->stream, spec->f_win_buf,
164 spec->f_win_end = spec->f_win_start + *size;
166 if (*size > end_pos - start_pos)
167 *size = end_pos - start_pos;
168 return spec->f_win_buf;
170 for (i = 0; i<spec->f_win_end - start_pos; i++)
171 spec->f_win_buf[i] = spec->f_win_buf[i + off];
172 r = (*spec->f_win_rf)(spec->stream,
174 spec->f_win_size - i);
175 spec->f_win_start = start_pos;
176 spec->f_win_end += r;
178 if (*size > end_pos - start_pos)
179 *size = end_pos - start_pos;
180 return spec->f_win_buf;
183 static int f_win_advance (struct lexSpec *spec, int *pos)
188 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
189 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
190 if (*pos == F_WIN_EOF)
192 buf = f_win_get (spec, *pos, *pos+1, &size);
202 static void regxCodeDel (struct regxCode **pp)
204 struct regxCode *p = *pp;
209 Tcl_DecrRefCount (p->tcl_obj);
217 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
221 p = (struct regxCode *) xmalloc (sizeof(*p));
222 p->str = (char *) xmalloc (len+1);
223 memcpy (p->str, buf, len);
226 p->tcl_obj = Tcl_NewStringObj ((char *) buf, len);
228 Tcl_IncrRefCount (p->tcl_obj);
233 static struct DFA *lexSpecDFA (void)
238 dfa_parse_cmap_del (dfa, ' ');
239 dfa_parse_cmap_del (dfa, '\t');
240 dfa_parse_cmap_add (dfa, '/', 0);
244 static void actionListDel (struct lexRuleAction **rap)
246 struct lexRuleAction *ra1, *ra;
248 for (ra = *rap; ra; ra = ra1)
254 dfa_delete (&ra->u.pattern.dfa);
257 regxCodeDel (&ra->u.code);
265 static struct lexContext *lexContextCreate (const char *name)
267 struct lexContext *p = (struct lexContext *) xmalloc (sizeof(*p));
269 p->name = xstrdup (name);
272 p->dfa = lexSpecDFA ();
275 p->beginActionList = NULL;
276 p->endActionList = NULL;
277 p->initActionList = NULL;
282 static void lexContextDestroy (struct lexContext *p)
284 struct lexRule *rp, *rp1;
286 dfa_delete (&p->dfa);
288 for (rp = p->rules; rp; rp = rp1)
291 actionListDel (&rp->info.actionList);
294 actionListDel (&p->beginActionList);
295 actionListDel (&p->endActionList);
296 actionListDel (&p->initActionList);
301 static struct lexSpec *lexSpecCreate (const char *name, data1_handle dh)
306 p = (struct lexSpec *) xmalloc (sizeof(*p));
307 p->name = (char *) xmalloc (strlen(name)+1);
308 strcpy (p->name, name);
315 p->context_stack_size = 100;
316 p->context_stack = (struct lexContext **)
317 xmalloc (sizeof(*p->context_stack) * p->context_stack_size);
321 p->concatBuf = (struct lexConcatBuf *)
322 xmalloc (sizeof(*p->concatBuf) * p->maxLevel);
323 for (i = 0; i < p->maxLevel; i++)
325 p->concatBuf[i].max = 0;
326 p->concatBuf[i].buf = 0;
328 p->d1_stack = (data1_node **) xmalloc (sizeof(*p->d1_stack) * p->maxLevel);
333 static void lexSpecDestroy (struct lexSpec **pp)
336 struct lexContext *lt;
344 for (i = 0; i < p->maxLevel; i++)
345 xfree (p->concatBuf[i].buf);
346 xfree (p->concatBuf);
351 struct lexContext *lt_next = lt->next;
352 lexContextDestroy (lt);
357 Tcl_DeleteInterp (p->tcl_interp);
360 xfree (p->f_win_buf);
361 xfree (p->context_stack);
367 static int readParseToken (const char **cpp, int *len)
369 const char *cp = *cpp;
373 while (*cp == ' ' || *cp == '\t' || *cp == '\n' || *cp == '\r')
402 if (*cp >= 'a' && *cp <= 'z')
404 else if (*cp >= 'A' && *cp <= 'Z')
405 cmd[i] = *cp + 'a' - 'A';
408 if (i < (int) sizeof(cmd)-2)
415 yaz_log (YLOG_WARN, "bad character %d %c", *cp, *cp);
417 while (*cp && *cp != ' ' && *cp != '\t' &&
418 *cp != '\n' && *cp != '\r')
424 if (!strcmp (cmd, "begin"))
426 else if (!strcmp (cmd, "end"))
428 else if (!strcmp (cmd, "body"))
430 else if (!strcmp (cmd, "context"))
432 else if (!strcmp (cmd, "init"))
436 yaz_log (YLOG_WARN, "bad command %s", cmd);
442 static int actionListMk (struct lexSpec *spec, const char *s,
443 struct lexRuleAction **ap)
449 while ((tok = readParseToken (&s, &len)))
457 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
459 regxCodeMk (&(*ap)->u.code, s, len);
463 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
465 (*ap)->u.pattern.body = bodyMark;
467 (*ap)->u.pattern.dfa = lexSpecDFA ();
469 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
475 yaz_log(YLOG_WARN, "regular expression error '%.*s'", pos, s0);
482 printf("pattern: %.*s\n", pos, s0);
483 dfa_mkstate((*ap)->u.pattern.dfa);
488 yaz_log (YLOG_WARN, "cannot use BEGIN here");
491 yaz_log (YLOG_WARN, "cannot use INIT here");
494 *ap = (struct lexRuleAction *) xmalloc (sizeof(**ap));
504 int readOneSpec (struct lexSpec *spec, const char *s)
508 struct lexContext *lc;
510 tok = readParseToken (&s, &len);
511 if (tok == REGX_CONTEXT)
513 char context_name[32];
514 tok = readParseToken (&s, &len);
515 if (tok != REGX_CODE)
517 yaz_log (YLOG_WARN, "missing name after CONTEXT keyword");
522 memcpy (context_name, s, len);
523 context_name[len] = '\0';
524 lc = lexContextCreate (context_name);
525 lc->next = spec->context;
530 spec->context = lexContextCreate ("main");
535 actionListDel (&spec->context->beginActionList);
536 actionListMk (spec, s, &spec->context->beginActionList);
539 actionListDel (&spec->context->endActionList);
540 actionListMk (spec, s, &spec->context->endActionList);
543 actionListDel (&spec->context->initActionList);
544 actionListMk (spec, s, &spec->context->initActionList);
548 yaz_log (YLOG_LOG, "rule %d %s", spec->context->ruleNo, s);
550 r = dfa_parse (spec->context->dfa, &s);
553 yaz_log (YLOG_WARN, "regular expression error. r=%d", r);
558 yaz_log (YLOG_WARN, "expects / at end of pattern. got %c", *s);
562 rp = (struct lexRule *) xmalloc (sizeof(*rp));
563 rp->info.no = spec->context->ruleNo++;
564 rp->next = spec->context->rules;
565 spec->context->rules = rp;
566 actionListMk (spec, s, &rp->info.actionList);
571 int readFileSpec (struct lexSpec *spec)
573 struct lexContext *lc;
574 int c, i, errors = 0;
580 if (spec->tcl_interp)
582 sprintf (fname, "%s.tflt", spec->name);
583 spec_inf = data1_path_fopen (spec->dh, fname, "r");
588 sprintf (fname, "%s.flt", spec->name);
589 spec_inf = data1_path_fopen (spec->dh, fname, "r");
593 yaz_log (YLOG_ERRNO|YLOG_WARN, "cannot read spec file %s", spec->name);
596 yaz_log (YLOG_LOG, "reading regx filter %s", fname);
598 if (spec->tcl_interp)
599 yaz_log (YLOG_LOG, "Tcl enabled");
605 debug_dfa_followpos = 0;
609 lineBuf = wrbuf_alloc();
614 wrbuf_rewind (lineBuf);
615 if (c == '#' || c == '\n' || c == ' ' || c == '\t' || c == '\r')
617 while (c != '\n' && c != EOF)
630 wrbuf_putc(lineBuf, c);
638 if (c != ' ' && c != '\t')
643 wrbuf_putc(lineBuf, '\0');
644 readOneSpec (spec, wrbuf_buf(lineBuf));
645 spec->lineNo += addLine;
649 wrbuf_free(lineBuf, 1);
651 for (lc = spec->context; lc; lc = lc->next)
654 lc->fastRule = (struct lexRuleInfo **)
655 xmalloc (sizeof(*lc->fastRule) * lc->ruleNo);
656 for (i = 0; i < lc->ruleNo; i++)
657 lc->fastRule[i] = NULL;
658 for (rp = lc->rules; rp; rp = rp->next)
659 lc->fastRule[rp->info.no] = &rp->info;
660 dfa_mkstate (lc->dfa);
669 static struct lexSpec *curLexSpec = NULL;
672 static void execData (struct lexSpec *spec,
673 const char *ebuf, int elen, int formatted_text,
674 const char *attribute_str, int attribute_len)
676 struct data1_node *res, *parent;
679 if (elen == 0) /* shouldn't happen, but it does! */
683 yaz_log (YLOG_LOG, "data(%d bytes) %.40s ... %.*s", elen,
684 ebuf, 40, ebuf + elen-40);
685 else if (elen == 1 && ebuf[0] == '\n')
687 yaz_log (YLOG_LOG, "data(new line)");
690 yaz_log (YLOG_LOG, "data(%d bytes) %.*s", elen, elen, ebuf);
692 yaz_log (YLOG_LOG, "data(%d bytes)", elen);
695 if (spec->d1_level <= 1)
698 parent = spec->d1_stack[spec->d1_level -1];
705 if (res->which != DATA1N_tag)
707 /* sweep through exising attributes.. */
708 for (ap = &res->u.tag.attributes; *ap; ap = &(*ap)->next)
709 if (strlen((*ap)->name) == attribute_len &&
710 !memcmp((*ap)->name, attribute_str, attribute_len))
714 /* new attribute. Create it with name + value */
715 *ap = nmem_malloc(spec->m, sizeof(**ap));
717 (*ap)->name = nmem_malloc(spec->m, attribute_len+1);
718 memcpy((*ap)->name, attribute_str, attribute_len);
719 (*ap)->name[attribute_len] = '\0';
721 (*ap)->value = nmem_malloc(spec->m, elen+1);
722 memcpy((*ap)->value, ebuf, elen);
723 (*ap)->value[elen] = '\0';
728 /* append to value if attribute already exists */
729 char *nv = nmem_malloc(spec->m, elen + 1 + strlen((*ap)->value));
730 strcpy(nv, (*ap)->value);
731 memcpy (nv + strlen(nv), ebuf, elen);
732 nv[strlen(nv)+elen] = '\0';
738 if ((res = spec->d1_stack[spec->d1_level]) &&
739 res->which == DATA1N_data)
740 org_len = res->u.data.len;
745 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_data, parent);
746 res->u.data.what = DATA1I_text;
748 res->u.data.formatted_text = formatted_text;
749 res->u.data.data = 0;
751 if (spec->d1_stack[spec->d1_level])
752 spec->d1_stack[spec->d1_level]->next = res;
753 spec->d1_stack[spec->d1_level] = res;
755 if (org_len + elen >= spec->concatBuf[spec->d1_level].max)
757 char *old_buf, *new_buf;
759 spec->concatBuf[spec->d1_level].max = org_len + elen + 256;
760 new_buf = (char *) xmalloc (spec->concatBuf[spec->d1_level].max);
761 if ((old_buf = spec->concatBuf[spec->d1_level].buf))
763 memcpy (new_buf, old_buf, org_len);
766 spec->concatBuf[spec->d1_level].buf = new_buf;
768 memcpy (spec->concatBuf[spec->d1_level].buf + org_len, ebuf, elen);
769 res->u.data.len += elen;
773 static void execDataP (struct lexSpec *spec,
774 const char *ebuf, int elen, int formatted_text)
776 execData (spec, ebuf, elen, formatted_text, 0, 0);
779 static void tagDataRelease (struct lexSpec *spec)
783 if ((res = spec->d1_stack[spec->d1_level]) &&
784 res->which == DATA1N_data &&
785 res->u.data.what == DATA1I_text)
787 assert (!res->u.data.data);
788 assert (res->u.data.len > 0);
789 if (res->u.data.len > DATA1_LOCALDATA)
790 res->u.data.data = (char *) nmem_malloc (spec->m, res->u.data.len);
792 res->u.data.data = res->lbuf;
793 memcpy (res->u.data.data, spec->concatBuf[spec->d1_level].buf,
798 static void variantBegin (struct lexSpec *spec,
799 const char *class_str, int class_len,
800 const char *type_str, int type_len,
801 const char *value_str, int value_len)
803 struct data1_node *parent = spec->d1_stack[spec->d1_level -1];
804 char tclass[DATA1_MAX_SYMBOL], ttype[DATA1_MAX_SYMBOL];
809 if (spec->d1_level == 0)
811 yaz_log (YLOG_WARN, "in variant begin. No record type defined");
814 if (class_len >= DATA1_MAX_SYMBOL)
815 class_len = DATA1_MAX_SYMBOL-1;
816 memcpy (tclass, class_str, class_len);
817 tclass[class_len] = '\0';
819 if (type_len >= DATA1_MAX_SYMBOL)
820 type_len = DATA1_MAX_SYMBOL-1;
821 memcpy (ttype, type_str, type_len);
822 ttype[type_len] = '\0';
825 yaz_log (YLOG_LOG, "variant begin(%s,%s,%d)", tclass, ttype,
830 data1_getvartypeby_absyn(spec->dh, parent->root->u.root.absyn,
834 if (parent->which != DATA1N_variant)
836 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
837 if (spec->d1_stack[spec->d1_level])
838 tagDataRelease (spec);
839 spec->d1_stack[spec->d1_level] = res;
840 spec->d1_stack[++(spec->d1_level)] = NULL;
842 for (i = spec->d1_level-1; spec->d1_stack[i]->which == DATA1N_variant; i--)
843 if (spec->d1_stack[i]->u.variant.type == tp)
850 yaz_log (YLOG_LOG, "variant node(%d)", spec->d1_level);
852 parent = spec->d1_stack[spec->d1_level-1];
853 res = data1_mk_node2 (spec->dh, spec->m, DATA1N_variant, parent);
854 res->u.variant.type = tp;
856 if (value_len >= DATA1_LOCALDATA)
857 value_len =DATA1_LOCALDATA-1;
858 memcpy (res->lbuf, value_str, value_len);
859 res->lbuf[value_len] = '\0';
861 res->u.variant.value = res->lbuf;
863 if (spec->d1_stack[spec->d1_level])
864 tagDataRelease (spec);
865 spec->d1_stack[spec->d1_level] = res;
866 spec->d1_stack[++(spec->d1_level)] = NULL;
869 static void tagStrip (const char **tag, int *len)
873 for (i = *len; i > 0 && isspace((*tag)[i-1]); --i)
876 for (i = 0; i < *len && isspace((*tag)[i]); i++)
882 static void tagBegin (struct lexSpec *spec,
883 const char *tag, int len)
885 if (spec->d1_level == 0)
887 yaz_log (YLOG_WARN, "in element begin. No record type defined");
890 tagStrip (&tag, &len);
891 if (spec->d1_stack[spec->d1_level])
892 tagDataRelease (spec);
895 yaz_log (YLOG_LOG, "begin tag(%.*s, %d)", len, tag, spec->d1_level);
898 spec->d1_stack[spec->d1_level] = data1_mk_tag_n (
899 spec->dh, spec->m, tag, len, 0, spec->d1_stack[spec->d1_level -1]);
900 spec->d1_stack[++(spec->d1_level)] = NULL;
903 static void tagEnd (struct lexSpec *spec, int min_level,
904 const char *tag, int len)
906 tagStrip (&tag, &len);
907 while (spec->d1_level > min_level)
909 tagDataRelease (spec);
911 if (spec->d1_level == 0)
913 if ((spec->d1_stack[spec->d1_level]->which == DATA1N_tag) &&
915 (strlen(spec->d1_stack[spec->d1_level]->u.tag.tag) ==
917 !memcmp (spec->d1_stack[spec->d1_level]->u.tag.tag, tag, len))))
921 yaz_log (YLOG_LOG, "end tag(%d)", spec->d1_level);
926 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
927 struct DFA *dfa, int greedy)
929 struct DFA_state *state = dfa->states[0];
932 unsigned char c_prev = 0;
933 int ptr = *pptr; /* current pointer */
934 int start_ptr = *pptr; /* first char of match */
935 int last_ptr = 0; /* last char of match */
936 int last_rule = 0; /* rule number of current match */
943 c = f_win_advance (spec, &ptr);
947 if (dfa->states[0] == state)
952 c = f_win_advance (spec, &ptr);
954 if (ptr == F_WIN_EOF)
968 if (--i < 0) /* no transition for character c */
972 *mptr = start_ptr; /* match starts here */
973 *pptr = last_ptr; /* match end here (+1) */
976 state = dfa->states[0];
979 c = f_win_advance (spec, &ptr);
985 else if (c >= t->ch[0] && c <= t->ch[1])
987 state = dfa->states[t->to];
988 if (state->rule_no && c_prev == '\n')
990 last_rule = state->rule_no;
993 else if (state->rule_nno)
995 last_rule = state->rule_nno;
1006 static int execTok (struct lexSpec *spec, const char **src,
1007 const char **tokBuf, int *tokLen)
1009 const char *s = *src;
1011 while (*s == ' ' || *s == '\t')
1015 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
1019 while (*s >= '0' && *s <= '9')
1020 n = n*10 + (*s++ -'0');
1021 if (spec->arg_no == 0)
1028 if (n >= spec->arg_no)
1030 *tokBuf = f_win_get (spec, spec->arg_start[n], spec->arg_end[n],
1034 else if (*s == '\"')
1037 while (*s && *s != '\"')
1039 *tokLen = s - *tokBuf;
1044 else if (*s == '\n' || *s == ';')
1052 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1055 *tokLen = s - *tokBuf;
1062 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != '\r' &&
1065 *tokLen = s - *tokBuf;
1071 static char *regxStrz (const char *src, int len, char *str)
1075 memcpy (str, src, len);
1081 static int cmd_tcl_begin (ClientData clientData, Tcl_Interp *interp,
1082 int argc, const char **argv)
1084 struct lexSpec *spec = (struct lexSpec *) clientData;
1087 if (!strcmp(argv[1], "record") && argc == 3)
1089 const char *absynName = argv[2];
1093 yaz_log (YLOG_LOG, "begin record %s", absynName);
1095 res = data1_mk_root (spec->dh, spec->m, absynName);
1099 spec->d1_stack[spec->d1_level++] = res;
1101 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1103 spec->d1_stack[spec->d1_level++] = res;
1105 spec->d1_stack[spec->d1_level] = NULL;
1107 else if (!strcmp(argv[1], "element") && argc == 3)
1109 tagBegin (spec, argv[2], strlen(argv[2]));
1111 else if (!strcmp (argv[1], "variant") && argc == 5)
1113 variantBegin (spec, argv[2], strlen(argv[2]),
1114 argv[3], strlen(argv[3]),
1115 argv[4], strlen(argv[4]));
1117 else if (!strcmp (argv[1], "context") && argc == 3)
1119 struct lexContext *lc = spec->context;
1121 yaz_log (YLOG_LOG, "begin context %s",argv[2]);
1123 while (lc && strcmp (argv[2], lc->name))
1127 spec->context_stack[++(spec->context_stack_top)] = lc;
1130 yaz_log (YLOG_WARN, "unknown context %s", argv[2]);
1137 static int cmd_tcl_end (ClientData clientData, Tcl_Interp *interp,
1138 int argc, const char **argv)
1140 struct lexSpec *spec = (struct lexSpec *) clientData;
1144 if (!strcmp (argv[1], "record"))
1146 while (spec->d1_level)
1148 tagDataRelease (spec);
1152 yaz_log (YLOG_LOG, "end record");
1154 spec->stop_flag = 1;
1156 else if (!strcmp (argv[1], "element"))
1159 const char *element = 0;
1160 if (argc >= 3 && !strcmp(argv[2], "-record"))
1169 tagEnd (spec, min_level, element, (element ? strlen(element) : 0));
1170 if (spec->d1_level <= 1)
1173 yaz_log (YLOG_LOG, "end element end records");
1175 spec->stop_flag = 1;
1178 else if (!strcmp (argv[1], "context"))
1181 yaz_log (YLOG_LOG, "end context");
1183 if (spec->context_stack_top)
1184 (spec->context_stack_top)--;
1191 static int cmd_tcl_data (ClientData clientData, Tcl_Interp *interp,
1192 int argc, const char **argv)
1196 const char *element = 0;
1197 const char *attribute = 0;
1198 struct lexSpec *spec = (struct lexSpec *) clientData;
1202 if (!strcmp("-text", argv[argi]))
1207 else if (!strcmp("-element", argv[argi]))
1211 element = argv[argi++];
1213 else if (!strcmp("-attribute", argv[argi]))
1217 attribute = argv[argi++];
1223 tagBegin (spec, element, strlen(element));
1227 #if TCL_MAJOR_VERSION > 8 || (TCL_MAJOR_VERSION == 8 && TCL_MINOR_VERSION > 0)
1229 char *native = Tcl_UtfToExternalDString(0, argv[argi], -1, &ds);
1230 execData (spec, native, strlen(native), textFlag, attribute,
1231 attribute ? strlen(attribute) : 0);
1232 Tcl_DStringFree (&ds);
1234 execData (spec, argv[argi], strlen(argv[argi]), textFlag, attribute,
1235 attribute ? strlen(attribute) : 0);
1240 tagEnd (spec, 2, NULL, 0);
1244 static int cmd_tcl_unread (ClientData clientData, Tcl_Interp *interp,
1245 int argc, const char **argv)
1247 struct lexSpec *spec = (struct lexSpec *) clientData;
1254 if (!strcmp("-offset", argv[argi]))
1259 offset = atoi(argv[argi]);
1268 no = atoi(argv[argi]);
1269 if (no >= spec->arg_no)
1270 no = spec->arg_no - 1;
1271 spec->ptr = spec->arg_start[no] + offset;
1275 static void execTcl (struct lexSpec *spec, struct regxCode *code)
1279 for (i = 0; i < spec->arg_no; i++)
1281 char var_name[10], *var_buf;
1284 sprintf (var_name, "%d", i);
1285 var_buf = f_win_get (spec, spec->arg_start[i], spec->arg_end[i],
1289 ch = var_buf[var_len];
1290 var_buf[var_len] = '\0';
1291 Tcl_SetVar (spec->tcl_interp, var_name, var_buf, 0);
1292 var_buf[var_len] = ch;
1295 #if HAVE_TCL_OBJECTS
1296 ret = Tcl_GlobalEvalObj(spec->tcl_interp, code->tcl_obj);
1298 ret = Tcl_GlobalEval (spec->tcl_interp, code->str);
1302 const char *err = Tcl_GetVar(spec->tcl_interp, "errorInfo", 0);
1303 yaz_log(YLOG_FATAL, "Tcl error, line=%d, \"%s\"\n%s",
1304 spec->tcl_interp->errorLine,
1305 spec->tcl_interp->result,
1306 err ? err : "[NO ERRORINFO]");
1312 static void execCode (struct lexSpec *spec, struct regxCode *code)
1314 const char *s = code->str;
1316 const char *cmd_str;
1318 r = execTok (spec, &s, &cmd_str, &cmd_len);
1325 r = execTok (spec, &s, &cmd_str, &cmd_len);
1328 p = regxStrz (cmd_str, cmd_len, ptmp);
1329 if (!strcmp (p, "begin"))
1331 r = execTok (spec, &s, &cmd_str, &cmd_len);
1334 yaz_log (YLOG_WARN, "missing keyword after 'begin'");
1337 p = regxStrz (cmd_str, cmd_len, ptmp);
1338 if (!strcmp (p, "record"))
1340 r = execTok (spec, &s, &cmd_str, &cmd_len);
1343 if (spec->d1_level <= 1)
1345 static char absynName[64];
1350 memcpy (absynName, cmd_str, cmd_len);
1351 absynName[cmd_len] = '\0';
1353 yaz_log (YLOG_LOG, "begin record %s", absynName);
1355 res = data1_mk_root (spec->dh, spec->m, absynName);
1359 spec->d1_stack[spec->d1_level++] = res;
1361 res = data1_mk_tag (spec->dh, spec->m, absynName, 0, res);
1363 spec->d1_stack[spec->d1_level++] = res;
1365 spec->d1_stack[spec->d1_level] = NULL;
1367 r = execTok (spec, &s, &cmd_str, &cmd_len);
1369 else if (!strcmp (p, "element"))
1371 r = execTok (spec, &s, &cmd_str, &cmd_len);
1374 tagBegin (spec, cmd_str, cmd_len);
1375 r = execTok (spec, &s, &cmd_str, &cmd_len);
1377 else if (!strcmp (p, "variant"))
1380 const char *class_str = NULL;
1382 const char *type_str = NULL;
1384 const char *value_str = NULL;
1385 r = execTok (spec, &s, &cmd_str, &cmd_len);
1388 class_str = cmd_str;
1389 class_len = cmd_len;
1390 r = execTok (spec, &s, &cmd_str, &cmd_len);
1396 r = execTok (spec, &s, &cmd_str, &cmd_len);
1399 value_str = cmd_str;
1400 value_len = cmd_len;
1402 variantBegin (spec, class_str, class_len,
1403 type_str, type_len, value_str, value_len);
1406 r = execTok (spec, &s, &cmd_str, &cmd_len);
1408 else if (!strcmp (p, "context"))
1412 struct lexContext *lc = spec->context;
1413 r = execTok (spec, &s, &cmd_str, &cmd_len);
1414 p = regxStrz (cmd_str, cmd_len, ptmp);
1416 yaz_log (YLOG_LOG, "begin context %s", p);
1418 while (lc && strcmp (p, lc->name))
1421 spec->context_stack[++(spec->context_stack_top)] = lc;
1423 yaz_log (YLOG_WARN, "unknown context %s", p);
1426 r = execTok (spec, &s, &cmd_str, &cmd_len);
1430 yaz_log (YLOG_WARN, "bad keyword '%s' after begin", p);
1433 else if (!strcmp (p, "end"))
1435 r = execTok (spec, &s, &cmd_str, &cmd_len);
1438 yaz_log (YLOG_WARN, "missing keyword after 'end'");
1441 p = regxStrz (cmd_str, cmd_len, ptmp);
1442 if (!strcmp (p, "record"))
1444 while (spec->d1_level)
1446 tagDataRelease (spec);
1449 r = execTok (spec, &s, &cmd_str, &cmd_len);
1451 yaz_log (YLOG_LOG, "end record");
1453 spec->stop_flag = 1;
1455 else if (!strcmp (p, "element"))
1458 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1460 if (cmd_len==7 && !memcmp ("-record", cmd_str, cmd_len))
1465 tagEnd (spec, min_level, cmd_str, cmd_len);
1466 r = execTok (spec, &s, &cmd_str, &cmd_len);
1469 tagEnd (spec, min_level, NULL, 0);
1470 if (spec->d1_level <= 1)
1473 yaz_log (YLOG_LOG, "end element end records");
1475 spec->stop_flag = 1;
1479 else if (!strcmp (p, "context"))
1482 yaz_log (YLOG_LOG, "end context");
1484 if (spec->context_stack_top)
1485 (spec->context_stack_top)--;
1486 r = execTok (spec, &s, &cmd_str, &cmd_len);
1489 yaz_log (YLOG_WARN, "bad keyword '%s' after end", p);
1491 else if (!strcmp (p, "data"))
1495 const char *element_str = NULL;
1497 const char *attribute_str = NULL;
1499 while ((r = execTok (spec, &s, &cmd_str, &cmd_len)) == 3)
1501 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1503 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1505 r = execTok (spec, &s, &element_str, &element_len);
1509 else if (cmd_len==10 && !memcmp ("-attribute", cmd_str,
1512 r = execTok (spec, &s, &attribute_str, &attribute_len);
1517 yaz_log (YLOG_WARN, "bad data option: %.*s",
1522 yaz_log (YLOG_WARN, "missing data item after data");
1526 tagBegin (spec, element_str, element_len);
1529 execData (spec, cmd_str, cmd_len, textFlag,
1530 attribute_str, attribute_len);
1531 r = execTok (spec, &s, &cmd_str, &cmd_len);
1534 tagEnd (spec, 2, NULL, 0);
1536 else if (!strcmp (p, "unread"))
1539 r = execTok (spec, &s, &cmd_str, &cmd_len);
1540 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1542 r = execTok (spec, &s, &cmd_str, &cmd_len);
1545 yaz_log (YLOG_WARN, "missing number after -offset");
1548 p = regxStrz (cmd_str, cmd_len, ptmp);
1550 r = execTok (spec, &s, &cmd_str, &cmd_len);
1556 yaz_log (YLOG_WARN, "missing index after unread command");
1559 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1561 yaz_log (YLOG_WARN, "bad index after unread command");
1566 no = *cmd_str - '0';
1567 if (no >= spec->arg_no)
1568 no = spec->arg_no - 1;
1569 spec->ptr = spec->arg_start[no] + offset;
1571 r = execTok (spec, &s, &cmd_str, &cmd_len);
1573 else if (!strcmp (p, "context"))
1577 struct lexContext *lc = spec->context;
1578 r = execTok (spec, &s, &cmd_str, &cmd_len);
1579 p = regxStrz (cmd_str, cmd_len, ptmp);
1581 while (lc && strcmp (p, lc->name))
1584 spec->context_stack[spec->context_stack_top] = lc;
1586 yaz_log (YLOG_WARN, "unknown context %s", p);
1589 r = execTok (spec, &s, &cmd_str, &cmd_len);
1593 yaz_log (YLOG_WARN, "unknown code command '%.*s'", cmd_len, cmd_str);
1594 r = execTok (spec, &s, &cmd_str, &cmd_len);
1599 yaz_log (YLOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1601 r = execTok (spec, &s, &cmd_str, &cmd_len);
1608 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1609 int start_ptr, int *pptr)
1618 arg_start[0] = start_ptr;
1620 spec->arg_start = arg_start;
1621 spec->arg_end = arg_end;
1628 if (ap->u.pattern.body)
1630 arg_start[arg_no] = *pptr;
1631 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 0))
1633 arg_end[arg_no] = F_WIN_EOF;
1635 arg_start[arg_no] = F_WIN_EOF;
1636 arg_end[arg_no] = F_WIN_EOF;
1637 yaz_log(YLOG_DEBUG, "Pattern match rest of record");
1642 arg_end[arg_no] = sptr;
1644 arg_start[arg_no] = sptr;
1645 arg_end[arg_no] = *pptr;
1650 arg_start[arg_no] = *pptr;
1651 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa, 1))
1653 if (sptr != arg_start[arg_no])
1655 arg_end[arg_no] = *pptr;
1660 spec->arg_no = arg_no;
1663 if (spec->tcl_interp)
1664 execTcl(spec, ap->u.code);
1666 execCode (spec, ap->u.code);
1668 execCode (spec, ap->u.code);
1671 if (spec->stop_flag)
1675 arg_start[arg_no] = *pptr;
1676 arg_end[arg_no] = F_WIN_EOF;
1685 static int execRule (struct lexSpec *spec, struct lexContext *context,
1686 int ruleNo, int start_ptr, int *pptr)
1689 yaz_log (YLOG_LOG, "exec rule %d", ruleNo);
1691 return execAction (spec, context->fastRule[ruleNo]->actionList,
1695 int lexNode (struct lexSpec *spec, int *ptr)
1697 struct lexContext *context = spec->context_stack[spec->context_stack_top];
1698 struct DFA_state *state = context->dfa->states[0];
1701 unsigned char c_prev = '\n';
1703 int last_rule = 0; /* rule number of current match */
1704 int last_ptr = *ptr; /* last char of match */
1705 int start_ptr = *ptr; /* first char of match */
1706 int skip_ptr = *ptr; /* first char of run */
1711 c = f_win_advance (spec, ptr);
1712 if (*ptr == F_WIN_EOF)
1714 /* end of file met */
1717 /* there was a match */
1718 if (skip_ptr < start_ptr)
1720 /* deal with chars that didn't match */
1723 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1724 execDataP (spec, buf, size, 0);
1726 /* restore pointer */
1729 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1731 /* restore skip pointer */
1735 else if (skip_ptr < *ptr)
1737 /* deal with chars that didn't match */
1740 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1741 execDataP (spec, buf, size, 0);
1743 state = context->dfa->states[0];
1744 if (*ptr == F_WIN_EOF)
1751 { /* no transition for character c ... */
1754 if (skip_ptr < start_ptr)
1756 /* deal with chars that didn't match */
1759 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1760 execDataP (spec, buf, size, 0);
1762 /* restore pointer */
1764 if (!execRule (spec, context, last_rule, start_ptr, ptr))
1766 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1768 off_t end_offset = *ptr;
1770 yaz_log (YLOG_LOG, "regx: endf ptr=%d", *ptr);
1772 (*spec->f_win_ef)(spec->stream, &end_offset);
1776 context = spec->context_stack[spec->context_stack_top];
1779 last_ptr = start_ptr = *ptr;
1783 c_prev = f_win_advance (spec, &start_ptr);
1788 c_prev = f_win_advance (spec, &start_ptr);
1791 state = context->dfa->states[0];
1794 else if (c >= t->ch[0] && c <= t->ch[1])
1795 { /* transition ... */
1796 state = context->dfa->states[t->to];
1801 last_rule = state->rule_no;
1804 else if (state->rule_nno)
1806 last_rule = state->rule_nno;
1819 static data1_node *lexRoot (struct lexSpec *spec, off_t offset,
1820 const char *context_name)
1822 struct lexContext *lt = spec->context;
1826 spec->stop_flag = 0;
1828 spec->context_stack_top = 0;
1831 if (!strcmp (lt->name, context_name))
1837 yaz_log (YLOG_WARN, "cannot find context %s", context_name);
1840 spec->context_stack[spec->context_stack_top] = lt;
1841 spec->d1_stack[spec->d1_level] = NULL;
1846 execAction (spec, lt->initActionList, ptr, &ptr);
1849 execAction (spec, lt->beginActionList, ptr, &ptr);
1851 ret = lexNode (spec, &ptr);
1852 while (spec->d1_level)
1854 tagDataRelease (spec);
1859 execAction (spec, lt->endActionList, ptr, &ptr);
1860 return spec->d1_stack[0];
1863 void grs_destroy(void *clientData)
1865 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1868 lexSpecDestroy(&specs->spec);
1873 void *grs_init(Res res, RecType recType)
1875 struct lexSpecs *specs = (struct lexSpecs *) xmalloc (sizeof(*specs));
1877 strcpy(specs->type, "");
1882 ZEBRA_RES grs_config(void *clientData, Res res, const char *args)
1884 struct lexSpecs *specs = (struct lexSpecs *) clientData;
1885 if (strlen(args) < sizeof(specs->type))
1886 strcpy(specs->type, args);
1890 data1_node *grs_read_regx (struct grs_read_info *p)
1893 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1894 struct lexSpec **curLexSpec = &specs->spec;
1898 yaz_log (YLOG_LOG, "grs_read_regx");
1900 if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type))
1903 lexSpecDestroy (curLexSpec);
1904 *curLexSpec = lexSpecCreate (specs->type, p->dh);
1905 res = readFileSpec (*curLexSpec);
1908 lexSpecDestroy (curLexSpec);
1912 (*curLexSpec)->dh = p->dh;
1913 start_offset = p->stream->tellf(p->stream);
1914 if (start_offset == 0)
1916 (*curLexSpec)->f_win_start = 0;
1917 (*curLexSpec)->f_win_end = 0;
1918 (*curLexSpec)->f_win_rf = p->stream->readf;
1919 (*curLexSpec)->f_win_sf = p->stream->seekf;
1920 (*curLexSpec)->stream = p->stream;
1921 (*curLexSpec)->f_win_ef = p->stream->endf;
1922 (*curLexSpec)->f_win_size = 500000;
1924 (*curLexSpec)->m = p->mem;
1925 return lexRoot (*curLexSpec, start_offset, "main");
1928 static int extract_regx(void *clientData, struct recExtractCtrl *ctrl)
1930 return zebra_grs_extract(clientData, ctrl, grs_read_regx);
1933 static int retrieve_regx(void *clientData, struct recRetrieveCtrl *ctrl)
1935 return zebra_grs_retrieve(clientData, ctrl, grs_read_regx);
1938 static struct recType regx_type = {
1950 data1_node *grs_read_tcl (struct grs_read_info *p)
1953 struct lexSpecs *specs = (struct lexSpecs *) p->clientData;
1954 struct lexSpec **curLexSpec = &specs->spec;
1958 yaz_log (YLOG_LOG, "grs_read_tcl");
1960 if (!*curLexSpec || strcmp ((*curLexSpec)->name, specs->type))
1962 Tcl_Interp *tcl_interp;
1964 lexSpecDestroy (curLexSpec);
1965 *curLexSpec = lexSpecCreate (specs->type, p->dh);
1966 Tcl_FindExecutable("");
1967 tcl_interp = (*curLexSpec)->tcl_interp = Tcl_CreateInterp();
1968 Tcl_Init(tcl_interp);
1969 Tcl_CreateCommand (tcl_interp, "begin", cmd_tcl_begin, *curLexSpec, 0);
1970 Tcl_CreateCommand (tcl_interp, "end", cmd_tcl_end, *curLexSpec, 0);
1971 Tcl_CreateCommand (tcl_interp, "data", cmd_tcl_data, *curLexSpec, 0);
1972 Tcl_CreateCommand (tcl_interp, "unread", cmd_tcl_unread,
1974 res = readFileSpec (*curLexSpec);
1977 lexSpecDestroy (curLexSpec);
1981 (*curLexSpec)->dh = p->dh;
1982 start_offset = p->stream->tellf(p->stream);
1983 if (start_offset == 0)
1985 (*curLexSpec)->f_win_start = 0;
1986 (*curLexSpec)->f_win_end = 0;
1987 (*curLexSpec)->f_win_rf = p->stream->readf;
1988 (*curLexSpec)->f_win_sf = p->stream->seekf;
1989 (*curLexSpec)->stream = p->stream;
1990 (*curLexSpec)->f_win_ef = p->stream->endf;
1991 (*curLexSpec)->f_win_size = 500000;
1993 (*curLexSpec)->m = p->mem;
1994 return lexRoot (*curLexSpec, start_offset, "main");
1997 static int extract_tcl(void *clientData, struct recExtractCtrl *ctrl)
1999 return zebra_grs_extract(clientData, ctrl, grs_read_tcl);
2002 static int retrieve_tcl(void *clientData, struct recRetrieveCtrl *ctrl)
2004 return zebra_grs_retrieve(clientData, ctrl, grs_read_tcl);
2007 static struct recType tcl_type = {
2020 #ifdef IDZEBRA_STATIC_GRS_REGX
2021 idzebra_filter_grs_regx
2036 * indent-tabs-mode: nil
2038 * vim: shiftwidth=4 tabstop=8 expandtab