2 * Copyright (C) 1994-1996, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.5 1997-02-19 16:22:33 adam
8 * Fixed "end element" to terminate record in outer-most level.
10 * Revision 1.4 1997/02/12 20:42:58 adam
11 * Changed some log messages.
13 * Revision 1.3 1996/11/08 14:05:33 adam
14 * Bug fix: data1 node member u.tag.get_bytes weren't initialized.
16 * Revision 1.2 1996/10/29 14:02:09 adam
17 * Doesn't use the global data1_tabpath (from YAZ). Instead the function
18 * data1_get_tabpath is used.
20 * Revision 1.1 1996/10/11 10:57:30 adam
21 * New module recctrl. Used to manage records (extract/retrieval).
23 * Revision 1.24 1996/06/17 14:25:31 adam
24 * Removed LOG_DEBUG logs; can still be enabled by setting REGX_DEBUG.
26 * Revision 1.23 1996/06/04 10:19:00 adam
27 * Minor changes - removed include of ctype.h.
29 * Revision 1.22 1996/06/03 15:23:13 adam
30 * Bug fix: /../ BODY /../ - pattern didn't match EOF.
32 * Revision 1.21 1996/05/14 16:58:38 adam
35 * Revision 1.20 1996/05/01 13:46:36 adam
36 * First work on multiple records in one file.
37 * New option, -offset, to the "unread" command in the filter module.
39 * Revision 1.19 1996/02/12 16:18:20 adam
40 * Yet another bug fix in implementation of unread command.
42 * Revision 1.18 1996/02/12 16:07:54 adam
43 * Bug fix in new unread command.
45 * Revision 1.17 1996/02/12 15:56:11 adam
46 * New code command: unread.
48 * Revision 1.16 1996/01/17 14:57:51 adam
49 * Prototype changed for reader functions in extract/retrieve. File
50 * is identified by 'void *' instead of 'int.
52 * Revision 1.15 1996/01/08 19:15:47 adam
53 * New input filter that works!
55 * Revision 1.14 1996/01/08 09:10:38 adam
56 * Yet another complete rework on this module.
58 * Revision 1.13 1995/12/15 17:21:50 adam
59 * This version is able to set data.formatted_text in data1-nodes.
61 * Revision 1.12 1995/12/15 16:20:10 adam
62 * The filter files (*.flt) are read from the path given by data1_tabpath.
64 * Revision 1.11 1995/12/15 12:35:16 adam
67 * Revision 1.10 1995/12/15 10:35:36 adam
70 * Revision 1.9 1995/12/14 16:38:48 adam
71 * Completely new attempt to make regular expression parsing.
73 * Revision 1.8 1995/12/13 17:16:59 adam
76 * Revision 1.7 1995/12/13 16:51:58 adam
77 * Modified to set last_child in data1_nodes.
78 * Uses destroy handler to free up data text nodes.
80 * Revision 1.6 1995/12/13 13:45:37 quinn
81 * Changed data1 to use nmem.
83 * Revision 1.5 1995/12/11 09:12:52 adam
84 * The rec_get function returns NULL if record doesn't exist - will
85 * happen in the server if the result set records have been deleted since
86 * the creation of the set (i.e. the search).
87 * The server saves a result temporarily if it is 'volatile', i.e. the
88 * set is register dependent.
90 * Revision 1.4 1995/12/05 16:57:40 adam
91 * More work on regular patterns.
93 * Revision 1.3 1995/12/05 09:37:09 adam
94 * One malloc was renamed to xmalloc.
96 * Revision 1.2 1995/12/04 17:59:24 adam
97 * More work on regular expression conversion.
99 * Revision 1.1 1995/12/04 14:25:30 adam
100 * Started work on regular expression parsed input to structured records.
108 #include <zebrautl.h>
114 #define F_WIN_EOF 2000000000
118 #define REGX_PATTERN 1
128 struct lexRuleAction {
132 struct DFA *dfa; /* REGX_PATTERN */
135 struct regxCode *code; /* REGX_CODE */
137 struct lexRuleAction *next;
142 struct lexRuleAction *actionList;
146 struct lexRuleInfo info;
147 struct lexRule *next;
152 struct lexRule *rules;
153 struct lexRuleInfo **fastRule;
159 struct lexTrans trans;
163 void (*f_win_ef)(void *, off_t);
169 int (*f_win_rf)(void *, char *, size_t);
170 off_t (*f_win_sf)(void *, off_t);
175 struct lexRuleAction *beginActionList;
176 struct lexRuleAction *endActionList;
180 static char *f_win_get (struct lexSpec *spec, off_t start_pos, off_t end_pos,
185 if (start_pos < spec->f_win_start || start_pos >= spec->f_win_end)
187 (*spec->f_win_sf)(spec->f_win_fh, start_pos);
188 spec->f_win_start = start_pos;
190 if (!spec->f_win_buf)
191 spec->f_win_buf = xmalloc (spec->f_win_size);
192 *size = (*spec->f_win_rf)(spec->f_win_fh, spec->f_win_buf,
194 spec->f_win_end = spec->f_win_start + *size;
196 if (*size > end_pos - start_pos)
197 *size = end_pos - start_pos;
198 return spec->f_win_buf;
200 if (end_pos <= spec->f_win_end)
202 *size = end_pos - start_pos;
203 return spec->f_win_buf + (start_pos - spec->f_win_start);
205 off = start_pos - spec->f_win_start;
206 for (i = 0; i<spec->f_win_end - start_pos; i++)
207 spec->f_win_buf[i] = spec->f_win_buf[i + off];
208 r = (*spec->f_win_rf)(spec->f_win_fh,
210 spec->f_win_size - i);
211 spec->f_win_start = start_pos;
212 spec->f_win_end += r;
214 if (*size > end_pos - start_pos)
215 *size = end_pos - start_pos;
216 return spec->f_win_buf;
219 static int f_win_advance (struct lexSpec *spec, int *pos)
224 if (*pos >= spec->f_win_start && *pos < spec->f_win_end)
225 return spec->f_win_buf[(*pos)++ - spec->f_win_start];
226 if (*pos == F_WIN_EOF)
228 buf = f_win_get (spec, *pos, *pos+1, &size);
239 static void regxCodeDel (struct regxCode **pp)
241 struct regxCode *p = *pp;
250 static void regxCodeMk (struct regxCode **pp, const char *buf, int len)
254 p = xmalloc (sizeof(*p));
255 p->str = xmalloc (len+1);
256 memcpy (p->str, buf, len);
261 static struct DFA *lexSpecDFA (void)
266 dfa_parse_cmap_del (dfa, ' ');
267 dfa_parse_cmap_del (dfa, '\t');
268 dfa_parse_cmap_add (dfa, '/', 0);
272 static struct lexSpec *lexSpecMk (const char *name)
276 p = xmalloc (sizeof(*p));
277 p->name = xmalloc (strlen(name)+1);
278 strcpy (p->name, name);
279 p->trans.dfa = lexSpecDFA ();
280 p->trans.rules = NULL;
281 p->trans.fastRule = NULL;
282 p->beginActionList = NULL;
283 p->endActionList = NULL;
290 static void actionListDel (struct lexRuleAction **rap)
292 struct lexRuleAction *ra1, *ra;
294 for (ra = *rap; ra; ra = ra1)
300 dfa_delete (&ra->u.pattern.dfa);
303 regxCodeDel (&ra->u.code);
311 static void lexSpecDel (struct lexSpec **pp)
314 struct lexRule *rp, *rp1;
320 dfa_delete (&p->trans.dfa);
322 xfree (p->trans.fastRule);
323 for (rp = p->trans.rules; rp; rp = rp1)
325 actionListDel (&rp->info.actionList);
328 actionListDel (&p->beginActionList);
329 actionListDel (&p->endActionList);
331 xfree (p->f_win_buf);
337 static int readParseToken (const char **cpp, int *len)
339 const char *cp = *cpp;
343 while (*cp == ' ' || *cp == '\t' || *cp == '\n')
372 if (*cp >= 'a' && *cp <= 'z')
374 else if (*cp >= 'A' && *cp <= 'Z')
375 cmd[i] = *cp + 'a' - 'A';
378 if (i > sizeof(cmd)-2)
386 logf (LOG_WARN, "bad character %d %c", *cp, *cp);
388 while (*cp && *cp != ' ' && *cp != '\t' && *cp != '\n')
394 if (!strcmp (cmd, "begin"))
396 else if (!strcmp (cmd, "end"))
398 else if (!strcmp (cmd, "body"))
402 logf (LOG_WARN, "bad command %s", cmd);
408 static int actionListMk (struct lexSpec *spec, const char *s,
409 struct lexRuleAction **ap)
414 while ((tok = readParseToken (&s, &len)))
422 *ap = xmalloc (sizeof(**ap));
424 regxCodeMk (&(*ap)->u.code, s, len);
428 *ap = xmalloc (sizeof(**ap));
430 (*ap)->u.pattern.body = bodyMark;
432 (*ap)->u.pattern.dfa = lexSpecDFA ();
433 r = dfa_parse ((*ap)->u.pattern.dfa, &s);
438 logf (LOG_WARN, "regular expression error. r=%d", r);
441 dfa_mkstate ((*ap)->u.pattern.dfa);
445 logf (LOG_WARN, "cannot use begin here");
448 *ap = xmalloc (sizeof(**ap));
458 int readOneSpec (struct lexSpec *spec, const char *s)
462 tok = readParseToken (&s, &len);
463 if (tok == REGX_BEGIN)
465 actionListDel (&spec->beginActionList);
466 actionListMk (spec, s, &spec->beginActionList);
468 else if (tok == REGX_END)
470 actionListDel (&spec->endActionList);
471 actionListMk (spec, s, &spec->endActionList);
473 else if (tok == REGX_PATTERN)
477 r = dfa_parse (spec->trans.dfa, &s);
480 logf (LOG_WARN, "regular expression error. r=%d", r);
485 logf (LOG_WARN, "expects / at end of pattern. got %c", *s);
489 rp = xmalloc (sizeof(*rp));
490 rp->info.no = spec->trans.ruleNo++;
491 rp->next = spec->trans.rules;
492 spec->trans.rules = rp;
493 actionListMk (spec, s, &rp->info.actionList);
498 int readFileSpec (struct lexSpec *spec)
503 int c, i, errors = 0;
506 lineBuf = xmalloc (1+lineSize);
507 logf (LOG_LOG, "reading regx filter %s.flt", spec->name);
508 sprintf (lineBuf, "%s.flt", spec->name);
509 if (!(spec_inf = yaz_path_fopen (data1_get_tabpath(), lineBuf, "r")))
511 logf (LOG_ERRNO|LOG_WARN, "cannot read spec file %s", spec->name);
516 spec->trans.ruleNo = 1;
521 if (c == '#' || c == '\n' || c == ' ' || c == '\t')
523 while (c != '\n' && c != EOF)
542 if (c != ' ' && c != '\t')
551 readOneSpec (spec, lineBuf);
552 spec->lineNo += addLine;
557 spec->trans.fastRule = xmalloc (sizeof(*spec->trans.fastRule) *
559 for (i = 0; i<spec->trans.ruleNo; i++)
560 spec->trans.fastRule[i] = NULL;
561 for (rp = spec->trans.rules; rp; rp = rp->next)
562 spec->trans.fastRule[rp->info.no] = &rp->info;
568 debug_dfa_followpos = 1;
571 dfa_mkstate (spec->trans.dfa);
575 static struct lexSpec *curLexSpec = NULL;
577 static void destroy_data (struct data1_node *n)
579 assert (n->which == DATA1N_data);
580 xfree (n->u.data.data);
583 static void execData (struct lexSpec *spec,
584 data1_node **d1_stack, int *d1_level,
585 const char *ebuf, int elen, int formatted_text)
587 struct data1_node *res, *parent;
591 logf (LOG_DEBUG, "execData %.15s ... %.*s", ebuf, 15, ebuf + elen-15);
593 logf (LOG_DEBUG, "execData %.*s", elen, ebuf);
595 logf (LOG_DEBUG, "execData len=%d", elen);
601 parent = d1_stack[*d1_level -1];
603 if ((res=d1_stack[*d1_level]) && res->which == DATA1N_data)
605 if (elen + res->u.data.len <= DATA1_LOCALDATA)
606 memcpy (res->u.data.data + res->u.data.len, ebuf, elen);
609 char *nb = xmalloc (elen + res->u.data.len);
610 memcpy (nb, res->u.data.data, res->u.data.len);
611 memcpy (nb + res->u.data.len, ebuf, elen);
612 if (res->u.data.len > DATA1_LOCALDATA)
613 xfree (res->u.data.data);
614 res->u.data.data = nb;
615 res->destroy = destroy_data;
617 res->u.data.len += elen;
621 res = data1_mk_node (spec->m);
622 res->parent = parent;
623 res->which = DATA1N_data;
624 res->u.data.what = DATA1I_text;
625 res->u.data.len = elen;
626 res->u.data.formatted_text = formatted_text;
627 if (elen > DATA1_LOCALDATA)
629 res->u.data.data = xmalloc (elen);
630 res->destroy = destroy_data;
633 res->u.data.data = res->lbuf;
634 memcpy (res->u.data.data, ebuf, elen);
635 res->root = parent->root;
637 parent->num_children++;
638 parent->last_child = res;
639 if (d1_stack[*d1_level])
640 d1_stack[*d1_level]->next = res;
643 d1_stack[*d1_level] = res;
647 static void execDataP (struct lexSpec *spec,
648 data1_node **d1_stack, int *d1_level,
649 const char *ebuf, int elen, int formatted_text)
651 execData (spec, d1_stack, d1_level, ebuf, elen, formatted_text);
655 static void tagBegin (struct lexSpec *spec,
656 data1_node **d1_stack, int *d1_level,
657 const char *tag, int len)
659 struct data1_node *parent = d1_stack[*d1_level -1];
660 data1_element *elem = NULL;
661 data1_node *partag = get_parent_tag(parent);
663 data1_element *e = NULL;
668 logf (LOG_WARN, "in element begin. No record type defined");
672 res = data1_mk_node (spec->m);
673 res->parent = parent;
674 res->which = DATA1N_tag;
675 res->u.tag.tag = res->lbuf;
676 res->u.tag.get_bytes = -1;
678 if (len >= DATA1_LOCALDATA)
679 len = DATA1_LOCALDATA-1;
681 memcpy (res->u.tag.tag, tag, len);
682 res->u.tag.tag[len] = '\0';
685 logf (LOG_DEBUG, "tag begin %s (%d)", res->u.tag.tag, *d1_level);
687 if (parent->which == DATA1N_variant)
690 if (!(e = partag->u.tag.element))
693 elem = data1_getelementbytagname (d1_stack[0]->u.root.absyn, e,
696 res->u.tag.element = elem;
697 res->u.tag.node_selected = 0;
698 res->u.tag.make_variantlist = 0;
699 res->u.tag.no_data_requested = 0;
700 res->root = parent->root;
701 parent->num_children++;
702 parent->last_child = res;
703 if (d1_stack[*d1_level])
704 d1_stack[*d1_level]->next = res;
707 d1_stack[*d1_level] = res;
708 d1_stack[++(*d1_level)] = NULL;
711 static void tagEnd (struct lexSpec *spec,
712 data1_node **d1_stack, int *d1_level,
713 const char *tag, int len)
715 while (*d1_level > 1)
719 (strlen(d1_stack[*d1_level]->u.tag.tag) == len &&
720 !memcmp (d1_stack[*d1_level]->u.tag.tag, tag, len)))
724 logf (LOG_DEBUG, "tag end (%d)", *d1_level);
729 static int tryMatch (struct lexSpec *spec, int *pptr, int *mptr,
732 struct DFA_state *state = dfa->states[0];
736 unsigned char c_prev = 0;
739 int start_ptr = *pptr;
747 c = f_win_advance (spec, &ptr);
748 if (ptr == F_WIN_EOF)
759 if (ptr == spec->scan_size)
769 c = spec->scan_buf[ptr++];
778 *mptr = start_ptr; /* match starts here */
779 *pptr = last_ptr; /* match end here (+1) */
782 state = dfa->states[0];
789 else if (c >= t->ch[0] && c <= t->ch[1])
791 state = dfa->states[t->to];
797 last_rule = state->rule_no;
802 last_rule = state->rule_nno;
806 last_rule = state->rule_no;
818 static int execTok (struct lexSpec *spec, const char **src,
819 int arg_no, int *arg_start, int *arg_end,
820 const char **tokBuf, int *tokLen)
822 const char *s = *src;
824 while (*s == ' ' || *s == '\t')
828 if (*s == '$' && s[1] >= '0' && s[1] <= '9')
832 while (*s >= '0' && *s <= '9')
833 n = n*10 + (*s++ -'0');
844 *tokBuf = f_win_get (spec, arg_start[n], arg_end[n], tokLen);
846 *tokBuf = spec->scan_buf + arg_start[n];
847 *tokLen = arg_end[n] - arg_start[n];
854 while (*s && *s != '\"')
856 *tokLen = s - *tokBuf;
861 else if (*s == '\n' || *s == ';')
869 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
871 *tokLen = s - *tokBuf;
878 while (*s && *s != ' ' && *s != '\t' && *s != '\n' && *s != ';')
880 *tokLen = s - *tokBuf;
886 static char *regxStrz (const char *src, int len)
892 memcpy (str, src, len);
897 static int execCode (struct lexSpec *spec,
898 int arg_no, int *arg_start, int *arg_end, int *pptr,
899 struct regxCode *code,
900 data1_node **d1_stack, int *d1_level)
902 const char *s = code->str;
907 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str, &cmd_len);
914 r = execTok (spec, &s, arg_no, arg_start, arg_end,
918 p = regxStrz (cmd_str, cmd_len);
919 if (!strcmp (p, "begin"))
921 r = execTok (spec, &s, arg_no, arg_start, arg_end,
925 p = regxStrz (cmd_str, cmd_len);
926 if (!strcmp (p, "record"))
928 r = execTok (spec, &s, arg_no, arg_start, arg_end,
934 static char absynName[64];
939 memcpy (absynName, cmd_str, cmd_len);
940 absynName[cmd_len] = '\0';
943 logf (LOG_DEBUG, "begin record %s", absynName);
945 if (!(absyn = data1_get_absyn (absynName)))
946 logf (LOG_WARN, "Unknown tagset: %s", absynName);
951 res = data1_mk_node (spec->m);
952 res->which = DATA1N_root;
953 res->u.root.type = absynName;
954 res->u.root.absyn = absyn;
957 d1_stack[*d1_level] = res;
958 d1_stack[++(*d1_level)] = NULL;
961 r = execTok (spec, &s, arg_no, arg_start, arg_end,
964 else if (!strcmp (p, "element"))
966 r = execTok (spec, &s, arg_no, arg_start, arg_end,
970 tagBegin (spec, d1_stack, d1_level, cmd_str, cmd_len);
971 r = execTok (spec, &s, arg_no, arg_start, arg_end,
975 else if (!strcmp (p, "end"))
977 r = execTok (spec, &s, arg_no, arg_start, arg_end,
981 p = regxStrz (cmd_str, cmd_len);
982 if (!strcmp (p, "record"))
985 r = execTok (spec, &s, arg_no, arg_start, arg_end,
988 logf (LOG_DEBUG, "end record");
992 else if (!strcmp (p, "element"))
994 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1003 tagEnd (spec, d1_stack, d1_level, cmd_str, cmd_len);
1004 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1005 &cmd_str, &cmd_len);
1008 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1011 logf (LOG_WARN, "missing record/element/variant");
1014 logf (LOG_WARN, "missing record/element/variant");
1016 else if (!strcmp (p, "data"))
1020 const char *element_str = NULL;
1022 while ((r = execTok (spec, &s, arg_no, arg_start, arg_end,
1023 &cmd_str, &cmd_len)) == 3)
1025 if (cmd_len==5 && !memcmp ("-text", cmd_str, cmd_len))
1027 else if (cmd_len==8 && !memcmp ("-element", cmd_str, cmd_len))
1029 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1030 &element_str, &element_len);
1035 logf (LOG_WARN, "bad data option: %.*s",
1040 logf (LOG_WARN, "missing data item after data");
1044 tagBegin (spec, d1_stack, d1_level, element_str, element_len);
1047 execData (spec, d1_stack, d1_level, cmd_str, cmd_len,
1049 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1050 &cmd_str, &cmd_len);
1053 tagEnd (spec, d1_stack, d1_level, NULL, 0);
1055 else if (!strcmp (p, "unread"))
1058 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1059 &cmd_str, &cmd_len);
1060 if (r==3 && cmd_len == 7 && !memcmp ("-offset", cmd_str, cmd_len))
1062 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1063 &cmd_str, &cmd_len);
1066 logf (LOG_WARN, "missing number after -offset");
1069 p = regxStrz (cmd_str, cmd_len);
1071 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1072 &cmd_str, &cmd_len);
1078 logf (LOG_WARN, "missing index after unread command");
1081 if (cmd_len != 1 || *cmd_str < '0' || *cmd_str > '9')
1083 logf (LOG_WARN, "bad index after unread command");
1088 no = *cmd_str - '0';
1091 *pptr = arg_start[no] + offset;
1093 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1094 &cmd_str, &cmd_len);
1098 logf (LOG_WARN, "unknown code command: %.*s", cmd_len, cmd_str);
1099 r = execTok (spec, &s, arg_no, arg_start, arg_end,
1100 &cmd_str, &cmd_len);
1105 logf (LOG_WARN, "ignoring token %.*s", cmd_len, cmd_str);
1107 r = execTok (spec, &s, arg_no, arg_start, arg_end, &cmd_str,
1116 static int execAction (struct lexSpec *spec, struct lexRuleAction *ap,
1117 data1_node **d1_stack, int *d1_level,
1118 int start_ptr, int *pptr)
1125 arg_start[0] = start_ptr;
1133 if (ap->u.pattern.body)
1135 arg_start[arg_no] = *pptr;
1136 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1138 arg_end[arg_no] = F_WIN_EOF;
1140 arg_start[arg_no] = F_WIN_EOF;
1141 arg_end[arg_no] = F_WIN_EOF;
1146 arg_end[arg_no] = sptr;
1148 arg_start[arg_no] = sptr;
1149 arg_end[arg_no] = *pptr;
1154 arg_start[arg_no] = *pptr;
1155 if (!tryMatch (spec, pptr, &sptr, ap->u.pattern.dfa))
1157 if (sptr != arg_start[arg_no])
1159 arg_end[arg_no] = *pptr;
1164 if (!execCode (spec, arg_no, arg_start, arg_end, pptr,
1165 ap->u.code, d1_stack, d1_level))
1169 arg_start[arg_no] = *pptr;
1171 arg_end[arg_no] = F_WIN_EOF;
1173 arg_end[arg_no] = spec->scan_size;
1179 *pptr = spec->scan_size;
1187 static int execRule (struct lexSpec *spec, struct lexTrans *trans,
1188 data1_node **d1_stack, int *d1_level,
1189 int ruleNo, int start_ptr, int *pptr)
1192 logf (LOG_DEBUG, "execRule %d", ruleNo);
1194 return execAction (spec, trans->fastRule[ruleNo]->actionList,
1195 d1_stack, d1_level, start_ptr, pptr);
1198 data1_node *lexNode (struct lexSpec *spec, struct lexTrans *trans,
1199 data1_node **d1_stack, int *d1_level,
1202 struct DFA_state *state = trans->dfa->states[0];
1206 unsigned char c_prev = '\n';
1210 int last_ptr = *ptr;
1211 int start_ptr = *ptr;
1212 int skip_ptr = *ptr;
1217 c = f_win_advance (spec, ptr);
1218 if (*ptr == F_WIN_EOF)
1222 if (skip_ptr < start_ptr)
1226 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1227 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1230 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1236 else if (skip_ptr < *ptr)
1240 buf = f_win_get (spec, skip_ptr, *ptr, &size);
1241 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1243 if (*ptr == F_WIN_EOF)
1247 if (*ptr == spec->scan_size)
1251 if (skip_ptr < start_ptr)
1253 execDataP (spec, d1_stack, d1_level,
1254 spec->scan_buf + skip_ptr, start_ptr - skip_ptr,
1258 execRule (spec, trans, d1_stack, d1_level, last_rule,
1263 else if (skip_ptr < *ptr)
1265 execDataP (spec, d1_stack, d1_level,
1266 spec->scan_buf + skip_ptr, *ptr - skip_ptr, 0);
1268 if (*ptr == spec->scan_size)
1271 c = spec->scan_buf[(*ptr)++];
1277 { /* no transition for character c ... */
1280 if (skip_ptr < start_ptr)
1285 buf = f_win_get (spec, skip_ptr, start_ptr, &size);
1286 execDataP (spec, d1_stack, d1_level, buf, size, 0);
1288 execDataP (spec, d1_stack, d1_level,
1289 spec->scan_buf + skip_ptr,
1290 start_ptr - skip_ptr, 0);
1294 if (!execRule (spec, trans, d1_stack, d1_level, last_rule,
1297 if (spec->f_win_ef && *ptr != F_WIN_EOF)
1298 (*spec->f_win_ef)(spec->f_win_fh, *ptr);
1308 c_prev = f_win_advance (spec, &start_ptr);
1315 c_prev = f_win_advance (spec, &start_ptr);
1321 state = trans->dfa->states[0];
1324 else if (c >= t->ch[0] && c <= t->ch[1])
1325 { /* transition ... */
1326 state = trans->dfa->states[t->to];
1332 last_rule = state->rule_no;
1335 else if (state->rule_nno)
1337 last_rule = state->rule_nno;
1341 if (!start_ptr || spec->scan_buf[start_ptr-1] == '\n')
1343 last_rule = state->rule_no;
1346 else if (state->rule_nno)
1348 last_rule = state->rule_nno;
1361 static data1_node *lexRoot (struct lexSpec *spec, off_t offset)
1363 data1_node *d1_stack[512];
1367 d1_stack[d1_level] = NULL;
1368 if (spec->beginActionList)
1369 execAction (spec, spec->beginActionList,
1370 d1_stack, &d1_level, 0, &ptr);
1371 lexNode (spec, &spec->trans, d1_stack, &d1_level, &ptr);
1372 if (spec->endActionList)
1373 execAction (spec, spec->endActionList,
1374 d1_stack, &d1_level, ptr, &ptr);
1378 data1_node *grs_read_regx (struct grs_read_info *p)
1380 int (*rf)(void *, char *, size_t),
1381 off_t (*sf)(void *, off_t),
1382 void (*ef)(void *, off_t),
1385 const char *name, NMEM m
1396 logf (LOG_DEBUG, "data1_read_regx, offset=%ld type=%s",(long) offset,
1399 if (!curLexSpec || strcmp (curLexSpec->name, p->type))
1402 lexSpecDel (&curLexSpec);
1403 curLexSpec = lexSpecMk (p->type);
1404 res = readFileSpec (curLexSpec);
1407 lexSpecDel (&curLexSpec);
1414 curLexSpec->f_win_start = 0;
1415 curLexSpec->f_win_end = 0;
1416 curLexSpec->f_win_rf = p->readf;
1417 curLexSpec->f_win_sf = p->seekf;
1418 curLexSpec->f_win_fh = p->fh;
1419 curLexSpec->f_win_ef = p->endf;
1420 curLexSpec->f_win_size = 500000;
1423 if (!(curLexSpec->scan_buf = xmalloc (size = 4096)))
1427 if (rd+4096 > size && !(curLexSpec->scan_buf
1428 = xrealloc (curLexSpec->scan_buf, size *= 2)))
1430 if ((res = (*rf)(fh, curLexSpec->scan_buf + rd, 4096)) < 0)
1434 curLexSpec->scan_size = rd;
1436 curLexSpec->m = p->mem;
1437 n = lexRoot (curLexSpec, p->offset);
1439 xfree (curLexSpec->scan_buf);