2 * Copyright (C) 1994, Index Data I/S
4 * Sebastian Hammer, Adam Dickmeiss
7 * Revision 1.6 1996-01-08 09:09:20 adam
8 * Function dfa_parse got 'const' string argument.
9 * New functions to define char mappings made public.
11 * Revision 1.5 1995/09/04 12:33:26 adam
12 * Various cleanup. YAZ util used instead.
14 * Revision 1.4 1995/01/24 16:00:21 adam
15 * Added -ansi to CFLAGS.
16 * Some changes to the dfa module.
18 * Revision 1.3 1994/10/04 17:46:43 adam
19 * Function options now returns arg with error option.
21 * Revision 1.2 1994/10/03 17:22:18 adam
22 * Optimization of grepper.
24 * Revision 1.1 1994/09/27 16:31:18 adam
25 * First version of grepper: grep with error correction.
39 static int show_line = 0;
41 typedef unsigned MatchWord;
45 int n; /* no of MatchWord needed */
46 int range; /* max no. of errors */
47 MatchWord *Sc; /* Mask Sc */
50 #define INFBUF_SIZE 16384
54 static INLINE void set_bit (MatchContext *mc, MatchWord *m, int ch, int state)
56 int off = state & (WORD_BITS-1);
57 int wno = state / WORD_BITS;
59 m[mc->n * ch + wno] |= 1<<off;
62 static INLINE void reset_bit (MatchContext *mc, MatchWord *m, int ch,
65 int off = state & (WORD_BITS-1);
66 int wno = state / WORD_BITS;
68 m[mc->n * ch + wno] &= ~(1<<off);
71 static INLINE MatchWord get_bit (MatchContext *mc, MatchWord *m, int ch,
74 int off = state & (WORD_BITS-1);
75 int wno = state / WORD_BITS;
77 return m[mc->n * ch + wno] & (1<<off);
80 static MatchContext *mk_MatchContext (struct DFA *dfa, int range)
82 MatchContext *mc = imalloc (sizeof(*mc));
85 mc->n = (dfa->no_states+WORD_BITS) / WORD_BITS;
87 mc->Sc = icalloc (sizeof(*mc->Sc) * 256 * mc->n);
89 for (i=0; i<dfa->no_states; i++)
92 struct DFA_state *state = dfa->states[i];
94 for (j=0; j<state->tran_no; j++)
97 int ch0 = state->trans[j].ch[0];
98 int ch1 = state->trans[j].ch[1];
99 assert (ch0 >= 0 && ch1 >= 0);
101 for (ch = ch0; ch <= ch1; ch++)
102 set_bit (mc, mc->Sc, ch, i);
109 static void mask_shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
110 struct DFA *dfa, int ch)
113 MatchWord *Rsrc_p = Rsrc, mask;
116 for (j = 1; j<mc->n; j++)
121 for (j = 0; j<WORD_BITS/4; j++)
127 struct DFA_state *state = dfa->states[s];
128 int i = state->tran_no;
130 if (ch >= state->trans[i].ch[0] &&
131 ch <= state->trans[i].ch[1])
132 set_bit (mc, Rdst, 0, state->trans[i].to);
136 struct DFA_state *state = dfa->states[s+1];
137 int i = state->tran_no;
139 if (ch >= state->trans[i].ch[0] &&
140 ch <= state->trans[i].ch[1])
141 set_bit (mc, Rdst, 0, state->trans[i].to);
145 struct DFA_state *state = dfa->states[s+2];
146 int i = state->tran_no;
148 if (ch >= state->trans[i].ch[0] &&
149 ch <= state->trans[i].ch[1])
150 set_bit (mc, Rdst, 0, state->trans[i].to);
154 struct DFA_state *state = dfa->states[s+3];
155 int i = state->tran_no;
157 if (ch >= state->trans[i].ch[0] &&
158 ch <= state->trans[i].ch[1])
159 set_bit (mc, Rdst, 0, state->trans[i].to);
163 if (s >= dfa->no_states)
170 static void shift (MatchContext *mc, MatchWord *Rdst, MatchWord *Rsrc,
174 MatchWord *Rsrc_p = Rsrc, mask;
175 for (j = 0; j<mc->n; j++)
180 for (j = 0; j<WORD_BITS/4; j++)
186 struct DFA_state *state = dfa->states[s];
187 int i = state->tran_no;
189 set_bit (mc, Rdst, 0, state->trans[i].to);
193 struct DFA_state *state = dfa->states[s+1];
194 int i = state->tran_no;
196 set_bit (mc, Rdst, 0, state->trans[i].to);
200 struct DFA_state *state = dfa->states[s+2];
201 int i = state->tran_no;
203 set_bit (mc, Rdst, 0, state->trans[i].to);
207 struct DFA_state *state = dfa->states[s+3];
208 int i = state->tran_no;
210 set_bit (mc, Rdst, 0, state->trans[i].to);
214 if (s >= dfa->no_states)
221 static void or (MatchContext *mc, MatchWord *Rdst,
222 MatchWord *Rsrc1, MatchWord *Rsrc2)
225 for (i = 0; i<mc->n; i++)
226 Rdst[i] = Rsrc1[i] | Rsrc2[i];
230 static int go (MatchContext *mc, struct DFA *dfa, FILE *inf)
232 MatchWord *Rj, *Rj1, *Rj_a, *Rj_b, *Rj_c;
239 infbuf = imalloc (INFBUF_SIZE);
241 Rj = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
242 Rj1 = icalloc (mc->n * (mc->range+1) * sizeof(*Rj));
243 Rj_a = icalloc (mc->n * sizeof(*Rj));
244 Rj_b = icalloc (mc->n * sizeof(*Rj));
245 Rj_c = icalloc (mc->n * sizeof(*Rj));
247 set_bit (mc, Rj, 0, 0);
248 for (d = 1; d<=mc->range; d++)
251 memcpy (Rj + mc->n * d, Rj + mc->n * (d-1), mc->n * sizeof(*Rj));
252 for (s = 0; s<dfa->no_states; s++)
254 if (get_bit (mc, Rj, d-1, s))
256 struct DFA_state *state = dfa->states[s];
257 int i = state->tran_no;
259 set_bit (mc, Rj, d, state->trans[i].to);
263 while ((ch = getc (inf)) != EOF)
267 infbuf[inf_ptr] = ch;
274 printf ("%5d:", lineno);
279 } while (infbuf[i] != '\n');
282 if (++i == INFBUF_SIZE)
285 } while (infbuf[i] != '\n');
290 if (++inf_ptr == INFBUF_SIZE)
292 mask_shift (mc, Rj1, Rj, dfa, ch);
293 for (d = 1; d <= mc->range; d++)
295 mask_shift (mc, Rj_b, Rj+d*mc->n, dfa, ch); /* 1 */
297 or (mc, Rj_a, Rj+(d-1)*mc->n, Rj1+(d-1)*mc->n); /* 2,3 */
299 shift (mc, Rj_c, Rj_a, dfa);
301 or (mc, Rj_a, Rj_b, Rj_c); /* 1,2,3*/
303 or (mc, Rj1+d*mc->n, Rj_a, Rj+(d-1)*mc->n); /* 1,2,3,4 */
305 for (s = 0; s<dfa->no_states; s++)
307 if (dfa->states[s]->rule_no)
308 if (get_bit (mc, Rj1+mc->range*mc->n, 0, s))
311 for (d = 0; d <= mc->range; d++)
312 reset_bit (mc, Rj1+d*mc->n, 0, dfa->no_states);
326 static int grep_file (struct DFA *dfa, const char *fname, int range)
333 inf = fopen (fname, "r");
336 logf (LOG_FATAL|LOG_ERRNO, "cannot open `%s'", fname);
343 mc = mk_MatchContext (dfa, range);
352 int main (int argc, char **argv)
357 const char *pattern = NULL;
359 struct DFA *dfa = dfa_init();
362 while ((ret = options ("nr:dsv:", argv, argc, &arg)) != -2)
370 i = dfa_parse (dfa, &pattern);
373 fprintf (stderr, "%s: illegal pattern\n", prog);
381 grep_file (dfa, arg, range);
386 log_init (log_mask_str(arg), prog, NULL);
395 debug_dfa_followpos = 1;
408 logf (LOG_FATAL, "Unknown option '-%s'", arg);
414 fprintf (stderr, "usage:\n "
415 " %s [-d] [-n] [-r n] [-s] [-v n] pattern file ..\n", prog);
418 else if (no_files == 0)
420 grep_file (dfa, NULL, range);