/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfa.c,v 1.5 2006-05-04 18:58:54 adam Exp $
+ * $Id: nfa.c,v 1.7 2006-05-05 14:02:27 heikki Exp $
*/
/**
#define LOOP_LIMIT 100
+typedef enum {
+ conv_none,
+ conv_string,
+ conv_backref,
+ conv_range
+} yaz_nfa_converter_type;
+
+struct yaz_nfa_converter {
+ yaz_nfa_converter *next;
+ yaz_nfa_converter_type type;
+ yaz_nfa_char *string;
+ size_t strlen;
+ int backref_no;
+ int char_diff;
+};
/* * * * * * *
yaz_nfa_char range_start,
yaz_nfa_char range_end) {
yaz_nfa_transition *t = nmem_malloc(n->nmem, sizeof(yaz_nfa_transition));
+ if (!from_state)
+ from_state = n->firststate;
t->range_start = range_start;
t->range_end = range_end;
t->to_state = to_state;
struct matcher {
yaz_nfa *n;
yaz_nfa_char *longest;
- int bestnode;
+ int bestnode;
void *result;
int errorcode;
int empties; /* count how many consecutive empty transitions */
static void match_state(
yaz_nfa_state *s,
- yaz_nfa_char *inchar,
- size_t incharsleft,
- struct matcher *m ) {
+ yaz_nfa_char *prevmatch,
+ yaz_nfa_char *inchar,
+ size_t incharsleft,
+ struct matcher *m ) {
yaz_nfa_transition *t = s->lasttrans;
if (s->backref_start)
m->n->curr_backrefs[s->backref_start].start = inchar;
if (s->backref_end)
- m->n->curr_backrefs[s->backref_end].end = inchar;
+ m->n->curr_backrefs[s->backref_end].end = prevmatch;
if (t) {
if (incharsleft) {
do {
m->n->curr_backrefs[0].start = inchar;
m->n->curr_backrefs[0].end = inchar;
}
- match_state(t->to_state, inchar+1, incharsleft-1, m);
- /* yes, descent to all matching nodes, even if overrun, */
- /* since we can find a better one later */
- } else if (( t->range_start==EMPTY_START) && (t->range_end==EMPTY_END)) {
+ match_state(t->to_state, inchar,
+ inchar+1, incharsleft-1, m);
+ } else if (( t->range_start==EMPTY_START) &&
+ (t->range_end==EMPTY_END)) {
if ( m->empties++ > LOOP_LIMIT )
m->errorcode= YAZ_NFA_LOOP;
else
- match_state(t->to_state, inchar, incharsleft, m);
+ match_state(t->to_state, prevmatch,
+ inchar, incharsleft, m);
}
} while (t != s->lasttrans );
} else {
int yaz_nfa_match(yaz_nfa *n,
yaz_nfa_char **inbuff,
- size_t incharsleft,
+ size_t *incharsleft,
void **result ){
struct matcher m;
int sz;
m.bestnode = n->nstates;
m.result = 0;
m.errorcode = 0;
+ m.empties = 0;
sz = sizeof( struct yaz_nfa_backref_info) * n->nbackrefs;
if (!n->curr_backrefs) {
n->curr_backrefs = nmem_malloc( n->nmem, sz);
n->best_backrefs[i].end = 0;
}
- match_state(n->firststate, *inbuff, incharsleft, &m);
+ match_state(n->firststate, *inbuff, *inbuff, *incharsleft, &m);
if (m.result) {
+ *incharsleft -= (m.longest-*inbuff);
*result = m.result;
*inbuff = m.longest;
if (m.errorcode)
return 0;
}
+/* * * * * * * * * * * * * *
+ * Converters
+ * * * * * * * * * * * * * */
+
+static yaz_nfa_converter *create_null_converter ( yaz_nfa *n) {
+ yaz_nfa_converter *c;
+ c=nmem_malloc(n->nmem, sizeof(yaz_nfa_converter));
+ c->next=0;
+ c->type=conv_none;
+ c->string=0;
+ c->strlen=0;
+ c->backref_no=0;
+ c->char_diff=0;
+ return c;
+}
+
+yaz_nfa_converter *yaz_nfa_create_string_converter (
+ yaz_nfa *n,
+ yaz_nfa_char *string,
+ size_t length){
+ yaz_nfa_converter *c;
+ int i;
+ c=create_null_converter(n);
+ c->type=conv_string;
+ c->string=nmem_malloc(n->nmem, length*sizeof(yaz_nfa_char));
+ for (i=0;i<length;i++)
+ c->string[i]=string[i];
+ c->strlen=length;
+ return c;
+}
+
+yaz_nfa_converter *yaz_nfa_create_backref_converter (
+ yaz_nfa *n, int backref_no ) {
+ yaz_nfa_converter *c;
+ c=create_null_converter(n);
+ c->type=conv_backref;
+ c->backref_no=backref_no;
+ return c;
+}
+
+yaz_nfa_converter *yaz_nfa_create_range_converter (
+ yaz_nfa *n, int backref_no,
+ yaz_nfa_char from_char,
+ yaz_nfa_char to_char){
+ yaz_nfa_converter *c;
+ c=create_null_converter(n);
+ c->type=conv_range;
+ c->backref_no=backref_no;
+ c->char_diff=to_char - from_char;
+ return c;
+
+}
+
+
+void yaz_nfa_append_converter (
+ yaz_nfa *n,
+ yaz_nfa_converter *startpoint,
+ yaz_nfa_converter *newconverter) {
+ while (startpoint->next)
+ startpoint=startpoint->next;
+ startpoint->next=newconverter;
+}
+
+static int string_convert (
+ yaz_nfa *n,
+ yaz_nfa_converter *c,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft){
+ size_t sz=c->strlen;
+ yaz_nfa_char *p=c->string;
+ while (sz--) {
+ if ((*outcharsleft)-- <= 0)
+ return 2;
+ **outbuff=*p++;
+ (*outbuff)++;
+ }
+ return 0;
+}
+static int backref_convert (
+ yaz_nfa *n,
+ yaz_nfa_converter *c,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft){
+ yaz_nfa_char *cp1,*cp2;
+ int i;
+ i=yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
+ if (i==2) /* no backref, produce no output, that's ok */
+ return 0;
+ if (i==1) /* no match in dfa */
+ return 1; /* should not happen */
+ while (cp2>=cp1) {
+ if ((*outcharsleft)-- <= 0)
+ return 2;
+ **outbuff=*cp1++;
+ (*outbuff)++;
+ }
+ return 0;
+}
+
+static int range_convert (
+ yaz_nfa *n,
+ yaz_nfa_converter *c,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft){
+ yaz_nfa_char *cp1,*cp2;
+ int i;
+ i = yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2);
+ printf ("range_convert: i=%d d=%d, cp1=%p cp2=%p \n",i,c->char_diff,cp1,cp2);
+ if (i == 2) /* no backref, produce no output, not ok */
+ return 1; /* should not happen */
+ if (i == 1) /* no match in dfa */
+ return 1; /* should not happen */
+ while (cp2 >= cp1) {
+ if ((*outcharsleft)-- <= 0)
+ return 2;
+ printf(" range_convert: %d '%c' -> ",*cp1,*cp1);
+ **outbuff=(*cp1++) + c->char_diff ;
+ printf("%d '%c'\n",**outbuff, **outbuff);
+ (*outbuff)++;
+ }
+ return 0;
+}
+
+
+int yaz_nfa_run_converters(
+ yaz_nfa *n,
+ yaz_nfa_converter *c,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft){
+ int rc=0;
+ while (c && !rc) {
+ switch(c->type) {
+ case conv_string:
+ rc=string_convert(n,c,outbuff,outcharsleft);
+ break;
+ case conv_backref:
+ rc=backref_convert(n,c,outbuff,outcharsleft);
+ break;
+ case conv_range:
+ rc=range_convert(n,c,outbuff,outcharsleft);
+ break;
+ default:
+ rc=3; /* internal error */
+ }
+ c=c->next;
+ }
+ return rc;
+}
/* * * * * * * * *
* Debug routines