/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
*
- * $Id: nfa.c,v 1.1 2006-05-03 09:04:33 heikki Exp $
+ * $Id: nfa.c,v 1.3 2006-05-03 11:36:14 mike Exp $
*
* This is a simple NFA-based system for character set normalizing
* in yaz and zebra. Unlike standard NFA's, this operates on ranges of
n->nmem=my_nmem;
n->nstates=0;
n->laststate=0;
+ n->firststate=0;
+ n->nbackrefs=0;
+ n->curr_backrefs=0;
+ n->best_backrefs=0;
return n;
}
if (s->backref_start)
return 1;
s->backref_start=backref_number;
- if (n->nbackrefs<backref_number)
+ if (n->nbackrefs<backref_number) {
n->nbackrefs=backref_number;
+ n->curr_backrefs=0;
+ n->best_backrefs=0;
+ /* clear them just in case we have already matched on */
+ /* with this nfa, and created a too small backref table */
+ /* we will reallocate when matching. */
+ }
} else {
if (s->backref_end)
return 1;
+ if (n->nbackrefs<backref_number)
+ return 2; /* can't end a backref that has not been started */
s->backref_end=backref_number;
}
return 0; /* ok */
* * * * * * */
struct matcher {
+ yaz_nfa *n;
yaz_nfa_char *longest;
int bestnode;
void *result;
};
/* Finds the longest match. In case of ties, chooses the one with the
- * lowest numbered state */
-static void match_state(yaz_nfa_state *s,
+ * lowest numbered state. Keep track of the back references. Recursively
+ * traverses the NFA. Keeps track of the best hit it has found. */
+
+static void match_state(
+ yaz_nfa_state *s,
yaz_nfa_char *inchar,
size_t incharsleft,
struct matcher *m ) {
yaz_nfa_transition *t=s->lasttrans;
-
+ if (s->backref_start)
+ m->n->curr_backrefs[s->backref_start].start=inchar;
+ if (s->backref_end)
+ m->n->curr_backrefs[s->backref_end].end=inchar;
if (t) {
if (incharsleft) {
do {
if ( (m->longest < inchar) || /* longer result */
((m->longest == inchar)&&(m->bestnode<s->num)) ){
/* or as long, but with lower node number. Still better */
+ int i;
m->longest=inchar;
m->bestnode=s->num;
m->result=s->result;
+ if (m->n->curr_backrefs)
+ for (i=0; i<m->n->nbackrefs;i++)
+ m->n->best_backrefs[i]=m->n->curr_backrefs[i];
}
}
+ if (s->backref_start)
+ m->n->curr_backrefs[s->backref_start].start=0;
+ if (s->backref_end)
+ m->n->curr_backrefs[s->backref_end].end=0;
} /* match_state */
int yaz_nfa_match(yaz_nfa *n,
struct matcher m;
if (!n->firststate)
return YAZ_NFA_NOMATCH;
+ m.n=n;
m.longest=*inbuff;
m.bestnode=n->nstates;
m.result=0;
m.errorcode=0;
- if (!n->curr_backrefs) {
- int sz=sizeof( struct yaz_nfa_backref_info);
- n->curr_backrefs=nmem_malloc( n->nmem, n->nbackrefs* sz);
- n->best_backrefs=nmem_malloc( n->nmem, n->nbackrefs* sz);
+ if ((!n->curr_backrefs) && (n->nbackrefs)){
+ int sz=sizeof( struct yaz_nfa_backref_info) * (n->nbackrefs+1);
+ n->curr_backrefs=nmem_malloc( n->nmem, sz);
+ n->best_backrefs=nmem_malloc( n->nmem, sz);
}
static void dump_trans(FILE *F, yaz_nfa_transition *t ) {
char c1;
char c2;
+ char *e;
c1=t->range_start;
c2=t->range_end;
- char *e="";
+ e="";
if ( (t->range_start <= ' ') || (t->range_start>'z'))
c1='?';
if ( (t->range_end <= ' ') || (t->range_end>'z'))