From 4d994a119a1949522fe229270983ba1b1b399c6a Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Wed, 3 May 2006 09:04:33 +0000 Subject: [PATCH] Added my new NFA character normalizer. Not yet ready, but want to have it in the cvs already now. --- include/yaz/nfa.h | 238 ++++++++++++++++++++++++++++++ src/Makefile.am | 7 +- src/nfa.c | 421 +++++++++++++++++++++++++++++++++++++++++++++++++++++ test/Makefile.am | 6 +- test/nfatest1.c | 146 +++++++++++++++++++ 5 files changed, 812 insertions(+), 6 deletions(-) create mode 100644 include/yaz/nfa.h create mode 100644 src/nfa.c create mode 100644 test/nfatest1.c diff --git a/include/yaz/nfa.h b/include/yaz/nfa.h new file mode 100644 index 0000000..e89a405 --- /dev/null +++ b/include/yaz/nfa.h @@ -0,0 +1,238 @@ +/* Copyright (C) 2006, Index Data ApS + * See the file LICENSE for details. + * $Id: nfa.h,v 1.1 2006-05-03 09:04:33 heikki Exp $ + */ + +/** + * \file nfa.h + * \brief NFA for character set normalizing + * + * The NFA is a character mathcing device. It consists of states + * and transitions between them. Each transition has a condition, which + * is a range of values. + * + * When matching, we always start at the first state, and find the longest + * possible sequence of input characters that match the ranges in the + * conditions, and that leads into a terminal state. + * + */ + +#ifndef NFA_H +#define NFA_H + +#include + +YAZ_BEGIN_CDECL + +/** \brief Internal character type */ +typedef unsigned int yaz_nfa_char; + + +/** \brief The NFA itself + * The internals are hidden in nfa.c */ +typedef struct yaz_nfa yaz_nfa; + +/** \brief A state of the NFA */ +typedef struct yaz_nfa_state yaz_nfa_state; + +/** \brief Transition from one state to another */ +typedef struct yaz_nfa_transition yaz_nfa_transition; + + +/** \brief Initialize the NFA without any states in it + * + * \return a pointer to the newly created NFA + * + * */ +yaz_nfa *yaz_nfa_init(); + +/** \brief Destroy the whole thing */ +void yaz_nfa_destroy( + yaz_nfa *n /** the nfa to destroy */ + ); + +/** \brief Add a normal state to the NFA. + * + * The first state added will become the starting point. + * Returns a pointer to it, which you can safely ignore, or use in building + * transitions. + */ +yaz_nfa_state *yaz_nfa_add_state( + yaz_nfa *n /** The NFA to add the state to */ + ); + + +/** \brief Sets the result pointer to a state + * + * Call with null to clear the pointer. + * + * \retval 0 ok + * \retval 1 The state already has a result! + */ +int yaz_nfa_set_result( + /** The NFA itsef */ + yaz_nfa *n, + /** The state to which the result is added */ + yaz_nfa_state *s, + /** The result. The NFA does not care what it is, just stores it */ + void *result + ); + +/** \brief Gets the result pointer from a state + * + * \retval NULL if no result set + */ +void *yaz_nfa_get_result( + yaz_nfa *n /** The NFA itself */, + yaz_nfa_state *s /** The state whose result you want */); + +/** \brief Set the backref number to a state. + * + * Each state can be the beginning and/or ending of a backref + * sequence. This call sets those flags in the states. After matching, + * we can get hold of the backrefs that matched, and use them in our + * translations. + * + * \param n the nfa + * \param s the state to add to + * \param backref_number is the number of the back reference. 0 for clearing + * \param is_start is 1 for start of the backref, 0 for end + * \return + * 0 for OK + * 1 if the backref is already set + * + * + */ + +int yaz_nfa_set_backref(yaz_nfa *n, yaz_nfa_state *s, + int backref_number, + int is_start ); + +/** \brief Get the backref number of a state. + * + * \param n the nfa + * \param s the state to add to + * \param is_start is 1 for start of the backref, 0 for end + * \return the backref number associated with the state, or 0 if none. + */ + +int yaz_nfa_get_backref(yaz_nfa *n, yaz_nfa_state *s, + int is_start ); + +/** \brief Add a transition to the NFA. + * + * Add a transition between two existing states. The condition + * is (as always) a range of yaz_nfa_chars. + * \param n the nfa + * \param from_state which state the transition is from + * \param to_state where the transition goes to + * \param range_start is the beginning of the range of values + * \param range_end is the end of the range of values + **/ +void yaz_nfa_add_transition(yaz_nfa *n, + yaz_nfa_state *from_state, + yaz_nfa_state *to_state, + yaz_nfa_char range_start, + yaz_nfa_char range_end); + +/** \brief Add an empty (epsilon) transition. + * + * \param n the nfa + * \param from_state which state the transition is from + * \param to_state where the transition goes to + **/ +void yaz_nfa_add_empty_transition( yaz_nfa *n, + yaz_nfa_state *from_state, + yaz_nfa_state *to_state); + +/** \brief Add a translation from a range to the NFA. + * + * \param n the nfa + * \param st the state to add this to. If null, adds to the initial state + * \param range_start is the beginning of the range of values + * \param range_end is the end of the range of values + * + * Checks if we already have a transition like this. If so, does not add + * a new one, but returns the target state. Otherwise creates a new state, + * and a transition to it. + */ +yaz_nfa_state *yaz_nfa_add_range( yaz_nfa *n, + yaz_nfa_state *st, + yaz_nfa_char range_start, + yaz_nfa_char range_end ); + +/** \brief Add a sequence of transitions and states. + * + * Starting from state s (or from the initial state, if s is + * null), finds as much of seq as possible and inserts the rest. + * \Return the final state + */ +yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n, + yaz_nfa_state *s, + yaz_nfa_char *seq ); + + +/** \brief Find the longest possible match. + * + * \param n the nfa itself + * \param inbuff buffer of input data. Will be incremented when match + * \param incharsleft max number of inchars to use from inbuff + * \param result the result pointer from the nfa (what ever that is) + * + * In case of errors, returns the best match so far, + * which the caller is free to ignore. + * + * \retval 0 success + * \retval 1 no match found + * \retval 2 overrun'of input buffer + * \retval 3 looping too far + * + */ + +int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t incharsleft, + void **result ); + +#define YAZ_NFA_SUCCESS 0 +#define YAZ_NFA_NOMATCH 1 +#define YAZ_NFA_OVERRUN 2 +#define YAZ_NFA_LOOP 3 + + +/** \brief Get the first state of the NFA. + * + * \param n the nfa + * + * Useful for iterating through all states, probably calling get_result + * for each, and doing something to the results (freeing memory?) + * + * \returns a pointer to the first state, or NULL if none. + */ +yaz_nfa_state *yaz_nfa_get_first(yaz_nfa *n); + +/** \brief Get the next state of the NFA. + * + * \param n the nfa + * \param s the state to add to + * \return the next state, or NULL if no more. + */ +yaz_nfa_state *yaz_nfa_get_next(yaz_nfa *n, yaz_nfa_state *s); + +/** \brief Dump the NFA into a file . + * + * \param F The file handle to dump into (null => stdout) + * \param n the nfa + * \param strfunc can be used for converting the resultinfo a string. + * + * strfunc is a function like + * char *f( void *result); + * it takes the result, and converts into a printable string (which + * must be allocated somewhere by the caller). If the results are + * already printable, passing a null pointer here prints them with a %s + * + */ +void yaz_nfa_dump(FILE *F, yaz_nfa *n, char *(*strfunc)(void *) ); + + +YAZ_END_CDECL + +#endif diff --git a/src/Makefile.am b/src/Makefile.am index 0a8ce04..3ccaf39 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -1,6 +1,6 @@ ## This file is part of the YAZ toolkit. ## Copyright (C) 1994-2006, Index Data, All rights reserved. -## $Id: Makefile.am,v 1.33 2006-05-02 20:47:45 adam Exp $ +## $Id: Makefile.am,v 1.34 2006-05-03 09:04:33 heikki Exp $ YAZ_VERSION_INFO=2:1:0 @@ -71,8 +71,9 @@ libyaz_la_SOURCES=version.c options.c log.c marcdisp.c oid.c wrbuf.c \ eventl.c seshigh.c statserv.c requestq.c tcpdchk.c \ eventl.h service.c service.h session.h test.c \ xmlquery.c \ - record_conv.c \ - mime.c mime.h + mime.c mime.h \ + nfa.c \ + record_conv.c libyaz_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO) diff --git a/src/nfa.c b/src/nfa.c new file mode 100644 index 0000000..5e8d56e --- /dev/null +++ b/src/nfa.c @@ -0,0 +1,421 @@ +/* Copyright (C) 2006, Index Data ApS + * See the file LICENSE for details. + * + * $Id: nfa.c,v 1.1 2006-05-03 09:04:33 heikki Exp $ + * + * This is a simple NFA-based system for character set normalizing + * in yaz and zebra. Unlike standard NFA's, this operates on ranges of + * characters at a time, so as to keep the size small. + * + * All characters are internally handled as unsigned 32-bit ints, so + * this works well with unicode. Translating to and from utf-8 is trivial, if + * need be. + * + * The NFA stores a translation-thing in the terminal nodes. It does not + * concern itself with what that thing is, for us it is juts a void* + * + * The module consists of two parts: Building the NFA, and translating + * strings with it. + */ + +#include +#include +#include + +/* * * * * * * * * * + * Data structures + * * * * * * * * * */ + +typedef struct yaz_nfa_backref_info yaz_backref_info; + +struct yaz_nfa_backref_info { + yaz_nfa_char *start; + yaz_nfa_char *end; +}; + +struct yaz_nfa { + NMEM nmem; + int nstates; /* how many states do we have */ + int nbackrefs; /* how many backrefs do we have */ + yaz_nfa_state *laststate; /* points to the last in the circular list */ + yaz_nfa_state *firststate; /* the initial state */ + yaz_backref_info *curr_backrefs; /* the backrefs we are matching */ + yaz_backref_info *best_backrefs; /* backrefs of the best match so far*/ +}; + +struct yaz_nfa_state { + int num; /* state number. used for resoving ambiguities, and for dumping */ + void *result; /* signals a terminal state, and gives the result */ + yaz_nfa_transition *lasttrans; /* circular list of transitions */ + yaz_nfa_state *next; /* Circular linked list */ + int backref_start; /* which backreference starts here. 0 if none */ + int backref_end; /* which backreference ends here. 0 if none */ +} ; + +struct yaz_nfa_transition { + yaz_nfa_state *to_state; /* where to */ + yaz_nfa_char range_start; + yaz_nfa_char range_end; + yaz_nfa_transition *next; /* a linked list of them */ +} ; + +/* a range from 1 down to 0 is impossible, and is used to denote an */ +/* empty (epsilon) transition */ +#define EMPTY_START 1 +#define EMPTY_END 0 + +/* a limit for the number of empty transitions we can have in a row before */ +/* we declare this a loop */ +#define LOOP_LIMIT 100 + + + + +/* * * * * * * + * Initializing and destroying whole nfa's + * * * * * * */ + +yaz_nfa *yaz_nfa_init() { + NMEM my_nmem = nmem_create(); + yaz_nfa *n=nmem_malloc(my_nmem, sizeof(yaz_nfa)); + n->nmem=my_nmem; + n->nstates=0; + n->laststate=0; + return n; +} + +void yaz_nfa_destroy(yaz_nfa *n) { + nmem_destroy(n->nmem); +} + + +/* * * * * * + * Low-level interface to building the NFA + * * * * * */ + +yaz_nfa_state *yaz_nfa_add_state(yaz_nfa *n) { + yaz_nfa_state *s = nmem_malloc(n->nmem,sizeof(yaz_nfa_state)); + s->num = (n->nstates)++; + s->result=0; + s->lasttrans=0; + s->backref_start=0; + s->backref_end=0; + if (n->laststate) { + s->next=n->laststate->next; + n->laststate->next=s; + n->laststate=s; + } else { /* first state */ + n->laststate=s; + n->firststate=s; + s->next=s; + } + return s; +} + +int yaz_nfa_set_result(yaz_nfa *n, yaz_nfa_state *s,void *result) { + if ((s->result)&&result) + return 1; + s->result=result; + return 0; +} + +void *yaz_nfa_get_result(yaz_nfa *n, yaz_nfa_state *s) { + if (!s) + return 0; + return s->result; +} + +int yaz_nfa_set_backref(yaz_nfa *n, yaz_nfa_state *s, + int backref_number, + int is_start ){ + if (is_start) { + if (s->backref_start) + return 1; + s->backref_start=backref_number; + if (n->nbackrefsnbackrefs=backref_number; + } else { + if (s->backref_end) + return 1; + s->backref_end=backref_number; + } + return 0; /* ok */ +} + +int yaz_nfa_get_backref(yaz_nfa *n, yaz_nfa_state *s, + int is_start ) { + if (!s) + return 0; + if (is_start) + return s->backref_start; + else + return s->backref_end; +} + +void yaz_nfa_add_transition(yaz_nfa *n, + yaz_nfa_state *from_state, + yaz_nfa_state *to_state, + yaz_nfa_char range_start, + yaz_nfa_char range_end) { + yaz_nfa_transition *t=nmem_malloc(n->nmem,sizeof(yaz_nfa_transition)); + t->range_start=range_start; + t->range_end=range_end; + t->to_state=to_state; + if (from_state->lasttrans) { + t->next= from_state->lasttrans->next; + from_state->lasttrans->next=t; + from_state->lasttrans=t; + } else { /* first trans */ + from_state->lasttrans=t; + t->next=t; + } +} + +void yaz_nfa_add_empty_transition( yaz_nfa *n, + yaz_nfa_state *from_state, + yaz_nfa_state *to_state) { + yaz_nfa_add_transition(n,from_state,to_state, + EMPTY_START,EMPTY_END); +} + +/* * * * * * * * + * Medium-level interface + * * * * * * * */ + +/* Finds a transition from s where the range is exactly c..c */ +/* There should only be one such transition */ +static yaz_nfa_state *find_single_trans( + yaz_nfa_state *s, + yaz_nfa_char range_start, + yaz_nfa_char range_end) { + yaz_nfa_transition *t=s->lasttrans; + if (!t) + return 0; + do { + t=t->next; + if ( ( t->range_start == range_start ) && ( t->range_end == range_end) ) + return t->to_state; + } while (t != s->lasttrans ); + return 0; +} + + +yaz_nfa_state *yaz_nfa_add_range(yaz_nfa *n, + yaz_nfa_state *s, + yaz_nfa_char range_start, + yaz_nfa_char range_end) { + yaz_nfa_state *nextstate; + if (!s) /* default to top-level of the nfa */ + s=n->firststate; + nextstate=find_single_trans(s,range_start,range_end); + if (!nextstate) { + nextstate = yaz_nfa_add_state(n); + yaz_nfa_add_transition(n, s, nextstate, range_start, range_end); + } + return nextstate; +} + +yaz_nfa_state *yaz_nfa_add_sequence(yaz_nfa *n, + yaz_nfa_state *s, + yaz_nfa_char *seq ){ + yaz_nfa_state *nextstate; + if (!s) /* default to top-level of the nfa */ + s=n->firststate; + nextstate=find_single_trans(s,*seq,*seq); + if (nextstate) { + seq++; + if (!*seq) /* whole sequence matched */ + return nextstate; + else + return yaz_nfa_add_sequence(n, nextstate, seq); + } else { /* no next state, build the rest */ + while (*seq) { + s=yaz_nfa_add_range(n,s, *seq, *seq); + seq++; + } + return s; + } + return 0; /* compiler shut up, we return somewhere above */ +} + + + +/* * * * * * * + * Searching the NFA + * * * * * * */ + +struct matcher { + yaz_nfa_char *longest; + int bestnode; + void *result; + int errorcode; + int empties; /* count how many consecutive empty transitions */ +}; + +/* Finds the longest match. In case of ties, chooses the one with the + * lowest numbered state */ +static void match_state(yaz_nfa_state *s, + yaz_nfa_char *inchar, + size_t incharsleft, + struct matcher *m ) { + yaz_nfa_transition *t=s->lasttrans; + + if (t) { + if (incharsleft) { + do { + t=t->next; + if ( (( t->range_start <= *inchar ) && ( t->range_end >= *inchar )) ){ + m->empties=0; + match_state(t->to_state, inchar+1,incharsleft-1,m); + /* yes, descent to all matching nodes, even if overrun, */ + /* since we can find a better one later */ + } else if (( t->range_start==EMPTY_START) && (t->range_end==EMPTY_END)) { + if ( m->empties++ > LOOP_LIMIT ) + m->errorcode= YAZ_NFA_LOOP; + else + match_state(t->to_state, inchar,incharsleft,m); + } + } while (t != s->lasttrans ); + } else { + m->errorcode=YAZ_NFA_OVERRUN; + } + } + if (s->result) { /* terminal node */ + if ( (m->longest < inchar) || /* longer result */ + ((m->longest == inchar)&&(m->bestnodenum)) ){ + /* or as long, but with lower node number. Still better */ + m->longest=inchar; + m->bestnode=s->num; + m->result=s->result; + } + } +} /* match_state */ + +int yaz_nfa_match(yaz_nfa *n, + yaz_nfa_char **inbuff, + size_t incharsleft, + void **result ){ + struct matcher m; + if (!n->firststate) + return YAZ_NFA_NOMATCH; + m.longest=*inbuff; + m.bestnode=n->nstates; + m.result=0; + m.errorcode=0; + if (!n->curr_backrefs) { + int sz=sizeof( struct yaz_nfa_backref_info); + n->curr_backrefs=nmem_malloc( n->nmem, n->nbackrefs* sz); + n->best_backrefs=nmem_malloc( n->nmem, n->nbackrefs* sz); + } + + + match_state(n->firststate, *inbuff, incharsleft, &m); + if (m.result) { + *result=m.result; + *inbuff=m.longest; + if (m.errorcode) + return m.errorcode; + else + return YAZ_NFA_SUCCESS; + } + return YAZ_NFA_NOMATCH; +} + + + + +/* * * * * * * * * + * Debug routines + * * * * * * */ + +yaz_nfa_state *yaz_nfa_get_first(yaz_nfa *n){ + if (!n) + return 0; + return n->firststate; +} + +yaz_nfa_state *yaz_nfa_get_next(yaz_nfa *n, yaz_nfa_state *s){ + if (n && s) { + if (s==n->laststate) + return 0; + return s->next; + } + return 0; +} + + +static void dump_trans(FILE *F, yaz_nfa_transition *t ) { + char c1; + char c2; + c1=t->range_start; + c2=t->range_end; + char *e=""; + if ( (t->range_start <= ' ') || (t->range_start>'z')) + c1='?'; + if ( (t->range_end <= ' ') || (t->range_end>'z')) + c2='?'; + if ((t->range_start==EMPTY_START) && (t->range_end==EMPTY_END)) { + e="(empty)"; + } + fprintf(F," -> [%d] %s '%c' %x - '%c' %x \n", + t->to_state->num, e, + c1, t->range_start, + c2, t->range_end ); +} + +static void dump_state(FILE *F, yaz_nfa_state *s, + char *(*strfunc)(void *) ) { + yaz_nfa_transition *t; + char *resultstring=""; + if (s->result) { + if (strfunc) { + resultstring=(*strfunc)(s->result); + } + else + resultstring=s->result; + } + fprintf(F," state [%d] %s %s", + s->num, s->result?"(FINAL)":"",resultstring ); + if (s->backref_start) { + fprintf(F," start-%d",s->backref_start); + } + if (s->backref_end) { + fprintf(F," end-%d",s->backref_end); + } + fprintf(F,"\n"); + t=s->lasttrans; + if (!t) { + fprintf(F," (no transitions)\n"); + } else { + do { + t=t->next; + dump_trans(F,t); + } while (t != s->lasttrans); + } + +} + +void yaz_nfa_dump(FILE *F, yaz_nfa *n, + char *(*strfunc)(void *) ) { + yaz_nfa_state *s; + if (!F) /* lazy programmers can just pass 0 for F */ + F=stdout; + fprintf(F,"The NFA has %d states \n",n->nstates); + s=n->laststate; + if (s) { + do { + s=s->next; + dump_state(F,s, strfunc); + } while (s != n->laststate); + } +} + + + +/* + * Local variables: + * c-basic-offset: 4 + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ diff --git a/test/Makefile.am b/test/Makefile.am index 7ddfe2d..5936e3a 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -1,9 +1,9 @@ ## Copyright (C) 1994-2006, Index Data ApS ## All rights reserved. -## $Id: Makefile.am,v 1.15 2006-05-02 20:47:46 adam Exp $ +## $Id: Makefile.am,v 1.16 2006-05-03 09:04:33 heikki Exp $ check_PROGRAMS = tsticonv tstnmem tstmatchstr tstwrbuf tstodr tstccl tstlog \ - tstsoap1 tstsoap2 tstodrstack tstlogthread tstxmlquery tstpquery \ + tstsoap1 tstsoap2 tstodrstack tstlogthread tstxmlquery tstpquery nfatest1 \ tst_filepath tst_record_conv check_SCRIPTS = tstcql.sh tstmarciso.sh tstmarcxml.sh @@ -49,6 +49,6 @@ tstsoap2_SOURCES = tstsoap2.c tstlogthread_SOURCES = tstlogthread.c tstxmlquery_SOURCES = tstxmlquery.c tstpquery_SOURCES = tstpquery.c +nfatest1_SOURCES = nfatest1.c tst_filepath_SOURCES = tst_filepath.c tst_record_conv_SOURCES = tst_record_conv.c - diff --git a/test/nfatest1.c b/test/nfatest1.c new file mode 100644 index 0000000..efafb8d --- /dev/null +++ b/test/nfatest1.c @@ -0,0 +1,146 @@ +/* Copyright (C) 2006, Index Data ApS + * See the file LICENSE for details. + * + * $Id: nfatest1.c,v 1.1 2006-05-03 09:04:33 heikki Exp $ + * + */ + + +#include +#include +#include +#include +#include + +#define VERBOSE 1 + +char *printfunc(void *result) { + static char buf[200]; + sprintf(buf,"\"%s\"", (char*) result); + return buf; +} + +void test_match(yaz_nfa *n, + yaz_nfa_char *buf, int buflen, + int expcode, char *expstr) { + yaz_nfa_char *c=buf; + void *resptr=0; + int i; + i=yaz_nfa_match(n,&c,buflen,&resptr); +#if VERBOSE + printf("\n'%s' returned %d. Moved c by %d, and resulted in '%s'\n", + expstr, i, (c-buf),(char*)resptr); +#endif + YAZ_CHECK_EQ(i,expcode); + YAZ_CHECK_EQ(strcmp(expstr,(char*)resptr),0); +} + +void construction_test() { + yaz_nfa* n= yaz_nfa_init(); + yaz_nfa_state *s,*s0,*s1,*s2,*s3,*s4,*s5; + int i; + yaz_nfa_char seq1[]={'p','r','e','f','i','x',0}; + yaz_nfa_char seq2[]={'p','r','e','l','i','m',0}; + yaz_nfa_char tst1[]={'c','0'}; + yaz_nfa_char tst2[]={'c','k','0'}; + yaz_nfa_char tst3[]={'c','x','0'}; + yaz_nfa_char tst4[]={'z','k','0'}; + yaz_nfa_char tst5[]={'y','k','k','k','k','k','k','y','0'}; + void *p; + + YAZ_CHECK(n); + + s=yaz_nfa_get_first(n); + YAZ_CHECK(!s); + + s0=yaz_nfa_add_state(n); + + s=yaz_nfa_get_first(n); + YAZ_CHECK(s); + s=yaz_nfa_get_next(n,s); + YAZ_CHECK(!s); + + s1=yaz_nfa_add_state(n); + i=yaz_nfa_set_result(n,s1,"first"); + YAZ_CHECK_EQ(i,0); + + i=yaz_nfa_set_result(n,s1,"DUPLICATE"); + YAZ_CHECK_EQ(i,1); + + p=yaz_nfa_get_result(n,s1); + YAZ_CHECK(p); + YAZ_CHECK( strcmp((char*)p,"first")==0 ); + + i=yaz_nfa_set_result(n,s1,0); + YAZ_CHECK_EQ(i,0); + p=yaz_nfa_get_result(n,s1); + YAZ_CHECK(!p); + i=yaz_nfa_set_result(n,s1,"first"); + YAZ_CHECK_EQ(i,0); + + s2=yaz_nfa_add_state(n); + s3=yaz_nfa_add_state(n); + yaz_nfa_set_result(n,s3,"a-k,x-z"); + + s=yaz_nfa_get_first(n); + YAZ_CHECK(s); + s=yaz_nfa_get_next(n,s); + YAZ_CHECK(s); + + + yaz_nfa_add_transition(n,s0,s1,'a','k'); + yaz_nfa_add_transition(n,s1,s1,'k','k'); + yaz_nfa_add_transition(n,s0,s2,'p','p'); + yaz_nfa_add_transition(n,s1,s3,'x','z'); + + s=yaz_nfa_add_range(n, 0, 'k','s' ); + yaz_nfa_set_result(n,s,"K-S"); + + s4=yaz_nfa_add_range(n, s2, 'l','r' ); + s5=yaz_nfa_add_range(n, s2, 'l','r' ); + YAZ_CHECK((s4==s5)); + s=yaz_nfa_add_range(n, 0, 'c','c' ); + + s=yaz_nfa_add_range(n, 0, 'z','z' ); + yaz_nfa_add_empty_transition(n,s,s); + yaz_nfa_set_result(n,s,"loop"); + + s=yaz_nfa_add_range(n, 0, 'y','y' ); + s1=yaz_nfa_add_state(n); + yaz_nfa_set_backref(n,s1,1,1); + yaz_nfa_add_empty_transition(n,s,s1); + s=s1; + yaz_nfa_add_transition(n,s,s,'k','k'); + s=yaz_nfa_add_range(n, s, 'y','y' ); + yaz_nfa_set_result(n,s,"y k+ y"); + yaz_nfa_set_backref(n,s,1,0); + + s=yaz_nfa_add_sequence(n, 0, seq1 ); + yaz_nfa_set_result(n,s,"PREFIX"); + s=yaz_nfa_add_sequence(n, 0, seq2 ); + yaz_nfa_set_result(n,s,"PRELIM"); + +#if VERBOSE + yaz_nfa_dump(0,n, printfunc); +#endif + + test_match(n,seq2,3,YAZ_NFA_OVERRUN,"K-S"); + test_match(n,seq2,6,YAZ_NFA_SUCCESS,"PRELIM"); + test_match(n,tst1,3,YAZ_NFA_SUCCESS,"first"); + test_match(n,tst2,3,YAZ_NFA_SUCCESS,"first"); + test_match(n,tst3,3,YAZ_NFA_SUCCESS,"a-k,x-z"); + test_match(n,tst4,9,YAZ_NFA_LOOP,"loop"); + test_match(n,tst5,9,YAZ_NFA_SUCCESS,"y k+ y"); + + yaz_nfa_destroy(n); +} + +int main(int argc, char **argv) +{ + YAZ_CHECK_INIT(argc, argv); + nmem_init (); + construction_test(); + nmem_exit (); + YAZ_CHECK_TERM; +} + -- 1.7.10.4