From 548d17fb92d2efaa1ef637b56fcec09a7261e842 Mon Sep 17 00:00:00 2001 From: Heikki Levanto Date: Wed, 10 May 2006 13:58:46 +0000 Subject: [PATCH] Higher-level interfaces, fixing the bugs these uncovered. Better defined return codes, doxygen comments, etc. --- include/yaz/nfa.h | 251 ++++++++++++++++++++++++++++++++++++++++++++++------- src/nfa.c | 210 ++++++++++++++++++++++++++++++++------------ test/nfatest1.c | 167 ++++++++++++++++++++++++++++------- 3 files changed, 508 insertions(+), 120 deletions(-) diff --git a/include/yaz/nfa.h b/include/yaz/nfa.h index 6880c70..ddce4af 100644 --- a/include/yaz/nfa.h +++ b/include/yaz/nfa.h @@ -1,6 +1,6 @@ /* Copyright (C) 2006, Index Data ApS * See the file LICENSE for details. - * $Id: nfa.h,v 1.6 2006-05-05 14:02:27 heikki Exp $ + * $Id: nfa.h,v 1.7 2006-05-10 13:58:46 heikki Exp $ */ /** @@ -31,7 +31,37 @@ YAZ_BEGIN_CDECL -/** \brief Internal character type */ +/** \name return codes and data types*/ +/* \{ */ +/** \brief Success */ +#define YAZ_NFA_SUCCESS 0 + +/** \brief no match found */ +#define YAZ_NFA_NOMATCH 1 + +/** \brief Need more input */ +#define YAZ_NFA_OVERRUN 2 + +/** \brief The NFA is looping */ +#define YAZ_NFA_LOOP 3 + +/** \brief no room in output buffer */ +#define YAZ_NFA_NOSPACE 4 + +/** \brief tryig to set a result when one already exists*/ +#define YAZ_NFA_ALREADY 5 + +/** \brief Attempting to set an end to a backref that has not been started */ +#define YAZ_NFA_NOSTART 6 + +/** \brief Asking for a non-existing backref */ +#define YAZ_NFA_NOSUCHBACKREF 7 + +/** \brief Internal error, should never happen */ +#define YAZ_NFA_INTERNAL 8 + + +/** \brief Internal character type. 32-bit unicode! */ typedef unsigned int yaz_nfa_char; @@ -45,11 +75,13 @@ typedef struct yaz_nfa_state yaz_nfa_state; /** \brief Transition from one state to another */ typedef struct yaz_nfa_transition yaz_nfa_transition; - -/** brief Simple character range converter */ +/** \brief A converter produces some output to a buffer */ typedef struct yaz_nfa_converter yaz_nfa_converter; +/* \} */ +/** \name Low-level interface to building the NFA */ +/* \{ */ /** \brief Initialize the NFA without any states in it * @@ -76,17 +108,19 @@ yaz_nfa_state *yaz_nfa_add_state( /** \brief Sets the result pointer to a state * - * Call with null to clear the pointer. + * \param n the NFA itself + * \param s the state to which the result will be added + * \param result the result pointer + * + * Sets the result pointer of a state. If already set, returns an error. Call + * with a NULL pointer to clear the result, before setting a new one. * - * \retval 0 ok - * \retval 1 The state already has a result! + * \retval YAZ_NFA_SUCCESS ok + * \retval YAZ_NFA_ALREADY The state already has a result! */ int yaz_nfa_set_result( - /** The NFA itsef */ yaz_nfa *n, - /** The state to which the result is added */ yaz_nfa_state *s, - /** The result. The NFA does not care what it is, just stores it */ void *result ); @@ -108,11 +142,12 @@ void *yaz_nfa_get_result( * * \param n the nfa * \param s the state to add to - * \param backref_number is the number of the back reference. 0 for clearing + * \param backref_number is the number of the back reference. * \param is_start is 1 for start of the backref, 0 for end - * \retval 0 for OK - * \retval 1 if the backref is already set - * \retval 2 for ending a backref that has not been started + * + * \retval YAZ_NFA_SUCCESS for OK + * \retval YAZ_NFA_ALREADY if the backref is already set + * \retval YAZ_NFA_NOSTART for ending a backref that has not been started * */ @@ -175,14 +210,28 @@ yaz_nfa_state *yaz_nfa_add_range( yaz_nfa *n, /** \brief Add a sequence of transitions and states. * + * \param n the nfa + * \param s the state to add this to. If null, adds to the initial state + * \param seq is a sequence of yaz_fna_chars. + * \param seq_len is the length of the sequence + * \Return the final state + * * Starting from state s (or from the initial state, if s is * null), finds as much of seq as possible and inserts the rest. - * \Return the final state */ yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n, yaz_nfa_state *s, - yaz_nfa_char *seq ); + yaz_nfa_char *seq, + size_t seq_len ); +/** \} */ + +/** \name Low-level interface for mathcing the NFA. */ +/* + * These do the actual matching. They know nothing of + * the type of the result pointers + */ +/** \{ */ /** \brief Find the longest possible match. * @@ -194,22 +243,16 @@ yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n, * In case of errors, returns the best match so far, * which the caller is free to ignore. * - * \retval 0 success - * \retval 1 no match found - * \retval 2 overrun'of input buffer - * \retval 3 looping too far + * \retval YAZ_NFA_SUCCESS success + * \retval YAZ_NFA_NOMATCH no match found + * \retval YAZ_NFA_OVERRUN overrun of input buffer + * \retval YAZ_NFA_LOOP looping too far * */ int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t *incharsleft, void **result ); -/** yaz_nfa_match return codes */ -#define YAZ_NFA_SUCCESS 0 -#define YAZ_NFA_NOMATCH 1 -#define YAZ_NFA_OVERRUN 2 -#define YAZ_NFA_LOOP 3 - /** \brief Get a back reference after a successfull match. * * \param n the nfa @@ -226,9 +269,9 @@ int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t *incharsleft, * not the end of a backref. It is up to the caller to decide how * to handle such a situation. * - * \retval 0 OK - * \retval 1 no match - * \retval 2 no such backref + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_NOMATCH The NFA hasn't matched anything, no backref + * \retval YAZ_NFA_NOSUCHBACKREF no such backref */ int yaz_nfa_get_backref( yaz_nfa *n, @@ -236,6 +279,14 @@ int yaz_nfa_get_backref( yaz_nfa *n, yaz_nfa_char **start, yaz_nfa_char **end ); +/* \} */ + +/** \name Low-level interface to the converters */ +/* These produce some output text into a buffer. There are a few + * kinds of converters, each producing different type of output. + */ +/* \{ */ + /** \brief Create a string converter. * \param n the nfa * \param string the string to output @@ -300,9 +351,10 @@ void yaz_nfa_append_converter ( * Runs the converters in the chain, placing output into outbuff * (and incrementing the pointer). * - * \retval 0 OK - * \retval 1 no match to get backrefs from - * \retval 2 no room in outbuf + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_NOMATCH no match to get backrefs from + * \retval YAZ_NFA_NOSPACE no room in outbuf + * \retval YAZ_NFA_INTERNAL Should never happen * */ int yaz_nfa_run_converters( @@ -311,6 +363,139 @@ int yaz_nfa_run_converters( yaz_nfa_char **outbuff, size_t *outcharsleft); +/** \} */ + +/** \name High-level interface to the NFA */ +/* This interface combines the NFA and converters, for ease of + * access. There are a few calls to build a complete system, and a call + * to do the actual conversion. + */ +/* \{ */ + +/** \brief Add a rule that converts one string to another ('IX' -> '9') + * + * \param n The nfa itself + * \param from_string the string to match + * \param from_length length of the from_string + * \param to_string the string to write in the output + * \param to_length length of the to_string + * + * Adds a matching rule and a string converter to the NFA. + * Can be used for converting strings into nothing, for example, + * to remove markup. + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_ALREADY Conflict with some other rule + * + */ +int yaz_nfa_add_string_rule( yaz_nfa *n, + yaz_nfa_char *from_string, + size_t from_length, + yaz_nfa_char *to_string, + size_t to_length); + +/** brief Just like yaz_nfa_add_string_rule, but takes the strings in ascii + * + * \param n The nfa itself + * \param from_string the string to match + * \param to_string the string to write in the output + * + * Like yaz_nfa_add_string_rule, this adds a rule to translate a string + * into another. The only difference is that this one takes the strings as + * normal char *, which means that no high-valued unicodes can be handled, + * and that this one uses null-terminated strings. In short, this is a + * simplified version mostly intended for tests and other small uses. + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_ALREADY Conflict with some other rule + */ +int yaz_nfa_add_ascii_string_rule( yaz_nfa *n, + char *from_string, + char *to_string); + + +/** \brief Add a rule that converts a character range + * + * \param n The nfa itself + * \param range_start Where the matching range starts + * \param range_end Where the matching range ends + * \param output_range_start Where the resulting range starts + * + * + * Adds a character range rule to the NFA. The range to be converted + * is defined by the range_start and range_end parameters. The output + * range starts at output_range_start, and is automatically as long + * as the input range. + * + * Useful for alphabet normalizing [a-z] -> [A-Z] + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_ALREADY Conflict with some other rule + */ +int yaz_nfa_add_char_range_rule (yaz_nfa *n, + yaz_nfa_char range_start, + yaz_nfa_char range_end, + yaz_nfa_char output_range_start); + +/** \brief Add a rule that converts a character range to a string + * + * \param n The nfa itself + * \param range_start Where the matching range starts + * \param range_end Where the matching range ends + * \param to_string the string to write in the output + * \param to_length length of the to_string + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_ALREADY Conflict with some other rule + * + * Adds a character range match rule, and a string converter. + * + * Useful in converting a range of special characters into (short?) + * strings of whitespace, or even to nothing at all. + */ +int yaz_nfa_add_char_string_rule (yaz_nfa *n, + yaz_nfa_char range_start, + yaz_nfa_char range_end, + yaz_nfa_char* to_string, + size_t to_length); + +/** \brief Converts one 'slice' that is, the best matching rule. + * + * \param n the nfa itself + * \param inbuff buffer of input data. Will be incremented when match + * \param incharsleft max number of inchars to use from inbuff. decrements. + * \param outbuff buffer for output data. Will be incremented when match + * \param outcharsleft max number of chars to write to outbuff. + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_OVERRUN No more input data, some pattern could match + * \retval YAZ_NFA_NOSPACE No room in the putput buffer + * \retval YAZ_NFA_NOSUCHBACKREF NFA refers to a non-existing backref + * + * Finds the best match at the beginning of inbuf, and fires its converter(s) + * to produce output in outbuff. Increments both inbuf and outbuf pointers and + * decrements the *charsleft values, so all is ready for calling again, until + * the buffer is exhausted. That loop is left to the caller, so he can load + * more data in the buffer in good time. + * + * If no match is found, converts one character into itself. If the matcher + * returns any sort of error, leaves the pointers where they were. + */ +int yaz_nfa_convert_slice (yaz_nfa *n, + yaz_nfa_char **inbuff, + size_t *incharsleft, + yaz_nfa_char **outbuff, + size_t *outcharsleft); + + +/* \} */ + +/** \name Debug routines */ +/* These provide a method for traversing all the states defined + * in the NFA, for example to release memory allocated in the results, + * and a simple debug routine to dump the NFA */ +/* \{ */ + /** \brief Get the first state of the NFA. * @@ -346,7 +531,7 @@ yaz_nfa_state *yaz_nfa_get_next(yaz_nfa *n, yaz_nfa_state *s); */ void yaz_nfa_dump(FILE *F, yaz_nfa *n, char *(*strfunc)(void *) ); - +/* \} */ diff --git a/src/nfa.c b/src/nfa.c index 90bb040..dca14c6 100644 --- a/src/nfa.c +++ b/src/nfa.c @@ -1,7 +1,7 @@ /* Copyright (C) 2006, Index Data ApS * See the file LICENSE for details. * - * $Id: nfa.c,v 1.8 2006-05-05 14:04:03 heikki Exp $ + * $Id: nfa.c,v 1.9 2006-05-10 13:58:46 heikki Exp $ */ /** @@ -23,7 +23,9 @@ * strings with it. */ + #include +#include #include #include @@ -100,13 +102,13 @@ yaz_nfa *yaz_nfa_init() { NMEM my_nmem = nmem_create(); yaz_nfa *n = nmem_malloc(my_nmem, sizeof(yaz_nfa)); n->nmem = my_nmem; - n->nstates = 0; - n->laststate = 0; - n->firststate = 0; - n->nbackrefs = 0; + n->nbackrefs = 1; /* we always have #0, last range match */ n->curr_backrefs = 0; n->best_backrefs = 0; n->lastmatch = YAZ_NFA_NOMATCH; + n->nstates = 0; + n->laststate = 0; + n->firststate = n->laststate ; return n; } @@ -140,7 +142,7 @@ yaz_nfa_state *yaz_nfa_add_state(yaz_nfa *n) { int yaz_nfa_set_result(yaz_nfa *n, yaz_nfa_state *s, void *result) { if ((s->result)&&result) - return 1; + return YAZ_NFA_ALREADY; s->result = result; return 0; } @@ -156,7 +158,7 @@ int yaz_nfa_set_backref_point(yaz_nfa *n, yaz_nfa_state *s, int is_start ){ if (is_start) { if (s->backref_start) - return 1; + return YAZ_NFA_ALREADY; s->backref_start = backref_number; if (n->nbackrefs<=backref_number) { n->nbackrefs = backref_number+1; @@ -168,9 +170,9 @@ int yaz_nfa_set_backref_point(yaz_nfa *n, yaz_nfa_state *s, } } else { if (s->backref_end) - return 1; - if (n->nbackrefsnbackrefs<=backref_number) + return YAZ_NFA_NOSTART; s->backref_end = backref_number; } return 0; /* ok */ @@ -240,10 +242,13 @@ yaz_nfa_state *yaz_nfa_add_range(yaz_nfa *n, yaz_nfa_state *s, yaz_nfa_char range_start, yaz_nfa_char range_end) { - yaz_nfa_state *nextstate; + yaz_nfa_state *nextstate=0; if (!s) /* default to top-level of the nfa */ s = n->firststate; - nextstate = find_single_trans(s, range_start, range_end); + if (s) + nextstate = find_single_trans(s, range_start, range_end); + else + s = yaz_nfa_add_state(n); /* create initial state */ if (!nextstate) { nextstate = yaz_nfa_add_state(n); yaz_nfa_add_transition(n, s, nextstate, range_start, range_end); @@ -253,21 +258,25 @@ yaz_nfa_state *yaz_nfa_add_range(yaz_nfa *n, yaz_nfa_state *yaz_nfa_add_sequence(yaz_nfa *n, yaz_nfa_state *s, - yaz_nfa_char *seq ){ - yaz_nfa_state *nextstate; + yaz_nfa_char *seq, + size_t seq_len){ + yaz_nfa_state *nextstate=0; if (!s) /* default to top-level of the nfa */ s = n->firststate; - nextstate = find_single_trans(s, *seq, *seq); + if (s) + nextstate = find_single_trans(s, *seq, *seq); if (nextstate) { seq++; - if (!*seq) /* whole sequence matched */ + seq_len--; + if (!seq_len) /* whole sequence matched */ return nextstate; else - return yaz_nfa_add_sequence(n, nextstate, seq); + return yaz_nfa_add_sequence(n, nextstate, seq,seq_len); } else { /* no next state, build the rest */ - while (*seq) { + while (seq_len) { s = yaz_nfa_add_range(n, s, *seq, *seq); seq++; + seq_len--; } return s; } @@ -308,7 +317,8 @@ static void match_state( if (incharsleft) { do { t = t->next; - if ( (( t->range_start <= *inchar ) && ( t->range_end >= *inchar )) ){ + if ( (( t->range_start <= *inchar ) && + ( t->range_end >= *inchar )) ){ m->empties = 0; if (t->range_start!=t->range_end){ /* backref 0 is special: the last range operation */ @@ -367,7 +377,7 @@ int yaz_nfa_match(yaz_nfa *n, m.longest=*inbuff; m.bestnode = n->nstates; m.result = 0; - m.errorcode = 0; + m.errorcode = YAZ_NFA_SUCCESS; m.empties = 0; sz = sizeof( struct yaz_nfa_backref_info) * n->nbackrefs; if (!n->curr_backrefs) { @@ -382,18 +392,17 @@ int yaz_nfa_match(yaz_nfa *n, } match_state(n->firststate, *inbuff, *inbuff, *incharsleft, &m); - if (m.result) { - *incharsleft -= (m.longest-*inbuff); - *result = m.result; - *inbuff = m.longest; - if (m.errorcode) - n->lastmatch = m.errorcode; - else - n->lastmatch= YAZ_NFA_SUCCESS; - return n->lastmatch; + if (m.errorcode==YAZ_NFA_SUCCESS) { + if (!m.result) + m.errorcode=YAZ_NFA_NOMATCH; + else { + *incharsleft -= (m.longest-*inbuff); + *result = m.result; + *inbuff = m.longest; + } } - n->lastmatch = YAZ_NFA_NOMATCH; - return n->lastmatch; + n->lastmatch=m.errorcode; + return m.errorcode; } @@ -401,12 +410,12 @@ int yaz_nfa_get_backref( yaz_nfa *n, int backref_no, yaz_nfa_char **start, yaz_nfa_char **end) { - if (backref_no>=n->nbackrefs) - return 2; - if (backref_no<0) - return 2; - if (n->lastmatch== YAZ_NFA_NOMATCH) - return 1; /* accept other errors, they return partial matches*/ + if (backref_no >= n->nbackrefs) + return YAZ_NFA_NOSUCHBACKREF; + if (backref_no < 0) + return YAZ_NFA_NOSUCHBACKREF; + if (n->lastmatch != YAZ_NFA_SUCCESS) + return YAZ_NFA_NOMATCH; *start = n->best_backrefs[backref_no].start; *end = n->best_backrefs[backref_no].end; @@ -485,11 +494,11 @@ static int string_convert ( yaz_nfa_char *p=c->string; while (sz--) { if ((*outcharsleft)-- <= 0) - return 2; + return YAZ_NFA_NOSPACE; **outbuff=*p++; (*outbuff)++; } - return 0; + return YAZ_NFA_SUCCESS; } static int backref_convert ( yaz_nfa *n, @@ -498,18 +507,18 @@ static int backref_convert ( size_t *outcharsleft){ yaz_nfa_char *cp1,*cp2; int i; - i=yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2); - if (i==2) /* no backref, produce no output, that's ok */ - return 0; - if (i==1) /* no match in dfa */ + i = yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2); + if ( i == YAZ_NFA_NOSUCHBACKREF) /* no backref, produce no output */ + return YAZ_NFA_SUCCESS; + if ( i == YAZ_NFA_NOMATCH ) /* no match in dfa */ return 1; /* should not happen */ - while (cp2>=cp1) { + while (cp2 >= cp1) { if ((*outcharsleft)-- <= 0) - return 2; + return YAZ_NFA_NOSPACE; **outbuff=*cp1++; (*outbuff)++; } - return 0; + return YAZ_NFA_SUCCESS; } static int range_convert ( @@ -517,20 +526,20 @@ static int range_convert ( yaz_nfa_converter *c, yaz_nfa_char **outbuff, size_t *outcharsleft){ - yaz_nfa_char *cp1,*cp2; + yaz_nfa_char *cp1=0, *cp2=0; int i; i = yaz_nfa_get_backref(n,c->backref_no, &cp1, &cp2); - if (i == 2) /* no backref, produce no output, not ok */ - return 1; /* should not happen */ - if (i == 1) /* no match in dfa */ - return 1; /* should not happen */ + if (i == YAZ_NFA_NOSUCHBACKREF) /* no backref, produce no output, not ok */ + return YAZ_NFA_NOSUCHBACKREF; /* should not happen */ + if (i == YAZ_NFA_NOMATCH) /* no match in dfa */ + return YAZ_NFA_NOMATCH; /* should not happen */ while (cp2 >= cp1) { if ((*outcharsleft)-- <= 0) - return 2; + return YAZ_NFA_NOSPACE; **outbuff=(*cp1++) + c->char_diff ; (*outbuff)++; } - return 0; + return YAZ_NFA_SUCCESS; } @@ -552,13 +561,104 @@ int yaz_nfa_run_converters( rc=range_convert(n,c,outbuff,outcharsleft); break; default: - rc=3; /* internal error */ + rc=YAZ_NFA_INTERNAL; /* should never happen */ } c=c->next; } return rc; } +/* * * * * * * * + * High-level interface + * These routines build the nfa and add converters, all + * in one go. + * * * * * * * */ + +int yaz_nfa_add_string_rule( yaz_nfa *n, + yaz_nfa_char *from_string, + size_t from_length, + yaz_nfa_char *to_string, + size_t to_length ) { + yaz_nfa_state *s= + yaz_nfa_add_sequence(n, 0, from_string,from_length); + yaz_nfa_converter *c= + yaz_nfa_create_string_converter(n,to_string,to_length); + return yaz_nfa_set_result(n,s,c); +} + +int yaz_nfa_add_ascii_string_rule( yaz_nfa *n, + char *from_string, + char *to_string) { + size_t from_len = strlen(from_string); + size_t to_len = strlen(to_string); + yaz_nfa_char *from_buf= + nmem_malloc(n->nmem, from_len*sizeof(yaz_nfa_char)); + yaz_nfa_char *to_buf= + nmem_malloc(n->nmem, to_len*sizeof(yaz_nfa_char)); + int i; + for (i=0;iresult; } fprintf(F, " state [%d] %s %s", - s->num, s->result?"(FINAL)":"", resultstring ); + s->num, s->result?"(final)":"", resultstring ); if (s->backref_start) { fprintf(F, " start-%d", s->backref_start); } diff --git a/test/nfatest1.c b/test/nfatest1.c index 3d14b30..2d4c8ab 100644 --- a/test/nfatest1.c +++ b/test/nfatest1.c @@ -1,7 +1,7 @@ /* Copyright (C) 2006, Index Data ApS * See the file LICENSE for details. * - * $Id: nfatest1.c,v 1.5 2006-05-05 14:02:27 heikki Exp $ + * $Id: nfatest1.c,v 1.6 2006-05-10 13:58:47 heikki Exp $ * */ @@ -12,7 +12,6 @@ #include #include -#define VERBOSE 0 char *printfunc(void *result) { static char buf[200]; @@ -35,26 +34,23 @@ void test_match(yaz_nfa *n, int i, bi; size_t buflen2 = buflen; i = yaz_nfa_match(n,&c, &buflen2,&resptr); -#if VERBOSE - printf("\n'%s' returned %d. Moved c by %d, and resulted in '%s'\n", + if (yaz_test_get_verbosity()>3) + printf("\n'%s' returned %d. Moved c by %d, and resulted in '%s'\n", expstr, i, (c-buf),(char*)resptr); -#endif YAZ_CHECK_EQ(buflen-buflen2, c-buf); YAZ_CHECK_EQ(i, expcode); - if (i!=1) + if (i==0) YAZ_CHECK_EQ(strcmp(expstr,(char*)resptr), 0); i = 0; bi = 0; - while(bi!=2){ + while((bi!=2) && (yaz_test_get_verbosity()>3)){ bi = yaz_nfa_get_backref(n, i,&cp1,&cp2); if (bi==0 && ( cp1 || cp2 ) ) { -#if VERBOSE printf(" got backref %d of %d chars (%p to %p): '", i, cp2-cp1+1, cp1, cp2); while (cp2-cp1 >= 0 ) printf("%c", *cp1++); printf("'\n"); -#endif } i++; } @@ -93,7 +89,7 @@ void construction_test() { YAZ_CHECK_EQ(i, 0); i = yaz_nfa_set_result(n, s1, "DUPLICATE"); - YAZ_CHECK_EQ(i, 1); + YAZ_CHECK_EQ(i, YAZ_NFA_ALREADY); p = yaz_nfa_get_result(n, s1); YAZ_CHECK(p); @@ -143,29 +139,33 @@ void construction_test() { yaz_nfa_set_result(n, s, "y k+ d"); yaz_nfa_set_backref_point(n, s, 1, 0); - s = yaz_nfa_add_sequence(n, 0, seq1 ); + s = yaz_nfa_add_sequence(n, 0, seq1,6 ); yaz_nfa_set_result(n, s, "PREFIX"); - s = yaz_nfa_add_sequence(n, 0, seq2 ); + s = yaz_nfa_add_sequence(n, 0, seq2,6 ); yaz_nfa_set_result(n, s, "PRELIM"); s = yaz_nfa_add_range(n, 0, 'x', 'x' ); - yaz_nfa_set_backref_point(n, s, 2, 1); - s1 = yaz_nfa_add_sequence(n, s, tst4); + i=yaz_nfa_set_backref_point(n, s, 2, 0); + YAZ_CHECK_EQ(i,YAZ_NFA_NOSTART); + i=yaz_nfa_set_backref_point(n, s, 2, 1); + YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS); + i=yaz_nfa_set_backref_point(n, s, 2, 1); + YAZ_CHECK_EQ(i,YAZ_NFA_ALREADY); + s1 = yaz_nfa_add_sequence(n, s, tst4,2); yaz_nfa_set_backref_point(n, s1, 2, 0); yaz_nfa_set_result(n, s1, "xzk"); /* check return codes before doing any matches */ i = yaz_nfa_get_backref(n, 0, &cp1, &cp2 ); - YAZ_CHECK_EQ(i, 1); + YAZ_CHECK_EQ(i, YAZ_NFA_NOMATCH); i = yaz_nfa_get_backref(n, 3, &cp1, &cp2 ); - YAZ_CHECK_EQ(i, 2); + YAZ_CHECK_EQ(i, YAZ_NFA_NOSUCHBACKREF ); i = yaz_nfa_get_backref(n, 1, &cp1, &cp2 ); - YAZ_CHECK_EQ(i, 1); + YAZ_CHECK_EQ(i, YAZ_NFA_NOMATCH ); -#if VERBOSE - yaz_nfa_dump(0, n, printfunc); -#endif + if (yaz_test_get_verbosity()>3) + yaz_nfa_dump(0, n, printfunc); test_match(n, seq2, 3, YAZ_NFA_OVERRUN, "K-S"); test_match(n, seq2, 6, YAZ_NFA_SUCCESS, "PRELIM"); @@ -184,10 +184,9 @@ void construction_test() { YAZ_CHECK_EQ(cp2-cp1+1,2); YAZ_CHECK_EQ(*cp1, 'z' ); YAZ_CHECK_EQ(*cp2, 'k' ); -#if VERBOSE - printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n", + if (yaz_test_get_verbosity()>3) + printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n", cp1, *cp1, cp2, *cp2, cp2-cp1+1, sz ); -#endif yaz_nfa_destroy(n); } @@ -196,7 +195,7 @@ void converter_test() { yaz_nfa* n= yaz_nfa_init(); yaz_nfa_converter *c1, *c2, *c3; yaz_nfa_char str1[]={'a','b','c'}; - yaz_nfa_char seq1[]={'A','B','C',0}; + yaz_nfa_char seq1[]={'A','B','C'}; yaz_nfa_char seq2[]={'k','m','n','m','x','P','Q','X',0}; yaz_nfa_char outbuf[1024]; yaz_nfa_char *outp, *cp, *cp1, *cp2; @@ -212,7 +211,7 @@ void converter_test() { outp=outbuf; sz=1; i=yaz_nfa_run_converters(n, c1, &outp, &sz); - YAZ_CHECK_EQ(i,2); /* overrun */ + YAZ_CHECK_EQ(i,4); /* overrun */ YAZ_CHECK_EQ(outbuf[0],'a'); YAZ_CHECK_EQ(outbuf[1],10000+1); @@ -250,7 +249,7 @@ void converter_test() { s=yaz_nfa_add_state(n); yaz_nfa_add_empty_transition(n,0,s); yaz_nfa_set_backref_point(n,s,1,1); - s=yaz_nfa_add_sequence(n, s, seq1 ); + s=yaz_nfa_add_sequence(n, s, seq1,3 ); yaz_nfa_set_result(n,s,c1); yaz_nfa_set_backref_point(n,s,1,0); @@ -268,9 +267,8 @@ void converter_test() { c1=yaz_nfa_create_backref_converter(n,2); yaz_nfa_set_result(n,s,c1); -#if VERBOSE - yaz_nfa_dump(0,n, printfunc2); -#endif + if (yaz_test_get_verbosity()>3) + yaz_nfa_dump(0,n, printfunc2); cp=seq2; sz=18; @@ -278,10 +276,9 @@ void converter_test() { c2=vp; YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS); i=yaz_nfa_get_backref(n, 2, &cp1, &cp2 ); -#if VERBOSE - printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n", + if (yaz_test_get_verbosity()>3) + printf("backref from %p '%c' to %p '%c' is %d long. sz is now %d\n", cp1, *cp1, cp2, *cp2, cp2-cp1+1, sz ); -#endif YAZ_CHECK_EQ(i,0); YAZ_CHECK_EQ((int)c1,(int)c2); /* got our pointer back from nfa */ for(i=0;i<1024;i++) @@ -316,6 +313,111 @@ void converter_test() { yaz_nfa_destroy(n); } +yaz_nfa_char *makebuff(NMEM nmem, char *in) { + yaz_nfa_char *buff = nmem_malloc(nmem, strlen(in)*sizeof(yaz_nfa_char)); + yaz_nfa_char *op=buff; + while ( (*op++ = *in++) ) + ; + return buff; +} + +void dumpbuff(char *msg, yaz_nfa_char *start, yaz_nfa_char *end) { + if (yaz_test_get_verbosity()>3) { + printf("%s\"",msg); + while (start!=end) + printf("%c",*start++); + printf("\"\n"); + } +} + +void chkbuff( yaz_nfa_char *start, yaz_nfa_char *end, char *exp) { + char *orig_exp=exp; + while (start!=end) + if ( *start++ != *exp++ ) { + if (yaz_test_get_verbosity()>3) { + start--; + exp--; + printf ("chkbuff: unexpected conversion '%c' != '%c' \n" + "\"%s\"\n", *start, *exp, orig_exp ); + } + YAZ_CHECK(!"conversion differs! "); + return; + } + +} + +void high_level_test() { + NMEM nmem=nmem_create(); + yaz_nfa_char from1[] = {'f','o','o','b','a','r'}; + yaz_nfa_char to1[] = {'f','u','b','a','r'}; + yaz_nfa_char tospace[] = {' '}; + yaz_nfa_char todot[] = {'.'}; + char *fromtext = + "It was a Dark and Rainy Night, when alpha and beta " + "fixme - FIND better names ?? !! ## - " + "went out to fix the foobar " + "that was all foo."; + char *expected = + "IT WAS A DARK AND RAINY NIGHT. WHEN ALPHA AND b " + "to-be-fixed-later . FIND BETTER NAMES .. .. .. . " + "WENT OUT TO (fix) THE fubar " + "THAT WAS ALL FOO."; + yaz_nfa_char *from3 = makebuff(nmem,fromtext); + yaz_nfa_char *to3 = nmem_malloc(nmem, 1024*sizeof(yaz_nfa_char)); + yaz_nfa_char *fromp=from3; + yaz_nfa_char *top=to3; + size_t insize=strlen(fromtext); + size_t outsize=1024; + size_t prev_insize=0; + + yaz_nfa *n = yaz_nfa_init(); + int i; + i = yaz_nfa_add_string_rule(n, from1, 6, to1, 5); + YAZ_CHECK_EQ(i,0); + i = yaz_nfa_add_string_rule(n, from1, 6, to1, 5); + YAZ_CHECK_EQ(i,YAZ_NFA_ALREADY); + i = yaz_nfa_add_ascii_string_rule(n,"beta","b"); + YAZ_CHECK_EQ(i,0); + i = yaz_nfa_add_ascii_string_rule(n,"fixme","to-be-fixed-later"); + YAZ_CHECK_EQ(i,0); + i = yaz_nfa_add_ascii_string_rule(n,"fix","(fix)"); + YAZ_CHECK_EQ(i,0); + i = yaz_nfa_add_char_range_rule(n, 'a','z','A'); + YAZ_CHECK_EQ(i,0); + i = yaz_nfa_add_char_string_rule(n, 0,' ', tospace,1); + YAZ_CHECK_EQ(i,0); + i = yaz_nfa_add_char_string_rule(n, '!','/', todot,1); + YAZ_CHECK_EQ(i,0); + i = yaz_nfa_add_char_string_rule(n, ':','?', todot,1); + YAZ_CHECK_EQ(i,0); + if (yaz_test_get_verbosity()>3) + yaz_nfa_dump(0,n, printfunc2); + + YAZ_CHECK_EQ( *from3, 'I' ); /* just to be sure my copy func works */ + for (i=0;i<100;i++) + to3[i]=10000+i; + i=yaz_nfa_convert_slice(n, &fromp, &insize, &top, &outsize); + YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS); + YAZ_CHECK_EQ(*to3,'I'); + YAZ_CHECK_EQ(insize, strlen(fromtext)-1); + YAZ_CHECK_EQ(outsize, 1024-1); + + while ( (i==YAZ_NFA_SUCCESS) && (insize > 0) && (prev_insize!=insize) ) { + prev_insize=insize; /* detect dead loops if something goes wrong */ + i=yaz_nfa_convert_slice(n, &fromp, &insize, &top, &outsize); + } + YAZ_CHECK_EQ(i,YAZ_NFA_SUCCESS); + YAZ_CHECK_EQ(insize,0); + YAZ_CHECK(prev_insize != insize); /* the loop would have been endless */ + + dumpbuff("Original text: ",from3, fromp); + dumpbuff("Converted text: ",to3, top); + + chkbuff(to3, top, expected); + + yaz_nfa_destroy(n); + nmem_destroy(nmem); +} int main(int argc, char **argv) { @@ -323,6 +425,7 @@ int main(int argc, char **argv) nmem_init (); construction_test(); converter_test(); + high_level_test(); nmem_exit (); YAZ_CHECK_TERM; } -- 1.7.10.4