X-Git-Url: http://git.indexdata.com/?p=yaz-moved-to-github.git;a=blobdiff_plain;f=include%2Fyaz%2Fnfa.h;h=ddce4af69578c537a80633e8ded19b45703d5035;hp=6880c701dce9d35561615f06336703333ef63e6a;hb=548d17fb92d2efaa1ef637b56fcec09a7261e842;hpb=821e247a6d064e29703c74b21f755baaf4cc1e6e diff --git a/include/yaz/nfa.h b/include/yaz/nfa.h index 6880c70..ddce4af 100644 --- a/include/yaz/nfa.h +++ b/include/yaz/nfa.h @@ -1,6 +1,6 @@ /* Copyright (C) 2006, Index Data ApS * See the file LICENSE for details. - * $Id: nfa.h,v 1.6 2006-05-05 14:02:27 heikki Exp $ + * $Id: nfa.h,v 1.7 2006-05-10 13:58:46 heikki Exp $ */ /** @@ -31,7 +31,37 @@ YAZ_BEGIN_CDECL -/** \brief Internal character type */ +/** \name return codes and data types*/ +/* \{ */ +/** \brief Success */ +#define YAZ_NFA_SUCCESS 0 + +/** \brief no match found */ +#define YAZ_NFA_NOMATCH 1 + +/** \brief Need more input */ +#define YAZ_NFA_OVERRUN 2 + +/** \brief The NFA is looping */ +#define YAZ_NFA_LOOP 3 + +/** \brief no room in output buffer */ +#define YAZ_NFA_NOSPACE 4 + +/** \brief tryig to set a result when one already exists*/ +#define YAZ_NFA_ALREADY 5 + +/** \brief Attempting to set an end to a backref that has not been started */ +#define YAZ_NFA_NOSTART 6 + +/** \brief Asking for a non-existing backref */ +#define YAZ_NFA_NOSUCHBACKREF 7 + +/** \brief Internal error, should never happen */ +#define YAZ_NFA_INTERNAL 8 + + +/** \brief Internal character type. 32-bit unicode! */ typedef unsigned int yaz_nfa_char; @@ -45,11 +75,13 @@ typedef struct yaz_nfa_state yaz_nfa_state; /** \brief Transition from one state to another */ typedef struct yaz_nfa_transition yaz_nfa_transition; - -/** brief Simple character range converter */ +/** \brief A converter produces some output to a buffer */ typedef struct yaz_nfa_converter yaz_nfa_converter; +/* \} */ +/** \name Low-level interface to building the NFA */ +/* \{ */ /** \brief Initialize the NFA without any states in it * @@ -76,17 +108,19 @@ yaz_nfa_state *yaz_nfa_add_state( /** \brief Sets the result pointer to a state * - * Call with null to clear the pointer. + * \param n the NFA itself + * \param s the state to which the result will be added + * \param result the result pointer + * + * Sets the result pointer of a state. If already set, returns an error. Call + * with a NULL pointer to clear the result, before setting a new one. * - * \retval 0 ok - * \retval 1 The state already has a result! + * \retval YAZ_NFA_SUCCESS ok + * \retval YAZ_NFA_ALREADY The state already has a result! */ int yaz_nfa_set_result( - /** The NFA itsef */ yaz_nfa *n, - /** The state to which the result is added */ yaz_nfa_state *s, - /** The result. The NFA does not care what it is, just stores it */ void *result ); @@ -108,11 +142,12 @@ void *yaz_nfa_get_result( * * \param n the nfa * \param s the state to add to - * \param backref_number is the number of the back reference. 0 for clearing + * \param backref_number is the number of the back reference. * \param is_start is 1 for start of the backref, 0 for end - * \retval 0 for OK - * \retval 1 if the backref is already set - * \retval 2 for ending a backref that has not been started + * + * \retval YAZ_NFA_SUCCESS for OK + * \retval YAZ_NFA_ALREADY if the backref is already set + * \retval YAZ_NFA_NOSTART for ending a backref that has not been started * */ @@ -175,14 +210,28 @@ yaz_nfa_state *yaz_nfa_add_range( yaz_nfa *n, /** \brief Add a sequence of transitions and states. * + * \param n the nfa + * \param s the state to add this to. If null, adds to the initial state + * \param seq is a sequence of yaz_fna_chars. + * \param seq_len is the length of the sequence + * \Return the final state + * * Starting from state s (or from the initial state, if s is * null), finds as much of seq as possible and inserts the rest. - * \Return the final state */ yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n, yaz_nfa_state *s, - yaz_nfa_char *seq ); + yaz_nfa_char *seq, + size_t seq_len ); +/** \} */ + +/** \name Low-level interface for mathcing the NFA. */ +/* + * These do the actual matching. They know nothing of + * the type of the result pointers + */ +/** \{ */ /** \brief Find the longest possible match. * @@ -194,22 +243,16 @@ yaz_nfa_state *yaz_nfa_add_sequence( yaz_nfa *n, * In case of errors, returns the best match so far, * which the caller is free to ignore. * - * \retval 0 success - * \retval 1 no match found - * \retval 2 overrun'of input buffer - * \retval 3 looping too far + * \retval YAZ_NFA_SUCCESS success + * \retval YAZ_NFA_NOMATCH no match found + * \retval YAZ_NFA_OVERRUN overrun of input buffer + * \retval YAZ_NFA_LOOP looping too far * */ int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t *incharsleft, void **result ); -/** yaz_nfa_match return codes */ -#define YAZ_NFA_SUCCESS 0 -#define YAZ_NFA_NOMATCH 1 -#define YAZ_NFA_OVERRUN 2 -#define YAZ_NFA_LOOP 3 - /** \brief Get a back reference after a successfull match. * * \param n the nfa @@ -226,9 +269,9 @@ int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t *incharsleft, * not the end of a backref. It is up to the caller to decide how * to handle such a situation. * - * \retval 0 OK - * \retval 1 no match - * \retval 2 no such backref + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_NOMATCH The NFA hasn't matched anything, no backref + * \retval YAZ_NFA_NOSUCHBACKREF no such backref */ int yaz_nfa_get_backref( yaz_nfa *n, @@ -236,6 +279,14 @@ int yaz_nfa_get_backref( yaz_nfa *n, yaz_nfa_char **start, yaz_nfa_char **end ); +/* \} */ + +/** \name Low-level interface to the converters */ +/* These produce some output text into a buffer. There are a few + * kinds of converters, each producing different type of output. + */ +/* \{ */ + /** \brief Create a string converter. * \param n the nfa * \param string the string to output @@ -300,9 +351,10 @@ void yaz_nfa_append_converter ( * Runs the converters in the chain, placing output into outbuff * (and incrementing the pointer). * - * \retval 0 OK - * \retval 1 no match to get backrefs from - * \retval 2 no room in outbuf + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_NOMATCH no match to get backrefs from + * \retval YAZ_NFA_NOSPACE no room in outbuf + * \retval YAZ_NFA_INTERNAL Should never happen * */ int yaz_nfa_run_converters( @@ -311,6 +363,139 @@ int yaz_nfa_run_converters( yaz_nfa_char **outbuff, size_t *outcharsleft); +/** \} */ + +/** \name High-level interface to the NFA */ +/* This interface combines the NFA and converters, for ease of + * access. There are a few calls to build a complete system, and a call + * to do the actual conversion. + */ +/* \{ */ + +/** \brief Add a rule that converts one string to another ('IX' -> '9') + * + * \param n The nfa itself + * \param from_string the string to match + * \param from_length length of the from_string + * \param to_string the string to write in the output + * \param to_length length of the to_string + * + * Adds a matching rule and a string converter to the NFA. + * Can be used for converting strings into nothing, for example, + * to remove markup. + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_ALREADY Conflict with some other rule + * + */ +int yaz_nfa_add_string_rule( yaz_nfa *n, + yaz_nfa_char *from_string, + size_t from_length, + yaz_nfa_char *to_string, + size_t to_length); + +/** brief Just like yaz_nfa_add_string_rule, but takes the strings in ascii + * + * \param n The nfa itself + * \param from_string the string to match + * \param to_string the string to write in the output + * + * Like yaz_nfa_add_string_rule, this adds a rule to translate a string + * into another. The only difference is that this one takes the strings as + * normal char *, which means that no high-valued unicodes can be handled, + * and that this one uses null-terminated strings. In short, this is a + * simplified version mostly intended for tests and other small uses. + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_ALREADY Conflict with some other rule + */ +int yaz_nfa_add_ascii_string_rule( yaz_nfa *n, + char *from_string, + char *to_string); + + +/** \brief Add a rule that converts a character range + * + * \param n The nfa itself + * \param range_start Where the matching range starts + * \param range_end Where the matching range ends + * \param output_range_start Where the resulting range starts + * + * + * Adds a character range rule to the NFA. The range to be converted + * is defined by the range_start and range_end parameters. The output + * range starts at output_range_start, and is automatically as long + * as the input range. + * + * Useful for alphabet normalizing [a-z] -> [A-Z] + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_ALREADY Conflict with some other rule + */ +int yaz_nfa_add_char_range_rule (yaz_nfa *n, + yaz_nfa_char range_start, + yaz_nfa_char range_end, + yaz_nfa_char output_range_start); + +/** \brief Add a rule that converts a character range to a string + * + * \param n The nfa itself + * \param range_start Where the matching range starts + * \param range_end Where the matching range ends + * \param to_string the string to write in the output + * \param to_length length of the to_string + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_ALREADY Conflict with some other rule + * + * Adds a character range match rule, and a string converter. + * + * Useful in converting a range of special characters into (short?) + * strings of whitespace, or even to nothing at all. + */ +int yaz_nfa_add_char_string_rule (yaz_nfa *n, + yaz_nfa_char range_start, + yaz_nfa_char range_end, + yaz_nfa_char* to_string, + size_t to_length); + +/** \brief Converts one 'slice' that is, the best matching rule. + * + * \param n the nfa itself + * \param inbuff buffer of input data. Will be incremented when match + * \param incharsleft max number of inchars to use from inbuff. decrements. + * \param outbuff buffer for output data. Will be incremented when match + * \param outcharsleft max number of chars to write to outbuff. + * + * \retval YAZ_NFA_SUCCESS OK + * \retval YAZ_NFA_OVERRUN No more input data, some pattern could match + * \retval YAZ_NFA_NOSPACE No room in the putput buffer + * \retval YAZ_NFA_NOSUCHBACKREF NFA refers to a non-existing backref + * + * Finds the best match at the beginning of inbuf, and fires its converter(s) + * to produce output in outbuff. Increments both inbuf and outbuf pointers and + * decrements the *charsleft values, so all is ready for calling again, until + * the buffer is exhausted. That loop is left to the caller, so he can load + * more data in the buffer in good time. + * + * If no match is found, converts one character into itself. If the matcher + * returns any sort of error, leaves the pointers where they were. + */ +int yaz_nfa_convert_slice (yaz_nfa *n, + yaz_nfa_char **inbuff, + size_t *incharsleft, + yaz_nfa_char **outbuff, + size_t *outcharsleft); + + +/* \} */ + +/** \name Debug routines */ +/* These provide a method for traversing all the states defined + * in the NFA, for example to release memory allocated in the results, + * and a simple debug routine to dump the NFA */ +/* \{ */ + /** \brief Get the first state of the NFA. * @@ -346,7 +531,7 @@ yaz_nfa_state *yaz_nfa_get_next(yaz_nfa *n, yaz_nfa_state *s); */ void yaz_nfa_dump(FILE *F, yaz_nfa *n, char *(*strfunc)(void *) ); - +/* \} */