/* Copyright (C) 2006, Index Data ApS
* See the file LICENSE for details.
- * $Id: nfa.h,v 1.2 2006-05-03 11:09:59 heikki Exp $
+ * $Id: nfa.h,v 1.6 2006-05-05 14:02:27 heikki Exp $
*/
/**
* possible sequence of input characters that match the ranges in the
* conditions, and that leads into a terminal state.
*
+ * Separate from this we have converters. Those can often be used
+ * together with a NFA (think match-pattern and replace-pattern).
+ *
+ * A converter is a routine that produces some output. It can translate a
+ * range of characters into another range, emit a constant string, or
+ * something like that.
+ *
*/
-#ifndef NFA_H
-#define NFA_H
+#ifndef YAZ_NFA_H
+#define YAZ_NFA_H
#include <yaz/yconfig.h>
typedef struct yaz_nfa_transition yaz_nfa_transition;
+/** brief Simple character range converter */
+typedef struct yaz_nfa_converter yaz_nfa_converter;
+
+
+
/** \brief Initialize the NFA without any states in it
*
* \return a pointer to the newly created NFA
yaz_nfa *n /** The NFA itself */,
yaz_nfa_state *s /** The state whose result you want */);
-/** \brief Set the backref number to a state.
+/** \brief Set a backref point to a state.
*
- * Each state can be the beginning and/or ending of a backref
- * sequence. This call sets those flags in the states. After matching,
- * we can get hold of the backrefs that matched, and use them in our
- * translations. The backrefs start at 1, not zero!
+ * Each state can be the beginning and/or ending point of a backref
+ * sequence. This call sets one of those flags in the state. After
+ * matching, we can get hold of the backrefs that matched, and use
+ * them in our translations. The numbering of backrefs start at 1,
+ * not zero!
*
* \param n the nfa
* \param s the state to add to
*
*/
-int yaz_nfa_set_backref(yaz_nfa *n, yaz_nfa_state *s,
+int yaz_nfa_set_backref_point(yaz_nfa *n, yaz_nfa_state *s,
int backref_number,
int is_start );
-/** \brief Get the backref number of a state.
+/** \brief Get the backref point of a state
*
* \param n the nfa
* \param s the state to add to
* \return the backref number associated with the state, or 0 if none.
*/
-int yaz_nfa_get_backref(yaz_nfa *n, yaz_nfa_state *s,
+int yaz_nfa_get_backref_point(yaz_nfa *n, yaz_nfa_state *s,
int is_start );
/** \brief Add a transition to the NFA.
* Add a transition between two existing states. The condition
* is (as always) a range of yaz_nfa_chars.
* \param n the nfa
- * \param from_state which state the transition is from
+ * \param from_state which state the transition is from. null=initial
* \param to_state where the transition goes to
* \param range_start is the beginning of the range of values
* \param range_end is the end of the range of values
*
* \param n the nfa itself
* \param inbuff buffer of input data. Will be incremented when match
- * \param incharsleft max number of inchars to use from inbuff
+ * \param incharsleft max number of inchars to use from inbuff. decrements.
* \param result the result pointer from the nfa (what ever that is)
*
* In case of errors, returns the best match so far,
*
*/
-int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t incharsleft,
+int yaz_nfa_match(yaz_nfa *n, yaz_nfa_char **inbuff, size_t *incharsleft,
void **result );
+/** yaz_nfa_match return codes */
#define YAZ_NFA_SUCCESS 0
#define YAZ_NFA_NOMATCH 1
#define YAZ_NFA_OVERRUN 2
#define YAZ_NFA_LOOP 3
+/** \brief Get a back reference after a successfull match.
+ *
+ * \param n the nfa
+ * \param backref_no the number of the backref to get
+ * \param start beginning of the matching substring
+ * \param end end of the matching substring
+ *
+ * Returns pointers to the beginning and end of a backref, or null
+ * pointers if one endpoint not met. Those pointers point to the
+ * original buffer that was matched, so the caller will not have to
+ * worry about freeing anything special.
+ *
+ * It is technically possible to create NFAs that meet the start but
+ * not the end of a backref. It is up to the caller to decide how
+ * to handle such a situation.
+ *
+ * \retval 0 OK
+ * \retval 1 no match
+ * \retval 2 no such backref
+ */
+
+int yaz_nfa_get_backref( yaz_nfa *n,
+ int backref_no,
+ yaz_nfa_char **start,
+ yaz_nfa_char **end );
+
+/** \brief Create a string converter.
+ * \param n the nfa
+ * \param string the string to output
+ * \param length how many chars in the string
+ *
+ * This converter produces a constant string in the output
+ */
+yaz_nfa_converter *yaz_nfa_create_string_converter (
+ yaz_nfa *n,
+ yaz_nfa_char *string,
+ size_t length );
+
+/** \brief Create a backref converter
+ * \param n the nfa
+ * \param backref_no The backreference to reproduce
+ *
+ * This converter copies a backref into the output buffer
+ */
+yaz_nfa_converter *yaz_nfa_create_backref_converter (
+ yaz_nfa *n, int backref_no );
+
+
+/** \brief Create a charcater range converter
+ * \param n the nfa
+ * \param backref_no The backreference to reproduce
+ * \param from_char the first character of the original range
+ * \param to_char the first character of the target range
+ *
+ * This converter takes a backreference, and shifts the characters
+ * by a constant value. For example, translating a-z to A-Z.
+ * Note that backref 0 is always the last character that matched a
+ * range, even if no backrefs were defined in teh nfa. This makes
+ * it pretty useful with this converter.
+ *
+ */
+yaz_nfa_converter *yaz_nfa_create_range_converter (
+ yaz_nfa *n, int backref_no,
+ yaz_nfa_char from_char,
+ yaz_nfa_char to_char);
+
+
+/** \brief Connects converters in a chain.
+ * \param n the nfa (mostly for nmem access)
+ * \param startpoint the first converter in the chain
+ * \param newconverter
+ *
+ * Places the new converter at the end of the chain that starts from
+ * startpoint.
+ *
+ */
+void yaz_nfa_append_converter (
+ yaz_nfa *n,
+ yaz_nfa_converter *startpoint,
+ yaz_nfa_converter *newconverter);
+
+/** brief Runs the chain of converters.
+ * \param n the nfa (mostly for nmem access)
+ * \param c the first converter in a chain
+ * \param outbuff buffer to write the output in. Increments the ptr.
+ * \param outcharsleft how many may we write
+ *
+ * Runs the converters in the chain, placing output into outbuff
+ * (and incrementing the pointer).
+ *
+ * \retval 0 OK
+ * \retval 1 no match to get backrefs from
+ * \retval 2 no room in outbuf
+ *
+ */
+int yaz_nfa_run_converters(
+ yaz_nfa *n,
+ yaz_nfa_converter *c,
+ yaz_nfa_char **outbuff,
+ size_t *outcharsleft);
+
/** \brief Get the first state of the NFA.
*
void yaz_nfa_dump(FILE *F, yaz_nfa *n, char *(*strfunc)(void *) );
+
+
+
YAZ_END_CDECL
#endif