Loading of the nfa now possible from an xml file.
authorHeikki Levanto <heikki@indexdata.dk>
Fri, 14 Jul 2006 13:06:37 +0000 (13:06 +0000)
committerHeikki Levanto <heikki@indexdata.dk>
Fri, 14 Jul 2006 13:06:37 +0000 (13:06 +0000)
Had to disable two tests, because make distcheck could not
find the files I wanted to load. After that distcheck passes
all right.

include/yaz/nfa.h
include/yaz/nfaxml.h
src/nfa.c
src/nfaxml.c
test/Makefile.am
test/nfaxmltest1.c

index addea16..7ff29bc 100644 (file)
@@ -1,6 +1,6 @@
 /*  Copyright (C) 2006, Index Data ApS
  *  See the file LICENSE for details.
- *  $Id: nfa.h,v 1.8 2006-07-04 12:59:56 heikki Exp $
+ *  $Id: nfa.h,v 1.9 2006-07-14 13:06:37 heikki Exp $
  */
 
 /**
@@ -534,6 +534,11 @@ void yaz_nfa_dump(FILE *F,
                   yaz_nfa *n, 
                   char *(*strfunc)(void *) ); 
 
+/** \brief Helper to dump converters 
+ *
+ */
+char *yaz_nfa_dump_converter(void *conv);
+
 /* \} */
 
 
index 6c7436d..98fd8a6 100644 (file)
@@ -1,6 +1,6 @@
 /*  Copyright (C) 2006, Index Data ApS
  *  See the file LICENSE for details.
- *  $Id: nfaxml.h,v 1.4 2006-07-06 13:10:29 heikki Exp $
+ *  $Id: nfaxml.h,v 1.5 2006-07-14 13:06:37 heikki Exp $
  */
 
 /**
@@ -10,7 +10,7 @@
  * The xml file is something like this (using round brakcets 
  * on tags, not to confuse our documentation tools)
  *   (?xml ...)
- *   (charmap)
+ *   (ruleset)
  *      (rule)
  *         (fromstring) FOO (/fromstring)
  *         (tostring)   BAR (/tostring)
@@ -48,7 +48,7 @@ YAZ_BEGIN_CDECL
 /** \brief Parse the NFA from a XML document 
  * 
  * \param doc the xml tree to parse
- * \param error_info will be filled in case of errors
+ * \param filename used for info in error messages
  * 
  * \returns either the NFA, or null in case of errors 
  *
@@ -62,7 +62,7 @@ YAZ_BEGIN_CDECL
  * logged in yazlog.
  *
  */
-yaz_nfa *yaz_nfa_parse_xml_doc(xmlDocPtr doc);
+yaz_nfa *yaz_nfa_parse_xml_doc(xmlDocPtr doc, const char *filename);
 
 
 /** \brief Parse the NFA from a file 
@@ -102,7 +102,7 @@ yaz_nfa *yaz_nfa_parse_xml_file(const char *filepath);
  * logged in yazlog.
  *
  */
-yaz_nfa *yaz_nfa_parse_xml_memory(const char *xmlbuff);
+yaz_nfa *yaz_nfa_parse_xml_memory(const char *xmlbuff, const char *filename);
 
 
 YAZ_END_CDECL
index 6046b3d..fe6362e 100644 (file)
--- a/src/nfa.c
+++ b/src/nfa.c
@@ -1,7 +1,7 @@
 /*  Copyright (C) 2006, Index Data ApS
  *  See the file LICENSE for details.
  * 
- *  $Id: nfa.c,v 1.11 2006-07-07 08:36:36 adam Exp $ 
+ *  $Id: nfa.c,v 1.12 2006-07-14 13:06:38 heikki Exp $ 
  */
 
 /**
@@ -748,6 +748,45 @@ void yaz_nfa_dump(FILE *F, yaz_nfa *n,
     }
 }
 
+static char buf[5000]="";
+char *yaz_nfa_dump_converter(void *conv)
+{
+    char onebuf[500]="";
+    yaz_nfa_converter *c=conv;
+    yaz_nfa_char *cp;
+    size_t len;
+    *buf=0;
+    while (c) {
+        switch(c->type) {
+            case conv_none:
+                sprintf(onebuf,"(none)" );
+                break;
+            case conv_string:
+                sprintf(onebuf,"(string '" );
+                strcat(buf,onebuf);
+                cp=c->string;
+                len=c->strlen;
+                while (len--) {
+                    onebuf[0]=*cp++;
+                    onebuf[1]=0;
+                    strcat(buf,onebuf);
+                }
+                strcat(buf,"')");
+                onebuf[0]=0;
+                break;
+            case conv_backref:
+                sprintf(onebuf,"(backref %d) ",c->backref_no);
+                break;
+            case conv_range:
+                sprintf(onebuf,"(range %d) ",c->char_diff);
+                break;
+        }
+        strcat(buf,onebuf);
+        c=c->next;
+    } /* while */
+    return buf;
+}
+
 
 /* 
  * Local variables:
index 84a8b0b..94ef5e6 100644 (file)
@@ -1,7 +1,7 @@
 /*  Copyright (C) 2006, Index Data ApS
  *  See the file LICENSE for details.
  * 
- *  $Id: nfaxml.c,v 1.8 2006-07-06 14:06:17 heikki Exp $ 
+ *  $Id: nfaxml.c,v 1.9 2006-07-14 13:06:38 heikki Exp $ 
  */
 
 /**
 #include <yaz/nfaxml.h>
 #include <yaz/libxml2_error.h>
 
+/** \brief How long strings we are willing to handle here */
+#define MAXDATALEN 200 
+
+/** \brief Get content of a node, in utf16, for yaz_nfa */
+static int utf16_content(xmlNodePtr node, yaz_nfa_char *buf, int maxlen,
+        const char *filename, int rulenumber)
+{
+    int bufidx=0;
+    xmlChar *content = xmlNodeGetContent(node);
+    xmlChar *cp=content;
+    int conlen=strlen((char *)content);
+    int len;
+    int res;
+    while (*cp && (bufidx<maxlen) ) {
+        len=conlen;
+        res=xmlGetUTF8Char(cp,&len);
+        if (res==-1) {
+            /* should be caught earlier */
+            yaz_log(YLOG_FATAL,"Illegal utf-8 sequence "
+                    "%d bytes into '%s' in %s, rule %d ",
+                    cp-content, content, filename, rulenumber);
+            xmlFree(content);
+            return -1;
+        }
+        buf[bufidx++]=res;
+        cp +=len;
+        conlen -=len;
+    }
+    buf[bufidx]=0;
+    xmlFree(content);
+    return bufidx;
+}
+
+static int parse_range(xmlNodePtr node, 
+        yaz_nfa_char *range_start,
+        yaz_nfa_char *range_end,
+        const char *filename, int rulenumber )
+{
+    xmlChar *content = xmlNodeGetContent(node);
+    xmlChar *cp=content;
+    int conlen=strlen((char *)content);
+    int len;
+    int res;
+    len=conlen;
+    res=xmlGetUTF8Char(cp,&len);
+    if ( res != -1 ) {
+        *range_start=res;
+        cp +=len;
+        conlen -=len;
+        len=conlen;
+        res=xmlGetUTF8Char(cp,&len);
+        if (res != '-' )
+            res = -1;
+    }
+    if ( res != -1 ) {
+        cp +=len;
+        conlen -=len;
+        len=conlen;
+        res=xmlGetUTF8Char(cp,&len);
+    }
+    if ( res != -1 ) {
+        *range_end=res;
+    }
+    xmlFree(content);
+    if (res==-1) {
+        yaz_log(YLOG_FATAL,"Illegal range. '%s'. Must be like 'a-z' "
+                "'in %s, rule %d ",
+                content, filename, rulenumber);
+        return 0;
+    }
+    return 1;
+} /* parserange */
+
+
+/** \brief Parse a fromstring clause */
+static yaz_nfa_state *parse_fromstring(yaz_nfa *nfa, 
+        xmlNodePtr node, const char *filename, int rulenumber )
+{
+    yaz_nfa_char buf[MAXDATALEN];
+    yaz_nfa_state *state;
+    int bufidx=utf16_content(node, buf, MAXDATALEN, filename, rulenumber);
+    if (bufidx<0) 
+        return 0;
+    state=yaz_nfa_add_sequence(nfa, 0, buf, bufidx);
+    return state;
+} /* parse_fromstring */
+
+/** \brief Parse a tostring clause */
+static yaz_nfa_converter *parse_tostring(yaz_nfa *nfa,
+                xmlNodePtr node, const char *filename, int rulenumber )
+{
+    yaz_nfa_char buf[MAXDATALEN];
+    yaz_nfa_converter *conv;
+    int bufidx=utf16_content(node, buf, MAXDATALEN, filename, rulenumber);
+    if (bufidx<0) 
+        return 0;
+    conv=yaz_nfa_create_string_converter(nfa, buf, bufidx);
+    return conv;
+} /* parse_tostring */
+
+static yaz_nfa_state * parse_fromrange(yaz_nfa *nfa,
+                xmlNodePtr node, 
+                yaz_nfa_char *from_begin,
+                yaz_nfa_char *from_end,
+                const char *filename, int rulenumber )
+{
+    yaz_nfa_char begin;
+    yaz_nfa_char end;
+    yaz_nfa_state *state;
+    int rc;
+    rc=parse_range(node, &begin, &end, filename, rulenumber);
+    if (!rc)
+        return 0;
+    *from_begin=begin;
+    *from_end=end; /* save for calculating the to-range */
+    state=yaz_nfa_add_range(nfa, 0, begin, end);
+    return state;
+} /* parse_fromrange */
+
+static yaz_nfa_converter *parse_torange(yaz_nfa *nfa,
+             xmlNodePtr node, yaz_nfa_char from_begin, yaz_nfa_char from_end,
+             const char *filename, int rulenumber )
+{
+    yaz_nfa_char begin;
+    yaz_nfa_char end;
+    yaz_nfa_converter *conv;
+    int rc;
+    rc=parse_range(node, &begin, &end, filename, rulenumber);
+    if (!rc)
+        return 0;
+    if ( from_end - from_begin != end - begin ) {
+        yaz_log(YLOG_FATAL,"From-range not as long as to-range: "
+                "from=%x-%x to=%x-%x in rule %d in %s",
+                from_begin, from_end,  begin, end, rulenumber, filename);
+        return 0;
+    }
+    conv=yaz_nfa_create_range_converter(nfa, 0, from_begin, begin);
+    return conv;
+} /* parse_torange */
+
+/** \brief Parse one rule from an XML node */
+static int parse_rule(yaz_nfa *nfa, xmlNodePtr rulenode, 
+        const char *filename, int rulenumber ) 
+{
+    yaz_nfa_state *state=0;
+    yaz_nfa_converter *conv=0;
+    yaz_nfa_char range_begin=0, range_end=0;
+    xmlNodePtr node;
+    int clauses=0;
+    for (node = rulenode->children; node; node = node->next)
+    {
+        if (node->type != XML_ELEMENT_NODE)
+            continue;
+        clauses++;
+        if (!strcmp((const char *) node->name, "fromstring")) 
+        {
+            state = parse_fromstring(nfa, node, filename, rulenumber );
+            if (!state)
+                return 0;
+        } else if (!strcmp((const char *) node->name, "tostring")) 
+        {
+            conv = parse_tostring(nfa, node, filename, rulenumber );
+            if (!conv)
+                return 0;
+        } else if (!strcmp((const char *) node->name, "fromrange")) 
+        {
+            state = parse_fromrange(nfa, node, 
+                    &range_begin, &range_end, filename, rulenumber );
+            if (!state)
+                return 0;
+        } else if (!strcmp((const char *) node->name, "torange")) 
+        {
+            conv = parse_torange(nfa, node, 
+                    range_begin, range_end, filename, rulenumber );
+            if (!conv)
+                return 0;
+        } else {
+            yaz_log(YLOG_FATAL,"Unknown clause '%s' in %s rule %d",
+                    node->name, filename,rulenumber);
+            return 0;
+        }
+    } /* for child */
+    if (!state) {
+        yaz_log(YLOG_FATAL,"No 'from' clause in a rule %d in %s", 
+                rulenumber,filename);
+        return 0;
+    }
+    if (!conv) {
+        yaz_log(YLOG_FATAL,"No 'to' clause in a rule %d in %s",
+                rulenumber,filename);
+        return 0;
+    }
+    if (clauses != 2) {
+        yaz_log(YLOG_FATAL,"Must have exactly one 'from' and one 'to' clause "
+                "in rule %d in %s", rulenumber,filename);
+        return 0;
+    }
+    if ( YAZ_NFA_SUCCESS == yaz_nfa_set_result(nfa,state,conv))
+        return 1; 
+    yaz_log(YLOG_FATAL,"Conflicting rules in %s rule %d",
+            filename, rulenumber);
+    return 0;
+} /* parse_rule */
+
+
 /** \brief Parse the NFA from a XML document 
  */
-yaz_nfa *yaz_nfa_parse_xml_doc(xmlDocPtr doc)
+yaz_nfa *yaz_nfa_parse_xml_doc(xmlDocPtr doc, const char *filename)
 {
-    libxml2_error_to_yazlog(YLOG_FATAL, "yaz_nfa_parse_doc");
+    xmlNodePtr node;
+    yaz_nfa *nfa;
+    int rulenumber=0;
 
     if (!doc)
         return 0;
-
-    return 0;
-}
+    libxml2_error_to_yazlog(YLOG_FATAL, "yaz_nfa_parse_doc");
+    node = xmlDocGetRootElement(doc);
+    if (!node || node->type != XML_ELEMENT_NODE ||
+        strcmp((const char *) node->name, "ruleset")) 
+    {
+        yaz_log(YLOG_FATAL,"nfa_parse_xml: Could not find root element 'ruleset' "
+                "in %s", filename);
+        return 0;
+    }
+    nfa= yaz_nfa_init();
+    if (!nfa) 
+    {
+        yaz_log(YLOG_FATAL,"nfa_parse_xml: Creating nfa failed, can't parse %s",
+                filename);
+        return 0;
+    }
+        
+    for (node = node->children; node; node = node->next)
+    {
+        if (node->type != XML_ELEMENT_NODE)
+            continue;
+         if (!strcmp((const char *) node->name, "rule")) {
+             if (!parse_rule(nfa,node,filename,rulenumber++))
+                 return 0;
+         } else {
+            yaz_log(YLOG_FATAL,"nfa_parse_xml: "
+                    "expected 'rule', found '%s' in %s", 
+                    (const char *) node->name,filename);
+            return 0;
+         }
+    } /* for */
+    return nfa;
+} /* yaz_nfa_parse_xml_doc */
 
 
 /** \brief Parse the NFA from a file 
  */
-yaz_nfa *yaz_nfa_parse_xml_file(const char *filepath) {
+yaz_nfa *yaz_nfa_parse_xml_file(const char *filepath) 
+{
     int nSubst;
-
     xmlDocPtr doc;
+    if (!filepath) 
+    {
+        yaz_log(YLOG_FATAL,"yaz_nfa_parse_xml_file called with NULL");
+        return 0;
+    }
     libxml2_error_to_yazlog(YLOG_FATAL, "yaz_nfa_parse_xml_file");
 
     doc = xmlParseFile(filepath);
@@ -54,16 +296,21 @@ yaz_nfa *yaz_nfa_parse_xml_file(const char *filepath) {
     if (nSubst==-1) {
         return 0;
     }
-    return yaz_nfa_parse_xml_doc(doc);
+    return yaz_nfa_parse_xml_doc(doc, filepath);
 }
 
 /** \brief Parse the NFA from a memory buffer
  */
-yaz_nfa *yaz_nfa_parse_xml_memory(const char *xmlbuff) {
+yaz_nfa *yaz_nfa_parse_xml_memory(const char *xmlbuff, const char *filename) {
     xmlDocPtr doc;
+    if (!xmlbuff) 
+    {
+        yaz_log(YLOG_FATAL,"yaz_nfa_parse_memroy called with NULL");
+        return 0;
+    }
     libxml2_error_to_yazlog(YLOG_FATAL, "yaz_nfa_parse_xml_memory");
     doc = xmlParseMemory(xmlbuff, strlen(xmlbuff));
-    return yaz_nfa_parse_xml_doc(doc);
+    return yaz_nfa_parse_xml_doc(doc,filename);
 }
 
 
index bbce667..6605a2a 100644 (file)
@@ -1,6 +1,6 @@
 ## Copyright (C) 1994-2006, Index Data ApS
 ## All rights reserved.
-## $Id: Makefile.am,v 1.23 2006-07-07 13:39:05 heikki Exp $
+## $Id: Makefile.am,v 1.24 2006-07-14 13:06:38 heikki Exp $
 
 check_PROGRAMS = tsticonv tstnmem tstmatchstr tstwrbuf tstodr tstccl tstlog \
  tstsoap1 tstsoap2 tstodrstack tstlogthread tstxmlquery tstpquery \
@@ -19,7 +19,9 @@ EXTRA_DIST = tstodr.asn tstodrcodec.c tstodrcodec.h cqlsample \
  marc4 marc4.xml marc4.chr marc4.xml.marc \
  marc5 marc5.xml marc5.chr marc5.xml.marc \
  marc6 marc6.xml marc6.chr marc6.xml.marc \
- tst_record_conv.xsl
+ tst_record_conv.xsl \
+ nfaxml-simple.xml nfaxml-main.xml \
+ nfaxml-include.xml nfaxml-badinclude.xml  
 
 YAZCOMP = ../util/yaz-asncomp
 YAZCOMPLINE = $(YAZCOMP) -d z.tcl -i yaz -I../include $(YCFLAGS)
index b6cf6d5..754be3b 100644 (file)
@@ -1,7 +1,7 @@
 /*  Copyright (C) 2006, Index Data ApS
  *  See the file LICENSE for details.
  *
- *  $Id: nfaxmltest1.c,v 1.6 2006-07-07 13:39:05 heikki Exp $
+ *  $Id: nfaxmltest1.c,v 1.7 2006-07-14 13:06:38 heikki Exp $
  *
  */
 
@@ -25,8 +25,9 @@ void test1() {
                    "  <tostring>bar</tostring> "
                    "</rule>"
                    "</ruleset>";
-    yaz_nfa *nfa = yaz_nfa_parse_xml_memory(xmlstr);
-    YAZ_CHECK_TODO(nfa);
+    yaz_nfa *nfa = yaz_nfa_parse_xml_memory(xmlstr,"test1");
+    YAZ_CHECK(nfa);
+    yaz_nfa_destroy(nfa);
 }
 
 
@@ -40,7 +41,9 @@ void test2() {
                    "</rule>";
                  /* missing "</ruleset>" */
     yaz_log(YLOG_LOG,"Parsing bad xml, expecting errors:");
-    nfa = yaz_nfa_parse_xml_memory(xmlstr);
+    nfa = yaz_nfa_parse_xml_memory(xmlstr,"test2");
+    YAZ_CHECK(!nfa);
+    nfa = yaz_nfa_parse_xml_memory(0,"test2-null");
     YAZ_CHECK(!nfa);
 }
 
@@ -59,7 +62,7 @@ void test3() {
     do {
         yaz_log(YLOG_LOG,"Parsing (good) xml file '%s'", *f);
         nfa=yaz_nfa_parse_xml_file(*f);
-        YAZ_CHECK_TODO(nfa);
+        YAZ_CHECK_TODO(nfa);  /* fails on make distcheck, can't find the files*/
     } while (*++f);
 
     f = badfilenames;
@@ -70,6 +73,232 @@ void test3() {
     } while (*++f);
 }
 
+/** \brief  Test parsing of a few minimal xml strings, with logical errors */
+void test4() {
+    yaz_nfa *nfa;
+    char *xmls[] = { 
+      /*a*/"<missingruleset>   <foo/>   </missingruleset>",
+      /*b*/"<ruleset> <missingrule/> </ruleset>",
+      /*c*/"<ruleset> <rule> <garbagerule/> </rule> </ruleset>",
+      /*d*/"<ruleset><rule>"
+              "<fromstring>MissingTo</fromstring>"
+           "</rule></ruleset>",
+      /*e*/"<ruleset><rule>"
+              "<fromstring>DuplicateFrom</fromstring>"
+              "<fromstring>Another Fromstring</fromstring>"
+           "</rule></ruleset>",
+      /*f*/"<ruleset><rule>"
+              "<tostring>MissingFrom</tostring>"
+           "</rule></ruleset>",
+      /*g*/"<ruleset><rule>"
+              "<tostring>DuplicateTo</tostring>"
+              "<tostring>AnotherTo</tostring>"
+           "</rule></ruleset>",
+      /*h*/"<ruleset><rule>"
+              "<fromstring>GoodUTF:æøå</fromstring>"
+           "</rule></ruleset>",
+      /*i*/"<ruleset><rule>"
+              "<tostring>BadUtf8:Ø</tostring>"
+           "</rule></ruleset>",
+      /*j*/"<ruleset>"
+             "<rule>"
+               "<fromstring>ConflictingRules</fromstring>"
+               "<tostring>IdenticalStrings</tostring>"
+             "</rule>"
+             "<rule>"
+               "<fromstring>ConflictingRules</fromstring>"
+               "<tostring>IdenticalStrings</tostring>"
+             "</rule>"
+           "</ruleset>",
+      /*k*/"", /* empty string! */
+      /*l*/"<ruleset>"
+             "<rule>"
+               "<fromrange>A-Z</fromrange>"
+               "<torange>a-x</torange>"
+             "</rule>"
+           "</ruleset>",
+              0 };
+    char **xmlp=xmls;    
+    char label[]= { 'a', 0 };
+    while ( *xmlp ) {
+        yaz_log(YLOG_LOG,"test4-%s: Parsing bad xml, expecting errors:",
+                label);
+        nfa = yaz_nfa_parse_xml_memory(*xmlp,label);
+        YAZ_CHECK(!nfa);
+        xmlp++;
+        label[0]++; 
+    }
+} /* test4 */
+
+static void test5() {
+    struct conv_test {
+        unsigned char *name;
+        int expresult;
+        unsigned char *xml;
+        unsigned char *from;
+        unsigned char *to;
+    };
+    struct conv_test tests[]= {
+        { "test5-1",  YAZ_NFA_SUCCESS,
+             "<ruleset>"
+               "<rule>"
+                 "<fromstring>foo</fromstring>"
+                 "<tostring>bar</tostring>"
+               "</rule>"
+             "</ruleset>",
+             "this is a foo test ofoofo fo foofoo fofoofooofoooo ",
+             "this is a bar test obarfo fo barbar fobarbarobaroo "
+        },
+        { "test5-2",  YAZ_NFA_SUCCESS,
+             "<ruleset>"
+               "<rule>"
+                 "<fromstring>ooooo</fromstring>"
+                 "<tostring>five </tostring>"
+               "</rule>"
+               "<rule>"
+                 "<fromstring>oooo</fromstring>"
+                 "<tostring>four </tostring>"
+               "</rule>"
+               "<rule>"
+                 "<fromstring>ooo</fromstring>"
+                 "<tostring>three </tostring>"
+               "</rule>"
+               "<rule>"
+                 "<fromstring>oo</fromstring>"
+                 "<tostring>two </tostring>"
+               "</rule>"
+             "</ruleset>",
+             "oo-oooo-",
+             "two -four -"
+        },
+        { "test5-4",  YAZ_NFA_SUCCESS, 0, /* same xml */
+             "oo-oooo-ooooooo-",
+             "two -four -five two -"
+        },
+        { "test5-3",  YAZ_NFA_OVERRUN, 0, /* could match further oo's */
+             "oo-oooo-ooooooo",
+             "two -four -five "  
+        },
+        { "test5-4 (lowercase)",  YAZ_NFA_SUCCESS,
+             "<ruleset>"
+               "<rule>"
+                 "<fromrange>A-Z</fromrange>"
+                 "<torange>a-z</torange>"
+               "</rule>"
+             "</ruleset>",
+             "LowerCase TEST with A-Z and a-z",
+             "lowercase test with a-z and a-z"
+        },
+        { "test5-5 (lowercase entities)",  YAZ_NFA_SUCCESS,
+             "<ruleset>"
+               "<rule>"
+                 "<fromrange>&#x41;-Z</fromrange>"
+                 "<torange>&#97;-&#x7A;</torange>"
+               "</rule>"
+             "</ruleset>",
+             "LowerCase TEST with A-Z and a-z (and &#41; &#5A; )",
+             "lowercase test with a-z and a-z (and &#41; &#5a; )"
+        },
+        { "test5-6 (danish lowercase)",  YAZ_NFA_SUCCESS,
+             "<ruleset>"
+               "<rule>"
+                 "<fromrange>A-Z</fromrange>"
+                 "<torange>a-z</torange>"
+               "</rule>"
+               "<rule>"
+                 "<fromrange>&#xC0;-&#xD6;</fromrange>"
+                 "<torange>&#xE0;-&#xF6;</torange>"
+               "</rule>"
+               "<rule>"
+                 "<fromrange>&#xD8;-&#xDF;</fromrange>"
+                 "<torange>&#xF8;-&#xFF;</torange>"
+               "</rule>"
+               "<rule>"
+                 "<fromstring>&#xC5;</fromstring>"
+                 "<tostring>&#xE5;</tostring>"
+               "</rule>"
+               "<rule>"
+                 "<fromstring>D&#xe4;nish</fromstring>"
+                 "<tostring>D&#xc4;NISH</tostring>"
+               "</rule>"
+             "</ruleset>",
+             "LowerCase TEST with Dänish Å !? åæø ÅÆØ XYZ",
+             "lowercase test with DÄNISH å !? åæø åæø xyz"
+        },
+        {0,0,0,0}
+    };
+    char *xml=0;
+#define MAXBUF 2048    
+    yaz_nfa *nfa;
+    yaz_nfa_char frombuf[MAXBUF];
+    yaz_nfa_char tobuf[MAXBUF];
+    unsigned char charbuf[MAXBUF];
+    struct conv_test *thistest=tests;
+    unsigned char *cp;
+    yaz_nfa_char *ycp;
+    size_t incharsleft;
+    size_t outcharsleft;
+    size_t prev_incharsleft;
+    int rc;
+    yaz_nfa_char *fromp;
+    yaz_nfa_char *top;
+    while (thistest->name) {
+        yaz_log(YLOG_DEBUG,"Starting test %s",thistest->name);
+        if (thistest->xml)
+            xml=thistest->xml;
+        nfa = yaz_nfa_parse_xml_memory(xml, thistest->name);
+        YAZ_CHECK(nfa);
+        if (nfa) {
+            if ( yaz_test_get_verbosity() > 3) {
+                yaz_nfa_dump(0,nfa,yaz_nfa_dump_converter);
+            }
+            ycp=frombuf;
+            cp=thistest->from;
+            while ( (*ycp++ = *cp++) )
+                ; /* strcpy, but expand to yaz_nfa_chars */
+            incharsleft = strlen(thistest->from);
+            prev_incharsleft = 0;
+            outcharsleft = MAXBUF-1;
+            fromp = frombuf;
+            top = tobuf;
+            rc = YAZ_NFA_SUCCESS;
+            while ( (rc == YAZ_NFA_SUCCESS) && (incharsleft>0) && 
+                    (prev_incharsleft != incharsleft ) )  /* prevent loops */
+            {
+                prev_incharsleft=incharsleft;
+                rc=yaz_nfa_convert_slice(nfa, &fromp, &incharsleft,
+                        &top, &outcharsleft);
+            }
+            YAZ_CHECK_EQ(rc, thistest->expresult);
+            if ( (rc == thistest->expresult) &&
+                 (rc == YAZ_NFA_SUCCESS)) {
+                YAZ_CHECK_EQ(incharsleft, 0);
+                YAZ_CHECK( prev_incharsleft != incharsleft ); 
+            }
+            ycp=tobuf;
+            cp=charbuf;
+            while (ycp != top )
+                *cp++ = *ycp++;
+            *cp=0;
+            if ( yaz_test_get_verbosity() > 2) {
+                printf("%s from:   '%s' \n",thistest->name, thistest->from);
+                printf("%s result: '%s' \n",thistest->name, charbuf);
+                printf("%s expect: '%s' \n",thistest->name, thistest->to);
+            }
+            YAZ_CHECK( 0==strcmp(thistest->to,charbuf) );
+            yaz_nfa_destroy(nfa);
+        }
+        thistest++;
+    }
+    
+} /* test5 */
+
+
+/* More things to test:
+ *
+ *   - Empty strings in to/from
+ *   - ranges, length mismatches, etc
+ */
 
 int main(int argc, char **argv)
 {
@@ -80,6 +309,8 @@ int main(int argc, char **argv)
     test1();
     test2();
     test3();
+    test4();
+    test5();
 
     nmem_exit ();
     YAZ_CHECK_TERM;