New libstemmer test
authorDennis Schafroth <dennis@indexdata.com>
Thu, 27 Jan 2011 13:32:20 +0000 (14:32 +0100)
committerDennis Schafroth <dennis@indexdata.com>
Thu, 27 Jan 2011 13:32:20 +0000 (14:32 +0100)
test/.gitignore
test/Makefile.am
test/test_libstemmer.c [new file with mode: 0644]

index 90d4151..cc7ce27 100644 (file)
@@ -34,6 +34,7 @@ test_oid
 test_file_glob
 test_log_thread
 test_mutex
+test_libstemmer
 *.log
 *.o
 *~
index 1974a30..5e92f72 100644 (file)
@@ -7,7 +7,7 @@ check_PROGRAMS = test_xmalloc test_iconv test_nmem test_matchstr test_wrbuf \
  test_comstack test_filepath test_record_conv test_retrieval test_tpath \
  test_timing test_query_charset test_oid test_icu test_match_glob \
  test_rpn2cql test_rpn2solr test_json test_xml_include test_file_glob \
- test_shared_ptr
+ test_shared_ptr test_libstemmer
 
 check_SCRIPTS = tstmarc.sh tstmarccol.sh tstcql2xcql.sh tstcql2pqf.sh tsticu.sh
 
@@ -48,8 +48,9 @@ dist-hook:
 test_odrcodec.c test_odrcodec.h: tstodr.asn $(YAZCOMP)
        cd $(srcdir); $(YAZCOMP) tstodr.asn
 
-LDADD = ../src/libyaz.la
+LDADD = ../src/libyaz.la 
 test_icu_LDADD = ../src/libyaz_icu.la ../src/libyaz.la $(ICU_LIBS)
+test_libstemmer_LDADD = ../src/libyaz_icu.la ../src/libyaz.la $(ICU_LIBS)
 
 CONFIG_CLEAN_FILES=*.log
 
@@ -84,3 +85,4 @@ test_json_SOURCES = test_json.c
 test_xml_include_SOURCES = test_xml_include.c
 test_file_glob_SOURCES = test_file_glob.c
 test_shared_ptr_SOURCES = test_shared_ptr.c
+test_libstemmer_SOURCES = test_libstemmer.c
diff --git a/test/test_libstemmer.c b/test/test_libstemmer.c
new file mode 100644 (file)
index 0000000..e657050
--- /dev/null
@@ -0,0 +1,79 @@
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2011 Index Data
+ * See the file LICENSE for details.
+ */
+#if HAVE_CONFIG_H
+#include <config.h>
+#endif
+
+#include <stdlib.h>
+#include <stdio.h>
+
+#include <yaz/stemmer.h>
+#include <yaz/test.h>
+
+int test_stemmer_stem(yaz_stemmer_p stemmer, const char* to_stem, const char *expected) 
+{
+    struct icu_buf_utf16 *src  = icu_buf_utf16_create(0);
+    struct icu_buf_utf16 *dst  = icu_buf_utf16_create(0);
+    struct icu_buf_utf8  *dst8 = icu_buf_utf8_create(0);
+
+    UErrorCode status; 
+    const char *result;
+    icu_utf16_from_utf8_cstr(src, to_stem, &status);
+    yaz_stemmer_stem(stemmer, dst, src, &status); 
+    /* Assume fail */
+    int rc = 0;
+    if (status == U_ZERO_ERROR) {
+        icu_utf16_to_utf8(dst8, dst, &status);
+        result = icu_buf_utf8_to_cstr(dst8);
+        rc = strcmp(result, expected) == 0;
+    }
+    icu_buf_utf8_destroy(dst8);
+    icu_buf_utf16_destroy(src);
+    icu_buf_utf16_destroy(dst);
+    return rc;
+}
+
+
+
+static void tst(void)
+{
+    UErrorCode status;
+    //== U_ZERO_ERROR; 
+    yaz_stemmer_p stemmer = yaz_stemmer_create("en", "porter", &status);
+    YAZ_CHECK(stemmer); 
+
+    /* fail  */
+    YAZ_CHECK(test_stemmer_stem(stemmer, "beer", "water") == 0 ); 
+
+    /* Same */
+    YAZ_CHECK(test_stemmer_stem(stemmer, "adadwwr", "adadwwr")); 
+
+    /* Remove S */
+    YAZ_CHECK(test_stemmer_stem(stemmer, "beers", "beer")); 
+    YAZ_CHECK(test_stemmer_stem(stemmer, "persons", "person")); 
+
+    /* Remove s and ing  */
+    YAZ_CHECK(test_stemmer_stem(stemmer, "runs", "run")); 
+    YAZ_CHECK(test_stemmer_stem(stemmer, "running", "run")); 
+
+    yaz_stemmer_destroy(stemmer);
+}
+
+int main (int argc, char **argv)
+{
+    YAZ_CHECK_INIT(argc, argv);
+    tst();
+    YAZ_CHECK_TERM;
+}
+
+/*
+ * Local variables:
+ * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
+ * indent-tabs-mode: nil
+ * End:
+ * vim: shiftwidth=4 tabstop=8 expandtab
+ */
+