From 33054ee1b9de573a3de49e09267b1bfb912c5a0c Mon Sep 17 00:00:00 2001 From: Dennis Schafroth Date: Thu, 27 Jan 2011 14:32:20 +0100 Subject: [PATCH] New libstemmer test --- test/.gitignore | 1 + test/Makefile.am | 6 ++-- test/test_libstemmer.c | 79 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 test/test_libstemmer.c diff --git a/test/.gitignore b/test/.gitignore index 90d4151..cc7ce27 100644 --- a/test/.gitignore +++ b/test/.gitignore @@ -34,6 +34,7 @@ test_oid test_file_glob test_log_thread test_mutex +test_libstemmer *.log *.o *~ diff --git a/test/Makefile.am b/test/Makefile.am index 1974a30..5e92f72 100644 --- a/test/Makefile.am +++ b/test/Makefile.am @@ -7,7 +7,7 @@ check_PROGRAMS = test_xmalloc test_iconv test_nmem test_matchstr test_wrbuf \ test_comstack test_filepath test_record_conv test_retrieval test_tpath \ test_timing test_query_charset test_oid test_icu test_match_glob \ test_rpn2cql test_rpn2solr test_json test_xml_include test_file_glob \ - test_shared_ptr + test_shared_ptr test_libstemmer check_SCRIPTS = tstmarc.sh tstmarccol.sh tstcql2xcql.sh tstcql2pqf.sh tsticu.sh @@ -48,8 +48,9 @@ dist-hook: test_odrcodec.c test_odrcodec.h: tstodr.asn $(YAZCOMP) cd $(srcdir); $(YAZCOMP) tstodr.asn -LDADD = ../src/libyaz.la +LDADD = ../src/libyaz.la test_icu_LDADD = ../src/libyaz_icu.la ../src/libyaz.la $(ICU_LIBS) +test_libstemmer_LDADD = ../src/libyaz_icu.la ../src/libyaz.la $(ICU_LIBS) CONFIG_CLEAN_FILES=*.log @@ -84,3 +85,4 @@ test_json_SOURCES = test_json.c test_xml_include_SOURCES = test_xml_include.c test_file_glob_SOURCES = test_file_glob.c test_shared_ptr_SOURCES = test_shared_ptr.c +test_libstemmer_SOURCES = test_libstemmer.c diff --git a/test/test_libstemmer.c b/test/test_libstemmer.c new file mode 100644 index 0000000..e657050 --- /dev/null +++ b/test/test_libstemmer.c @@ -0,0 +1,79 @@ +/* This file is part of the YAZ toolkit. + * Copyright (C) 1995-2011 Index Data + * See the file LICENSE for details. + */ +#if HAVE_CONFIG_H +#include +#endif + +#include +#include + +#include +#include + +int test_stemmer_stem(yaz_stemmer_p stemmer, const char* to_stem, const char *expected) +{ + struct icu_buf_utf16 *src = icu_buf_utf16_create(0); + struct icu_buf_utf16 *dst = icu_buf_utf16_create(0); + struct icu_buf_utf8 *dst8 = icu_buf_utf8_create(0); + + UErrorCode status; + const char *result; + icu_utf16_from_utf8_cstr(src, to_stem, &status); + yaz_stemmer_stem(stemmer, dst, src, &status); + /* Assume fail */ + int rc = 0; + if (status == U_ZERO_ERROR) { + icu_utf16_to_utf8(dst8, dst, &status); + result = icu_buf_utf8_to_cstr(dst8); + rc = strcmp(result, expected) == 0; + } + icu_buf_utf8_destroy(dst8); + icu_buf_utf16_destroy(src); + icu_buf_utf16_destroy(dst); + return rc; +} + + + +static void tst(void) +{ + UErrorCode status; + //== U_ZERO_ERROR; + yaz_stemmer_p stemmer = yaz_stemmer_create("en", "porter", &status); + YAZ_CHECK(stemmer); + + /* fail */ + YAZ_CHECK(test_stemmer_stem(stemmer, "beer", "water") == 0 ); + + /* Same */ + YAZ_CHECK(test_stemmer_stem(stemmer, "adadwwr", "adadwwr")); + + /* Remove S */ + YAZ_CHECK(test_stemmer_stem(stemmer, "beers", "beer")); + YAZ_CHECK(test_stemmer_stem(stemmer, "persons", "person")); + + /* Remove s and ing */ + YAZ_CHECK(test_stemmer_stem(stemmer, "runs", "run")); + YAZ_CHECK(test_stemmer_stem(stemmer, "running", "run")); + + yaz_stemmer_destroy(stemmer); +} + +int main (int argc, char **argv) +{ + YAZ_CHECK_INIT(argc, argv); + tst(); + YAZ_CHECK_TERM; +} + +/* + * Local variables: + * c-basic-offset: 4 + * c-file-style: "Stroustrup" + * indent-tabs-mode: nil + * End: + * vim: shiftwidth=4 tabstop=8 expandtab + */ + -- 1.7.10.4