From b5b083d6c36e61c77b8b67980c155dde0a539ca5 Mon Sep 17 00:00:00 2001 From: Dennis Schafroth Date: Wed, 10 Nov 2010 14:41:16 +0100 Subject: [PATCH] First implementation of yaz_stemmer API. Can compile. --- src/stemmer.c | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 70 insertions(+), 7 deletions(-) diff --git a/src/stemmer.c b/src/stemmer.c index b544ebc..dd596f7 100644 --- a/src/stemmer.c +++ b/src/stemmer.c @@ -10,32 +10,95 @@ #include +#include + +#include + #include /* some more string fcns*/ #include /* char names */ +enum stemmer_implementation { + yaz_snowball +}; struct yaz_stemmer_t { int implementation; + // Required for cloning. + const char *locale; + const char *rule; union { - struct sb_stemmer *snowballer; + struct sb_stemmer *sb_stemmer; }; }; -yaz_stemmer_p yaz_stemmer_create(const char *locale, const char *rule, UErrorCode *status) { - return 0; +const char* yaz_stemmer_lookup_charenc(const char *charenc) { + return charenc; } -yaz_stemmer_p yaz_stemmer_clone(yaz_stemmer_p stemmer) { - return 0; +const char* yaz_stemmer_lookup_algorithm(const char *locale, const char *rule) { + return rule; } -void yaz_stemmer_stem(yaz_stemmer_p stemmer, struct icu_buf_utf16 *dst, struct icu_buf_utf16* src, UErrorCode *status) { +yaz_stemmer_p yaz_stemmer_snowball_create(const char *locale, const char *rule, UErrorCode *status) { + const char *charenc = yaz_stemmer_lookup_charenc(locale); + const char *algorithm = yaz_stemmer_lookup_algorithm(locale,rule); + struct sb_stemmer *stemmer = sb_stemmer_new(algorithm, charenc); + yaz_stemmer_p yaz_stemmer; + if (stemmer == 0) { + *status = U_ARGUMENT_TYPE_MISMATCH; + return 0; + } + yaz_stemmer = xmalloc(sizeof(*yaz_stemmer)); + yaz_stemmer->implementation = yaz_snowball; + yaz_stemmer->locale = xstrdup(locale); + yaz_stemmer->rule = xstrdup(rule); + yaz_stemmer->sb_stemmer = stemmer; + return yaz_stemmer; } -void yaz_stemmer_destroy(yaz_stemmer_p stemmer) { +yaz_stemmer_p yaz_stemmer_create(const char *locale, const char *rule, UErrorCode *status) { + *status = U_ZERO_ERROR; + // dispatch logic required if more algorithms is implemented. + return yaz_stemmer_snowball_create(locale, rule, status); +} +yaz_stemmer_p yaz_stemmer_clone(yaz_stemmer_p stemmer) { + UErrorCode error = U_ZERO_ERROR; + return yaz_stemmer_create(stemmer->locale, stemmer->rule, &error); +} +void yaz_stemmer_stem(yaz_stemmer_p stemmer, struct icu_buf_utf16 *dst, struct icu_buf_utf16* src, UErrorCode *status) +{ + switch(stemmer->implementation) { + case yaz_snowball: { + int length; + struct icu_buf_utf8 *utf8_buf = icu_buf_utf8_create(0); + icu_utf16_to_utf8(utf8_buf, src, status); + if (*status == U_ZERO_ERROR) { + const char *sb_symbol = sb_stemmer_stem(stemmer->sb_stemmer, icu_buf_utf8_to_cstr(utf8_buf), length); + if (sb_symbol == 0) { + icu_buf_utf16_copy(dst, src); + } + else { + icu_utf16_from_utf8_cstr(dst, sb_symbol, status); + } + } + return ; + break; + } + } +} + +void yaz_stemmer_destroy(yaz_stemmer_p stemmer) { + switch (stemmer->implementation) { + case yaz_snowball: + sb_stemmer_delete(stemmer->sb_stemmer); + break; + } + free(stemmer->locale); + free(stemmer->rule); + free(stemmer); } #endif /* YAZ_HAVE_ICU */ -- 1.7.10.4