From 640ef4d07f0b09f70578dccc4cda6d282ed6b281 Mon Sep 17 00:00:00 2001 From: Dennis Schafroth Date: Thu, 11 Nov 2010 13:06:44 +0100 Subject: [PATCH] Stemmer implementation working. - Snowball stemmer now works. - Implementing "No Operation" for invalid settings. But returning error. --- src/stemmer.c | 58 +++++++++++++++++++++++++++++++++------------------------ 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/src/stemmer.c b/src/stemmer.c index dd596f7..12c9119 100644 --- a/src/stemmer.c +++ b/src/stemmer.c @@ -18,34 +18,38 @@ #include /* char names */ enum stemmer_implementation { + yaz_no_operation, yaz_snowball }; struct yaz_stemmer_t { int implementation; // Required for cloning. - const char *locale; - const char *rule; + char *locale; + char *rule; union { struct sb_stemmer *sb_stemmer; }; }; -const char* yaz_stemmer_lookup_charenc(const char *charenc) { - return charenc; +const char* yaz_stemmer_lookup_charenc(const char *charenc, const char *rule) { + return "UTF_8"; } const char* yaz_stemmer_lookup_algorithm(const char *locale, const char *rule) { - return rule; + return locale; } yaz_stemmer_p yaz_stemmer_snowball_create(const char *locale, const char *rule, UErrorCode *status) { - const char *charenc = yaz_stemmer_lookup_charenc(locale); + const char *charenc = yaz_stemmer_lookup_charenc(locale, rule); const char *algorithm = yaz_stemmer_lookup_algorithm(locale,rule); struct sb_stemmer *stemmer = sb_stemmer_new(algorithm, charenc); yaz_stemmer_p yaz_stemmer; + yaz_log(YLOG_DEBUG, "create snowball stemmer: algoritm %s charenc %s ", algorithm, charenc); if (stemmer == 0) { *status = U_ARGUMENT_TYPE_MISMATCH; + yaz_log(YLOG_DEBUG, "failed to create stemmer. Creating NOP stemmer"); + return 0; } yaz_stemmer = xmalloc(sizeof(*yaz_stemmer)); @@ -53,13 +57,14 @@ yaz_stemmer_p yaz_stemmer_snowball_create(const char *locale, const char *rule, yaz_stemmer->locale = xstrdup(locale); yaz_stemmer->rule = xstrdup(rule); yaz_stemmer->sb_stemmer = stemmer; - + yaz_log(YLOG_DEBUG, "created snowball stemmer: algoritm %s charenc %s ", algorithm, charenc); return yaz_stemmer; } yaz_stemmer_p yaz_stemmer_create(const char *locale, const char *rule, UErrorCode *status) { *status = U_ZERO_ERROR; // dispatch logic required if more algorithms is implemented. + yaz_log(YLOG_DEBUG, "create stemmer: locale %s rule %s ", locale, rule); return yaz_stemmer_snowball_create(locale, rule, status); } @@ -71,22 +76,27 @@ yaz_stemmer_p yaz_stemmer_clone(yaz_stemmer_p stemmer) { void yaz_stemmer_stem(yaz_stemmer_p stemmer, struct icu_buf_utf16 *dst, struct icu_buf_utf16* src, UErrorCode *status) { switch(stemmer->implementation) { - case yaz_snowball: { - int length; - struct icu_buf_utf8 *utf8_buf = icu_buf_utf8_create(0); - icu_utf16_to_utf8(utf8_buf, src, status); - if (*status == U_ZERO_ERROR) { - const char *sb_symbol = sb_stemmer_stem(stemmer->sb_stemmer, icu_buf_utf8_to_cstr(utf8_buf), length); - if (sb_symbol == 0) { - icu_buf_utf16_copy(dst, src); - } - else { - icu_utf16_from_utf8_cstr(dst, sb_symbol, status); + case yaz_snowball: { + struct icu_buf_utf8 *utf8_buf = icu_buf_utf8_create(0); + icu_utf16_to_utf8(utf8_buf, src, status); + if (*status == U_ZERO_ERROR) { + const sb_symbol *cstr = (const sb_symbol*) icu_buf_utf8_to_cstr(utf8_buf); + const sb_symbol *sb_symbol = sb_stemmer_stem(stemmer->sb_stemmer, cstr, utf8_buf->utf8_len); + if (sb_symbol == 0) { + icu_buf_utf16_copy(dst, src); + } + else { + const char *cstr = (const char *) sb_symbol; + icu_utf16_from_utf8_cstr(dst, cstr , status); + } } + return ; + break; + } + default: { + // Default return the same as given. + icu_buf_utf16_copy(dst, src); } - return ; - break; - } } } @@ -96,9 +106,9 @@ void yaz_stemmer_destroy(yaz_stemmer_p stemmer) { sb_stemmer_delete(stemmer->sb_stemmer); break; } - free(stemmer->locale); - free(stemmer->rule); - free(stemmer); + xfree(stemmer->locale); + xfree(stemmer->rule); + xfree(stemmer); } #endif /* YAZ_HAVE_ICU */ -- 1.7.10.4