--- /dev/null
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2010 Index Data.
+ * All rights reserved.
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are met:
+ *
+ * * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * * Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ * * Neither the name of Index Data nor the names of its contributors
+ * may be used to endorse or promote products derived from this
+ * software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+ * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+ * DISCLAIMED. IN NO EVENT SHALL THE REGENTS AND CONTRIBUTORS BE LIABLE FOR ANY
+ * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+ * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ * ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
+ * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+/**
+ * \file stemming.h
+ * \brief Header for the stemming API. Simple wrapper to hide underlying implementation.
+ */
+
+#ifndef YAZ_STEMMER_H
+#define YAZ_STEMMER_H
+
+#include <yaz/yconfig.h>
+#include <yaz/xmltypes.h>
+#include <yaz/icu_I18N.h>
+#include <yaz/log.h>
+
+#include <unicode/ustring.h> /* some more string fcns*/
+#include <unicode/uchar.h> /* char names */
+
+
+YAZ_BEGIN_CDECL
+
+typedef struct yaz_stemmer_t *yaz_stemmer_p;
+
+YAZ_EXPORT
+yaz_stemmer_p yaz_stemmer_create(const char *locale, const char *rule, UErrorCode *status);
+
+YAZ_EXPORT
+yaz_stemmer_p yaz_stemmer_clone(yaz_stemmer_p stemmer);
+
+YAZ_EXPORT
+void yaz_stemmer_stem(yaz_stemmer_p stemmer, struct icu_buf_utf16 *dst, struct icu_buf_utf16* src, UErrorCode *status);
+
+YAZ_EXPORT
+void yaz_stemmer_destroy(yaz_stemmer_p stemmer);
+
+#endif
libyaz_server_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO)
-libyaz_icu_la_SOURCES = icu_chain.c icu_utf16.c icu_utf8.c \
+libyaz_icu_la_SOURCES = icu_chain.c icu_utf16.c icu_utf8.c stemmer.c \
icu_transform.c icu_casemap.c icu_tokenizer.c icu_sortkey.c
libyaz_icu_la_LDFLAGS=-version-info $(YAZ_VERSION_INFO)
#include <yaz/icu_I18N.h>
+#include <yaz/stemmer.h>
+
#include <yaz/log.h>
#include <yaz/nmem.h>
#include <yaz/nmem_xml.h>
enum icu_chain_step_type {
ICU_chain_step_type_none,
- ICU_chain_step_type_display, /* convert to utf8 display format */
- ICU_chain_step_type_casemap, /* apply utf16 charmap */
- ICU_chain_step_type_transform, /* apply utf16 transform */
- ICU_chain_step_type_tokenize, /* apply utf16 tokenization */
- ICU_chain_step_type_transliterate /* apply utf16 tokenization */
+ ICU_chain_step_type_display, /* convert to utf8 display format */
+ ICU_chain_step_type_casemap, /* apply utf16 charmap */
+ ICU_chain_step_type_transform, /* apply utf16 transform */
+ ICU_chain_step_type_tokenize, /* apply utf16 tokenization */
+ ICU_chain_step_type_transliterate, /* apply utf16 tokenization */
+ YAZ_chain_step_type_stemming /* apply utf16 stemming (YAZ) */
};
struct icu_chain_step
/* type and action object */
enum icu_chain_step_type type;
union {
- struct icu_casemap *casemap;
+ struct icu_casemap *casemap;
struct icu_transform *transform;
struct icu_tokenizer *tokenizer;
+ yaz_stemmer_p stemmer;
} u;
struct icu_chain_step *previous;
};
step->u.transform = icu_transform_create("custom", 'f',
(const char *) rule, status);
break;
+ case YAZ_chain_step_type_stemming:
+ step->u.stemmer = yaz_stemmer_create((char *) chain->locale, (const char *) rule, status);
+ break;
default:
break;
}
case ICU_chain_step_type_tokenize:
icu_tokenizer_destroy(step->u.tokenizer);
break;
+ case YAZ_chain_step_type_stemming:
+ yaz_stemmer_destroy(step->u.stemmer);
+ break;
default:
break;
}
case ICU_chain_step_type_tokenize:
(*sp)->u.tokenizer = icu_tokenizer_clone(old->u.tokenizer);
break;
+ case YAZ_chain_step_type_stemming:
+ yaz_stemmer_clone(step->u.stemmer);
+ break;
case ICU_chain_step_type_none:
break;
}
else if (!strcmp((const char *) node->name, "display"))
step = icu_chain_insert_step(chain, ICU_chain_step_type_display,
(const uint8_t *) "", status);
+ else if (!strcmp((const char *) node->name, "stemming"))
+ step = icu_chain_insert_step(chain, YAZ_chain_step_type_stemming,
+ (const uint8_t *) rule, status);
else if (!strcmp((const char *) node->name, "normalize"))
{
yaz_log(YLOG_WARN, "Element %s is deprecated. "
if (dst)
icu_utf16_to_utf8(iter->display, dst, &iter->status);
break;
+ case YAZ_chain_step_type_stemming:
+ if (dst)
+ {
+ struct icu_buf_utf16 *src = dst;
+ dst = icu_buf_utf16_create(0);
+ yaz_stemmer_stem(step->u.stemmer, dst, src, &iter->status);
+ icu_buf_utf16_destroy(src);
+ }
+ break;
default:
assert(0);
}
--- /dev/null
+
+
+#if HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#if YAZ_HAVE_ICU
+
+#include <yaz/yconfig.h>
+
+#include <yaz/stemmer.h>
+
+#include <unicode/ustring.h> /* some more string fcns*/
+#include <unicode/uchar.h> /* char names */
+
+struct yaz_stemmer_t
+{
+ int implementation;
+ union {
+ struct sb_stemmer *snowballer;
+ };
+};
+
+yaz_stemmer_p yaz_stemmer_create(const char *locale, const char *rule, UErrorCode *status) {
+ return 0;
+}
+
+yaz_stemmer_p yaz_stemmer_clone(yaz_stemmer_p stemmer) {
+ return 0;
+}
+
+void yaz_stemmer_stem(yaz_stemmer_p stemmer, struct icu_buf_utf16 *dst, struct icu_buf_utf16* src, UErrorCode *status) {
+
+}
+
+void yaz_stemmer_destroy(yaz_stemmer_p stemmer) {
+
+
+}
+
+#endif /* YAZ_HAVE_ICU */