/* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2009 Index Data.
+ * Copyright (C) 1995-2012 Index Data.
* All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
#include <unicode/utypes.h> /* Basic ICU data types */
#include <unicode/uchar.h> /* char names */
-#include <unicode/ucol.h>
+#include <unicode/ucol.h>
#include <unicode/ubrk.h>
-#include <unicode/utrans.h>
#include <yaz/icu.h>
struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
size_t capacity);
-struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
- struct icu_buf_utf16 * src16);
+struct icu_buf_utf16 *icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
+ const struct icu_buf_utf16 * src16);
void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
-
+struct icu_buf_utf8;
struct icu_buf_utf8
{
const char * src8cstr,
UErrorCode * status);
+const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8);
-UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
- struct icu_buf_utf16 * src16,
+
+UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8,
+ const struct icu_buf_utf16 *src16,
UErrorCode * status);
-struct icu_casemap
-{
- char action;
-};
+struct icu_casemap;
struct icu_casemap * icu_casemap_create(char action, UErrorCode *status);
+struct icu_casemap *icu_casemap_clone(struct icu_casemap *old);
+
void icu_casemap_destroy(struct icu_casemap * casemap);
int icu_casemap_casemap(struct icu_casemap * casemap,
struct icu_buf_utf16 * src16,
UErrorCode * status);
-struct icu_tokenizer
-{
- char action;
- UBreakIterator* bi;
- struct icu_buf_utf16 * buf16;
- int32_t token_count;
- int32_t token_id;
- int32_t token_start;
- int32_t token_end;
-/*
- keep always invariant
- 0 <= token_start
- <= token_end
- <= buf16->utf16_len
- and invariant
- 0 <= token_id <= token_count
-*/
-};
-
+struct icu_tokenizer;
struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
UErrorCode *status);
+struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old);
void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
struct icu_buf_utf16 * tkn16,
UErrorCode *status);
-int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer);
-int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer);
-int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer);
-int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer);
int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
-
-
-struct icu_transform
-{
- char action;
- UParseError parse_error;
- UTransliterator * trans;
-};
+struct icu_transform;
struct icu_transform * icu_transform_create(const char *id, char action,
const char *rules,
UErrorCode *status);
-
+struct icu_transform *icu_transform_clone(struct icu_transform *old);
void icu_transform_destroy(struct icu_transform * transform);
int icu_transform_trans(struct icu_transform * transform,
struct icu_buf_utf16 * dest16,
- struct icu_buf_utf16 * src16,
+ const struct icu_buf_utf16 * src16,
UErrorCode *status);
-enum icu_chain_step_type {
- ICU_chain_step_type_none,
- ICU_chain_step_type_display, /* convert to utf8 display format */
- ICU_chain_step_type_casemap, /* apply utf16 charmap */
- ICU_chain_step_type_transform, /* apply utf16 transform */
- ICU_chain_step_type_tokenize, /* apply utf16 tokenization */
- ICU_chain_step_type_transliterate /* apply utf16 tokenization */
-};
-
-
-
-struct icu_chain_step
-{
- /* type and action object */
- enum icu_chain_step_type type;
- union {
- struct icu_casemap * casemap;
- struct icu_transform * transform;
- struct icu_tokenizer * tokenizer;
- } u;
- /* temprary post-action utf16 buffer */
- struct icu_buf_utf16 * buf16;
- struct icu_chain_step * previous;
- int more_tokens;
- int need_new_token;
-};
-
-
-struct icu_chain;
-
-struct icu_chain_step * icu_chain_step_create(yaz_icu_chain_t chain,
- enum icu_chain_step_type type,
- const uint8_t * rule,
- struct icu_buf_utf16 * buf16,
- UErrorCode *status);
-
-
-void icu_chain_step_destroy(struct icu_chain_step * step);
-
-
-struct icu_chain
-{
- char *locale;
- int sort;
-
- const char * src8cstr;
-
- UCollator * coll;
-
- /* number of tokens returned so far */
- int32_t token_count;
-
- /* utf8 output buffers */
- struct icu_buf_utf8 * display8;
- struct icu_buf_utf8 * norm8;
- struct icu_buf_utf8 * sort8;
-
- /* utf16 source buffer */
- struct icu_buf_utf16 * src16;
-
- /* linked list of chain steps */
- struct icu_chain_step * steps;
-};
-
-struct icu_chain_step * icu_chain_insert_step(yaz_icu_chain_t chain,
- enum icu_chain_step_type type,
- const uint8_t * rule,
- UErrorCode *status);
-
-int icu_chain_step_next_token(yaz_icu_chain_t chain,
- struct icu_chain_step * step,
- UErrorCode *status);
+struct icu_chain_step;
int icu_chain_token_number(yaz_icu_chain_t chain);
-const UCollator * icu_chain_get_coll(yaz_icu_chain_t chain);
-
yaz_icu_chain_t icu_chain_create(const char * locale,
int sort,
UErrorCode * status);
-
#endif /* ICU_I18NL_H */
/*