-/*
- * Copyright (c) 1995-2007, Index Data
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2010 Index Data.
* All rights reserved.
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
/** \file
\brief Internal header for ICU utilities
+
+ These functions, while non-static, are considered unstable and internal
+ and may be renamed for each YAZ release.
*/
#ifndef ICU_I18NL_H
#include <yaz/yconfig.h>
-#include <libxml/tree.h>
-
#include <unicode/utypes.h> /* Basic ICU data types */
#include <unicode/uchar.h> /* char names */
-#include <unicode/ucol.h>
+#include <unicode/ucol.h>
#include <unicode/ubrk.h>
-#include <unicode/utrans.h>
#include <yaz/icu.h>
struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
size_t capacity);
-struct icu_buf_utf16 * icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
- struct icu_buf_utf16 * src16);
+struct icu_buf_utf16 *icu_buf_utf16_copy(struct icu_buf_utf16 * dest16,
+ const struct icu_buf_utf16 * src16);
void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16);
-
+struct icu_buf_utf8;
struct icu_buf_utf8
{
void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8);
-UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
- struct icu_buf_utf8 * src8,
- UErrorCode * status);
-
UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
const char * src8cstr,
UErrorCode * status);
+const char *icu_buf_utf8_to_cstr(struct icu_buf_utf8 *src8);
-UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
- struct icu_buf_utf16 * src16,
+
+UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 *dest8,
+ const struct icu_buf_utf16 *src16,
UErrorCode * status);
-struct icu_casemap
-{
- char action;
-};
+struct icu_casemap;
struct icu_casemap * icu_casemap_create(char action, UErrorCode *status);
+struct icu_casemap *icu_casemap_clone(struct icu_casemap *old);
+
void icu_casemap_destroy(struct icu_casemap * casemap);
int icu_casemap_casemap(struct icu_casemap * casemap,
struct icu_buf_utf16 * src16,
UErrorCode * status);
-struct icu_tokenizer
-{
- char action;
- UBreakIterator* bi;
- struct icu_buf_utf16 * buf16;
- int32_t token_count;
- int32_t token_id;
- int32_t token_start;
- int32_t token_end;
-/*
- keep always invariant
- 0 <= token_start
- <= token_end
- <= buf16->utf16_len
- and invariant
- 0 <= token_id <= token_count
-*/
-};
-
+struct icu_tokenizer;
struct icu_tokenizer * icu_tokenizer_create(const char *locale, char action,
UErrorCode *status);
+struct icu_tokenizer *icu_tokenizer_clone(struct icu_tokenizer *old);
void icu_tokenizer_destroy(struct icu_tokenizer * tokenizer);
int icu_tokenizer_attach(struct icu_tokenizer * tokenizer,
struct icu_buf_utf16 * tkn16,
UErrorCode *status);
-int32_t icu_tokenizer_token_id(struct icu_tokenizer * tokenizer);
-int32_t icu_tokenizer_token_start(struct icu_tokenizer * tokenizer);
-int32_t icu_tokenizer_token_end(struct icu_tokenizer * tokenizer);
-int32_t icu_tokenizer_token_length(struct icu_tokenizer * tokenizer);
int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
+struct icu_transform;
+struct icu_transform * icu_transform_create(const char *id, char action,
+ const char *rules,
+ UErrorCode *status);
+struct icu_transform *icu_transform_clone(struct icu_transform *old);
+void icu_transform_destroy(struct icu_transform * transform);
-struct icu_normalizer
-{
- char action;
- struct icu_buf_utf16 * rules16;
- UParseError parse_error[256];
- UTransliterator * trans;
-};
-
-struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
- UErrorCode *status);
-
-
-void icu_normalizer_destroy(struct icu_normalizer * normalizer);
-
-int icu_normalizer_normalize(struct icu_normalizer * normalizer,
- struct icu_buf_utf16 * dest16,
- struct icu_buf_utf16 * src16,
- UErrorCode *status);
-
-enum icu_chain_step_type {
- ICU_chain_step_type_none,
- ICU_chain_step_type_display, /* convert to utf8 display format */
- ICU_chain_step_type_casemap, /* apply utf16 charmap */
- ICU_chain_step_type_normalize, /* apply utf16 normalization */
- ICU_chain_step_type_tokenize /* apply utf16 tokenization */
-};
-
-
-
-struct icu_chain_step
-{
- /* type and action object */
- enum icu_chain_step_type type;
- union {
- struct icu_casemap * casemap;
- struct icu_normalizer * normalizer;
- struct icu_tokenizer * tokenizer;
- } u;
- /* temprary post-action utf16 buffer */
- struct icu_buf_utf16 * buf16;
- struct icu_chain_step * previous;
- int more_tokens;
- int need_new_token;
-};
-
-
-struct icu_chain;
-
-struct icu_chain_step * icu_chain_step_create(yaz_icu_chain_t chain,
- enum icu_chain_step_type type,
- const uint8_t * rule,
- struct icu_buf_utf16 * buf16,
- UErrorCode *status);
-
-
-void icu_chain_step_destroy(struct icu_chain_step * step);
-
-
-struct icu_chain
-{
- char *locale;
- int sort;
-
- const char * src8cstr;
-
- UCollator * coll;
-
- /* number of tokens returned so far */
- int32_t token_count;
-
- /* utf8 output buffers */
- struct icu_buf_utf8 * display8;
- struct icu_buf_utf8 * norm8;
- struct icu_buf_utf8 * sort8;
-
- /* utf16 source buffer */
- struct icu_buf_utf16 * src16;
-
- /* linked list of chain steps */
- struct icu_chain_step * steps;
-};
-
-struct icu_chain_step * icu_chain_insert_step(yaz_icu_chain_t chain,
- enum icu_chain_step_type type,
- const uint8_t * rule,
- UErrorCode *status);
+int icu_transform_trans(struct icu_transform * transform,
+ struct icu_buf_utf16 * dest16,
+ const struct icu_buf_utf16 * src16,
+ UErrorCode *status);
-int icu_chain_step_next_token(yaz_icu_chain_t chain,
- struct icu_chain_step * step,
- UErrorCode *status);
+struct icu_chain_step;
int icu_chain_token_number(yaz_icu_chain_t chain);
-const UCollator * icu_chain_get_coll(yaz_icu_chain_t chain);
+yaz_icu_chain_t icu_chain_create(const char * locale,
+ int sort,
+ UErrorCode * status);
#endif /* ICU_I18NL_H */
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab
*/
+