Note about icu_I18N.h being unstable
[yaz-moved-to-github.git] / include / yaz / icu_I18N.h
index c2c5042..457f767 100644 (file)
@@ -1,5 +1,5 @@
-/*
- * Copyright (c) 1995-2007, Index Data
+/* This file is part of the YAZ toolkit.
+ * Copyright (C) 1995-2009 Index Data.
  * All rights reserved.
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions are met:
  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  */
 
+/** \file
+    \brief Internal header for ICU utilities
+
+    These functions, while non-static, are considered unstable and internal
+    and may be renamed for each YAZ release.
+*/
+
 #ifndef ICU_I18NL_H
 #define ICU_I18NL_H
 
-#include <yaz/nmem.h>
-
-#include <libxml/parser.h>
-#include <libxml/tree.h>
+#include <yaz/yconfig.h>
 
 #include <unicode/utypes.h>   /* Basic ICU data types */
 #include <unicode/uchar.h>    /* char names           */
@@ -40,6 +44,7 @@
 #include <unicode/ubrk.h>
 #include <unicode/utrans.h>
 
+#include <yaz/icu.h>
 
 /* declared structs and functions */
 
@@ -47,9 +52,9 @@ int icu_check_status (UErrorCode status);
 
 struct icu_buf_utf16
 {
-  UChar * utf16;
-  int32_t utf16_len;
-  int32_t utf16_cap;
+    UChar * utf16;
+    int32_t utf16_len;
+    int32_t utf16_cap;
 };
 
 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity);
@@ -98,33 +103,31 @@ UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
 
 struct icu_casemap
 {
-    char locale[16];
     char action;
 };
 
-struct icu_casemap * icu_casemap_create(const char *locale, char action,
-                                            UErrorCode *status);
+struct icu_casemap * icu_casemap_create(char action, UErrorCode *status);
 
 void icu_casemap_destroy(struct icu_casemap * casemap);
 
 int icu_casemap_casemap(struct icu_casemap * casemap,
                         struct icu_buf_utf16 * dest16,
                         struct icu_buf_utf16 * src16,
-                        UErrorCode *status);
+                        UErrorCode *status,
+                        const char *locale);
 
 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
                       struct icu_buf_utf16 * src16,
                       const char *locale, char action,
                       UErrorCode *status);
 
-UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
-                                   struct icu_buf_utf8 * dest8, 
-                                   struct icu_buf_utf16 * src16,
-                                   UErrorCode * status);
+void icu_sortkey8_from_utf16(UCollator *coll,
+                             struct icu_buf_utf8 * dest8, 
+                             struct icu_buf_utf16 * src16,
+                             UErrorCode * status);
 
 struct icu_tokenizer
 {
-    char locale[16];
     char action;
     UBreakIterator* bi;
     struct icu_buf_utf16 * buf16;
@@ -162,31 +165,31 @@ int32_t icu_tokenizer_token_count(struct icu_tokenizer * tokenizer);
 
 
 
-struct icu_normalizer
+struct icu_transform
 {
     char action;
-    struct icu_buf_utf16 * rules16;
-    UParseError parse_error[256];
+    UParseError parse_error;
     UTransliterator * trans;
 };
 
-struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
-                                              UErrorCode *status);
-
+struct icu_transform * icu_transform_create(const char *id, char action,
+                                            const char *rules,
+                                            UErrorCode *status);
 
-void icu_normalizer_destroy(struct icu_normalizer * normalizer);
+void icu_transform_destroy(struct icu_transform * transform);
 
-int icu_normalizer_normalize(struct icu_normalizer * normalizer,
-                             struct icu_buf_utf16 * dest16,
-                             struct icu_buf_utf16 * src16,
-                             UErrorCode *status);
+int icu_transform_trans(struct icu_transform * transform,
+                        struct icu_buf_utf16 * dest16,
+                        struct icu_buf_utf16 * src16,
+                        UErrorCode *status);
 
 enum icu_chain_step_type {
     ICU_chain_step_type_none,
     ICU_chain_step_type_display,   /* convert to utf8 display format */
     ICU_chain_step_type_casemap,   /* apply utf16 charmap */
-    ICU_chain_step_type_normalize, /* apply utf16 normalization */
-    ICU_chain_step_type_tokenize   /* apply utf16 tokenization */
+    ICU_chain_step_type_transform, /* apply utf16 transform */
+    ICU_chain_step_type_tokenize,  /* apply utf16 tokenization */
+    ICU_chain_step_type_transliterate  /* apply utf16 tokenization */
 };
 
 
@@ -197,7 +200,7 @@ struct icu_chain_step
     enum icu_chain_step_type type;
     union {
        struct icu_casemap * casemap;
-       struct icu_normalizer * normalizer;
+       struct icu_transform * transform;
        struct icu_tokenizer * tokenizer;  
     } u;
     /* temprary post-action utf16 buffer */
@@ -210,7 +213,7 @@ struct icu_chain_step
 
 struct icu_chain;
 
-struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
+struct icu_chain_step * icu_chain_step_create(yaz_icu_chain_t chain,
                                               enum icu_chain_step_type type,
                                               const uint8_t * rule,
                                               struct icu_buf_utf16 * buf16,
@@ -222,7 +225,7 @@ void icu_chain_step_destroy(struct icu_chain_step * step);
 
 struct icu_chain
 {
-    uint8_t locale[16];
+    char *locale;
     int sort;
 
     const char * src8cstr;
@@ -244,49 +247,32 @@ struct icu_chain
     struct icu_chain_step * steps;
 };
 
-struct icu_chain * icu_chain_create(const uint8_t * locale,
-                                    int sort,
-                                    UErrorCode * status);
-
-void icu_chain_destroy(struct icu_chain * chain);
-
-struct icu_chain * icu_chain_xml_config(xmlNode *xml_node,
-                                        const uint8_t * locale,
-                                        int sort,
-                                        UErrorCode * status);
-
-struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
+struct icu_chain_step * icu_chain_insert_step(yaz_icu_chain_t chain,
                                               enum icu_chain_step_type type,
                                               const uint8_t * rule,
                                               UErrorCode *status);
 
-int icu_chain_step_next_token(struct icu_chain * chain,
+int icu_chain_step_next_token(yaz_icu_chain_t chain,
                               struct icu_chain_step * step,
                               UErrorCode *status);
 
-int icu_chain_assign_cstr(struct icu_chain * chain,
-                          const char * src8cstr, 
-                          UErrorCode *status);
-
-int icu_chain_next_token(struct icu_chain * chain,
-                         UErrorCode *status);
+int icu_chain_token_number(yaz_icu_chain_t chain);
 
-int icu_chain_token_number(struct icu_chain * chain);
+const UCollator * icu_chain_get_coll(yaz_icu_chain_t chain);
 
-const char * icu_chain_token_display(struct icu_chain * chain);
+yaz_icu_chain_t icu_chain_create(const char * locale,
+                                 int sort,
+                                 UErrorCode * status);
 
-const char * icu_chain_token_norm(struct icu_chain * chain);
-
-const char * icu_chain_token_sortkey(struct icu_chain * chain);
-
-const UCollator * icu_chain_get_coll(struct icu_chain * chain);
 
 #endif /* ICU_I18NL_H */
 
 /*
  * Local variables:
  * c-basic-offset: 4
+ * c-file-style: "Stroustrup"
  * indent-tabs-mode: nil
  * End:
  * vim: shiftwidth=4 tabstop=8 expandtab
  */
+