New function: stop
[pazpar2-moved-to-github.git] / src / icu_I18N.h
index 2746f07..22d77af 100644 (file)
@@ -1,4 +1,4 @@
-/* $Id: icu_I18N.h,v 1.12 2007-05-14 13:51:24 marc Exp $
+/* $Id: icu_I18N.h,v 1.17 2007-05-25 13:27:21 marc Exp $
    Copyright (c) 2006-2007, Index Data.
 
    This file is part of Pazpar2.
@@ -26,6 +26,8 @@
 
 #include <yaz/nmem.h>
 
+#include <libxml/parser.h>
+#include <libxml/tree.h>
 
 #include <unicode/utypes.h>   /* Basic ICU data types */
 #include <unicode/uchar.h>    /* char names           */
@@ -87,6 +89,22 @@ UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
                              struct icu_buf_utf16 * src16,
                              UErrorCode * status);
 
+struct icu_casemap
+{
+  char locale[16];
+  char action;
+};
+
+struct icu_casemap * icu_casemap_create(const char *locale, char action,
+                                            UErrorCode *status);
+
+void icu_casemap_destroy(struct icu_casemap * casemap);
+
+int icu_casemap_casemap(struct icu_casemap * casemap,
+                        struct icu_buf_utf16 * dest16,
+                        struct icu_buf_utf16 * src16,
+                        UErrorCode *status);
+
 int icu_utf16_casemap(struct icu_buf_utf16 * dest16,
                       struct icu_buf_utf16 * src16,
                       const char *locale, char action,
@@ -165,12 +183,13 @@ struct icu_token
 }
 #endif
 
+
 enum icu_chain_step_type {
     ICU_chain_step_type_none,      // 
     ICU_chain_step_type_display,   // convert to utf8 display format 
-    ICU_chain_step_type_norm,      // convert to utf8 norm format 
-    ICU_chain_step_type_sort,      // convert to utf8 sort format 
-    ICU_chain_step_type_charmap,   // apply utf16 charmap
+    ICU_chain_step_type_index,     // convert to utf8 index format 
+    ICU_chain_step_type_sortkey,   // convert to utf8 sortkey format 
+    ICU_chain_step_type_casemap,   // apply utf16 charmap
     ICU_chain_step_type_normalize, // apply utf16 normalization
     ICU_chain_step_type_tokenize   // apply utf16 tokenization 
 };
@@ -182,15 +201,30 @@ struct icu_chain_step
   // type and action object
   enum icu_chain_step_type type;
   union {
+    struct icu_casemap * casemap;
     struct icu_normalizer * normalizer;
     struct icu_tokenizer * tokenizer;  
   } u;
   // temprary post-action utf16 buffer
   struct icu_buf_utf16 * buf16;  
-  struct icu_chain_step * next;
+  struct icu_chain_step * previous;
+  int more_tokens;
+  int need_new_token;
 };
 
 
+struct icu_chain;
+
+struct icu_chain_step * icu_chain_step_create(struct icu_chain * chain,
+                                              enum icu_chain_step_type type,
+                                              const uint8_t * rule,
+                                              struct icu_buf_utf16 * buf16,
+                                              UErrorCode *status);
+
+
+void icu_chain_step_destroy(struct icu_chain_step * step);
+
+
 struct icu_chain
 {
   uint8_t identifier[128];
@@ -216,11 +250,36 @@ struct icu_chain * icu_chain_create(const uint8_t * identifier,
 
 void icu_chain_destroy(struct icu_chain * chain);
 
-struct icu_chain_step * icu_chain_append_step(struct icu_chain * chain,
+struct icu_chain * icu_chain_xml_config(xmlNode *xml_node, 
+                                        UErrorCode * status);
+
+
+struct icu_chain_step * icu_chain_insert_step(struct icu_chain * chain,
                                               enum icu_chain_step_type type,
-                                              const uint8_t * rule);
+                                              const uint8_t * rule,
+                                              UErrorCode *status);
+
+
+int icu_chain_step_next_token(struct icu_chain * chain,
+                              struct icu_chain_step * step,
+                              UErrorCode *status);
+
+int icu_chain_assign_cstr(struct icu_chain * chain,
+                          const char * src8cstr, 
+                          UErrorCode *status);
+
+int icu_chain_next_token(struct icu_chain * chain,
+                         UErrorCode *status);
+
+int icu_chain_get_token_count(struct icu_chain * chain);
+
+const char * icu_chain_get_display(struct icu_chain * chain);
+
+const char * icu_chain_get_norm(struct icu_chain * chain);
+
+const char * icu_chain_get_sort(struct icu_chain * chain);
+
 
-void icu_chain_step_destroy(struct icu_chain_step * step);