/* This file is part of the YAZ toolkit.
- * Copyright (C) 1995-2008 Index Data
+ * Copyright (C) 1995-2009 Index Data
* See the file LICENSE for details.
*/
-struct icu_normalizer * icu_normalizer_create(const char *rules, char action,
- UErrorCode *status)
+struct icu_transform * icu_transform_create(const char *id, char action,
+ const char *rules,
+ UErrorCode *status)
{
+ struct icu_buf_utf16 *id16 = icu_buf_utf16_create(0);
+ struct icu_buf_utf16 *rules16 = icu_buf_utf16_create(0);
+
+ struct icu_transform * transform
+ = (struct icu_transform *) xmalloc(sizeof(struct icu_transform));
+
+ transform->action = action;
+ transform->trans = 0;
- struct icu_normalizer * normalizer
- = (struct icu_normalizer *) xmalloc(sizeof(struct icu_normalizer));
+ if (id)
+ icu_utf16_from_utf8_cstr(id16, id, status);
+ if (rules)
+ icu_utf16_from_utf8_cstr(rules16, rules, status);
- normalizer->action = action;
- normalizer->trans = 0;
- normalizer->rules16 = icu_buf_utf16_create(0);
- icu_utf16_from_utf8_cstr(normalizer->rules16, rules, status);
-
- switch(normalizer->action) {
+ switch(transform->action)
+ {
case 'f':
case 'F':
- normalizer->trans
- = utrans_openU(normalizer->rules16->utf16,
- normalizer->rules16->utf16_len,
+ transform->trans
+ = utrans_openU(id16->utf16,
+ id16->utf16_len,
UTRANS_FORWARD,
- 0, 0,
- &normalizer->parse_error, status);
+ rules16->utf16,
+ rules16->utf16_len,
+ &transform->parse_error, status);
break;
case 'r':
case 'R':
- normalizer->trans
- = utrans_openU(normalizer->rules16->utf16,
- normalizer->rules16->utf16_len,
+ transform->trans
+ = utrans_openU(id16->utf16,
+ id16->utf16_len,
UTRANS_REVERSE ,
- 0, 0,
- &normalizer->parse_error, status);
+ rules16->utf16,
+ rules16->utf16_len,
+ &transform->parse_error, status);
break;
default:
*status = U_UNSUPPORTED_ERROR;
- return 0;
break;
}
+ icu_buf_utf16_destroy(rules16);
+ icu_buf_utf16_destroy(id16);
if (U_SUCCESS(*status))
- return normalizer;
+ return transform;
/* freeing if failed */
- icu_normalizer_destroy(normalizer);
+ icu_transform_destroy(transform);
return 0;
}
-void icu_normalizer_destroy(struct icu_normalizer * normalizer){
- if (normalizer) {
- if (normalizer->rules16)
- icu_buf_utf16_destroy(normalizer->rules16);
- if (normalizer->trans)
- utrans_close(normalizer->trans);
- xfree(normalizer);
+void icu_transform_destroy(struct icu_transform * transform){
+ if (transform) {
+ if (transform->trans)
+ utrans_close(transform->trans);
+ xfree(transform);
}
}
-int icu_normalizer_normalize(struct icu_normalizer * normalizer,
- struct icu_buf_utf16 * dest16,
- struct icu_buf_utf16 * src16,
- UErrorCode *status)
+int icu_transform_trans(struct icu_transform * transform,
+ struct icu_buf_utf16 * dest16,
+ struct icu_buf_utf16 * src16,
+ UErrorCode *status)
{
- if (!normalizer || !normalizer->trans
+ if (!transform || !transform->trans
|| !src16
|| !dest16)
return 0;
return 0;
- utrans_transUChars (normalizer->trans,
+ utrans_transUChars (transform->trans,
dest16->utf16, &(dest16->utf16_len),
dest16->utf16_cap,
0, &(src16->utf16_len), status);
case ICU_chain_step_type_casemap:
step->u.casemap = icu_casemap_create(rule[0], status);
break;
- case ICU_chain_step_type_normalize:
- step->u.normalizer = icu_normalizer_create((char *) rule, 'f', status);
+ case ICU_chain_step_type_transform:
+ /* rule omitted. Only ID used */
+ step->u.transform = icu_transform_create((const char *) rule, 'f',
+ 0, status);
break;
case ICU_chain_step_type_tokenize:
step->u.tokenizer = icu_tokenizer_create((char *) chain->locale,
(char) rule[0], status);
break;
+ case ICU_chain_step_type_transliterate:
+ /* we pass a dummy ID to utrans_openU.. */
+ step->u.transform = icu_transform_create("custom", 'f',
+ (const char *) rule, status);
+ break;
default:
break;
}
icu_casemap_destroy(step->u.casemap);
icu_buf_utf16_destroy(step->buf16);
break;
- case ICU_chain_step_type_normalize:
- icu_normalizer_destroy(step->u.normalizer);
+ case ICU_chain_step_type_transform:
+ case ICU_chain_step_type_transliterate:
+ icu_transform_destroy(step->u.transform);
icu_buf_utf16_destroy(step->buf16);
break;
case ICU_chain_step_type_tokenize:
step = icu_chain_insert_step(chain, ICU_chain_step_type_casemap,
(const uint8_t *) xml_rule, status);
else if (!strcmp((const char *) node->name, "transform"))
- step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
+ step = icu_chain_insert_step(chain, ICU_chain_step_type_transform,
+ (const uint8_t *) xml_rule, status);
+ else if (!strcmp((const char *) node->name, "transliterate"))
+ step = icu_chain_insert_step(chain, ICU_chain_step_type_transliterate,
(const uint8_t *) xml_rule, status);
else if (!strcmp((const char *) node->name, "tokenize"))
step = icu_chain_insert_step(chain, ICU_chain_step_type_tokenize,
{
yaz_log(YLOG_WARN, "Element %s is deprecated. "
"Use transform instead", node->name);
- step = icu_chain_insert_step(chain, ICU_chain_step_type_normalize,
+ step = icu_chain_insert_step(chain, ICU_chain_step_type_transform,
(const uint8_t *) xml_rule, status);
}
else if (!strcmp((const char *) node->name, "index")
case ICU_chain_step_type_casemap:
buf16 = icu_buf_utf16_create(0);
break;
- case ICU_chain_step_type_normalize:
+ case ICU_chain_step_type_transform:
+ case ICU_chain_step_type_transliterate:
buf16 = icu_buf_utf16_create(0);
break;
case ICU_chain_step_type_tokenize:
buf16 = icu_buf_utf16_create(0);
break;
+ break;
default:
break;
}
step->buf16, src16, status,
chain->locale);
break;
- case ICU_chain_step_type_normalize:
- icu_normalizer_normalize(step->u.normalizer,
- step->buf16, src16, status);
+ case ICU_chain_step_type_transform:
+ case ICU_chain_step_type_transliterate:
+ icu_transform_trans(step->u.transform,
+ step->buf16, src16, status);
break;
case ICU_chain_step_type_tokenize:
/* attach to new src16 token only first time during splitting */
/*
* Local variables:
* c-basic-offset: 4
+ * c-file-style: "Stroustrup"
* indent-tabs-mode: nil
* End:
* vim: shiftwidth=4 tabstop=8 expandtab
*/
+