1 /* $Id: icu_I18N.c,v 1.6 2007-05-07 12:18:34 marc Exp $
2 Copyright (c) 2006-2007, Index Data.
4 This file is part of Pazpar2.
6 Pazpar2 is free software; you can redistribute it and/or modify it under
7 the terms of the GNU General Public License as published by the Free
8 Software Foundation; either version 2, or (at your option) any later
11 Pazpar2 is distributed in the hope that it will be useful, but WITHOUT ANY
12 WARRANTY; without even the implied warranty of MERCHANTABILITY or
13 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 You should have received a copy of the GNU General Public License
17 along with Pazpar2; see the file LICENSE. If not, write to the
18 Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA
28 #include <yaz/timing.h>
41 #include <unicode/ustring.h> /* some more string fcns*/
42 #include <unicode/uchar.h> /* char names */
45 //#include <unicode/ustdio.h>
46 //#include <unicode/utypes.h> /* Basic ICU data types */
47 #include <unicode/ucol.h>
48 //#include <unicode/ucnv.h> /* C Converter API */
49 //#include <unicode/uloc.h>
50 //#include <unicode/ubrk.h>
51 /* #include <unicode/unistr.h> */
56 int icu_check_status (UErrorCode status)
58 //if(U_FAILURE(status))
59 if(!U_SUCCESS(status))
61 "ICU: %d %s\n", status, u_errorName(status));
67 struct icu_buf_utf16 * icu_buf_utf16_create(size_t capacity)
69 struct icu_buf_utf16 * buf16
70 = (struct icu_buf_utf16 *) malloc(sizeof(struct icu_buf_utf16));
77 buf16->utf16 = (UChar *) malloc(sizeof(UChar) * capacity);
78 buf16->utf16[0] = (UChar) 0;
79 buf16->utf16_cap = capacity;
85 struct icu_buf_utf16 * icu_buf_utf16_resize(struct icu_buf_utf16 * buf16,
90 if (0 == buf16->utf16)
91 buf16->utf16 = (UChar *) malloc(sizeof(UChar) * capacity);
94 = (UChar *) realloc(buf16->utf16, sizeof(UChar) * capacity);
95 buf16->utf16[0] = (UChar) 0;
97 buf16->utf16_cap = capacity;
103 buf16->utf16_len = 0;
104 buf16->utf16_cap = 0;
112 void icu_buf_utf16_destroy(struct icu_buf_utf16 * buf16)
126 struct icu_buf_utf8 * icu_buf_utf8_create(size_t capacity)
128 struct icu_buf_utf8 * buf8
129 = (struct icu_buf_utf8 *) malloc(sizeof(struct icu_buf_utf8));
136 buf8->utf8 = (uint8_t *) malloc(sizeof(uint8_t) * capacity);
137 buf8->utf8[0] = (uint8_t) 0;
138 buf8->utf8_cap = capacity;
145 struct icu_buf_utf8 * icu_buf_utf8_resize(struct icu_buf_utf8 * buf8,
151 buf8->utf8 = (uint8_t *) malloc(sizeof(uint8_t) * capacity);
154 = (uint8_t *) realloc(buf8->utf8, sizeof(uint8_t) * capacity);
155 buf8->utf8[0] = (uint8_t) 0;
157 buf8->utf8_cap = capacity;
173 void icu_buf_utf8_destroy(struct icu_buf_utf8 * buf8)
184 UErrorCode icu_utf16_from_utf8(struct icu_buf_utf16 * dest16,
185 struct icu_buf_utf8 * src8,
188 int32_t utf16_len = 0;
190 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
192 (const char *) src8->utf8, src8->utf8_len, status);
194 // check for buffer overflow, resize and retry
195 if (*status == U_BUFFER_OVERFLOW_ERROR
196 //|| dest16->utf16_len > dest16->utf16_cap
198 icu_buf_utf16_resize(dest16, utf16_len * 2);
199 *status = U_ZERO_ERROR;
200 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
202 (const char *) src8->utf8, src8->utf8_len, status);
205 //if (*status != U_BUFFER_OVERFLOW_ERROR
206 if (U_SUCCESS(*status)
207 && utf16_len < dest16->utf16_cap)
208 dest16->utf16_len = utf16_len;
210 dest16->utf16[0] = (UChar) 0;
211 dest16->utf16_len = 0;
219 UErrorCode icu_utf16_from_utf8_cstr(struct icu_buf_utf16 * dest16,
220 const char * src8cstr,
223 size_t src8cstr_len = 0;
224 int32_t utf16_len = 0;
226 src8cstr_len = strlen(src8cstr);
228 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
230 src8cstr, src8cstr_len, status);
232 // check for buffer overflow, resize and retry
233 if (*status == U_BUFFER_OVERFLOW_ERROR
234 //|| dest16->utf16_len > dest16->utf16_cap
236 icu_buf_utf16_resize(dest16, utf16_len * 2);
237 *status = U_ZERO_ERROR;
238 u_strFromUTF8(dest16->utf16, dest16->utf16_cap,
240 src8cstr, src8cstr_len, status);
243 // if (*status != U_BUFFER_OVERFLOW_ERROR
244 if (U_SUCCESS(*status)
245 && utf16_len < dest16->utf16_cap)
246 dest16->utf16_len = utf16_len;
248 dest16->utf16[0] = (UChar) 0;
249 dest16->utf16_len = 0;
258 UErrorCode icu_utf16_to_utf8(struct icu_buf_utf8 * dest8,
259 struct icu_buf_utf16 * src16,
262 int32_t utf8_len = 0;
264 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
266 src16->utf16, src16->utf16_len, status);
268 // check for buffer overflow, resize and retry
269 if (*status == U_BUFFER_OVERFLOW_ERROR
270 //|| dest8->utf8_len > dest8->utf8_cap
272 icu_buf_utf8_resize(dest8, utf8_len * 2);
273 *status = U_ZERO_ERROR;
274 u_strToUTF8((char *) dest8->utf8, dest8->utf8_cap,
276 src16->utf16, src16->utf16_len, status);
280 //if (*status != U_BUFFER_OVERFLOW_ERROR
281 if (U_SUCCESS(*status)
282 && utf8_len < dest8->utf8_cap)
283 dest8->utf8_len = utf8_len;
285 dest8->utf8[0] = (uint8_t) 0;
295 UErrorCode icu_sortkey8_from_utf16(UCollator *coll,
296 struct icu_buf_utf8 * dest8,
297 struct icu_buf_utf16 * src16,
301 int32_t sortkey_len = 0;
303 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
304 dest8->utf8, dest8->utf8_cap);
306 // check for buffer overflow, resize and retry
307 if (sortkey_len > dest8->utf8_cap) {
308 icu_buf_utf8_resize(dest8, sortkey_len * 2);
309 sortkey_len = ucol_getSortKey(coll, src16->utf16, src16->utf16_len,
310 dest8->utf8, dest8->utf8_cap);
313 if (U_SUCCESS(*status)
315 dest8->utf8_len = sortkey_len;
317 dest8->utf8[0] = (UChar) 0;
328 /// CRAP FOLLOWING HERE ...
332 // forward declarations for helper functions
334 int icu_check_status (UErrorCode status);
336 UChar* icu_utf16_from_utf8(UChar *utf16,
341 UChar* icu_utf16_from_utf8n(UChar *utf16,
348 char* icu_utf16_to_utf8(char *utf8,
355 int32_t icu_utf16_casemap(UChar *dest16, int32_t dest16_cap,
356 const UChar *src16, int32_t src16_len,
357 const char *locale, char action);
360 // source code of all functions
362 int icu_check_status (UErrorCode status)
364 //if(U_FAILURE(status))
365 if(!U_SUCCESS(status))
367 "ICU: %d %s\n", status, u_errorName(status));
372 UChar* icu_utf16_from_utf8(UChar *utf16,
377 size_t utf8_len = strlen(utf8);
378 return icu_utf16_from_utf8n(utf16, utf16_cap, utf16_len,
383 UChar* icu_utf16_from_utf8n(UChar *utf16,
389 UErrorCode status = U_ZERO_ERROR;
390 u_strFromUTF8(utf16, utf16_cap, utf16_len, utf8, (int32_t) utf8_len,
392 if (U_ZERO_ERROR != icu_check_status(status))
399 char* icu_utf16_to_utf8(char *utf8,
405 UErrorCode status = U_ZERO_ERROR;
406 u_strToUTF8(utf8, (int32_t) utf8_cap, (int32_t *)utf8_len,
407 utf16, utf16_len, &status);
408 if (U_ZERO_ERROR != icu_check_status(status))
415 int32_t icu_utf16_casemap(UChar *dest16, int32_t dest16_cap,
416 const UChar *src16, int32_t src16_len,
417 const char *locale, char action)
419 UErrorCode status = U_ZERO_ERROR;
420 int32_t dest16_len = 0;
424 dest16_len = u_strToLower(dest16, dest16_cap, src16, src16_len,
428 dest16_len = u_strToUpper(dest16, dest16_cap, src16, src16_len,
432 dest16_len = u_strToTitle(dest16, dest16_cap, src16, src16_len,
436 dest16_len = u_strFoldCase(dest16, dest16_cap, src16, src16_len,
437 U_FOLD_CASE_DEFAULT, &status);
445 if (U_ZERO_ERROR != icu_check_status(status))
452 char * icu_casemap(NMEM nmem, char *buf, size_t buf_cap,
453 size_t *dest8_len, const char *src8,
454 const char *locale, char action)
456 size_t src8_len = strlen(src8);
463 if (!buf || !(buf_cap > 0) || !src8_len)
466 // converting buf to utf16
467 buf = (char *)icu_utf16_from_utf8n((UChar *) buf,
468 (int32_t) buf_cap, &buf_len,
472 buf_len = (size_t) icu_utf16_casemap((UChar *)buf, (int32_t) buf_cap,
473 (const UChar *)buf, (int32_t) buf_len,
476 // converting buf to utf8
477 buf = icu_utf16_to_utf8(buf, buf_cap, (size_t *) &buf_len,
478 (const UChar *) buf, (int32_t) buf_len);
481 // copying out to nmem
485 *dest8_len = buf_len;
487 dest8 = nmem_strdup(nmem, buf);
492 struct icu_termmap * icu_termmap_create(NMEM nmem)
494 struct icu_termmap *itmp = nmem_malloc(nmem, sizeof(*itmp));
501 int icu_termmap_cmp(const void *vp1, const void *vp2)
503 struct icu_termmap *itmp1 = *(struct icu_termmap **) vp1;
504 struct icu_termmap *itmp2 = *(struct icu_termmap **) vp2;
506 return strcmp(itmp1->sort_key, itmp2->sort_key);
511 char * icu_sortmap(NMEM nmem, char *buf, size_t buf_cap,
512 size_t *dest8_len, const char *src8,
515 size_t src8_len = strlen(src8);
522 if (!buf || !(buf_cap > 0) || !src8_len)
525 // converting buf to utf16
526 buf = (char *)icu_utf16_from_utf8n((UChar *) buf,
527 (int32_t) buf_cap, &buf_len,
531 //buf_len = (size_t) icu_utf16_casemap((UChar *)buf, (int32_t) buf_cap,
532 // (const UChar *)buf, (int32_t) buf_len,
537 UErrorCode status = U_ZERO_ERROR;
539 UCollator * coll = ucol_open (locale, &status);
540 if (U_ZERO_ERROR != icu_check_status(status))
543 ucol_getSortKey(coll, (const UChar *) buf, (int32_t) buf_len,
544 (uint8_t *) buf, (int32_t) buf_cap);
550 // copying out to nmem
554 *dest8_len = buf_len;
556 dest8 = nmem_strdup(nmem, buf);
572 * indent-tabs-mode: nil
574 * vim: shiftwidth=4 tabstop=8 expandtab