1 // $Id: UnicodeToIso5426.java,v 1.3 2008/10/17 06:47:06 haschart Exp $
\r
3 * Copyright (C) 2002 Bas Peters (mail@bpeters.com)
\r
5 * This file is part of MARC4J
\r
7 * MARC4J is free software; you can redistribute it and/or
\r
8 * modify it under the terms of the GNU Lesser General Public
\r
9 * License as published by the Free Software Foundation; either
\r
10 * version 2.1 of the License, or (at your option) any later version.
\r
12 * MARC4J is distributed in the hope that it will be useful,
\r
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
15 * Lesser General Public License for more details.
\r
17 * You should have received a copy of the GNU Lesser General Public
\r
18 * License along with MARC4J; if not, write to the Free Software
\r
19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
21 package org.marc4j.converter.impl;
\r
23 import org.marc4j.converter.CharConverter;
\r
27 * A utility to convert UCS/Unicode data to UNIMARC (ISO 5426 charset).
\r
30 * @author Bas Peters
\r
31 * @author Yves Pratter
\r
32 * @version $Revision: 1.3 $
\r
34 public class UnicodeToIso5426 extends CharConverter {
\r
38 * Converts UCS/Unicode data to UNIMARC (ISO 5426 charset).
\r
42 * A question mark (0x3F) is returned if there is no match.
\r
45 * @param data - the UCS/Unicode data in an array of char
\r
46 * @return {@link String}- the UNIMARC (ISO 5426 charset) data
\r
48 public String convert(char data[]) {
\r
49 StringBuffer sb = new StringBuffer();
\r
50 for (int i = 0; i < data.length; i++) {
\r
57 sb.append((char) d);
\r
59 sb.append((char) (d / 256));
\r
60 sb.append((char) (d % 256));
\r
64 return sb.toString();
\r
67 private int convert(int i) {
\r
70 return 0xA4; // 2/4 dollar sign
\r
72 return 0xD820; // underline
\r
74 return 0xA1; // 2/1 inverted exclamation mark
\r
76 return 0xA3; // 2/3 pound sign
\r
78 return 0xA5; // 2/5 yen sign
\r
80 return 0xA7; // 2/7 paragraph (section)
\r
82 return 0xC820; // diaeresis
\r
84 return 0xAD; // 2/13 copyright sign
\r
86 return 0xAB; // 2/11 left angle quotation mark
\r
88 return 0xAF; // 2/15 trade mark sign
\r
90 return 0xB7; // 3/7 middle dot
\r
92 return 0xD020; // cedilla
\r
94 return 0xBB; // 3/11 right angle quotation mark
\r
96 return 0xBF; // 3/15 inverted question mark
\r
98 return 0xC141; // CAPITAL A WITH GRAVE ACCENT
\r
100 return 0xC241; // CAPITAL A WITH ACUTE ACCENT
\r
102 return 0xC341; // CAPITAL A WITH CIRCUMFLEX ACCENT
\r
104 return 0xC441; // CAPITAL A WITH TILDE
\r
106 return 0xC841; // CAPITAL A WITH DIAERESIS
\r
108 return 0xCA41; // CAPITAL A WITH RING ABOVE
\r
110 return 0xE1; // 6/1 CAPITAL DIPHTHONG A WITH E
\r
112 return 0xD043; // CAPITAL C WITH CEDILLA
\r
114 return 0xC145; // CAPITAL E WITH GRAVE ACCENT
\r
116 return 0xC245; // CAPITAL E WITH ACUTE ACCENT
\r
118 return 0xC345; // CAPITAL E WITH CIRCUMFLEX ACCENT
\r
120 return 0xC845; // CAPITAL E WITH DIAERESIS
\r
122 return 0xC149; // CAPITAL I WITH GRAVE ACCENT
\r
124 return 0xC249; // CAPITAL I WITH ACUTE ACCENT
\r
126 return 0xC349; // CAPITAL I WITH CIRCUMFLEX ACCENT
\r
128 return 0xC849; // CAPITAL I WITH DIAERESIS
\r
130 return 0xC44E; // CAPITAL N WITH TILDE
\r
132 return 0xC14F; // CAPITAL O WITH GRAVE ACCENT
\r
134 return 0xC24F; // CAPITAL O WITH ACUTE ACCENT
\r
136 return 0xC34F; // CAPITAL O WITH CIRCUMFLEX ACCENT
\r
138 return 0xC44F; // CAPITAL O WITH TILDE
\r
140 return 0xC84F; // CAPITAL O WITH DIAERESIS
\r
142 return 0xE9; // 6/9 CAPITAL LETTER O WITH SOLIDUS [oblique stroke]
\r
144 return 0xC155; // CAPITAL U WITH GRAVE ACCENT
\r
146 return 0xC255; // CAPITAL U WITH ACUTE ACCENT
\r
148 return 0xC355; // CAPITAL U WITH CIRCUMFLEX
\r
150 return 0xC855; // CAPITAL U WITH DIAERESIS
\r
152 return 0xC259; // CAPITAL Y WITH ACUTE ACCENT
\r
154 return 0xEC; // 6/12 CAPITAL LETTER THORN
\r
156 return 0xFB; // 7/11 small letter sharp s
\r
158 return 0xC161; // small a with grave accent
\r
160 return 0xC261; // small a with acute accent
\r
162 return 0xC361; // small a with circumflex accent
\r
164 return 0xC461; // small a with tilde
\r
166 return 0xC861; // small a with diaeresis
\r
168 return 0xCA61; // small a with ring above
\r
170 return 0xF1; // 7/1 small diphthong a with e
\r
172 return 0xD063; // small c with cedilla
\r
174 return 0xC165; // small e with grave accent
\r
176 return 0xC265; // small e with acute accent
\r
178 return 0xC365; // small e with circumflex accent
\r
180 return 0xC865; // small e with diaeresis
\r
182 return 0xC169; // small i with grave accent
\r
184 return 0xC269; // small i with acute accent
\r
186 return 0xC369; // small i with circumflex accent
\r
188 return 0xC869; // small i with diaeresis
\r
190 return 0xC46E; // small n with tilde
\r
192 return 0xC16F; // small o with grave accent
\r
194 return 0xC26F; // small o with acute accent
\r
196 return 0xC36F; // small o with circumflex accent
\r
198 return 0xC46F; // small o with tilde
\r
200 return 0xC86F; // small o with diaeresis
\r
202 return 0xF9; // 7/9 small letter o with solidus (oblique stroke)
\r
204 return 0xC175; // small u with grave accent
\r
206 return 0xC275; // small u with acute accent
\r
208 return 0xC375; // small u with circumflex
\r
210 return 0xC875; // small u with diaeresis
\r
212 return 0xC279; // small y with acute accent
\r
214 return 0xFC; // 7/12 small letter thorn
\r
216 return 0xC879; // small y with diaeresis
\r
218 return 0xC541; // CAPITAL A WITH MACRON
\r
220 return 0xC561; // small a with macron
\r
222 return 0xC641; // CAPITAL A WITH BREVE
\r
224 return 0xC661; // small a with breve
\r
226 return 0xD341; // CAPITAL A WITH OGONEK
\r
228 return 0xD361; // small a with ogonek
\r
230 return 0xC243; // CAPITAL C WITH ACUTE ACCENT
\r
232 return 0xC263; // small c with acute accent
\r
234 return 0xC343; // CAPITAL C WITH CIRCUMFLEX
\r
236 return 0xC363; // small c with circumflex
\r
238 return 0xC743; // CAPITAL C WITH DOT ABOVE
\r
240 return 0xC763; // small c with dot above
\r
242 return 0xCF43; // CAPITAL C WITH CARON
\r
244 return 0xCF63; // small c with caron
\r
246 return 0xCF44; // CAPITAL D WITH CARON
\r
248 return 0xCF64; // small d with caron
\r
250 return 0xE2; // 6/2 CAPITAL LETTER D WITH STROKE
\r
252 return 0xC545; // CAPITAL E WITH MACRON
\r
254 return 0xC565; // small e with macron
\r
256 return 0xC645; // CAPITAL E WITH BREVE
\r
258 return 0xC665; // small e with breve
\r
260 return 0xC745; // CAPITAL E WITH DOT ABOVE
\r
262 return 0xC765; // small e with dot above
\r
264 return 0xD345; // CAPITAL E WITH OGONEK
\r
266 return 0xD365; // small e with ogonek
\r
268 return 0xCF45; // CAPITAL E WITH CARON
\r
270 return 0xCF65; // small e with caron
\r
272 return 0xC347; // CAPITAL G WITH CIRCUMFLEX
\r
274 return 0xC367; // small g with circumflex
\r
276 return 0xC647; // CAPITAL G WITH BREVE
\r
278 return 0xC667; // small g with breve
\r
280 return 0xC747; // CAPITAL G WITH DOT ABOVE
\r
282 return 0xC767; // small g with dot above
\r
284 return 0xD047; // CAPITAL G WITH CEDILLA
\r
286 return 0xD067; // small g with cedilla
\r
288 return 0xC348; // CAPITAL H WITH CIRCUMFLEX
\r
290 return 0xC368; // small h with circumflex
\r
292 return 0xC449; // CAPITAL I WITH TILDE
\r
294 return 0xC469; // small i with tilde
\r
296 return 0xC549; // CAPITAL I WITH MACRON
\r
298 return 0xC569; // small i with macron
\r
300 return 0xC649; // CAPITAL I WITH BREVE
\r
302 return 0xC669; // small i with breve
\r
304 return 0xD349; // CAPITAL I WITH OGONEK
\r
306 return 0xD369; // small i with ogonek
\r
308 return 0xC749; // CAPITAL I WITH DOT ABOVE
\r
310 return 0xF5; // 7/5 small letter i without dot
\r
312 return 0xE6; // 6/6 CAPITAL LETTER IJ
\r
314 return 0xF6; // 7/6 small letter ij
\r
316 return 0xC34A; // CAPITAL J WITH CIRCUMFLEX
\r
318 return 0xC36A; // small j with circumflex
\r
320 return 0xD04B; // CAPITAL K WITH CEDILLA
\r
322 return 0xD06B; // small k with cedilla
\r
324 return 0xC24C; // CAPITAL L WITH ACUTE ACCENT
\r
326 return 0xC26C; // small l with acute accent
\r
328 return 0xD04C; // CAPITAL L WITH CEDILLA
\r
330 return 0xD06C; // small l with cedilla
\r
332 return 0xCF4C; // CAPITAL L WITH CARON
\r
334 return 0xCF6C; // small l with caron
\r
336 return 0xE8; // 6/8 CAPITAL LETTER L WITH STROKE
\r
338 return 0xF8; // 7/8 small letter l with stroke
\r
340 return 0xC24E; // CAPITAL N WITH ACUTE ACCENT
\r
342 return 0xC26E; // small n with acute accent
\r
344 return 0xD04E; // CAPITAL N WITH CEDILLA
\r
346 return 0xD06E; // small n with cedilla
\r
348 return 0xCF4E; // CAPITAL N WITH CARON
\r
350 return 0xCF6E; // small n with caron
\r
352 return 0xC54F; // CAPITAL O WITH MACRON
\r
354 return 0xC56F; // small o with macron
\r
356 return 0xC64F; // CAPITAL O WITH BREVE
\r
358 return 0xC66F; // small o with breve
\r
360 return 0xCD4F; // CAPITAL O WITH DOUBLE ACUTE
\r
362 return 0xCD6F; // small o with double acute
\r
364 return 0xEA; // 6/10 CAPITAL DIPHTONG OE
\r
366 return 0xFA; // 7/10 small diphtong oe
\r
368 return 0xC252; // CAPITAL R WITH ACUTE ACCENT
\r
370 return 0xC272; // small r with acute accent
\r
372 return 0xD052; // CAPITAL R WITH CEDILLA
\r
374 return 0xD072; // small r with cedilla
\r
376 return 0xCF52; // CAPITAL R WITH CARON
\r
378 return 0xCF72; // small r with caron
\r
380 return 0xC253; // CAPITAL S WITH ACUTE ACCENT
\r
382 return 0xC273; // small s with acute accent
\r
384 return 0xC353; // CAPITAL S WITH CIRCUMFLEX
\r
386 return 0xC373; // small s with circumflex
\r
388 return 0xD053; // CAPITAL S WITH CEDILLA
\r
390 return 0xD073; // small s with cedilla
\r
392 return 0xCF53; // CAPITAL S WITH CARON
\r
394 return 0xCF73; // small s with caron
\r
396 return 0xD054; // CAPITAL T WITH CEDILLA
\r
398 return 0xD074; // small t with cedilla
\r
400 return 0xCF54; // CAPITAL T WITH CARON
\r
402 return 0xCF74; // small t with caron
\r
404 return 0xC455; // CAPITAL U WITH TILDE
\r
406 return 0xC475; // small u with tilde
\r
408 return 0xC555; // CAPITAL U WITH MACRON
\r
410 return 0xC575; // small u with macron
\r
412 return 0xC655; // CAPITAL U WITH BREVE
\r
414 return 0xC675; // small u with breve
\r
416 return 0xCAAD; // CAPITAL U WITH RING ABOVE
\r
418 return 0xCA75; // small u with ring above
\r
420 return 0xCD55; // CAPITAL U WITH DOUBLE ACUTE
\r
422 return 0xCD75; // small u with double acute
\r
424 return 0xD355; // CAPITAL U WITH OGONEK
\r
426 return 0xD375; // small u with ogonek
\r
428 return 0xC357; // CAPITAL W WITH CIRCUMFLEX
\r
430 return 0xC377; // small w with circumflex
\r
432 return 0xC359; // CAPITAL Y WITH CIRCUMFLEX
\r
434 return 0xC379; // small y with circumflex
\r
436 return 0xC859; // CAPITAL Y WITH DIAERESIS
\r
438 return 0xC25A; // CAPITAL Z WITH ACUTE ACCENT
\r
440 return 0xC27A; // small z with acute accent
\r
442 return 0xC75A; // CAPITAL Z WITH DOT ABOVE
\r
444 return 0xC77A; // small z with dot above
\r
446 return 0xCF5A; // CAPITAL Z WITH CARON
\r
448 return 0xCF7A; // small z with caron
\r
450 return 0xCE54; // LATIN CAPITAL LETTER O WITH HORN
\r
452 return 0xCE74; // latin small letter o with horn
\r
454 return 0xCE55; // LATIN CAPITAL LETTER U WITH HORN
\r
456 return 0xCE75; // latin small letter u with horn
\r
458 return 0xCF41; // CAPITAL A WITH CARON
\r
460 return 0xCF61; // small a with caron
\r
462 return 0xCF49; // CAPITAL I WITH CARON
\r
464 return 0xCF69; // small i with caron
\r
466 return 0xCF4F; // CAPITAL O WITH CARON
\r
468 return 0xCF6F; // small o with caron
\r
470 return 0xCF55; // CAPITAL U WITH CARON
\r
472 return 0xCF75; // small u with caron
\r
474 return 0xC5E1; // CAPITAL AE WITH MACRON
\r
476 return 0xC5F1; // small ae with macron
\r
478 return 0xCF47; // CAPITAL G WITH CARON
\r
480 return 0xCF67; // small g with caron
\r
482 return 0xCF4B; // CAPITAL K WITH CARON
\r
484 return 0xCF6B; // small k with caron
\r
486 return 0xD34F; // CAPITAL O WITH OGONEK
\r
488 return 0xD36F; // small o with ogonek
\r
490 return 0xCF6A; // small j with caron
\r
492 return 0xC247; // CAPITAL G WITH ACUTE
\r
494 return 0xC267; // small g with acute
\r
496 return 0xC2E1; // CAPITAL AE WITH ACUTE
\r
498 return 0xC2F1; // small ae with acute
\r
500 return 0xBD; // 3/13 mjagkij znak
\r
502 return 0xBE; // 3/14 tverdyj znak
\r
504 return 0xDA20; // small low vertical bar
\r
506 return 0xD320; // ogonek
\r
508 return 0xB1; // 3/1 alif/hamzah [alef with hamza above]
\r
510 return 0xB0; // 3/0 ayn [ain]
\r
512 return 0xD441; // CAPITAL A WITH RING BELOW
\r
514 return 0xD461; // small a with ring below
\r
516 return 0xC742; // CAPITAL B WITH DOT ABOVE
\r
518 return 0xC762; // small b with dot above
\r
520 return 0xD642; // CAPITAL B WITH DOT BELOW
\r
522 return 0xD662; // small b with dot below
\r
524 return 0xC744; // CAPITAL D WITH DOT ABOVE
\r
526 return 0xC764; // small d with dot above
\r
528 return 0xD644; // CAPITAL D WITH DOT BELOW
\r
530 return 0xD664; // small d with dot below
\r
532 return 0xD044; // CAPITAL D WITH CEDILLA
\r
534 return 0xD064; // small d with cedilla
\r
536 return 0xC746; // CAPITAL F WITH DOT ABOVE
\r
538 return 0xC766; // small f with dot above
\r
540 return 0xC547; // CAPITAL G WITH MACRON
\r
542 return 0xC567; // small g with macron
\r
544 return 0xC748; // CAPITAL H WITH DOT ABOVE
\r
546 return 0xC768; // small h with dot above
\r
548 return 0xD648; // CAPITAL H WITH DOT BELOW
\r
550 return 0xD668; // small h with dot below
\r
552 return 0xC848; // CAPITAL H WITH DIAERESIS
\r
554 return 0xC868; // small h with diaeresis
\r
556 return 0xD048; // CAPITAL H WITH CEDILLA
\r
558 return 0xD068; // small h with cedilla
\r
560 return 0xF948; // CAPITAL H WITH BREVE BELOW
\r
562 return 0xF968; // small h with breve below
\r
564 return 0xC24B; // CAPITAL K WITH ACUTE
\r
566 return 0xC26B; // small k with acute
\r
568 return 0xD64B; // CAPITAL K WITH DOT BELOW
\r
570 return 0xD66B; // small k with dot below
\r
572 return 0xD64C; // CAPITAL L WITH DOT BELOW
\r
574 return 0xD66C; // small l with dot below
\r
576 return 0xC24D; // CAPITAL M WITH ACUTE
\r
578 return 0xC26D; // small m with acute
\r
580 return 0xC74D; // CAPITAL M WITH DOT ABOVE
\r
582 return 0xC76D; // small m with dot above
\r
584 return 0xD64D; // CAPITAL M WITH DOT BELOW
\r
586 return 0xD66D; // small m with dot below
\r
588 return 0xC74E; // CAPITAL N WITH DOT ABOVE
\r
590 return 0xC76E; // small n with dot above
\r
592 return 0xD64E; // CAPITAL N WITH DOT BELOW
\r
594 return 0xD66E; // small n with dot below
\r
596 return 0xC250; // CAPITAL P WITH ACUTE
\r
598 return 0xC270; // small p with acute
\r
600 return 0xC750; // CAPITAL P WITH DOT ABOVE
\r
602 return 0xC770; // small p with dot above
\r
604 return 0xC752; // CAPITAL R WITH DOT ABOVE
\r
606 return 0xC772; // small r with dot above
\r
608 return 0xD652; // CAPITAL R WITH DOT BELOW
\r
610 return 0xD672; // small r with dot below
\r
612 return 0xC753; // CAPITAL S WITH DOT ABOVE
\r
614 return 0xC773; // small s with dot above
\r
616 return 0xD653; // CAPITAL S WITH DOT BELOW
\r
618 return 0xD673; // small s with dot below
\r
620 return 0xC754; // CAPITAL T WITH DOT ABOVE
\r
622 return 0xC774; // small t with dot above
\r
624 return 0xD654; // CAPITAL T WITH DOT BELOW
\r
626 return 0xD674; // small t with dot below
\r
628 return 0xD755; // CAPITAL U WITH DIAERESIS BELOW
\r
630 return 0xD775; // small u with diaeresis below
\r
632 return 0xC456; // CAPITAL V WITH TILDE
\r
634 return 0xC476; // small v with tilde
\r
636 return 0xD656; // CAPITAL V WITH DOT BELOW
\r
638 return 0xD676; // small v with dot below
\r
640 return 0xC157; // CAPITAL W WITH GRAVE
\r
642 return 0xC177; // small w with grave
\r
644 return 0xC257; // CAPITAL W WITH ACUTE
\r
646 return 0xC277; // small w with acute
\r
648 return 0xC857; // CAPITAL W WITH DIAERESIS
\r
650 return 0xC877; // small w with diaeresis
\r
652 return 0xC757; // CAPITAL W WITH DOT ABOVE
\r
654 return 0xC777; // small w with dot above
\r
656 return 0xD657; // CAPITAL W WITH DOT BELOW
\r
658 return 0xD677; // small w with dot below
\r
660 return 0xC758; // CAPITAL X WITH DOT ABOVE
\r
662 return 0xC778; // small x with dot above
\r
664 return 0xC858; // CAPITAL X WITH DIAERESIS
\r
666 return 0xC878; // small x with diaeresis
\r
668 return 0xC759; // CAPITAL Y WITH DOT ABOVE
\r
670 return 0xC779; // small y with dot above
\r
672 return 0xC35A; // CAPITAL Z WITH CIRCUMFLEX
\r
674 return 0xC37A; // small z with circumflex
\r
676 return 0xD65A; // CAPITAL Z WITH DOT BELOW
\r
678 return 0xD67A; // small z with dot below
\r
680 return 0xC874; // small t with diaeresis
\r
682 return 0xCA77; // small w with ring above
\r
684 return 0xCA79; // small y with ring above
\r
686 return 0xD641; // CAPITAL A WITH DOT BELOW
\r
688 return 0xD661; // small a with dot below
\r
690 return 0xC041; // CAPITAL A WITH HOOK ABOVE
\r
692 return 0xC061; // small a with hook above
\r
694 return 0xD645; // CAPITAL E WITH DOT BELOW
\r
696 return 0xD665; // small e with dot below
\r
698 return 0xC045; // CAPITAL E WITH HOOK ABOVE
\r
700 return 0xC065; // small e with hook above
\r
702 return 0xC445; // CAPITAL E WITH TILDE
\r
704 return 0xC465; // small e with tilde
\r
706 return 0xC049; // CAPITAL I WITH HOOK ABOVE
\r
708 return 0xC069; // small i with hook above
\r
710 return 0xD649; // CAPITAL I WITH DOT BELOW
\r
712 return 0xD669; // small i with dot below
\r
714 return 0xD64F; // CAPITAL O WITH DOT BELOW
\r
716 return 0xD66F; // small o with dot below
\r
718 return 0xC04F; // CAPITAL O WITH HOOK ABOVE
\r
720 return 0xC06F; // small o with hook above
\r
722 return 0xD655; // CAPITAL U WITH DOT BELOW
\r
724 return 0xD675; // small u with dot below
\r
726 return 0xC055; // CAPITAL U WITH HOOK ABOVE
\r
728 return 0xC075; // small u with hook above
\r
730 return 0xC159; // CAPITAL Y WITH GRAVE
\r
732 return 0xC179; // small y with grave
\r
734 return 0xD659; // CAPITAL Y WITH DOT BELOW
\r
736 return 0xD679; // small y with dot below
\r
738 return 0xC059; // CAPITAL Y WITH HOOK ABOVE
\r
740 return 0xC079; // small y with hook above
\r
742 return 0xC459; // CAPITAL Y WITH TILDE
\r
744 return 0xC479; // small y with tilde
\r
746 return 0xD920; // double underline
\r
748 return 0xA9; // 2/9 left high single quotation mark
\r
749 // case 0x2018: return 0xB2; // 3/2 left low single quotation mark
\r
751 return 0xB9; // 3/9 right high single quotation mark
\r
753 return 0xA2; // 2/2 left low double quotation mark
\r
754 // case 0x201C: return 0xAA; // 2/10 left high double quotation mark
\r
756 return 0xBA; // 3/10 right high double quotation mark
\r
758 return 0xA6; // 2/6 single dagger
\r
760 return 0xB6; // 3/6 double dagger
\r
762 return 0xA8; // 2/8 prime
\r
764 return 0xB8; // 3/8 double prime
\r
766 return 0xAE; // 2/14 sound recording copyright sign
\r
768 return 0xAC; // 2/12 music flat
\r
770 return 0xBC; // 3/12 musical sharp
\r
773 return 0x3F; // if no match, return question mark
\r