1 // $Id: Iso5426ToUnicode.java,v 1.3 2008/10/17 06:47:06 haschart Exp $
\r
3 * Copyright (C) 2002 Bas Peters (mail@bpeters.com)
\r
4 * Copyright (C) 2002 Yves Pratter (ypratter@club-internet.fr)
\r
6 * This file is part of MARC4J
\r
8 * MARC4J is free software; you can redistribute it and/or
\r
9 * modify it under the terms of the GNU Lesser General Public
\r
10 * License as published by the Free Software Foundation; either
\r
11 * version 2.1 of the License, or (at your option) any later version.
\r
13 * MARC4J is distributed in the hope that it will be useful,
\r
14 * but WITHOUT ANY WARRANTY; without even the implied warranty of
\r
15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
\r
16 * Lesser General Public License for more details.
\r
18 * You should have received a copy of the GNU Lesser General Public
\r
19 * License along with MARC4J; if not, write to the Free Software
\r
20 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
\r
22 package org.marc4j.converter.impl;
\r
24 import org.marc4j.converter.CharConverter;
\r
28 * A utility to convert UNIMARC data to UCS/Unicode.
\r
31 * @author Bas Peters
\r
32 * @author Yves Pratter
\r
33 * @version $Revision: 1.3 $
\r
35 public class Iso5426ToUnicode extends CharConverter {
\r
39 * Converts UNIMARC (ISO 5426 charset) data to UCS/Unicode.
\r
42 * @param data - the UNIMARC data in an array of char
\r
43 * @return {@link String}- the UCS/Unicode data
\r
45 public String convert(char data[]) {
\r
46 StringBuffer sb = new StringBuffer();
\r
48 for (int i = 0; i < data.length; i++) {
\r
50 int len = data.length;
\r
53 else if (isCombining(c) && hasNext(i, len)) {
\r
54 char d = getCombiningChar(c * 256 + data[i + 1]);
\r
59 sb.append(getChar(c));
\r
62 sb.append(getChar(c));
\r
64 return sb.toString();
\r
67 private boolean hasNext(int pos, int len) {
\r
68 if (pos < (len - 1))
\r
73 private boolean isAscii(int i) {
\r
74 if (i >= 0x00 && i <= 0x7F)
\r
79 private boolean isCombining(int i) {
\r
80 // if (i > 0xE0 && i < 0xFF)
\r
81 if (i >= 0xC0 && i <= 0xDF)
\r
86 // Source : http://www.itscj.ipsj.or.jp/ISO-IR/053.pdf
\r
87 private char getChar(int i) {
\r
90 return 0x00A1; // 2/1 inverted exclamation mark
\r
92 return 0x201C; // 2/2 left low double quotation mark
\r
94 return 0x00A3; // 2/3 pound sign
\r
96 return 0x0024; // 2/4 dollar sign
\r
98 return 0x00A5; // 2/5 yen sign
\r
100 return 0x2020; // 2/6 single dagger
\r
102 return 0x00A7; // 2/7 paragraph (section)
\r
104 return 0x2032; // 2/8 prime
\r
106 return 0x2018; // 2/9 left high single quotation mark
\r
108 return 0x201C; // 2/10 left high double quotation mark
\r
110 return 0x00AB; // 2/11 left angle quotation mark
\r
112 return 0x266D; // 2/12 music flat
\r
114 return 0x00A9; // 2/13 copyright sign
\r
116 return 0x2117; // 2/14 sound recording copyright sign
\r
118 return 0x00AE; // 2/15 trade mark sign
\r
121 return 0x0639; // 3/0 ayn [ain]
\r
123 return 0x0623; // 3/1 alif/hamzah [alef with hamza above]
\r
125 return 0x2018; // 3/2 left low single quotation mark
\r
126 // 3/3 (this position shall not be used)
\r
127 // 3/4 (this position shall not be used)
\r
128 // 3/5 (this position shall not be used)
\r
130 return 0x2021; // 3/6 double dagger
\r
132 return 0x00B7; // 3/7 middle dot
\r
134 return 0x2033; // 3/8 double prime
\r
136 return 0x2019; // 3/9 right high single quotation mark
\r
138 return 0x201D; // 3/10 right high double quotation mark
\r
140 return 0x00BB; // 3/11 right angle quotation mark
\r
142 return 0x266F; // 3/12 musical sharp
\r
144 return 0x02B9; // 3/13 mjagkij znak
\r
146 return 0x02BA; // 3/14 tverdyj znak
\r
148 return 0x00BF; // 3/15 inverted question mark
\r
150 // 4/0 to 5/15 diacritic characters
\r
152 // 6/0 (this position shall not be used)
\r
154 return 0x00C6; // 6/1 CAPITAL DIPHTHONG A WITH E
\r
156 return 0x0110; // 6/2 CAPITAL LETTER D WITH STROKE
\r
157 // 6/3 (this position shall not be used)
\r
158 // 6/4 (this position shall not be used)
\r
159 // 6/5 (this position shall not be used)
\r
161 return 0x0132; // 6/6 CAPITAL LETTER IJ
\r
162 // 6/7 (this position shall not be used)
\r
164 return 0x0141; // 6/8 CAPITAL LETTER L WITH STROKE
\r
166 return 0x00D8; // 6/9 CAPITAL LETTER O WITH SOLIDUS [oblique stroke]
\r
168 return 0x0152; // 6/10 CAPITAL DIPHTONG OE
\r
169 // 6/11 (this position shall not be used)
\r
171 return 0x00DE; // 6/12 CAPITAL LETTER THORN
\r
172 // 6/13 (this position shall not be used)
\r
173 // 6/14 (this position shall not be used)
\r
174 // 6/15 (this position shall not be used)
\r
176 // 7/0 (this position shall not be used)
\r
178 return 0x00E6; // 7/1 small diphthong a with e
\r
179 // 7/4 (this position shall not be used)
\r
181 return 0x0131; // 7/5 small letter i without dot
\r
183 return 0x0133; // 7/6 small letter ij
\r
184 // 7/7 (this position shall not be used)
\r
186 return 0x0142; // 7/8 small letter l with stroke
\r
188 return 0x00F8; // 7/9 small letter o with solidus (oblique stroke)
\r
190 return 0x0153; // 7/10 small diphtong oe
\r
192 return 0x00DF; // 7/11 small letter sharp s
\r
194 return 0x00FE; // 7/12 small letter thorn
\r
195 // 7/13 (this position shall not be used)
\r
196 // 7/14 (this position shall not be used)
\r
202 private char getCombiningChar(int i) {
\r
204 // 4/0 low rising tone mark
\r
206 return 0x1EA2; // CAPITAL A WITH HOOK ABOVE
\r
208 return 0x1EBA; // CAPITAL E WITH HOOK ABOVE
\r
210 return 0x1EC8; // CAPITAL I WITH HOOK ABOVE
\r
212 return 0x1ECE; // CAPITAL O WITH HOOK ABOVE
\r
214 return 0x1EE6; // CAPITAL U WITH HOOK ABOVE
\r
216 return 0x1EF6; // CAPITAL Y WITH HOOK ABOVE
\r
218 return 0x1EA3; // small a with hook above
\r
220 return 0x1EBB; // small e with hook above
\r
222 return 0x1EC9; // small i with hook above
\r
224 return 0x1ECF; // small o with hook above
\r
226 return 0x1EE7; // small u with hook above
\r
228 return 0x1EF7; // small y with hook above
\r
230 // 4/1 grave accent
\r
232 return 0x00C0; // CAPITAL A WITH GRAVE ACCENT
\r
234 return 0x00C8; // CAPITAL E WITH GRAVE ACCENT
\r
236 return 0x00CC; // CAPITAL I WITH GRAVE ACCENT
\r
238 return 0x00D2; // CAPITAL O WITH GRAVE ACCENT
\r
240 return 0x00D9; // CAPITAL U WITH GRAVE ACCENT
\r
242 return 0x1E80; // CAPITAL W WITH GRAVE
\r
244 return 0x1EF2; // CAPITAL Y WITH GRAVE
\r
246 return 0x00E0; // small a with grave accent
\r
248 return 0x00E8; // small e with grave accent
\r
250 return 0x00EC; // small i with grave accent
\r
252 return 0x00F2; // small o with grave accent
\r
254 return 0x00F9; // small u with grave accent
\r
256 return 0x1E81; // small w with grave
\r
258 return 0x1EF3; // small y with grave
\r
260 // 4/2 acute accent
\r
262 return 0x00C1; // CAPITAL A WITH ACUTE ACCENT
\r
264 return 0x0106; // CAPITAL C WITH ACUTE ACCENT
\r
266 return 0x00C9; // CAPITAL E WITH ACUTE ACCENT
\r
268 return 0x01F4; // CAPITAL G WITH ACUTE
\r
270 return 0x00CD; // CAPITAL I WITH ACUTE ACCENT
\r
272 return 0x1E30; // CAPITAL K WITH ACUTE
\r
274 return 0x0139; // CAPITAL L WITH ACUTE ACCENT
\r
276 return 0x1E3E; // CAPITAL M WITH ACUTE
\r
278 return 0x0143; // CAPITAL N WITH ACUTE ACCENT
\r
280 return 0x00D3; // CAPITAL O WITH ACUTE ACCENT
\r
282 return 0x1E54; // CAPITAL P WITH ACUTE
\r
284 return 0x0154; // CAPITAL R WITH ACUTE ACCENT
\r
286 return 0x015A; // CAPITAL S WITH ACUTE ACCENT
\r
288 return 0x00DA; // CAPITAL U WITH ACUTE ACCENT
\r
290 return 0x1E82; // CAPITAL W WITH ACUTE
\r
292 return 0x00DD; // CAPITAL Y WITH ACUTE ACCENT
\r
294 return 0x0179; // CAPITAL Z WITH ACUTE ACCENT
\r
296 return 0x00E1; // small a with acute accent
\r
298 return 0x0107; // small c with acute accent
\r
300 return 0x00E9; // small e with acute accent
\r
302 return 0x01F5; // small g with acute
\r
304 return 0x00ED; // small i with acute accent
\r
306 return 0x1E31; // small k with acute
\r
308 return 0x013A; // small l with acute accent
\r
310 return 0x1E3F; // small m with acute
\r
312 return 0x0144; // small n with acute accent
\r
314 return 0x00F3; // small o with acute accent
\r
316 return 0x1E55; // small p with acute
\r
318 return 0x0155; // small r with acute accent
\r
320 return 0x015B; // small s with acute accent
\r
322 return 0x00FA; // small u with acute accent
\r
324 return 0x1E83; // small w with acute
\r
326 return 0x00FD; // small y with acute accent
\r
328 return 0x017A; // small z with acute accent
\r
330 return 0x01FC; // CAPITAL AE WITH ACUTE
\r
332 return 0x01FD; // small ae with acute
\r
334 // 4/3 circumflex accent
\r
336 return 0x00C2; // CAPITAL A WITH CIRCUMFLEX ACCENT
\r
338 return 0x0108; // CAPITAL C WITH CIRCUMFLEX
\r
340 return 0x00CA; // CAPITAL E WITH CIRCUMFLEX ACCENT
\r
342 return 0x011C; // CAPITAL G WITH CIRCUMFLEX
\r
344 return 0x0124; // CAPITAL H WITH CIRCUMFLEX
\r
346 return 0x00CE; // CAPITAL I WITH CIRCUMFLEX ACCENT
\r
348 return 0x0134; // CAPITAL J WITH CIRCUMFLEX
\r
350 return 0x00D4; // CAPITAL O WITH CIRCUMFLEX ACCENT
\r
352 return 0x015C; // CAPITAL S WITH CIRCUMFLEX
\r
354 return 0x00DB; // CAPITAL U WITH CIRCUMFLEX
\r
356 return 0x0174; // CAPITAL W WITH CIRCUMFLEX
\r
358 return 0x0176; // CAPITAL Y WITH CIRCUMFLEX
\r
360 return 0x1E90; // CAPITAL Z WITH CIRCUMFLEX
\r
362 return 0x00E2; // small a with circumflex accent
\r
364 return 0x0109; // small c with circumflex
\r
366 return 0x00EA; // small e with circumflex accent
\r
368 return 0x011D; // small g with circumflex
\r
370 return 0x0125; // small h with circumflex
\r
372 return 0x00EE; // small i with circumflex accent
\r
374 return 0x0135; // small j with circumflex
\r
376 return 0x00F4; // small o with circumflex accent
\r
378 return 0x015D; // small s with circumflex
\r
380 return 0x00FB; // small u with circumflex
\r
382 return 0x0175; // small w with circumflex
\r
384 return 0x0177; // small y with circumflex
\r
386 return 0x1E91; // small z with circumflex
\r
390 return 0x00C3; // CAPITAL A WITH TILDE
\r
392 return 0x1EBC; // CAPITAL E WITH TILDE
\r
394 return 0x0128; // CAPITAL I WITH TILDE
\r
396 return 0x00D1; // CAPITAL N WITH TILDE
\r
398 return 0x00D5; // CAPITAL O WITH TILDE
\r
400 return 0x0168; // CAPITAL U WITH TILDE
\r
402 return 0x1E7C; // CAPITAL V WITH TILDE
\r
404 return 0x1EF8; // CAPITAL Y WITH TILDE
\r
406 return 0x00E3; // small a with tilde
\r
408 return 0x1EBD; // small e with tilde
\r
410 return 0x0129; // small i with tilde
\r
412 return 0x00F1; // small n with tilde
\r
414 return 0x00F5; // small o with tilde
\r
416 return 0x0169; // small u with tilde
\r
418 return 0x1E7D; // small v with tilde
\r
420 return 0x1EF9; // small y with tilde
\r
424 return 0x0100; // CAPITAL A WITH MACRON
\r
426 return 0x0112; // CAPITAL E WITH MACRON
\r
428 return 0x1E20; // CAPITAL G WITH MACRON
\r
430 return 0x012A; // CAPITAL I WITH MACRON
\r
432 return 0x014C; // CAPITAL O WITH MACRON
\r
434 return 0x016A; // CAPITAL U WITH MACRON
\r
436 return 0x0101; // small a with macron
\r
438 return 0x0113; // small e with macron
\r
440 return 0x1E21; // small g with macron
\r
442 return 0x012B; // small i with macron
\r
444 return 0x014D; // small o with macron
\r
446 return 0x016B; // small u with macron
\r
448 return 0x01E2; // CAPITAL AE WITH MACRON
\r
450 return 0x01E3; // small ae with macron
\r
454 return 0x0102; // CAPITAL A WITH BREVE
\r
456 return 0x0114; // CAPITAL E WITH BREVE
\r
458 return 0x011E; // CAPITAL G WITH BREVE
\r
460 return 0x012C; // CAPITAL I WITH BREVE
\r
462 return 0x014E; // CAPITAL O WITH BREVE
\r
464 return 0x016C; // CAPITAL U WITH BREVE
\r
466 return 0x0103; // small a with breve
\r
468 return 0x0115; // small e with breve
\r
470 return 0x011F; // small g with breve
\r
472 return 0x012D; // small i with breve
\r
474 return 0x014F; // small o with breve
\r
476 return 0x016D; // small u with breve
\r
480 return 0x1E02; // CAPITAL B WITH DOT ABOVE
\r
482 return 0x010A; // CAPITAL C WITH DOT ABOVE
\r
484 return 0x1E0A; // CAPITAL D WITH DOT ABOVE
\r
486 return 0x0116; // CAPITAL E WITH DOT ABOVE
\r
488 return 0x1E1E; // CAPITAL F WITH DOT ABOVE
\r
490 return 0x0120; // CAPITAL G WITH DOT ABOVE
\r
492 return 0x1E22; // CAPITAL H WITH DOT ABOVE
\r
494 return 0x0130; // CAPITAL I WITH DOT ABOVE
\r
496 return 0x1E40; // CAPITAL M WITH DOT ABOVE
\r
498 return 0x1E44; // CAPITAL N WITH DOT ABOVE
\r
500 return 0x1E56; // CAPITAL P WITH DOT ABOVE
\r
502 return 0x1E58; // CAPITAL R WITH DOT ABOVE
\r
504 return 0x1E60; // CAPITAL S WITH DOT ABOVE
\r
506 return 0x1E6A; // CAPITAL T WITH DOT ABOVE
\r
508 return 0x1E86; // CAPITAL W WITH DOT ABOVE
\r
510 return 0x1E8A; // CAPITAL X WITH DOT ABOVE
\r
512 return 0x1E8E; // CAPITAL Y WITH DOT ABOVE
\r
514 return 0x017B; // CAPITAL Z WITH DOT ABOVE
\r
516 return 0x1E03; // small b with dot above
\r
518 return 0x010B; // small c with dot above
\r
520 return 0x1E0B; // small d with dot above
\r
522 return 0x0117; // small e with dot above
\r
524 return 0x1E1F; // small f with dot above
\r
526 return 0x0121; // small g with dot above
\r
528 return 0x1E23; // small h with dot above
\r
530 return 0x1E41; // small m with dot above
\r
532 return 0x1E45; // small n with dot above
\r
534 return 0x1E57; // small p with dot above
\r
536 return 0x1E59; // small r with dot above
\r
538 return 0x1E61; // small s with dot above
\r
540 return 0x1E6B; // small t with dot above
\r
542 return 0x1E87; // small w with dot above
\r
544 return 0x1E8B; // small x with dot above
\r
546 return 0x1E8F; // small y with dot above
\r
548 return 0x017C; // small z with dot above
\r
550 // 4/8 trema, diaresis
\r
552 return 0x00A8; // diaeresis
\r
554 return 0x00C4; // CAPITAL A WITH DIAERESIS
\r
556 return 0x00CB; // CAPITAL E WITH DIAERESIS
\r
558 return 0x1E26; // CAPITAL H WITH DIAERESIS
\r
560 return 0x00CF; // CAPITAL I WITH DIAERESIS
\r
562 return 0x00D6; // CAPITAL O WITH DIAERESIS
\r
564 return 0x00DC; // CAPITAL U WITH DIAERESIS
\r
566 return 0x1E84; // CAPITAL W WITH DIAERESIS
\r
568 return 0x1E8C; // CAPITAL X WITH DIAERESIS
\r
570 return 0x0178; // CAPITAL Y WITH DIAERESIS
\r
572 return 0x00E4; // small a with diaeresis
\r
574 return 0x00EB; // small e with diaeresis
\r
576 return 0x1E27; // small h with diaeresis
\r
578 return 0x00EF; // small i with diaeresis
\r
580 return 0x00F6; // small o with diaeresis
\r
582 return 0x1E97; // small t with diaeresis
\r
584 return 0x00FC; // small u with diaeresis
\r
586 return 0x1E85; // small w with diaeresis
\r
588 return 0x1E8D; // small x with diaeresis
\r
590 return 0x00FF; // small y with diaeresis
\r
594 return 0x00A8; // [diaeresis]
\r
596 // 4/10 circle above
\r
598 return 0x00C5; // CAPITAL A WITH RING ABOVE
\r
600 return 0x016E; // CAPITAL U WITH RING ABOVE
\r
602 return 0x00E5; // small a with ring above
\r
604 return 0x016F; // small u with ring above
\r
606 return 0x1E98; // small w with ring above
\r
608 return 0x1E99; // small y with ring above
\r
610 // 4/11 high comma off centre
\r
612 // 4/12 inverted high comma centred
\r
614 // 4/13 double acute accent
\r
616 return 0x0150; // CAPITAL O WITH DOUBLE ACUTE
\r
618 return 0x0170; // CAPITAL U WITH DOUBLE ACUTE
\r
620 return 0x0151; // small o with double acute
\r
622 return 0x0171; // small u with double acute
\r
626 return 0x01A0; // LATIN CAPITAL LETTER O WITH HORN
\r
628 return 0x01AF; // LATIN CAPITAL LETTER U WITH HORN
\r
630 return 0x01A1; // latin small letter o with horn
\r
632 return 0x01B0; // latin small letter u with horn
\r
634 // 4/15 caron (hacek)
\r
636 return 0x01CD; // CAPITAL A WITH CARON
\r
638 return 0x010C; // CAPITAL C WITH CARON
\r
640 return 0x010E; // CAPITAL D WITH CARON
\r
642 return 0x011A; // CAPITAL E WITH CARON
\r
644 return 0x01E6; // CAPITAL G WITH CARON
\r
646 return 0x01CF; // CAPITAL I WITH CARON
\r
648 return 0x01E8; // CAPITAL K WITH CARON
\r
650 return 0x013D; // CAPITAL L WITH CARON
\r
652 return 0x0147; // CAPITAL N WITH CARON
\r
654 return 0x01D1; // CAPITAL O WITH CARON
\r
656 return 0x0158; // CAPITAL R WITH CARON
\r
658 return 0x0160; // CAPITAL S WITH CARON
\r
660 return 0x0164; // CAPITAL T WITH CARON
\r
662 return 0x01D3; // CAPITAL U WITH CARON
\r
664 return 0x017D; // CAPITAL Z WITH CARON
\r
666 return 0x01CE; // small a with caron
\r
668 return 0x010D; // small c with caron
\r
670 return 0x010F; // small d with caron
\r
672 return 0x011B; // small e with caron
\r
674 return 0x01E7; // small g with caron
\r
676 return 0x01D0; // small i with caron
\r
678 return 0x01F0; // small j with caron
\r
680 return 0x01E9; // small k with caron
\r
682 return 0x013E; // small l with caron
\r
684 return 0x0148; // small n with caron
\r
686 return 0x01D2; // small o with caron
\r
688 return 0x0159; // small r with caron
\r
690 return 0x0161; // small s with caron
\r
692 return 0x0165; // small t with caron
\r
694 return 0x01D4; // small u with caron
\r
696 return 0x017E; // small z with caron
\r
700 return 0x00B8; // cedilla
\r
702 return 0x00C7; // CAPITAL C WITH CEDILLA
\r
704 return 0x1E10; // CAPITAL D WITH CEDILLA
\r
706 return 0x0122; // CAPITAL G WITH CEDILLA
\r
708 return 0x1E28; // CAPITAL H WITH CEDILLA
\r
710 return 0x0136; // CAPITAL K WITH CEDILLA
\r
712 return 0x013B; // CAPITAL L WITH CEDILLA
\r
714 return 0x0145; // CAPITAL N WITH CEDILLA
\r
716 return 0x0156; // CAPITAL R WITH CEDILLA
\r
718 return 0x015E; // CAPITAL S WITH CEDILLA
\r
720 return 0x0162; // CAPITAL T WITH CEDILLA
\r
722 return 0x00E7; // small c with cedilla
\r
724 return 0x1E11; // small d with cedilla
\r
726 return 0x0123; // small g with cedilla
\r
728 return 0x1E29; // small h with cedilla
\r
730 return 0x0137; // small k with cedilla
\r
732 return 0x013C; // small l with cedilla
\r
734 return 0x0146; // small n with cedilla
\r
736 return 0x0157; // small r with cedilla
\r
738 return 0x015F; // small s with cedilla
\r
740 return 0x0163; // small t with cedilla
\r
744 // 5/2 hook to left
\r
746 // 5/3 ogonek (hook to right)
\r
748 return 0x02DB; // ogonek
\r
750 return 0x0104; // CAPITAL A WITH OGONEK
\r
752 return 0x0118; // CAPITAL E WITH OGONEK
\r
754 return 0x012E; // CAPITAL I WITH OGONEK
\r
756 return 0x01EA; // CAPITAL O WITH OGONEK
\r
758 return 0x0172; // CAPITAL U WITH OGONEK
\r
760 return 0x0105; // small a with ogonek
\r
762 return 0x0119; // small e with ogonek
\r
764 return 0x012F; // small i with ogonek
\r
766 return 0x01EB; // small o with ogonek
\r
768 return 0x0173; // small u with ogonek
\r
770 // 5/4 circle below
\r
772 return 0x1E00; // CAPITAL A WITH RING BELOW
\r
774 return 0x1E01; // small a with ring below
\r
776 // 5/5 half circle below
\r
778 return 0x1E2A; // CAPITAL H WITH BREVE BELOW
\r
780 return 0x1E2B; // small h with breve below
\r
784 return 0x1EA0; // CAPITAL A WITH DOT BELOW
\r
786 return 0x1E04; // CAPITAL B WITH DOT BELOW
\r
788 return 0x1E0C; // CAPITAL D WITH DOT BELOW
\r
790 return 0x1EB8; // CAPITAL E WITH DOT BELOW
\r
792 return 0x1E24; // CAPITAL H WITH DOT BELOW
\r
794 return 0x1ECA; // CAPITAL I WITH DOT BELOW
\r
796 return 0x1E32; // CAPITAL K WITH DOT BELOW
\r
798 return 0x1E36; // CAPITAL L WITH DOT BELOW
\r
800 return 0x1E42; // CAPITAL M WITH DOT BELOW
\r
802 return 0x1E46; // CAPITAL N WITH DOT BELOW
\r
804 return 0x1ECC; // CAPITAL O WITH DOT BELOW
\r
806 return 0x1E5A; // CAPITAL R WITH DOT BELOW
\r
808 return 0x1E62; // CAPITAL S WITH DOT BELOW
\r
810 return 0x1E6C; // CAPITAL T WITH DOT BELOW
\r
812 return 0x1EE4; // CAPITAL U WITH DOT BELOW
\r
814 return 0x1E7E; // CAPITAL V WITH DOT BELOW
\r
816 return 0x1E88; // CAPITAL W WITH DOT BELOW
\r
818 return 0x1EF4; // CAPITAL Y WITH DOT BELOW
\r
820 return 0x1E92; // CAPITAL Z WITH DOT BELOW
\r
822 return 0x1EA1; // small a with dot below
\r
824 return 0x1E05; // small b with dot below
\r
826 return 0x1E0D; // small d with dot below
\r
828 return 0x1EB9; // small e with dot below
\r
830 return 0x1E25; // small h with dot below
\r
832 return 0x1ECB; // small i with dot below
\r
834 return 0x1E33; // small k with dot below
\r
836 return 0x1E37; // small l with dot below
\r
838 return 0x1E43; // small m with dot below
\r
840 return 0x1E47; // small n with dot below
\r
842 return 0x1ECD; // small o with dot below
\r
844 return 0x1E5B; // small r with dot below
\r
846 return 0x1E63; // small s with dot below
\r
848 return 0x1E6D; // small t with dot below
\r
850 return 0x1EE5; // small u with dot below
\r
852 return 0x1E7F; // small v with dot below
\r
854 return 0x1E89; // small w with dot below
\r
856 return 0x1EF5; // small y with dot below
\r
858 return 0x1E93; // small z with dot below
\r
860 // 5/7 double dot below
\r
862 return 0x1E72; // CAPITAL U WITH DIAERESIS BELOW
\r
864 return 0x1E73; // small u with diaeresis below
\r
868 return 0x005F; // underline
\r
870 // 5/9 double underline
\r
872 return 0x2017; // double underline
\r
874 // 5/10 small low vertical bar
\r
878 // 5/11 circumflex below
\r
880 // 5/12 (this position shall not be used)
\r
882 // 5/13 left half of ligature sign and of double tilde
\r
884 // 5/14 right half of ligature sign
\r
886 // 5/15 right half of double tilde
\r