Implement real ISO546 character set, first version.
authorWolfram Schneider <wosch@indexdata.dk>
Tue, 18 Nov 2008 22:32:50 +0000 (23:32 +0100)
committerWolfram Schneider <wosch@indexdata.dk>
Tue, 18 Nov 2008 22:32:50 +0000 (23:32 +0100)
src/codetables-iso5426.xml

index 6dd818c..abcae3d 100644 (file)
@@ -2,9 +2,9 @@
 <!-- Switched <marc>212320</marc> and <marc>212320</marc> -->
 <codeTables>
        <codeTable name="MAB2-Zeichensatz ISO 646 (IRV) + ISO 5426-1983" date="Nov 2008" number="1">
 <!-- Switched <marc>212320</marc> and <marc>212320</marc> -->
 <codeTables>
        <codeTable name="MAB2-Zeichensatz ISO 646 (IRV) + ISO 5426-1983" date="Nov 2008" number="1">
-               <note>The first column in this table contains the MARC-8 code (in hex) for
+               <note>The first column in this table contains the ISO5426 code (in hex) for
      the character as coming from the G0 graphic set, the second column
      the character as coming from the G0 graphic set, the second column
-     contains the MARC-8 code (in hex) for the character as coming from the G1
+     contains the ISO5426 code (in hex) for the character as coming from the G1
      graphic set, the third column contains the UCS/Unicode 16-bit code (in
      hex), the fourth column contains the UTF-8 code (in hex) for the UCS
      characters, the fifth column contains a representation of the character (where possible), 
      graphic set, the third column contains the UCS/Unicode 16-bit code (in
      hex), the fourth column contains the UTF-8 code (in hex) for the UCS
      characters, the fifth column contains a representation of the character (where possible), 
         in Unicode and UTF-8 are given.  When that occurs the alternate Unicode and 
         alternate UTF-8 columns follow the character name. </note>
                <characterSet name="MAB2-Zeichensatz ISO 646 (IRV) + ISO 5426-1983" ISOcode="42">
         in Unicode and UTF-8 are given.  When that occurs the alternate Unicode and 
         alternate UTF-8 columns follow the character name. </note>
                <characterSet name="MAB2-Zeichensatz ISO 646 (IRV) + ISO 5426-1983" ISOcode="42">
+                       <!-- ???
                        <code>
                                <marc>1B</marc>
                                <ucs>001B</ucs>
                                <utf-8>1B</utf-8>
                                <name>ESCAPE (Unlikely to occur in UCS/Unicode)</name>
                        </code>
                        <code>
                                <marc>1B</marc>
                                <ucs>001B</ucs>
                                <utf-8>1B</utf-8>
                                <name>ESCAPE (Unlikely to occur in UCS/Unicode)</name>
                        </code>
+                       -->
                        <code>
                                <marc>1D</marc>
                                <ucs>001D</ucs>
                        <code>
                                <marc>1D</marc>
                                <ucs>001D</ucs>
@@ -612,8 +614,14 @@ BRACKET</name>
                                <utf-8>7E</utf-8>
                                <name>SPACING TILDE / TILDE</name>
                        </code>
                                <utf-8>7E</utf-8>
                                <name>SPACING TILDE / TILDE</name>
                        </code>
+                       <!-- 7F -->
+                       
                </characterSet>
                <characterSet name="MAB2-Zeichensatz ISO 646 (IRV) + ISO 5426-1983" date="1-Feb-2005, Updated Nov 2008" ISOcode="45">
                </characterSet>
                <characterSet name="MAB2-Zeichensatz ISO 646 (IRV) + ISO 5426-1983" date="1-Feb-2005, Updated Nov 2008" ISOcode="45">
+                       <note>See also Zeichentabelle MAB2 (ISO 5426-1983), http://www.gymel.com/charsets/MAB2.html</note>
+                       <note>See also MAB2-Zeichensatz ISO 646 (IRV) + ISO 5426-1983, http://www.d-nb.de/standardisierung/pdf/mab_unic.pdf</note>
+                       <note>See also Zeichenkonkordanz MAB2-Zeichensatz - MARC-8, http://www.d-nb.de/standardisierung/pdf/mab_marc.pdf</note>
+                       <!-- 80 - 87 -->
                        <code>
                                <marc>88</marc>
                                <ucs>0098</ucs>
                        <code>
                                <marc>88</marc>
                                <ucs>0098</ucs>
@@ -626,504 +634,566 @@ BRACKET</name>
                                <utf-8>C29C</utf-8>
                                <name>NON-SORT END / STRING TERMINATOR</name>
                        </code>
                                <utf-8>C29C</utf-8>
                                <name>NON-SORT END / STRING TERMINATOR</name>
                        </code>
+                       <!-- 8A - 8F -->
                        <code>
                        <code>
-                               <marc>8D</marc>
-                               <ucs>200D</ucs>
-                               <utf-8>E2808D</utf-8>
-                               <name>JOINER / ZERO WIDTH JOINER</name>
-                       </code>
-
-                       <!-- fix sort order later -->
-                       <code>
-                               <marc>E8</marc>
-                               <ucs>0141</ucs>
-                               <utf-8>C581</utf-8>
-                               <name>UPPERCASE POLISH L / LATIN CAPITAL LETTER L WITH
-STROKE</name>
+                               <marc>A1</marc>
+                               <ucs>00A1</ucs>
+                               <utf-8>C2A1</utf-8>
+                               <name>INVERTED EXCLAMATION MARK</name>
                        </code>
                        </code>
+               
                        <code>
                        <code>
-                               <marc>E9</marc>
-                               <ucs>00D8</ucs>
-                               <utf-8>C398</utf-8>
-                               <name>UPPERCASE SCANDINAVIAN O / LATIN CAPITAL LETTER
-O WITH STROKE</name>
+                               <marc>A2</marc>
+                               <ucs>201E</ucs>
+                               <utf-8>E2809E</utf-8>
+                               <name>LOW DOUBLE COMMA QUOTATION MARK</name>
                        </code>
                        </code>
+               
                        <code>
                        <code>
-                               <marc>E2</marc>
-                               <ucs>0110</ucs>
-                               <utf-8>C490</utf-8>
-                               <name>UPPERCASE D WITH CROSSBAR / LATIN CAPITAL LETTER
-D WITH STROKE</name>
+                               <marc>A3</marc>
+                               <ucs>00A3</ucs>
+                               <utf-8>C2A3</utf-8>
+                               <name>BRITISH POUND / POUND SIGN</name>
                        </code>
                        <code>
                        </code>
                        <code>
-                               <marc>EC</marc>
-                               <ucs>00DE</ucs>
-                               <utf-8>C39E</utf-8>
-                               <name>UPPERCASE ICELANDIC THORN / LATIN CAPITAL LETTER
-THORN (Icelandic)</name>
+                               <marc>A4</marc>
+                               <ucs>0024</ucs>
+                               <utf-8>24</utf-8>
+                               <name>DOLLAR SIGN</name>
                        </code>
                        </code>
+               
                        <code>
                        <code>
-                               <marc>E1</marc>
-                               <ucs>00C6</ucs>
-                               <utf-8>C386</utf-8>
-                               <name>UPPERCASE DIGRAPH AE / LATIN CAPITAL LIGATURE
-AE</name>
-                       </code>
+                               <marc>A5</marc>
+                               <ucs>00A5</ucs>
+                               <utf-8>C2A5</utf-8>
+                               <name>YEN SIGN</name>
+                       </code>         
                        <code>
                                <marc>A6</marc>
                        <code>
                                <marc>A6</marc>
-                               <ucs>0152</ucs>
-                               <utf-8>C592</utf-8>
-                               <name>UPPERCASE DIGRAPH OE / LATIN CAPITAL LIGATURE
-OE</name>
-                       </code>
+                               <ucs>2020</ucs>
+                               <utf-8>E280A0</utf-8>
+                               <name>DAGGER</name>
+                       </code>         
                        <code>
                        <code>
-                               <marc>EA</marc>
-                               <ucs>02B9</ucs>
-                               <utf-8>CAB9</utf-8>
-                               <name>SOFT SIGN, PRIME / MODIFIER LETTER PRIME</name>
+                               <marc>A7</marc>
+                               <ucs>00A7</ucs>
+                               <utf-8>C2A7</utf-8>
+                               <name>SECTION SIGN</name>
                        </code>
                        <code>
                        </code>
                        <code>
-                               <marc>B7</marc>
-                               <ucs>00B7</ucs>
-                               <utf-8>C2B7</utf-8>
-                               <name>MIDDLE DOT</name>
+                               <marc>A8</marc>
+                               <ucs>2032</ucs>
+                               <utf-8>E280A0</utf-8>
+                               <name>PRIME</name>
                        </code>
                        </code>
+               
                        <code>
                        <code>
-                               <marc>AC</marc>
-                               <ucs>266D</ucs>
-                               <utf-8>E299AD</utf-8>
-                               <name>MUSIC FLAT SIGN</name>
-                       </code>
+                               <marc>A9</marc>
+                               <ucs>2018</ucs>
+                               <utf-8>E28098</utf-8>
+                               <name>SINGLE TURNED COMMA QUOTATION MARK</name>
+                       </code> 
                        <code>
                        <code>
-                               <marc>AF</marc>
-                               <ucs>00AE</ucs>
-                               <utf-8>C2AE</utf-8>
-                               <name>PATENT MARK / REGISTERED SIGN</name>
-                       </code>
-
-                       <!-- unknown
+                               <marc>AA</marc>
+                               <ucs>201C</ucs>
+                               <utf-8>E2809C</utf-8>
+                               <name>DOUBLE TURNED COMMA QUOTATION MARK</name>
+                       </code>                 
                        <code>
                                <marc>AB</marc>
                        <code>
                                <marc>AB</marc>
-                               <ucs>00B1</ucs>
-                               <utf-8>C2B1</utf-8>
-                               <name>PLUS OR MINUS / PLUS-MINUS SIGN</name>
-                       </code>
-                       -->
+                               <ucs>00AB</ucs>
+                               <utf-8>E280A0</utf-8>
+                               <name>LEFT-POINTING DOUBLE ANGLE QUOTATION MARK (LEFT POINTING GUILLEMET)</name>
+                       </code>                 
                        <code>
                                <marc>AC</marc>
                        <code>
                                <marc>AC</marc>
-                               <ucs>01A0</ucs>
-                               <utf-8>C6A0</utf-8>
-                               <name>UPPERCASE O-HOOK / LATIN CAPITAL LETTER O WITH
-HORN</name>
-                       </code>
+                               <ucs>266D</ucs>
+                               <utf-8>E299AD</utf-8>
+                               <name>MUSIC FLAT SIGN (FLAT)</name>
+                       </code>                 
                        <code>
                                <marc>AD</marc>
                        <code>
                                <marc>AD</marc>
-                               <ucs>01AF</ucs>
-                               <utf-8>C6AF</utf-8>
-                               <name>UPPERCASE U-HOOK / LATIN CAPITAL LETTER U WITH
-HORN</name>
+                               <ucs>00A9</ucs>
+                               <utf-8>C2A9</utf-8>
+                               <name>COPYRIGHT SIGN</name>
                        </code>
                        <code>
                                <marc>AE</marc>
                        </code>
                        <code>
                                <marc>AE</marc>
-                               <ucs>02BC</ucs>
-                               <utf-8>CABC</utf-8>
-                               <altutf-8>CABE</altutf-8>
-                               <name>ALIF / MODIFIER LETTER APOSTROPHE</name>
+                               <ucs>2117</ucs>
+                               <utf-8>E28497</utf-8>
+                               <name>SOUND RECORDING COPYRIGHT</name>
+                       </code>
+                       <code>
+                               <marc>AF</marc>
+                               <ucs>00AE</ucs>
+                               <utf-8>C2AE</utf-8>
+                               <name>PATENT MARK / REGISTERED SIGN</name>
                        </code>
                        </code>
+               
+               
+               
+                       
                        <code>
                                <marc>B0</marc>
                                <ucs>02BB</ucs>
                                <utf-8>CABB</utf-8>
                                <name>AYN / MODIFIER LETTER TURNED COMMA</name>
                        </code>
                        <code>
                                <marc>B0</marc>
                                <ucs>02BB</ucs>
                                <utf-8>CABB</utf-8>
                                <name>AYN / MODIFIER LETTER TURNED COMMA</name>
                        </code>
+                       
                        <code>
                                <marc>B1</marc>
                        <code>
                                <marc>B1</marc>
-                               <ucs>0142</ucs>
-                               <utf-8>C582</utf-8>
-                               <name>LOWERCASE POLISH L / LATIN SMALL LETTER L WITH
-STROKE</name>
+                               <ucs>02BC</ucs>
+                               <utf-8>CABC</utf-8>
+                               <altutf-8>CABE</altutf-8>
+                               <name>ALIF / MODIFIER LETTER APOSTROPHE</name>
                        </code>
                        </code>
+                       
                        <code>
                                <marc>B2</marc>
                        <code>
                                <marc>B2</marc>
-                               <ucs>00F8</ucs>
-                               <utf-8>C3B8</utf-8>
-                               <name>LOWERCASE SCANDINAVIAN O / LATIN SMALL LETTER O
-WITH STROKE</name>
-                       </code>
-                       <code>
-                               <marc>B3</marc>
-                               <ucs>0111</ucs>
-                               <utf-8>C491</utf-8>
-                               <name>LOWERCASE D WITH CROSSBAR / LATIN SMALL LETTER
-D WITH STROKE</name>
-                       </code>
-                       <code>
-                               <marc>B4</marc>
-                               <ucs>00FE</ucs>
-                               <utf-8>C3BE</utf-8>
-                               <name>LOWERCASE ICELANDIC THORN / LATIN SMALL LETTER
-THORN (Icelandic)</name>
-                       </code>
-                       <code>
-                               <marc>B5</marc>
-                               <ucs>00E6</ucs>
-                               <utf-8>C3A6</utf-8>
-                               <name>LOWERCASE DIGRAPH AE / LATIN SMALL LIGATURE
-AE</name>
+                               <ucs>201A</ucs>
+                               <utf-8>E2809A</utf-8>
+                               <name>SINGLE LOW-9 QUOTATION MARK (LOW SINGLE COMMA QUOTATION MARK)</name>
                        </code>
                        </code>
+                       <!-- B3, B4, B5 -->
                        <code>
                                <marc>B6</marc>
                        <code>
                                <marc>B6</marc>
-                               <ucs>0153</ucs>
-                               <utf-8>C593</utf-8>
-                               <name>LOWERCASE DIGRAPH OE / LATIN SMALL LIGATURE
-OE</name>
+                               <ucs>2021</ucs>
+                               <utf-8>E280A1</utf-8>
+                               <name></name>
                        </code>
                        <code>
                                <marc>B7</marc>
                        </code>
                        <code>
                                <marc>B7</marc>
-                               <ucs>02BA</ucs>
-                               <utf-8>CABA</utf-8>
-                               <name>HARD SIGN, DOUBLE PRIME / MODIFIER LETTER DOUBLE
-PRIME</name>
+                               <ucs>00B7</ucs>
+                               <utf-8>C2B7</utf-8>
+                               <name>MIDDLE DOT</name>
                        </code>
                        <code>
                                <marc>B8</marc>
                        </code>
                        <code>
                                <marc>B8</marc>
-                               <ucs>0131</ucs>
-                               <utf-8>C4B1</utf-8>
-                               <name>LOWERCASE TURKISH I / LATIN SMALL LETTER DOTLESS
-I</name>
-                       </code>
+                               <ucs>2033</ucs>
+                               <utf-8>E280B3</utf-8>
+                               <name>DOUBLE PRIME</name>
+                       </code>                 
                        <code>
                                <marc>B9</marc>
                        <code>
                                <marc>B9</marc>
-                               <ucs>00A3</ucs>
-                               <utf-8>C2A3</utf-8>
-                               <name>BRITISH POUND / POUND SIGN</name>
-                       </code>
+                               <ucs>2019</ucs>
+                               <utf-8>E2809D</utf-8>
+                               <name>RIGHT SINGLE QUOTATION MARK (SINGLE COMMA QUOTATION MARK)</name>
+                       </code>                 
                        <code>
                                <marc>BA</marc>
                        <code>
                                <marc>BA</marc>
-                               <ucs>00F0</ucs>
-                               <utf-8>C3B0</utf-8>
-                               <name>LOWERCASE ETH / LATIN SMALL LETTER ETH
-(Icelandic)</name>
-                       </code>
-                       <code>
-                               <marc>BC</marc>
-                               <ucs>01A1</ucs>
-                               <utf-8>C6A1</utf-8>
-                               <name>LOWERCASE O-HOOK / LATIN SMALL LETTER O WITH
-HORN</name>
-                       </code>
-                       <code>
-                               <marc>BD</marc>
-                               <ucs>01B0</ucs>
-                               <utf-8>C6B0</utf-8>
-                               <name>LOWERCASE U-HOOK / LATIN SMALL LETTER U WITH
-HORN</name>
-                       </code>
-                       <code>
-                               <marc>C0</marc>
-                               <ucs>00B0</ucs>
-                               <utf-8>C2B0</utf-8>
-                               <name>DEGREE SIGN</name>
-                       </code>
-                       <code>
-                               <marc>C1</marc>
-                               <ucs>2113</ucs>
-                               <utf-8>E28493</utf-8>
-                               <name>SCRIPT SMALL L</name>
-                       </code>
-                       <code>
-                               <marc>C2</marc>
-                               <ucs>2117</ucs>
-                               <utf-8>E28497</utf-8>
-                               <name>SOUND RECORDING COPYRIGHT</name>
-                       </code>
-                       <code>
-                               <marc>C3</marc>
-                               <ucs>00A9</ucs>
-                               <utf-8>C2A9</utf-8>
-                               <name>COPYRIGHT SIGN</name>
-                       </code>
-                       <code>
-                               <marc>C4</marc>
-                               <ucs>266F</ucs>
-                               <utf-8>E299AF</utf-8>
-                               <name>MUSIC SHARP SIGN</name>
-                       </code>
-                       <code>
-                               <marc>C5</marc>
-                               <ucs>00BF</ucs>
-                               <utf-8>C2BF</utf-8>
-                               <name>INVERTED QUESTION MARK</name>
-                       </code>
-                       <code>
-                               <marc>C6</marc>
-                               <ucs>00A1</ucs>
-                               <utf-8>C2A1</utf-8>
-                               <name>INVERTED EXCLAMATION MARK</name>
-                       </code>
-                       <code>
-                               <marc>C7</marc>
-                               <ucs>00DF</ucs>
-                               <utf-8>C39F</utf-8>
-                               <name>ESZETT SYMBOL</name>
-                       </code>
-                       <code>
-                               <marc>C8</marc>
-                               <ucs>20AC</ucs>
-                               <utf-8>E282AC</utf-8>
-                               <name>EURO SIGN</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E0</marc>
-                               <ucs>0309</ucs>
-                               <utf-8>CC89</utf-8>
-                               <name>PSEUDO QUESTION MARK / COMBINING HOOK
-ABOVE</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E1</marc>
-                               <ucs>0300</ucs>
-                               <utf-8>CC80</utf-8>
-                               <name>GRAVE / COMBINING GRAVE ACCENT (Varia)</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E2</marc>
-                               <ucs>0301</ucs>
-                               <utf-8>CC81</utf-8>
-                               <name>ACUTE / COMBINING ACUTE ACCENT (Oxia)</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E3</marc>
-                               <ucs>0302</ucs>
-                               <utf-8>CC82</utf-8>
-                               <name>CIRCUMFLEX / COMBINING CIRCUMFLEX
-ACCENT</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E4</marc>
-                               <ucs>0303</ucs>
-                               <utf-8>CC83</utf-8>
-                               <name>TILDE / COMBINING TILDE</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E5</marc>
-                               <ucs>0304</ucs>
-                               <utf-8>CC84</utf-8>
-                               <name>MACRON / COMBINING MACRON</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E6</marc>
-                               <ucs>0306</ucs>
-                               <utf-8>CC86</utf-8>
-                               <name>BREVE / COMBINING BREVE (Vrachy)</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E7</marc>
-                               <ucs>0307</ucs>
-                               <utf-8>CC87</utf-8>
-                               <name>SUPERIOR DOT / COMBINING DOT ABOVE</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E8</marc>
-                               <ucs>0308</ucs>
-                               <utf-8>CC88</utf-8>
-                               <name>UMLAUT, DIAERESIS / COMBINING DIAERESIS
-(Dialytika)</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>E9</marc>
-                               <ucs>030C</ucs>
-                               <utf-8>CC8C</utf-8>
-                               <name>HACEK / COMBINING CARON</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>EA</marc>
-                               <ucs>030A</ucs>
-                               <utf-8>CC8A</utf-8>
-                               <name>CIRCLE ABOVE, ANGSTROM / COMBINING RING
-ABOVE</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>EB</marc>
-                               <ucs>0361</ucs>
-                               <utf-8>CDA1</utf-8>
-                               <alt>FE20</alt>
-                               <altutf-8>EFB8A0</altutf-8>
-                               <name>LIGATURE, FIRST HALF / COMBINING DOUBLE 
-                               INVERTED BREVE</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>EC</marc>
-                               <ucs></ucs>
-                               <utf-8></utf-8>
-                               <alt>FE21</alt>
-                               <altutf-8>EFB8A1</altutf-8>
-                               <name>LIGATURE, SECOND HALF / COMBINING LIGATURE RIGHT HALF</name>
-                               <note>The Ligature that spans two characters 
-                               is constructed of two halves in MARC-8: EB 
-                               (Ligature, first half) and EC (Ligature, second 
-                               half).  The preferred Unicode/UTF-8 mapping is to 
-                               the single character Ligature that spans two characters,
-                               U+0361.  The single character Ligature is encoded
-                               following the second of the two characters to be spanned.  
-                               The two half Ligatures in Unicode, to which the 
-                               Ligature has been mapped since 1996, are indicted 
-                               in the mapping as alternatives, but their use is not 
-                               recommended.  It is expected that font support for 
-                               the single character Ligature mark will be more 
-                               easily obtained than for the two halves.</note>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>ED</marc>
-                               <ucs>0315</ucs>
-                               <utf-8>CC95</utf-8>
-                               <name>HIGH COMMA, OFF CENTER / COMBINING COMMA ABOVE
-RIGHT</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>EE</marc>
-                               <ucs>030B</ucs>
-                               <utf-8>CC8B</utf-8>
-                               <name>DOUBLE ACUTE / COMBINING DOUBLE ACUTE
-ACCENT</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>EF</marc>
-                               <ucs>0310</ucs>
-                               <utf-8>CC90</utf-8>
-                               <name>CANDRABINDU / COMBINING CANDRABINDU</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F0</marc>
-                               <ucs>0327</ucs>
-                               <utf-8>CCA7</utf-8>
-                               <name>CEDILLA / COMBINING CEDILLA</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F1</marc>
-                               <ucs>0328</ucs>
-                               <utf-8>CCA8</utf-8>
-                               <name>RIGHT HOOK, OGONEK / COMBINING OGONEK</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F2</marc>
-                               <ucs>0323</ucs>
-                               <utf-8>CCA3</utf-8>
-                               <name>DOT BELOW / COMBINING DOT BELOW</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F3</marc>
-                               <ucs>0324</ucs>
-                               <utf-8>CCA4</utf-8>
-                               <name>DOUBLE DOT BELOW / COMBINING DIAERESIS
-BELOW</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F4</marc>
-                               <ucs>0325</ucs>
-                               <utf-8>CCA5</utf-8>
-                               <name>CIRCLE BELOW / COMBINING RING BELOW</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F5</marc>
-                               <ucs>0333</ucs>
-                               <utf-8>CCB3</utf-8>
-                               <name>DOUBLE UNDERSCORE / COMBINING DOUBLE LOW
-LINE</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F6</marc>
-                               <ucs>0332</ucs>
-                               <utf-8>CCB2</utf-8>
-                               <name>UNDERSCORE / COMBINING LOW LINE</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F7</marc>
-                               <ucs>0326</ucs>
-                               <utf-8>CCA6</utf-8>
-                               <name>LEFT HOOK (COMMA BELOW) / COMBINING COMMA
-BELOW</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F8</marc>
-                               <ucs>031C</ucs>
-                               <utf-8>CC9C</utf-8>
-                               <name>RIGHT CEDILLA / COMBINING LEFT HALF RING
-BELOW</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>F9</marc>
-                               <ucs>032E</ucs>
-                               <utf-8>CCAE</utf-8>
-                               <name>UPADHMANIYA / COMBINING BREVE BELOW</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>FA</marc>
-                               <ucs>0360</ucs>
-                               <utf-8>CDA0</utf-8>
-                               <alt>FE22</alt>
-                               <altutf-8>EFB8A2</altutf-8>
-                               <name>DOUBLE TILDE, FIRST HALF / COMBINING DOUBLE TILDE</name>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>FB</marc>
-                               <ucs></ucs>
-                               <utf-8></utf-8>
-                               <alt>FE23</alt>
-                               <altutf-8>EFB8A3</altutf-8>
-                               <name>DOUBLE TILDE, SECOND HALF / COMBINING DOUBLE TILDE RIGHT HALF</name>
-                               <note>The Double Tilde that spans two characters is 
-                               constructed of two halves in MARC-8: FA (Double 
-                               Tilde, first half) and FB (Double Tilde, second 
-                               half).  The preferred Unicode/UTF-8 mapping 
-                               is to the single character Double Tilde that 
-                               spans two characters, U+0360.  The single 
-                               character Double Tilde is encoded following 
-                               the second of the two characters to be spanned.  
-                               The two half Double Tildes in Unicode, to 
-                               which the MARC8 Double Tilde has been 
-                               mapped since 1996, are indicted in the 
-                               mapping as alternatives, but their use is not 
-                               recommended.  It is expected that font support 
-                               for the single character Double Tilde mark will 
-                               be more easily obtained than for the two halves.</note>
-                       </code>
-                       <code>
-                               <isCombining>true</isCombining>
-                               <marc>FE</marc>
-                               <ucs>0313</ucs>
-                               <utf-8>CC93</utf-8>
-                               <name>HIGH COMMA, CENTERED / COMBINING COMMA ABOVE
-(Psili)</name>
-                       </code>
+                               <ucs>201D</ucs>
+                               <utf-8>E2809D</utf-8>
+                               <name>RIGHT DOUBLE QUOTATION MARK (DOUBLE COMMA QUOTATION MARK)</name>
+                       </code>                 
+                       <code>
+                               <marc>BB</marc>
+                               <ucs>00BB</ucs>
+                               <utf-8>C2BB</utf-8>
+                               <name>RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK (RIGHT POINTING GUILLEMET)</name>
+                       </code>                 
+                       <code>          
+                                <marc>BC</marc>
+                                <ucs>266F</ucs>
+                                <utf-8>E299AF</utf-8>
+                                <name>MUSIC SHARP SIGN</name>
+                        </code>
+                        <code>
+                                <marc>BD</marc>
+                                <ucs>02B9</ucs>
+                                <utf-8>CAB9</utf-8>
+                                <name>SOFT SIGN, PRIME / MODIFIER LETTER PRIME</name>
+                        </code>
+                        <code>
+                                <marc>BE</marc>
+                                <ucs>02BA</ucs>
+                                <utf-8>CABA</utf-8>
+                                <name>HARD SIGN, DOUBLE PRIME / MODIFIER LETTER DOUBLE PRIME</name>            
+                        </code>
+                        <code>
+                                <marc>BF</marc>
+                                <ucs>00BF</ucs>
+                                <utf-8>C2BF</utf-8>
+                                <name>INVERTED QUESTION MARK</name>
+                        </code>
+               
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C0</marc>
+                                <ucs>0309</ucs>
+                                <utf-8>CC89</utf-8>
+                                <name>PSEUDO QUESTION MARK / COMBINING HOOK ABOVE</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C1</marc>
+                                <ucs>0300</ucs>
+                                <utf-8>CC80</utf-8>
+                                <name>GRAVE / COMBINING GRAVE ACCENT (Varia)</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C2</marc>
+                                <ucs>0301</ucs>
+                                <utf-8>CC81</utf-8>
+                                <name>ACUTE / COMBINING ACUTE ACCENT (Oxia)</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C3</marc>
+                                <ucs>0302</ucs>
+                                <utf-8>CC82</utf-8>
+                                <name>CIRCUMFLEX / COMBINING CIRCUMFLEX ACCENT</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C4</marc>
+                                <ucs>0303</ucs>
+                                <utf-8>CC83</utf-8>
+                                <name>TILDE / COMBINING TILDE</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C5</marc>
+                                <ucs>0304</ucs>
+                                <utf-8>CC84</utf-8>
+                                <name>MACRON / COMBINING MACRON</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C6</marc>
+                                <ucs>0306</ucs>
+                                <utf-8>CC86</utf-8>
+                                <name>BREVE / COMBINING BREVE (Vrachy)</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C7</marc>
+                                <ucs>0307</ucs>
+                                <utf-8>CC87</utf-8>
+                                <name>SUPERIOR DOT / COMBINING DOT ABOVE</name>
+                        </code>
+                        <code>
+                               <note>Q: Unicode doesn't seem to distinguish between tréma and umlaut,
+                               but I need to distinguish. What shall I do?
+                               http://www.unicode.org/faq/char_combmark.html#18</note>
+                                <isCombining>true</isCombining>
+                                <marc>C8</marc>
+                                <ucs>034F0308</ucs>
+                                <utf-8>CC88</utf-8>
+                                <name>U+034F COMBINING GRAPHEME JOINER (CGJ) / tréma</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>C9</marc>
+                                <ucs>0308</ucs>
+                                <utf-8>CC88</utf-8>
+                                <name>UMLAUT, DIAERESIS / COMBINING DIAERESIS (Dialytika)</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>CA</marc>
+                                <ucs>030A</ucs>
+                                <utf-8>CC8A</utf-8>
+                                <name>CIRCLE ABOVE, ANGSTROM / COMBINING RING ABOVE</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>CB</marc>
+                                <ucs>0315</ucs>
+                                <utf-8>CC95</utf-8>
+                                <name>HIGH COMMA, OFF CENTER / COMBINING COMMA ABOVE RIGHT</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>CC</marc>
+                                <ucs>0313</ucs>
+                                <utf-8>CC93</utf-8>
+                                <name>HIGH COMMA, CENTERED / COMBINING COMMA ABOVE (Psili)</name>              
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>CD</marc>
+                                <ucs>030B</ucs>
+                                <utf-8>CC8B</utf-8>
+                                <name>DOUBLE ACUTE / COMBINING DOUBLE ACUTE ACCENT</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>CE</marc>
+                                <ucs>031B</ucs>
+                                <utf-8>CC9B</utf-8>
+                                <name>COMBINING HORN (NON-SPACING HORN)</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>CF</marc>
+                                <ucs>030C</ucs>
+                                <utf-8>CC8C</utf-8>
+                                <name>HACEK / COMBINING CARON</name>
+                        </code>
+               
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D0</marc>
+                                <ucs>0327</ucs>
+                                <utf-8>CCA7</utf-8>
+                                <name>CEDILLA / COMBINING CEDILLA</name>
+                        </code>                
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D1</marc>
+                                <ucs>031C</ucs>
+                                <utf-8>CC9C</utf-8>
+                                <name>RIGHT CEDILLA / COMBINING LEFT HALF RING BELOW</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D2</marc>
+                                <ucs>0326</ucs>
+                                <utf-8>CCA6</utf-8>
+                                <name>LEFT HOOK (COMMA BELOW) / COMBINING COMMA BELOW</name>
+                        </code>                
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D3</marc>
+                                <ucs>0328</ucs>
+                                <utf-8>CCA8</utf-8>
+                                <name>RIGHT HOOK, OGONEK / COMBINING OGONEK</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D4</marc>
+                                <ucs>0325</ucs>
+                                <utf-8>CCA5</utf-8>
+                                <name>CIRCLE BELOW / COMBINING RING BELOW</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D5</marc>
+                                <ucs>032E</ucs>
+                                <utf-8>CCAE</utf-8>
+                                <name>UPADHMANIYA / COMBINING BREVE BELOW</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D6</marc>
+                                <ucs>0323</ucs>
+                                <utf-8>CCA3</utf-8>
+                                <name>DOT BELOW / COMBINING DOT BELOW</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D7</marc>
+                                <ucs>0324</ucs>
+                                <utf-8>CCA4</utf-8>
+                                <name>DOUBLE DOT BELOW / COMBINING DIAERESIS BELOW</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D8</marc>
+                                <ucs>0332</ucs>
+                                <utf-8>CCB2</utf-8>
+                                <name>UNDERSCORE / COMBINING LOW LINE</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>D9</marc>
+                                <ucs>0333</ucs>
+                                <utf-8>CCB3</utf-8>
+                                <name>DOUBLE UNDERSCORE / COMBINING DOUBLE LOW LINE</name>
+                        </code>
+
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>DA</marc>
+                                <ucs>0329</ucs>
+                                <utf-8>CCA9</utf-8>
+                                <name>COMBINING VERTICAL LINE BELOW (NON-SPACING VERTICAL LINE BELOW)</name>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>DB</marc>
+                                <ucs>032D</ucs>
+                                <utf-8>CCAD</utf-8>
+                                <name>COMBINING CIRCUMFLEX ACCENT BELOW (NON-SPACING CIRCUMFLEX BELOW)</name>
+                        </code>
+                       <!-- DC -->
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>DD</marc>
+                                <ucs>0360</ucs>
+                                <utf-8>CDA0</utf-8>
+                                <alt>FE22</alt>
+                                <altutf-8>EFB8A2</altutf-8>
+                                <name>DOUBLE TILDE, FIRST HALF / COMBINING DOUBLE TILDE</name>
+                        </code>                
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>DE</marc>
+                                <ucs></ucs>
+                                <utf-8></utf-8>
+                                <alt>FE21</alt>
+                                <altutf-8>EFB8A1</altutf-8>
+                                <name>LIGATURE, SECOND HALF / COMBINING LIGATURE RIGHT HALF</name>
+                                <note>The Ligature that spans two characters 
+                                is constructed of two halves in MARC-8: EB 
+                                (Ligature, first half) and EC (Ligature, second 
+                                half).  The preferred Unicode/UTF-8 mapping is to 
+                                the single character Ligature that spans two characters,
+                                U+0361.  The single character Ligature is encoded
+                                following the second of the two characters to be spanned.  
+                                The two half Ligatures in Unicode, to which the 
+                                Ligature has been mapped since 1996, are indicted 
+                                in the mapping as alternatives, but their use is not 
+                                recommended.  It is expected that font support for 
+                                the single character Ligature mark will be more 
+                                easily obtained than for the two halves.</note>
+                        </code>
+                        <code>
+                                <isCombining>true</isCombining>
+                                <marc>DF</marc>
+                                <ucs></ucs>
+                                <utf-8></utf-8>
+                                <alt>FE23</alt>
+                                <altutf-8>EFB8A3</altutf-8>
+                                <name>DOUBLE TILDE, SECOND HALF / COMBINING DOUBLE TILDE RIGHT HALF</name>
+                                <note>The Double Tilde that spans two characters is 
+                                constructed of two halves in MARC-8: FA (Double 
+                                Tilde, first half) and FB (Double Tilde, second 
+                                half).  The preferred Unicode/UTF-8 mapping 
+                                is to the single character Double Tilde that 
+                                spans two characters, U+0360.  The single 
+                                character Double Tilde is encoded following 
+                                the second of the two characters to be spanned.  
+                                The two half Double Tildes in Unicode, to 
+                                which the MARC8 Double Tilde has been 
+                                mapped since 1996, are indicted in the 
+                                mapping as alternatives, but their use is not 
+                                recommended.  It is expected that font support 
+                                for the single character Double Tilde mark will 
+                                be more easily obtained than for the two halves.</note>
+                        </code>
+               
+               
+                       <!-- E0 -->
+                        <code>
+                                <marc>E1</marc>
+                                <ucs>00C6</ucs>
+                                <utf-8>C386</utf-8>
+                                <name>UPPERCASE DIGRAPH AE / LATIN CAPITAL LIGATURE AE</name>
+                        </code>
+                        <code>
+                                <marc>E2</marc>
+                                <ucs>0110</ucs>
+                                <utf-8>C490</utf-8>
+                                <name>UPPERCASE D WITH CROSSBAR / LATIN CAPITAL LETTER D WITH STROKE</name>
+                        </code>
+                       <!-- E3, E4, E5 -->
+                        <code>
+                                <marc>E6</marc>
+                                <ucs>0132</ucs>
+                                <utf-8>C4B2</utf-8>
+                                <name>LATIN CAPITAL LIGATURE IJ (LATIN CAPITAL LETTER I J)</name>
+                        </code>
+                       <!-- E7 -->
+                        <code>
+                                <marc>E8</marc>
+                                <ucs>0141</ucs>
+                                <utf-8>C581</utf-8>
+                                <name>UPPERCASE POLISH L / LATIN CAPITAL LETTER L WITH STROKE</name>                   
+                        </code>
+                        <code>
+                                <marc>E9</marc>
+                                <ucs>00D8</ucs>
+                                <utf-8>C398</utf-8>
+                                <name>UPPERCASE SCANDINAVIAN O / LATIN CAPITAL LETTER O WITH STROKE</name>
+                        </code>
+                        <code>
+                                <marc>EA</marc>
+                                <ucs>0152</ucs>
+                                <utf-8>C592</utf-8>
+                                <name>UPPERCASE DIGRAPH OE / LATIN CAPITAL LIGATURE OE</name>
+                        </code>
+                       <!-- EB -->
+                        <code>
+                                <marc>EC</marc>
+                                <ucs>00DE</ucs>
+                                <utf-8>C39E</utf-8>
+                                <name>UPPERCASE ICELANDIC THORN / LATIN CAPITAL LETTER THORN (Icelandic)</name>
+                        </code>
+                       <!-- ED, EE, EF -->
+                        <code>
+                                <marc>F1</marc>
+                                <ucs>00E6</ucs>
+                                <utf-8>C3A6</utf-8>
+                                <name>LOWERCASE DIGRAPH AE / LATIN SMALL LIGATURE AE</name>
+                        </code>
+                        <code>
+                                <marc>F2</marc>
+                                <ucs>0111</ucs>
+                                <utf-8>C491</utf-8>
+                                <name>LOWERCASE D WITH CROSSBAR / LATIN SMALL LETTER D WITH STROKE</name>
+                        </code>
+                        <code>
+                                <marc>F3</marc>
+                                <ucs>00F0</ucs>
+                                <utf-8>C3B0</utf-8>
+                                <name>LOWERCASE ETH / LATIN SMALL LETTER ETH (Icelandic)</name>
+                        </code>
+                       <!-- F4 -->
+                        <code>
+                                <marc>F5</marc>
+                                <ucs>0131</ucs>
+                                <utf-8>C4B1</utf-8>
+                                <name>LOWERCASE TURKISH I / LATIN SMALL LETTER DOTLESS I</name>
+                        </code>
+                        <code>
+                                <marc>F6</marc>
+                                <ucs>0133</ucs>
+                                <utf-8>C4B3</utf-8>
+                                <name>LATIN SMALL LIGATURE IJ (LATIN SMALL LETTER I J)</name>
+                        </code>                        
+                       <!-- F7 -->
+                        <code>
+                                <marc>F8</marc>
+                                <ucs>0142</ucs>
+                                <utf-8>C582</utf-8>
+                                <name>LOWERCASE POLISH L / LATIN SMALL LETTER L WITH STROKE</name>
+                        </code>
+                        <code>
+                                <marc>F9</marc>
+                                <ucs>00F8</ucs>
+                                <utf-8>C3B8</utf-8>
+                                <name>LOWERCASE SCANDINAVIAN O / LATIN SMALL LETTER O WITH STROKE</name>
+                        </code>
+                        <code>
+                                <marc>FA</marc>
+                                <ucs>0153</ucs>
+                                <utf-8>C593</utf-8>
+                                <name>LOWERCASE DIGRAPH OE / LATIN SMALL LIGATURE OE</name>
+                        </code>
+                        <code>
+                                <marc>FB</marc>
+                                <ucs>00DF</ucs>
+                                <utf-8>C39F</utf-8>
+                                <name>ESZETT SYMBOL</name>
+                        </code>
+                        <code>
+                                <marc>FC</marc>
+                                <ucs>00FE</ucs>
+                                <utf-8>C3BE</utf-8>
+                                <name>LOWERCASE ICELANDIC THORN / LATIN SMALL LETTER THORN (Icelandic)</name>
+                        </code>
+                       <!-- FD, FE, FF -->
+                       
+                       <!-- not yet defined
+                        <code>
+                                <marc>??</marc>
+                                <ucs>20AC</ucs>
+                                <utf-8>E282AC</utf-8>
+                                <name>EURO SIGN</name>
+                        </code>
+                       -->
                </characterSet>
        </codeTable>
 </codeTables>
                </characterSet>
        </codeTable>
 </codeTables>