3 * UTF8 helper functions
5 * @license GPL 2 (http://www.gnu.org/licenses/gpl.html)
6 * @author Andreas Gohr <andi@splitbrain.org>
10 * URL-Encode a filename to allow unicodecharacters
12 * Slashes are not encoded
14 * When the second parameter is true the string will
15 * be encoded only if non ASCII characters are detected -
16 * This makes it safe to run it multiple times on the
17 * same string (default is true)
19 * @author Andreas Gohr <andi@splitbrain.org>
22 function utf8_encodeFN($file,$safe=true){
23 if($safe && preg_match('#^[a-zA-Z0-9/_\-.%]+$#',$file)){
26 $file = urlencode($file);
27 $file = str_replace('%2F','/',$file);
32 * URL-Decode a filename
34 * This is just a wrapper around urldecode
36 * @author Andreas Gohr <andi@splitbrain.org>
39 function utf8_decodeFN($file){
40 $file = urldecode($file);
45 * Checks if a string contains 7bit ASCII only
47 * @author Andreas Gohr <andi@splitbrain.org>
49 function utf8_isASCII($str){
50 for($i=0; $i<strlen($str); $i++){
51 if(ord($str{$i}) >127) return false;
57 * Tries to detect if a string is in Unicode encoding
59 * @author <bmorel@ssi.fr>
60 * @link http://www.php.net/manual/en/function.utf8-encode.php
62 function utf8_check($Str) {
63 for ($i=0; $i<strlen($Str); $i++) {
64 if (ord($Str[$i]) < 0x80) continue; # 0bbbbbbb
65 elseif ((ord($Str[$i]) & 0xE0) == 0xC0) $n=1; # 110bbbbb
66 elseif ((ord($Str[$i]) & 0xF0) == 0xE0) $n=2; # 1110bbbb
67 elseif ((ord($Str[$i]) & 0xF8) == 0xF0) $n=3; # 11110bbb
68 elseif ((ord($Str[$i]) & 0xFC) == 0xF8) $n=4; # 111110bb
69 elseif ((ord($Str[$i]) & 0xFE) == 0xFC) $n=5; # 1111110b
70 else return false; # Does not match any model
71 for ($j=0; $j<$n; $j++) { # n bytes matching 10bbbbbb follow ?
72 if ((++$i == strlen($Str)) || ((ord($Str[$i]) & 0xC0) != 0x80))
80 * This is a unicode aware replacement for strlen()
82 * Uses mb_string extension if available
84 * @author Andreas Gohr <andi@splitbrain.org>
87 function utf8_strlen($string){
88 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strlen'))
89 return mb_strlen($string,'utf-8');
91 $uni = utf8_to_unicode($string);
96 * This is a unicode aware replacement for substr()
98 * Uses mb_string extension if available
100 * @author Andreas Gohr <andi@splitbrain.org>
103 function utf8_substr($str, $start, $length=null){
104 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_substr'))
105 return mb_substr($str,$start,$length,'utf-8');
107 $uni = utf8_to_unicode($str);
108 return unicode_to_utf8(array_slice($uni,$start,$length));
112 * This is a unicode aware replacement for strtolower()
114 * Uses mb_string extension if available
116 * @author Andreas Gohr <andi@splitbrain.org>
118 * @see utf8_strtoupper()
120 function utf8_strtolower($string){
121 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower'))
122 return mb_strtolower($string,'utf-8');
124 global $UTF8_UPPER_TO_LOWER;
125 $uni = utf8_to_unicode($string);
126 for ($i=0; $i < count($uni); $i++){
127 if($UTF8_UPPER_TO_LOWER[$uni[$i]]){
128 $uni[$i] = $UTF8_UPPER_TO_LOWER[$uni[$i]];
131 return unicode_to_utf8($uni);
135 * This is a unicode aware replacement for strtoupper()
137 * Uses mb_string extension if available
139 * @author Andreas Gohr <andi@splitbrain.org>
141 * @see utf8_strtoupper()
143 function utf8_strtoupper($string){
144 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strtolower'))
145 return mb_strtolower($string,'utf-8');
147 global $UTF8_LOWER_TO_UPPER;
148 $uni = utf8_to_unicode($string);
149 for ($i=0; $i < count($uni); $i++){
150 if($UTF8_LOWER_TO_UPPER[$uni[$i]]){
151 $uni[$i] = $UTF8_LOWER_TO_UPPER[$uni[$i]];
154 return unicode_to_utf8($uni);
158 * Replace accented UTF-8 characters by unaccented ASCII-7 equivalents
160 * Use the optional parameter to just deaccent lower ($case = -1) or upper ($case = 1)
161 * letters. Default is to deaccent both cases ($case = 0)
163 * @author Andreas Gohr <andi@splitbrain.org>
165 function utf8_deaccent($string,$case=0){
167 global $UTF8_LOWER_ACCENTS;
168 $string = str_replace(array_keys($UTF8_LOWER_ACCENTS),array_values($UTF8_LOWER_ACCENTS),$string);
171 global $UTF8_UPPER_ACCENTS;
172 $string = str_replace(array_keys($UTF8_UPPER_ACCENTS),array_values($UTF8_UPPER_ACCENTS),$string);
178 * Removes special characters (nonalphanumeric) from a UTF-8 string
180 * Be sure to specify all specialchars you give in $repl in $keep, too
183 * This function adds the controlchars 0x00 to 0x19 to the array of
184 * stripped chars (they are not included in $UTF8_SPECIAL_CHARS)
186 * @author Andreas Gohr <andi@splitbrain.org>
187 * @param string $string The UTF8 string to strip of special chars
188 * @param string $repl Replace special with this string
189 * @param string $keep Special chars to keep (in UTF8)
191 function utf8_stripspecials($string,$repl='',$keep=''){
192 global $UTF8_SPECIAL_CHARS;
194 $specials = array_diff($UTF8_SPECIAL_CHARS, utf8_to_unicode($keep));
196 $specials = $UTF8_SPECIAL_CHARS;
199 $specials = unicode_to_utf8($specials);
200 $specials = preg_quote($specials, '/');
202 return preg_replace('/[\x00-\x19'.$specials.']/u',$repl,$string);
206 * This is an Unicode aware replacement for strpos
208 * Uses mb_string extension if available
210 * @author Scott Michael Reynen <scott@randomchaos.com>
211 * @author Andreas Gohr <andi@splitbrain.org>
212 * @link http://www.randomchaos.com/document.php?source=php_and_unicode
215 function utf8_strpos($haystack, $needle,$offset=0) {
216 if(!defined('UTF8_NOMBSTRING') && function_exists('mb_strpos'))
217 return mb_strpos($haystack,$needle,$offset,'utf-8');
219 $haystack = utf8_to_unicode($haystack);
220 $needle = utf8_to_unicode($needle);
224 while( (! $found ) && ( $position < count( $haystack ) ) ) {
225 if ( $needle[0] == $haystack[$position] ) {
226 for ($i = 1; $i < count( $needle ); $i++ ) {
227 if ( $needle[$i] != $haystack[ $position + $i ] ) break;
229 if ( $i == count( $needle ) ) {
236 return ( $found == true ) ? $position : false;
240 * This function will any UTF-8 encoded text and return it as
241 * a list of Unicode values:
243 * @author Scott Michael Reynen <scott@randomchaos.com>
244 * @link http://www.randomchaos.com/document.php?source=php_and_unicode
245 * @see unicode_to_utf8()
247 function utf8_to_unicode( $str ) {
252 for ($i = 0; $i < strlen( $str ); $i++ ) {
253 $thisValue = ord( $str[ $i ] );
254 if ( $thisValue < 128 ) $unicode[] = $thisValue;
256 if ( count( $values ) == 0 ) $lookingFor = ( $thisValue < 224 ) ? 2 : 3;
257 $values[] = $thisValue;
258 if ( count( $values ) == $lookingFor ) {
259 $number = ( $lookingFor == 3 ) ?
260 ( ( $values[0] % 16 ) * 4096 ) + ( ( $values[1] % 64 ) * 64 ) + ( $values[2] % 64 ):
261 ( ( $values[0] % 32 ) * 64 ) + ( $values[1] % 64 );
262 $unicode[] = $number;
272 * This function will convert a Unicode array back to its UTF-8 representation
274 * @author Scott Michael Reynen <scott@randomchaos.com>
275 * @link http://www.randomchaos.com/document.php?source=php_and_unicode
276 * @see utf8_to_unicode()
278 function unicode_to_utf8( $str ) {
280 foreach( $str as $unicode ) {
281 if ( $unicode < 128 ) {
282 $utf8.= chr( $unicode );
283 } elseif ( $unicode < 2048 ) {
284 $utf8.= chr( 192 + ( ( $unicode - ( $unicode % 64 ) ) / 64 ) );
285 $utf8.= chr( 128 + ( $unicode % 64 ) );
287 $utf8.= chr( 224 + ( ( $unicode - ( $unicode % 4096 ) ) / 4096 ) );
288 $utf8.= chr( 128 + ( ( ( $unicode % 4096 ) - ( $unicode % 64 ) ) / 64 ) );
289 $utf8.= chr( 128 + ( $unicode % 64 ) );
296 * UTF-8 Case lookup table
298 * This lookuptable defines the upper case letters to their correspponding
299 * lower case letter in UTF-8
301 * @author Andreas Gohr <andi@splitbrain.org>
303 $UTF8_LOWER_TO_UPPER = array(
304 0x0061=>0x0041, 0x03C6=>0x03A6, 0x0163=>0x0162, 0x00E5=>0x00C5, 0x0062=>0x0042,
305 0x013A=>0x0139, 0x00E1=>0x00C1, 0x0142=>0x0141, 0x03CD=>0x038E, 0x0101=>0x0100,
306 0x0491=>0x0490, 0x03B4=>0x0394, 0x015B=>0x015A, 0x0064=>0x0044, 0x03B3=>0x0393,
307 0x00F4=>0x00D4, 0x044A=>0x042A, 0x0439=>0x0419, 0x0113=>0x0112, 0x043C=>0x041C,
308 0x015F=>0x015E, 0x0144=>0x0143, 0x00EE=>0x00CE, 0x045E=>0x040E, 0x044F=>0x042F,
309 0x03BA=>0x039A, 0x0155=>0x0154, 0x0069=>0x0049, 0x0073=>0x0053, 0x1E1F=>0x1E1E,
310 0x0135=>0x0134, 0x0447=>0x0427, 0x03C0=>0x03A0, 0x0438=>0x0418, 0x00F3=>0x00D3,
311 0x0440=>0x0420, 0x0454=>0x0404, 0x0435=>0x0415, 0x0449=>0x0429, 0x014B=>0x014A,
312 0x0431=>0x0411, 0x0459=>0x0409, 0x1E03=>0x1E02, 0x00F6=>0x00D6, 0x00F9=>0x00D9,
313 0x006E=>0x004E, 0x0451=>0x0401, 0x03C4=>0x03A4, 0x0443=>0x0423, 0x015D=>0x015C,
314 0x0453=>0x0403, 0x03C8=>0x03A8, 0x0159=>0x0158, 0x0067=>0x0047, 0x00E4=>0x00C4,
315 0x03AC=>0x0386, 0x03AE=>0x0389, 0x0167=>0x0166, 0x03BE=>0x039E, 0x0165=>0x0164,
316 0x0117=>0x0116, 0x0109=>0x0108, 0x0076=>0x0056, 0x00FE=>0x00DE, 0x0157=>0x0156,
317 0x00FA=>0x00DA, 0x1E61=>0x1E60, 0x1E83=>0x1E82, 0x00E2=>0x00C2, 0x0119=>0x0118,
318 0x0146=>0x0145, 0x0070=>0x0050, 0x0151=>0x0150, 0x044E=>0x042E, 0x0129=>0x0128,
319 0x03C7=>0x03A7, 0x013E=>0x013D, 0x0442=>0x0422, 0x007A=>0x005A, 0x0448=>0x0428,
320 0x03C1=>0x03A1, 0x1E81=>0x1E80, 0x016D=>0x016C, 0x00F5=>0x00D5, 0x0075=>0x0055,
321 0x0177=>0x0176, 0x00FC=>0x00DC, 0x1E57=>0x1E56, 0x03C3=>0x03A3, 0x043A=>0x041A,
322 0x006D=>0x004D, 0x016B=>0x016A, 0x0171=>0x0170, 0x0444=>0x0424, 0x00EC=>0x00CC,
323 0x0169=>0x0168, 0x03BF=>0x039F, 0x006B=>0x004B, 0x00F2=>0x00D2, 0x00E0=>0x00C0,
324 0x0434=>0x0414, 0x03C9=>0x03A9, 0x1E6B=>0x1E6A, 0x00E3=>0x00C3, 0x044D=>0x042D,
325 0x0436=>0x0416, 0x01A1=>0x01A0, 0x010D=>0x010C, 0x011D=>0x011C, 0x00F0=>0x00D0,
326 0x013C=>0x013B, 0x045F=>0x040F, 0x045A=>0x040A, 0x00E8=>0x00C8, 0x03C5=>0x03A5,
327 0x0066=>0x0046, 0x00FD=>0x00DD, 0x0063=>0x0043, 0x021B=>0x021A, 0x00EA=>0x00CA,
328 0x03B9=>0x0399, 0x017A=>0x0179, 0x00EF=>0x00CF, 0x01B0=>0x01AF, 0x0065=>0x0045,
329 0x03BB=>0x039B, 0x03B8=>0x0398, 0x03BC=>0x039C, 0x045C=>0x040C, 0x043F=>0x041F,
330 0x044C=>0x042C, 0x00FE=>0x00DE, 0x00F0=>0x00D0, 0x1EF3=>0x1EF2, 0x0068=>0x0048,
331 0x00EB=>0x00CB, 0x0111=>0x0110, 0x0433=>0x0413, 0x012F=>0x012E, 0x00E6=>0x00C6,
332 0x0078=>0x0058, 0x0161=>0x0160, 0x016F=>0x016E, 0x03B1=>0x0391, 0x0457=>0x0407,
333 0x0173=>0x0172, 0x00FF=>0x0178, 0x006F=>0x004F, 0x043B=>0x041B, 0x03B5=>0x0395,
334 0x0445=>0x0425, 0x0121=>0x0120, 0x017E=>0x017D, 0x017C=>0x017B, 0x03B6=>0x0396,
335 0x03B2=>0x0392, 0x03AD=>0x0388, 0x1E85=>0x1E84, 0x0175=>0x0174, 0x0071=>0x0051,
336 0x0437=>0x0417, 0x1E0B=>0x1E0A, 0x0148=>0x0147, 0x0105=>0x0104, 0x0458=>0x0408,
337 0x014D=>0x014C, 0x00ED=>0x00CD, 0x0079=>0x0059, 0x010B=>0x010A, 0x03CE=>0x038F,
338 0x0072=>0x0052, 0x0430=>0x0410, 0x0455=>0x0405, 0x0452=>0x0402, 0x0127=>0x0126,
339 0x0137=>0x0136, 0x012B=>0x012A, 0x03AF=>0x038A, 0x044B=>0x042B, 0x006C=>0x004C,
340 0x03B7=>0x0397, 0x0125=>0x0124, 0x0219=>0x0218, 0x00FB=>0x00DB, 0x011F=>0x011E,
341 0x043E=>0x041E, 0x1E41=>0x1E40, 0x03BD=>0x039D, 0x0107=>0x0106, 0x03CB=>0x03AB,
342 0x0446=>0x0426, 0x00FE=>0x00DE, 0x00E7=>0x00C7, 0x03CA=>0x03AA, 0x0441=>0x0421,
343 0x0432=>0x0412, 0x010F=>0x010E, 0x00F8=>0x00D8, 0x0077=>0x0057, 0x011B=>0x011A,
344 0x0074=>0x0054, 0x006A=>0x004A, 0x045B=>0x040B, 0x0456=>0x0406, 0x0103=>0x0102,
345 0x03BB=>0x039B, 0x00F1=>0x00D1, 0x043D=>0x041D, 0x03CC=>0x038C, 0x00E9=>0x00C9,
346 0x00F0=>0x00D0, 0x0457=>0x0407, 0x0123=>0x0122,
350 * UTF-8 Case lookup table
352 * This lookuptable defines the lower case letters to their correspponding
353 * upper case letter in UTF-8 (it does so by flipping $UTF8_LOWER_TO_UPPER)
355 * @author Andreas Gohr <andi@splitbrain.org>
357 $UTF8_UPPER_TO_LOWER = @array_flip($UTF8_LOWER_TO_UPPER);
360 * UTF-8 lookup table for lower case accented letters
362 * This lookuptable defines replacements for accented characters from the ASCII-7
363 * range. This are lower case letters only.
365 * @author Andreas Gohr <andi@splitbrain.org>
366 * @see utf8_deaccent()
368 $UTF8_LOWER_ACCENTS = array(
369 'à ' => 'a', 'ô' => 'o', 'Ä
\8f' => 'd', 'á¸
\9f' => 'f', 'ë' => 'e', 'š' => 's', 'ơ' => 'o',
370 'ß' => 'ss', 'ă' => 'a', 'ř' => 'r', 'ț' => 't', 'ň' => 'n', 'ā' => 'a', 'ķ' => 'k',
371 'ŝ' => 's', 'ỳ' => 'y', 'ņ' => 'n', 'ĺ' => 'l', 'ħ' => 'h', 'ṗ' => 'p', 'ó' => 'o',
372 'ú' => 'u', 'ě' => 'e', 'é' => 'e', 'ç' => 'c', 'ẁ' => 'w', 'ċ' => 'c', 'õ' => 'o',
373 'ṡ' => 's', 'ø' => 'o', 'ģ' => 'g', 'ŧ' => 't', 'ș' => 's', 'ė' => 'e', 'ĉ' => 'c',
374 'ś' => 's', 'î' => 'i', 'ű' => 'u', 'ć' => 'c', 'ę' => 'e', 'ŵ' => 'w', 'ṫ' => 't',
375 'ū' => 'u', 'č' => 'c', 'ö' => 'oe', 'è' => 'e', 'ŷ' => 'y', 'ą' => 'a', 'ł' => 'l',
376 'ų' => 'u', 'ů' => 'u', 'ş' => 's', 'ğ' => 'g', 'ļ' => 'l', 'ƒ' => 'f', 'ž' => 'z',
377 'ẃ' => 'w', 'ḃ' => 'b', 'å' => 'a', 'ì' => 'i', 'ï' => 'i', 'ḋ' => 'd', 'ť' => 't',
378 'ŗ' => 'r', 'ä' => 'ae', 'í' => 'i', 'ŕ' => 'r', 'ê' => 'e', 'ü' => 'ue', 'ò' => 'o',
379 'ē' => 'e', 'ñ' => 'n', 'ń' => 'n', 'ĥ' => 'h', 'ĝ' => 'g', 'đ' => 'd', 'ĵ' => 'j',
380 'ÿ' => 'y', 'ũ' => 'u', 'ŭ' => 'u', 'ư' => 'u', 'ţ' => 't', 'ý' => 'y', 'ő' => 'o',
381 'â' => 'a', 'ľ' => 'l', 'ẅ' => 'w', 'ż' => 'z', 'ī' => 'i', 'ã' => 'a', 'ġ' => 'g',
382 'ṁ' => 'm', 'ō' => 'o', 'ĩ' => 'i', 'ù' => 'u', 'į' => 'i', 'ź' => 'z', 'á' => 'a',
383 'û' => 'u', 'þ' => 'th', 'ð' => 'dh', 'æ' => 'ae', 'µ' => 'u',
387 * UTF-8 lookup table for upper case accented letters
389 * This lookuptable defines replacements for accented characters from the ASCII-7
390 * range. This are upper case letters only.
392 * @author Andreas Gohr <andi@splitbrain.org>
393 * @see utf8_deaccent()
395 $UTF8_UPPER_ACCENTS = array(
396 'à ' => 'A', 'ô' => 'O', 'Ä
\8f' => 'D', 'á¸
\9f' => 'F', 'ë' => 'E', 'š' => 'S', 'ơ' => 'O',
397 'ß' => 'Ss', 'ă' => 'A', 'ř' => 'R', 'ț' => 'T', 'ň' => 'N', 'ā' => 'A', 'ķ' => 'K',
398 'ŝ' => 'S', 'ỳ' => 'Y', 'ņ' => 'N', 'ĺ' => 'L', 'ħ' => 'H', 'ṗ' => 'P', 'ó' => 'O',
399 'ú' => 'U', 'ě' => 'E', 'é' => 'E', 'ç' => 'C', 'ẁ' => 'W', 'ċ' => 'C', 'õ' => 'O',
400 'ṡ' => 'S', 'ø' => 'O', 'ģ' => 'G', 'ŧ' => 'T', 'ș' => 'S', 'ė' => 'E', 'ĉ' => 'C',
401 'ś' => 'S', 'î' => 'I', 'ű' => 'U', 'ć' => 'C', 'ę' => 'E', 'ŵ' => 'W', 'ṫ' => 'T',
402 'ū' => 'U', 'č' => 'C', 'ö' => 'Oe', 'è' => 'E', 'ŷ' => 'Y', 'ą' => 'A', 'ł' => 'L',
403 'ų' => 'U', 'ů' => 'U', 'ş' => 'S', 'ğ' => 'G', 'ļ' => 'L', 'ƒ' => 'F', 'ž' => 'Z',
404 'ẃ' => 'W', 'ḃ' => 'B', 'å' => 'A', 'ì' => 'I', 'ï' => 'I', 'ḋ' => 'D', 'ť' => 'T',
405 'ŗ' => 'R', 'ä' => 'Ae', 'í' => 'I', 'ŕ' => 'R', 'ê' => 'E', 'ü' => 'Ue', 'ò' => 'O',
406 'ē' => 'E', 'ñ' => 'N', 'ń' => 'N', 'ĥ' => 'H', 'ĝ' => 'G', 'đ' => 'D', 'ĵ' => 'J',
407 'ÿ' => 'Y', 'ũ' => 'U', 'ŭ' => 'U', 'ư' => 'U', 'ţ' => 'T', 'ý' => 'Y', 'ő' => 'O',
408 'â' => 'A', 'ľ' => 'L', 'ẅ' => 'W', 'ż' => 'Z', 'ī' => 'I', 'ã' => 'A', 'ġ' => 'G',
409 'ṁ' => 'M', 'ō' => 'O', 'ĩ' => 'I', 'ù' => 'U', 'į' => 'I', 'ź' => 'Z', 'á' => 'A',
410 'û' => 'U', 'Þ' => 'Th', 'Ð' => 'Dh', 'Æ' => 'Ae',
414 * UTF-8 array of common special characters
416 * This array should contain all special characters (not a letter or digit)
417 * defined in the various local charsets - it's not a complete list of non-alphanum
418 * characters in UTF-8. It's not perfect but should match most cases of special
421 * The controlchars 0x00 to 0x19 are _not_ included in this array. The space 0x20 is!
423 * @author Andreas Gohr <andi@splitbrain.org>
424 * @see utf8_stripspecials()
426 $UTF8_SPECIAL_CHARS = array(
427 0x001a, 0x001b, 0x001c, 0x001d, 0x001e, 0x001f, 0x0020, 0x0021, 0x0022, 0x0023,
428 0x0024, 0x0025, 0x0026, 0x0027, 0x0028, 0x0029, 0x002a, 0x002b, 0x002c, 0x002d,
429 0x002e, 0x002f, 0x003a, 0x003b, 0x003c, 0x003d, 0x003e, 0x003f, 0x0040, 0x005b,
430 0x005c, 0x005d, 0x005e, 0x005f, 0x0060, 0x0142, 0x007b, 0x007c, 0x007d, 0x007e,
431 0x007f, 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087, 0x0088,
432 0x0089, 0x008a, 0x008b, 0x008c, 0x008d, 0x008e, 0x008f, 0x0090, 0x0091, 0x0092,
433 0x0093, 0x0094, 0x0095, 0x0096, 0x0097, 0x0098, 0x0099, 0x009a, 0x009b, 0x009c,
434 0x009d, 0x009e, 0x009f, 0x00a0, 0x00a1, 0x00a2, 0x00a3, 0x00a4, 0x00a5, 0x00a6,
435 0x00a7, 0x00a8, 0x00a9, 0x00aa, 0x00ab, 0x00ac, 0x00ad, 0x00ae, 0x00af, 0x00b0,
436 0x00b1, 0x00b2, 0x00b3, 0x00b4, 0x00b5, 0x00b6, 0x00b7, 0x00b8, 0x00b9, 0x00ba,
437 0x00bb, 0x00bc, 0x00bd, 0x00be, 0x00bf, 0x00d7, 0x00f7, 0x02c7, 0x02d8, 0x02d9,
438 0x02da, 0x02db, 0x02dc, 0x02dd, 0x0300, 0x0301, 0x0303, 0x0309, 0x0323, 0x0384,
439 0x0385, 0x0387, 0x03b2, 0x03c6, 0x03d1, 0x03d2, 0x03d5, 0x03d6, 0x05b0, 0x05b1,
440 0x05b2, 0x05b3, 0x05b4, 0x05b5, 0x05b6, 0x05b7, 0x05b8, 0x05b9, 0x05bb, 0x05bc,
441 0x05bd, 0x05be, 0x05bf, 0x05c0, 0x05c1, 0x05c2, 0x05c3, 0x05f3, 0x05f4, 0x060c,
442 0x061b, 0x061f, 0x0640, 0x064b, 0x064c, 0x064d, 0x064e, 0x064f, 0x0650, 0x0651,
443 0x0652, 0x066a, 0x0e3f, 0x200c, 0x200d, 0x200e, 0x200f, 0x2013, 0x2014, 0x2015,
444 0x2017, 0x2018, 0x2019, 0x201a, 0x201c, 0x201d, 0x201e, 0x2020, 0x2021, 0x2022,
445 0x2026, 0x2030, 0x2032, 0x2033, 0x2039, 0x203a, 0x2044, 0x20a7, 0x20aa, 0x20ab,
446 0x20ac, 0x2116, 0x2118, 0x2122, 0x2126, 0x2135, 0x2190, 0x2191, 0x2192, 0x2193,
447 0x2194, 0x2195, 0x21b5, 0x21d0, 0x21d1, 0x21d2, 0x21d3, 0x21d4, 0x2200, 0x2202,
448 0x2203, 0x2205, 0x2206, 0x2207, 0x2208, 0x2209, 0x220b, 0x220f, 0x2211, 0x2212,
449 0x2215, 0x2217, 0x2219, 0x221a, 0x221d, 0x221e, 0x2220, 0x2227, 0x2228, 0x2229,
450 0x222a, 0x222b, 0x2234, 0x223c, 0x2245, 0x2248, 0x2260, 0x2261, 0x2264, 0x2265,
451 0x2282, 0x2283, 0x2284, 0x2286, 0x2287, 0x2295, 0x2297, 0x22a5, 0x22c5, 0x2310,
452 0x2320, 0x2321, 0x2329, 0x232a, 0x2469, 0x2500, 0x2502, 0x250c, 0x2510, 0x2514,
453 0x2518, 0x251c, 0x2524, 0x252c, 0x2534, 0x253c, 0x2550, 0x2551, 0x2552, 0x2553,
454 0x2554, 0x2555, 0x2556, 0x2557, 0x2558, 0x2559, 0x255a, 0x255b, 0x255c, 0x255d,
455 0x255e, 0x255f, 0x2560, 0x2561, 0x2562, 0x2563, 0x2564, 0x2565, 0x2566, 0x2567,
456 0x2568, 0x2569, 0x256a, 0x256b, 0x256c, 0x2580, 0x2584, 0x2588, 0x258c, 0x2590,
457 0x2591, 0x2592, 0x2593, 0x25a0, 0x25b2, 0x25bc, 0x25c6, 0x25ca, 0x25cf, 0x25d7,
458 0x2605, 0x260e, 0x261b, 0x261e, 0x2660, 0x2663, 0x2665, 0x2666, 0x2701, 0x2702,
459 0x2703, 0x2704, 0x2706, 0x2707, 0x2708, 0x2709, 0x270c, 0x270d, 0x270e, 0x270f,
460 0x2710, 0x2711, 0x2712, 0x2713, 0x2714, 0x2715, 0x2716, 0x2717, 0x2718, 0x2719,
461 0x271a, 0x271b, 0x271c, 0x271d, 0x271e, 0x271f, 0x2720, 0x2721, 0x2722, 0x2723,
462 0x2724, 0x2725, 0x2726, 0x2727, 0x2729, 0x272a, 0x272b, 0x272c, 0x272d, 0x272e,
463 0x272f, 0x2730, 0x2731, 0x2732, 0x2733, 0x2734, 0x2735, 0x2736, 0x2737, 0x2738,
464 0x2739, 0x273a, 0x273b, 0x273c, 0x273d, 0x273e, 0x273f, 0x2740, 0x2741, 0x2742,
465 0x2743, 0x2744, 0x2745, 0x2746, 0x2747, 0x2748, 0x2749, 0x274a, 0x274b, 0x274d,
466 0x274f, 0x2750, 0x2751, 0x2752, 0x2756, 0x2758, 0x2759, 0x275a, 0x275b, 0x275c,
467 0x275d, 0x275e, 0x2761, 0x2762, 0x2763, 0x2764, 0x2765, 0x2766, 0x2767, 0x277f,
468 0x2789, 0x2793, 0x2794, 0x2798, 0x2799, 0x279a, 0x279b, 0x279c, 0x279d, 0x279e,
469 0x279f, 0x27a0, 0x27a1, 0x27a2, 0x27a3, 0x27a4, 0x27a5, 0x27a6, 0x27a7, 0x27a8,
470 0x27a9, 0x27aa, 0x27ab, 0x27ac, 0x27ad, 0x27ae, 0x27af, 0x27b1, 0x27b2, 0x27b3,
471 0x27b4, 0x27b5, 0x27b6, 0x27b7, 0x27b8, 0x27b9, 0x27ba, 0x27bb, 0x27bc, 0x27bd,
472 0x27be, 0xf6d9, 0xf6da, 0xf6db, 0xf8d7, 0xf8d8, 0xf8d9, 0xf8da, 0xf8db, 0xf8dc,
473 0xf8dd, 0xf8de, 0xf8df, 0xf8e0, 0xf8e1, 0xf8e2, 0xf8e3, 0xf8e4, 0xf8e5, 0xf8e6,
474 0xf8e7, 0xf8e8, 0xf8e9, 0xf8ea, 0xf8eb, 0xf8ec, 0xf8ed, 0xf8ee, 0xf8ef, 0xf8f0,
475 0xf8f1, 0xf8f2, 0xf8f3, 0xf8f4, 0xf8f5, 0xf8f6, 0xf8f7, 0xf8f8, 0xf8f9, 0xf8fa,
476 0xf8fb, 0xf8fc, 0xf8fd, 0xf8fe, 0xfe7c, 0xfe7d,