bpo-40328: Add tool for generating cjk mapping headers (GH-19602)

This commit is contained in:
Dong-hee Na 2020-04-30 02:34:24 +09:00 committed by GitHub
parent 2d8757758d
commit 113feb3ec2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
15 changed files with 51015 additions and 3 deletions

View file

@ -0,0 +1 @@
Add tools for generating mappings headers for CJKCodecs.

View file

@ -1,8 +1,6 @@
To generate or modify mapping headers
-------------------------------------
Mapping headers are imported from CJKCodecs as pre-generated form.
If you need to tweak or add something on it, please look at tools/
subdirectory of CJKCodecs' distribution.
Mapping headers are generated from Tools/unicode/genmap_*.py

View file

@ -1,3 +1,4 @@
// AUTO-GENERATED FILE FROM genmap_schinese.py: DO NOT EDIT
static const ucs2_t __gb2312_decmap[7482] = {
12288,12289,12290,12539,713,711,168,12291,12293,8213,65374,8214,8230,8216,
8217,8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,

View file

@ -1,3 +1,4 @@
// AUTO-GENERATED FILE FROM genmap_japanese.py: DO NOT EDIT
#define JISX0213_ENCPAIRS 46
#ifdef EXTERN_JISX0213_PAIR
static const struct widedbcs_index *jisx0213_pair_decmap;

View file

@ -1,3 +1,4 @@
// AUTO-GENERATED FILE FROM genmap_japanese.py: DO NOT EDIT
static const ucs2_t __jisx0208_decmap[6956] = {
12288,12289,12290,65292,65294,12539,65306,65307,65311,65281,12443,12444,180,
65344,168,65342,65507,65343,12541,12542,12445,12446,12291,20189,12293,12294,

View file

@ -1,3 +1,4 @@
// AUTO-GENERATED FILE FROM genmap_korean.py: DO NOT EDIT
static const ucs2_t __ksx1001_decmap[8264] = {
12288,12289,12290,183,8229,8230,168,12291,173,8213,8741,65340,8764,8216,8217,
8220,8221,12308,12309,12296,12297,12298,12299,12300,12301,12302,12303,12304,
@ -3249,3 +3250,4 @@ __cp949_encmap+31959,0,255},{__cp949_encmap+32215,0,255},{__cp949_encmap+32471
__cp949_encmap+32891,0,11},{0,0,0},{0,0,0},{0,0,0},{0,0,0},{__cp949_encmap+
32903,1,230},
};

View file

@ -0,0 +1,251 @@
#
# genmap_ja_codecs.py: Japanese Codecs Map Generator
#
# Original Author: Hye-Shik Chang <perky@FreeBSD.org>
# Modified Author: Dong-hee Na <donghee.na92@gmail.com>
#
import os
from genmap_support import *
JISX0208_C1 = (0x21, 0x74)
JISX0208_C2 = (0x21, 0x7e)
JISX0212_C1 = (0x22, 0x6d)
JISX0212_C2 = (0x21, 0x7e)
JISX0213_C1 = (0x21, 0x7e)
JISX0213_C2 = (0x21, 0x7e)
CP932P0_C1 = (0x81, 0x81) # patches between shift-jis and cp932
CP932P0_C2 = (0x5f, 0xca)
CP932P1_C1 = (0x87, 0x87) # CP932 P1
CP932P1_C2 = (0x40, 0x9c)
CP932P2_C1 = (0xed, 0xfc) # CP932 P2
CP932P2_C2 = (0x40, 0xfc)
MAPPINGS_JIS0208 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0208.TXT'
MAPPINGS_JIS0212 = 'http://www.unicode.org/Public/MAPPINGS/OBSOLETE/EASTASIA/JIS/JIS0212.TXT'
MAPPINGS_CP932 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP932.TXT'
MAPPINGS_JISX0213_2004 = 'http://wakaba-web.hp.infoseek.co.jp/table/jisx0213-2004-std.txt'
def loadmap_jisx0213(fo):
decmap3, decmap4 = {}, {} # maps to BMP for level 3 and 4
decmap3_2, decmap4_2 = {}, {} # maps to U+2xxxx for level 3 and 4
decmap3_pair = {} # maps to BMP-pair for level 3
for line in fo:
line = line.split('#', 1)[0].strip()
if not line or len(line.split()) < 2:
continue
row = line.split()
loc = eval('0x' + row[0][2:])
level = eval(row[0][0])
m = None
if len(row[1].split('+')) == 2: # single unicode
uni = eval('0x' + row[1][2:])
if level == 3:
if uni < 0x10000:
m = decmap3
elif 0x20000 <= uni < 0x30000:
uni -= 0x20000
m = decmap3_2
elif level == 4:
if uni < 0x10000:
m = decmap4
elif 0x20000 <= uni < 0x30000:
uni -= 0x20000
m = decmap4_2
m.setdefault((loc >> 8), {})
m[(loc >> 8)][(loc & 0xff)] = uni
else: # pair
uniprefix = eval('0x' + row[1][2:6]) # body
uni = eval('0x' + row[1][7:11]) # modifier
if level != 3:
raise ValueError("invalid map")
decmap3_pair.setdefault(uniprefix, {})
m = decmap3_pair[uniprefix]
if m is None:
raise ValueError("invalid map")
m.setdefault((loc >> 8), {})
m[(loc >> 8)][(loc & 0xff)] = uni
return decmap3, decmap4, decmap3_2, decmap4_2, decmap3_pair
def main():
jisx0208file = open_mapping_file('python-mappings/JIS0208.TXT', MAPPINGS_JIS0208)
jisx0212file = open_mapping_file('python-mappings/JIS0212.TXT', MAPPINGS_JIS0212)
cp932file = open_mapping_file('python-mappings/CP932.TXT', MAPPINGS_CP932)
jisx0213file = open_mapping_file('python-mappings/jisx0213-2004-std.txt', MAPPINGS_JISX0213_2004)
print("Loading Mapping File...")
sjisdecmap = loadmap(jisx0208file, natcol=0, unicol=2)
jisx0208decmap = loadmap(jisx0208file, natcol=1, unicol=2)
jisx0212decmap = loadmap(jisx0212file)
cp932decmap = loadmap(cp932file)
jis3decmap, jis4decmap, jis3_2_decmap, jis4_2_decmap, jis3_pairdecmap = loadmap_jisx0213(jisx0213file)
if jis3decmap[0x21][0x24] != 0xff0c:
raise SystemExit('Please adjust your JIS X 0213 map using jisx0213-2000-std.txt.diff')
sjisencmap, cp932encmap = {}, {}
jisx0208_0212encmap = {}
for c1, m in sjisdecmap.items():
for c2, code in m.items():
sjisencmap.setdefault(code >> 8, {})
sjisencmap[code >> 8][code & 0xff] = c1 << 8 | c2
for c1, m in cp932decmap.items():
for c2, code in m.items():
cp932encmap.setdefault(code >> 8, {})
if (code & 0xff) not in cp932encmap[code >> 8]:
cp932encmap[code >> 8][code & 0xff] = c1 << 8 | c2
for c1, m in cp932encmap.copy().items():
for c2, code in m.copy().items():
if c1 in sjisencmap and c2 in sjisencmap[c1] and sjisencmap[c1][c2] == code:
del cp932encmap[c1][c2]
if not cp932encmap[c1]:
del cp932encmap[c1]
jisx0213pairdecmap = {}
jisx0213pairencmap = []
for unibody, m1 in jis3_pairdecmap.items():
for c1, m2 in m1.items():
for c2, modifier in m2.items():
jisx0213pairencmap.append((unibody, modifier, c1 << 8 | c2))
jisx0213pairdecmap.setdefault(c1, {})
jisx0213pairdecmap[c1][c2] = unibody << 16 | modifier
# Twinmap for both of JIS X 0208 (MSB unset) and JIS X 0212 (MSB set)
for c1, m in jisx0208decmap.items():
for c2, code in m.items():
jisx0208_0212encmap.setdefault(code >> 8, {})
jisx0208_0212encmap[code >> 8][code & 0xff] = c1 << 8 | c2
for c1, m in jisx0212decmap.items():
for c2, code in m.items():
jisx0208_0212encmap.setdefault(code >> 8, {})
if (code & 0xff) in jisx0208_0212encmap[code >> 8]:
print("OOPS!!!", (code))
jisx0208_0212encmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2
jisx0213bmpencmap = {}
for c1, m in jis3decmap.copy().items():
for c2, code in m.copy().items():
if c1 in jisx0208decmap and c2 in jisx0208decmap[c1]:
if code in jis3_pairdecmap:
jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair
jisx0213pairencmap.append((code, 0, c1 << 8 | c2))
elif jisx0208decmap[c1][c2] == code:
del jis3decmap[c1][c2]
if not jis3decmap[c1]:
del jis3decmap[c1]
else:
raise ValueError("Difference between JIS X 0208 and JIS X 0213 Plane 1 is found.")
else:
jisx0213bmpencmap.setdefault(code >> 8, {})
if code not in jis3_pairdecmap:
jisx0213bmpencmap[code >> 8][code & 0xff] = c1 << 8 | c2
else:
jisx0213bmpencmap[code >> 8][code & 0xff] = (0,) # pair
jisx0213pairencmap.append((code, 0, c1 << 8 | c2))
for c1, m in jis4decmap.items():
for c2, code in m.items():
jisx0213bmpencmap.setdefault(code >> 8, {})
jisx0213bmpencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2
jisx0213empencmap = {}
for c1, m in jis3_2_decmap.items():
for c2, code in m.items():
jisx0213empencmap.setdefault(code >> 8, {})
jisx0213empencmap[code >> 8][code & 0xff] = c1 << 8 | c2
for c1, m in jis4_2_decmap.items():
for c2, code in m.items():
jisx0213empencmap.setdefault(code >> 8, {})
jisx0213empencmap[code >> 8][code & 0xff] = 0x8000 | c1 << 8 | c2
with open("mappings_jp.h", "w") as fp:
print_autogen(fp, os.path.basename(__file__))
print("Generating JIS X 0208 decode map...")
writer = DecodeMapWriter(fp, "jisx0208", jisx0208decmap)
writer.update_decode_map(JISX0208_C1, JISX0208_C2)
writer.generate()
print("Generating JIS X 0212 decode map...")
writer = DecodeMapWriter(fp, "jisx0212", jisx0212decmap)
writer.update_decode_map(JISX0212_C1, JISX0212_C2)
writer.generate()
print("Generating JIS X 0208 && JIS X 0212 encode map...")
writer = EncodeMapWriter(fp, "jisxcommon", jisx0208_0212encmap)
writer.generate()
print("Generating CP932 Extension decode map...")
writer = DecodeMapWriter(fp, "cp932ext", cp932decmap)
writer.update_decode_map(CP932P0_C1, CP932P0_C2)
writer.update_decode_map(CP932P1_C1, CP932P1_C2)
writer.update_decode_map(CP932P2_C1, CP932P2_C2)
writer.generate()
print("Generating CP932 Extension encode map...")
writer = EncodeMapWriter(fp, "cp932ext", cp932encmap)
writer.generate()
print("Generating JIS X 0213 Plane 1 BMP decode map...")
writer = DecodeMapWriter(fp, "jisx0213_1_bmp", jis3decmap)
writer.update_decode_map(JISX0213_C1, JISX0213_C2)
writer.generate()
print("Generating JIS X 0213 Plane 2 BMP decode map...")
writer = DecodeMapWriter(fp, "jisx0213_2_bmp", jis4decmap)
writer.update_decode_map(JISX0213_C1, JISX0213_C2)
writer.generate()
print("Generating JIS X 0213 BMP encode map...")
writer = EncodeMapWriter(fp, "jisx0213_bmp", jisx0213bmpencmap)
writer.generate()
print("Generating JIS X 0213 Plane 1 EMP decode map...")
writer = DecodeMapWriter(fp, "jisx0213_1_emp", jis3_2_decmap)
writer.update_decode_map(JISX0213_C1, JISX0213_C2)
writer.generate()
print("Generating JIS X 0213 Plane 2 EMP decode map...")
writer = DecodeMapWriter(fp, "jisx0213_2_emp", jis4_2_decmap)
writer.update_decode_map(JISX0213_C1, JISX0213_C2)
writer.generate()
print("Generating JIS X 0213 EMP encode map...")
writer = EncodeMapWriter(fp, "jisx0213_emp", jisx0213empencmap)
writer.generate()
with open('mappings_jisx0213_pair.h', 'w') as fp:
print_autogen(fp, os.path.basename(__file__))
fp.write(f"#define JISX0213_ENCPAIRS {len(jisx0213pairencmap)}\n")
fp.write("""\
#ifdef EXTERN_JISX0213_PAIR
static const struct widedbcs_index *jisx0213_pair_decmap;
static const struct pair_encodemap *jisx0213_pair_encmap;
#else
""")
print("Generating JIS X 0213 unicode-pair decode map...")
writer = DecodeMapWriter(fp, "jisx0213_pair", jisx0213pairdecmap)
writer.update_decode_map(JISX0213_C1, JISX0213_C2)
writer.generate(wide=True)
print("Generating JIS X 0213 unicode-pair encode map...")
jisx0213pairencmap.sort()
fp.write("static const struct pair_encodemap jisx0213_pair_encmap[JISX0213_ENCPAIRS] = {\n")
filler = BufferedFiller()
for body, modifier, jis in jisx0213pairencmap:
filler.write('{', '0x%04x%04x,' % (body, modifier), '0x%04x' % jis, '},')
filler.printout(fp)
fp.write("};\n")
fp.write("#endif\n")
print("Done!")
if __name__ == '__main__':
main()

View file

@ -0,0 +1,62 @@
#
# genmap_korean.py: Korean Codecs Map Generator
#
# Original Author: Hye-Shik Chang <perky@FreeBSD.org>
# Modified Author: Dong-hee Na <donghee.na92@gmail.com>
#
import os
from genmap_support import *
KSX1001_C1 = (0x21, 0x7e)
KSX1001_C2 = (0x21, 0x7e)
UHCL1_C1 = (0x81, 0xa0)
UHCL1_C2 = (0x41, 0xfe)
UHCL2_C1 = (0xa1, 0xfe)
UHCL2_C2 = (0x41, 0xa0)
MAPPINGS_CP949 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP949.TXT'
def main():
mapfile = open_mapping_file('python-mappings/CP949.TXT', MAPPINGS_CP949)
print("Loading Mapping File...")
decmap = loadmap(mapfile)
uhcdecmap, ksx1001decmap, cp949encmap = {}, {}, {}
for c1, c2map in decmap.items():
for c2, code in c2map.items():
if c1 >= 0xa1 and c2 >= 0xa1:
ksx1001decmap.setdefault(c1 & 0x7f, {})
ksx1001decmap[c1 & 0x7f][c2 & 0x7f] = c2map[c2]
cp949encmap.setdefault(code >> 8, {})
cp949encmap[code >> 8][code & 0xFF] = (c1 << 8 | c2) & 0x7f7f
else:
# uhc
uhcdecmap.setdefault(c1, {})
uhcdecmap[c1][c2] = c2map[c2]
cp949encmap.setdefault(code >> 8, {}) # MSB set
cp949encmap[code >> 8][code & 0xFF] = (c1 << 8 | c2)
with open('mappings_kr.h', 'w') as fp:
print_autogen(fp, os.path.basename(__file__))
print("Generating KS X 1001 decode map...")
writer = DecodeMapWriter(fp, "ksx1001", ksx1001decmap)
writer.update_decode_map(KSX1001_C1, KSX1001_C2)
writer.generate()
print("Generating UHC decode map...")
writer = DecodeMapWriter(fp, "cp949ext", uhcdecmap)
writer.update_decode_map(UHCL1_C1, UHCL1_C2)
writer.update_decode_map(UHCL2_C1, UHCL2_C2)
writer.generate()
print("Generating CP949 (includes KS X 1001) encode map...")
writer = EncodeMapWriter(fp, "cp949", cp949encmap)
writer.generate()
print("Done!")
if __name__ == '__main__':
main()

View file

@ -0,0 +1,149 @@
#
# genmap_schinese.py: Simplified Chinese Codecs Map Generator
#
# Original Author: Hye-Shik Chang <perky@FreeBSD.org>
# Modified Author: Dong-hee Na <donghee.na92@gmail.com>
#
import os
import re
from genmap_support import *
GB2312_C1 = (0x21, 0x7e)
GB2312_C2 = (0x21, 0x7e)
GBKL1_C1 = (0x81, 0xa8)
GBKL1_C2 = (0x40, 0xfe)
GBKL2_C1 = (0xa9, 0xfe)
GBKL2_C2 = (0x40, 0xa0)
GB18030EXTP1_C1 = (0xa1, 0xa9)
GB18030EXTP1_C2 = (0x40, 0xfe)
GB18030EXTP2_C1 = (0xaa, 0xaf)
GB18030EXTP2_C2 = (0xa1, 0xfe)
GB18030EXTP3_C1 = (0xd7, 0xd7)
GB18030EXTP3_C2 = (0xfa, 0xfe)
GB18030EXTP4_C1 = (0xf8, 0xfd)
GB18030EXTP4_C2 = (0xa1, 0xfe)
GB18030EXTP5_C1 = (0xfe, 0xfe)
GB18030EXTP5_C2 = (0x50, 0xfe)
MAPPINGS_GB2312 = 'http://people.freebsd.org/~perky/i18n/GB2312.TXT'
MAPPINGS_CP936 = 'http://www.unicode.org/Public/MAPPINGS/VENDORS/MICSFT/WINDOWS/CP936.TXT'
MAPPINGS_GB18030 = 'http://oss.software.ibm.com/cvs/icu/~checkout~/charset/data/xml/gb-18030-2000.xml'
re_gb18030ass = re.compile('<a u="([A-F0-9]{4})" b="([0-9A-F ]+)"/>')
def parse_gb18030map(fo):
m, gbuni = {}, {}
for i in range(65536):
if i < 0xd800 or i > 0xdfff: # exclude unicode surrogate area
gbuni[i] = None
for uni, native in re_gb18030ass.findall(fo.read()):
uni = eval('0x'+uni)
native = [eval('0x'+u) for u in native.split()]
if len(native) <= 2:
del gbuni[uni]
if len(native) == 2: # we can decode algorithmically for 1 or 4 bytes
m.setdefault(native[0], {})
m[native[0]][native[1]] = uni
gbuni = [k for k in gbuni.keys()]
gbuni.sort()
return m, gbuni
def main():
print("Loading Mapping File...")
gb2312map = open_mapping_file('python-mappings/GB2312.TXT', MAPPINGS_GB2312)
cp936map = open_mapping_file('python-mappings/CP936.TXT', MAPPINGS_CP936)
gb18030map = open_mapping_file('python-mappings/gb-18030-2000.xml', MAPPINGS_GB18030)
gb18030decmap, gb18030unilinear = parse_gb18030map(gb18030map)
gbkdecmap = loadmap(cp936map)
gb2312decmap = loadmap(gb2312map)
difmap = {}
for c1, m in gbkdecmap.items():
for c2, code in m.items():
del gb18030decmap[c1][c2]
if not gb18030decmap[c1]:
del gb18030decmap[c1]
for c1, m in gb2312decmap.items():
for c2, code in m.items():
gbkc1, gbkc2 = c1 | 0x80, c2 | 0x80
if gbkdecmap[gbkc1][gbkc2] == code:
del gbkdecmap[gbkc1][gbkc2]
if not gbkdecmap[gbkc1]:
del gbkdecmap[gbkc1]
gb2312_gbkencmap, gb18030encmap = {}, {}
for c1, m in gbkdecmap.items():
for c2, code in m.items():
gb2312_gbkencmap.setdefault(code >> 8, {})
gb2312_gbkencmap[code >> 8][code & 0xff] = c1 << 8 | c2 # MSB set
for c1, m in gb2312decmap.items():
for c2, code in m.items():
gb2312_gbkencmap.setdefault(code >> 8, {})
gb2312_gbkencmap[code >> 8][code & 0xff] = c1 << 8 | c2 # MSB unset
for c1, m in gb18030decmap.items():
for c2, code in m.items():
gb18030encmap.setdefault(code >> 8, {})
gb18030encmap[code >> 8][code & 0xff] = c1 << 8 | c2
with open('mappings_cn.h', 'w') as fp:
print_autogen(fp, os.path.basename(__file__))
print("Generating GB2312 decode map...")
writer = DecodeMapWriter(fp, "gb2312", gb2312decmap)
writer.update_decode_map(GB2312_C1, GB2312_C2)
writer.generate()
print("Generating GBK decode map...")
writer = DecodeMapWriter(fp, "gbkext", gbkdecmap)
writer.update_decode_map(GBKL1_C1, GBKL1_C2)
writer.update_decode_map(GBKL2_C1, GBKL2_C2)
writer.generate()
print("Generating GB2312 && GBK encode map...")
writer = EncodeMapWriter(fp, "gbcommon", gb2312_gbkencmap)
writer.generate()
print("Generating GB18030 extension decode map...")
writer = DecodeMapWriter(fp, "gb18030ext", gb18030decmap)
for i in range(1, 6):
writer.update_decode_map(eval("GB18030EXTP%d_C1" % i), eval("GB18030EXTP%d_C2" % i))
writer.generate()
print("Generating GB18030 extension encode map...")
writer = EncodeMapWriter(fp, "gb18030ext", gb18030encmap)
writer.generate()
print("Generating GB18030 Unicode BMP Mapping Ranges...")
ranges = [[-1, -1, -1]]
gblinnum = 0
fp.write("""
static const struct _gb18030_to_unibmp_ranges {
Py_UCS4 first, last;
DBCHAR base;
} gb18030_to_unibmp_ranges[] = {
""")
for uni in gb18030unilinear:
if uni == ranges[-1][1] + 1:
ranges[-1][1] = uni
else:
ranges.append([uni, uni, gblinnum])
gblinnum += 1
filler = BufferedFiller()
for first, last, base in ranges[1:]:
filler.write('{', str(first), ',', str(last), ',', str(base), '},')
filler.write('{', '0,', '0,', str(
ranges[-1][2] + ranges[-1][1] - ranges[-1][0] + 1), '}', '};')
filler.printout(fp)
print("Done!")
if __name__ == '__main__':
main()

View file

@ -0,0 +1,198 @@
#
# genmap_support.py: Multibyte Codec Map Generator
#
# Original Author: Hye-Shik Chang <perky@FreeBSD.org>
# Modified Author: Dong-hee Na <donghee.na92@gmail.com>
#
class BufferedFiller:
def __init__(self, column=78):
self.column = column
self.buffered = []
self.cline = []
self.clen = 0
self.count = 0
def write(self, *data):
for s in data:
if len(s) > self.column:
raise ValueError("token is too long")
if len(s) + self.clen > self.column:
self.flush()
self.clen += len(s)
self.cline.append(s)
self.count += 1
def flush(self):
if not self.cline:
return
self.buffered.append(''.join(self.cline))
self.clen = 0
del self.cline[:]
def printout(self, fp):
self.flush()
for l in self.buffered:
fp.write(f'{l}\n')
del self.buffered[:]
def __len__(self):
return self.count
class DecodeMapWriter:
filler_class = BufferedFiller
def __init__(self, fp, prefix, decode_map):
self.fp = fp
self.prefix = prefix
self.decode_map = decode_map
self.filler = self.filler_class()
def update_decode_map(self, c1range, c2range, onlymask=(), wide=0):
c2values = range(c2range[0], c2range[1] + 1)
for c1 in range(c1range[0], c1range[1] + 1):
if c1 not in self.decode_map or (onlymask and c1 not in onlymask):
continue
c2map = self.decode_map[c1]
rc2values = [n for n in c2values if n in c2map]
if not rc2values:
continue
c2map[self.prefix] = True
c2map['min'] = rc2values[0]
c2map['max'] = rc2values[-1]
c2map['midx'] = len(self.filler)
for v in range(rc2values[0], rc2values[-1] + 1):
if v in c2map:
self.filler.write('%d,' % c2map[v])
else:
self.filler.write('U,')
def generate(self, wide=False):
if not wide:
self.fp.write(f"static const ucs2_t __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
else:
self.fp.write(f"static const Py_UCS4 __{self.prefix}_decmap[{len(self.filler)}] = {{\n")
self.filler.printout(self.fp)
self.fp.write("};\n\n")
if not wide:
self.fp.write(f"static const struct dbcs_index {self.prefix}_decmap[256] = {{\n")
else:
self.fp.write(f"static const struct widedbcs_index {self.prefix}_decmap[256] = {{\n")
for i in range(256):
if i in self.decode_map and self.prefix in self.decode_map[i]:
m = self.decode_map
prefix = self.prefix
else:
self.filler.write("{", "0,", "0,", "0", "},")
continue
self.filler.write("{", "__%s_decmap" % prefix, "+", "%d" % m[i]['midx'],
",", "%d," % m[i]['min'], "%d" % m[i]['max'], "},")
self.filler.printout(self.fp)
self.fp.write("};\n\n")
class EncodeMapWriter:
filler_class = BufferedFiller
elemtype = 'DBCHAR'
indextype = 'struct unim_index'
def __init__(self, fp, prefix, encode_map):
self.fp = fp
self.prefix = prefix
self.encode_map = encode_map
self.filler = self.filler_class()
def generate(self):
self.buildmap()
self.printmap()
def buildmap(self):
for c1 in range(0, 256):
if c1 not in self.encode_map:
continue
c2map = self.encode_map[c1]
rc2values = [k for k in c2map.keys()]
rc2values.sort()
if not rc2values:
continue
c2map[self.prefix] = True
c2map['min'] = rc2values[0]
c2map['max'] = rc2values[-1]
c2map['midx'] = len(self.filler)
for v in range(rc2values[0], rc2values[-1] + 1):
if v not in c2map:
self.write_nochar()
elif isinstance(c2map[v], int):
self.write_char(c2map[v])
elif isinstance(c2map[v], tuple):
self.write_multic(c2map[v])
else:
raise ValueError
def write_nochar(self):
self.filler.write('N,')
def write_multic(self, point):
self.filler.write('M,')
def write_char(self, point):
self.filler.write(str(point) + ',')
def printmap(self):
self.fp.write(f"static const {self.elemtype} __{self.prefix}_encmap[{len(self.filler)}] = {{\n")
self.filler.printout(self.fp)
self.fp.write("};\n\n")
self.fp.write(f"static const {self.indextype} {self.prefix}_encmap[256] = {{\n")
for i in range(256):
if i in self.encode_map and self.prefix in self.encode_map[i]:
self.filler.write("{", "__%s_encmap" % self.prefix, "+",
"%d" % self.encode_map[i]['midx'], ",",
"%d," % self.encode_map[i]['min'],
"%d" % self.encode_map[i]['max'], "},")
else:
self.filler.write("{", "0,", "0,", "0", "},")
continue
self.filler.printout(self.fp)
self.fp.write("};\n\n")
def open_mapping_file(path, source):
try:
f = open(path)
except IOError:
raise SystemExit(f'{source} is needed')
return f
def print_autogen(fo, source):
fo.write(f'// AUTO-GENERATED FILE FROM {source}: DO NOT EDIT\n')
def loadmap(fo, natcol=0, unicol=1, sbcs=0):
print("Loading from", fo)
fo.seek(0, 0)
decmap = {}
for line in fo:
line = line.split('#', 1)[0].strip()
if not line or len(line.split()) < 2:
continue
row = [eval(e) for e in line.split()]
loc, uni = row[natcol], row[unicol]
if loc >= 0x100 or sbcs:
decmap.setdefault((loc >> 8), {})
decmap[(loc >> 8)][(loc & 0xff)] = uni
return decmap

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,271 @@
--- jisx0213-2000-std.txt.orig Tue Apr 16 23:32:38 2002
+++ jisx0213-2000-std.txt Wed Jun 16 14:49:05 2004
@@ -23,21 +23,21 @@
3-2121 U+3000 # IDEOGRAPHIC SPACE
3-2122 U+3001 # IDEOGRAPHIC COMMA
3-2123 U+3002 # IDEOGRAPHIC FULL STOP
-3-2124 U+002C # COMMA Fullwidth: U+FF0C
-3-2125 U+002E # FULL STOP Fullwidth: U+FF0E
+3-2124 U+FF0C # COMMA Fullwidth: U+FF0C
+3-2125 U+FF0E # FULL STOP Fullwidth: U+FF0E
3-2126 U+30FB # KATAKANA MIDDLE DOT
-3-2127 U+003A # COLON Fullwidth: U+FF1A
-3-2128 U+003B # SEMICOLON Fullwidth: U+FF1B
-3-2129 U+003F # QUESTION MARK Fullwidth: U+FF1F
-3-212A U+0021 # EXCLAMATION MARK Fullwidth: U+FF01
+3-2127 U+FF1A # COLON Fullwidth: U+FF1A
+3-2128 U+FF1B # SEMICOLON Fullwidth: U+FF1B
+3-2129 U+FF1F # QUESTION MARK Fullwidth: U+FF1F
+3-212A U+FF01 # EXCLAMATION MARK Fullwidth: U+FF01
3-212B U+309B # KATAKANA-HIRAGANA VOICED SOUND MARK
3-212C U+309C # KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
3-212D U+00B4 # ACUTE ACCENT
-3-212E U+0060 # GRAVE ACCENT Fullwidth: U+FF40
+3-212E U+FF40 # GRAVE ACCENT Fullwidth: U+FF40
3-212F U+00A8 # DIAERESIS
-3-2130 U+005E # CIRCUMFLEX ACCENT Fullwidth: U+FF3E
-3-2131 U+203E # OVERLINE Windows: U+FFE3
-3-2132 U+005F # LOW LINE Fullwidth: U+FF3F
+3-2130 U+FF3E # CIRCUMFLEX ACCENT Fullwidth: U+FF3E
+3-2131 U+FFE3 # OVERLINE Windows: U+FFE3
+3-2132 U+FF3F # LOW LINE Fullwidth: U+FF3F
3-2133 U+30FD # KATAKANA ITERATION MARK
3-2134 U+30FE # KATAKANA VOICED ITERATION MARK
3-2135 U+309D # HIRAGANA ITERATION MARK
@@ -48,27 +48,27 @@
3-213A U+3006 # IDEOGRAPHIC CLOSING MARK
3-213B U+3007 # IDEOGRAPHIC NUMBER ZERO
3-213C U+30FC # KATAKANA-HIRAGANA PROLONGED SOUND MARK
-3-213D U+2014 # EM DASH Windows: U+2015
+3-213D U+2015 # EM DASH Windows: U+2015
3-213E U+2010 # HYPHEN
-3-213F U+002F # SOLIDUS Fullwidth: U+FF0F
+3-213F U+FF0F # SOLIDUS Fullwidth: U+FF0F
3-2140 U+005C # REVERSE SOLIDUS Fullwidth: U+FF3C
3-2141 U+301C # WAVE DASH Windows: U+FF5E
3-2142 U+2016 # DOUBLE VERTICAL LINE Windows: U+2225
-3-2143 U+007C # VERTICAL LINE Fullwidth: U+FF5C
+3-2143 U+FF5C # VERTICAL LINE Fullwidth: U+FF5C
3-2144 U+2026 # HORIZONTAL ELLIPSIS
3-2145 U+2025 # TWO DOT LEADER
3-2146 U+2018 # LEFT SINGLE QUOTATION MARK
3-2147 U+2019 # RIGHT SINGLE QUOTATION MARK
3-2148 U+201C # LEFT DOUBLE QUOTATION MARK
3-2149 U+201D # RIGHT DOUBLE QUOTATION MARK
-3-214A U+0028 # LEFT PARENTHESIS Fullwidth: U+FF08
-3-214B U+0029 # RIGHT PARENTHESIS Fullwidth: U+FF09
+3-214A U+FF08 # LEFT PARENTHESIS Fullwidth: U+FF08
+3-214B U+FF09 # RIGHT PARENTHESIS Fullwidth: U+FF09
3-214C U+3014 # LEFT TORTOISE SHELL BRACKET
3-214D U+3015 # RIGHT TORTOISE SHELL BRACKET
-3-214E U+005B # LEFT SQUARE BRACKET Fullwidth: U+FF3B
-3-214F U+005D # RIGHT SQUARE BRACKET Fullwidth: U+FF3D
-3-2150 U+007B # LEFT CURLY BRACKET Fullwidth: U+FF5B
-3-2151 U+007D # RIGHT CURLY BRACKET Fullwidth: U+FF5D
+3-214E U+FF3B # LEFT SQUARE BRACKET Fullwidth: U+FF3B
+3-214F U+FF3D # RIGHT SQUARE BRACKET Fullwidth: U+FF3D
+3-2150 U+FF5B # LEFT CURLY BRACKET Fullwidth: U+FF5B
+3-2151 U+FF5D # RIGHT CURLY BRACKET Fullwidth: U+FF5D
3-2152 U+3008 # LEFT ANGLE BRACKET
3-2153 U+3009 # RIGHT ANGLE BRACKET
3-2154 U+300A # LEFT DOUBLE ANGLE BRACKET
@@ -79,15 +79,15 @@
3-2159 U+300F # RIGHT WHITE CORNER BRACKET
3-215A U+3010 # LEFT BLACK LENTICULAR BRACKET
3-215B U+3011 # RIGHT BLACK LENTICULAR BRACKET
-3-215C U+002B # PLUS SIGN Fullwidth: U+FF0B
+3-215C U+FF0B # PLUS SIGN Fullwidth: U+FF0B
3-215D U+2212 # MINUS SIGN Windows: U+FF0D
3-215E U+00B1 # PLUS-MINUS SIGN
3-215F U+00D7 # MULTIPLICATION SIGN
3-2160 U+00F7 # DIVISION SIGN
-3-2161 U+003D # EQUALS SIGN Fullwidth: U+FF1D
+3-2161 U+FF1D # EQUALS SIGN Fullwidth: U+FF1D
3-2162 U+2260 # NOT EQUAL TO
-3-2163 U+003C # LESS-THAN SIGN Fullwidth: U+FF1C
-3-2164 U+003E # GREATER-THAN SIGN Fullwidth: U+FF1E
+3-2163 U+FF1C # LESS-THAN SIGN Fullwidth: U+FF1C
+3-2164 U+FF1E # GREATER-THAN SIGN Fullwidth: U+FF1E
3-2165 U+2266 # LESS-THAN OVER EQUAL TO
3-2166 U+2267 # GREATER-THAN OVER EQUAL TO
3-2167 U+221E # INFINITY
@@ -98,15 +98,15 @@
3-216C U+2032 # PRIME
3-216D U+2033 # DOUBLE PRIME
3-216E U+2103 # DEGREE CELSIUS
-3-216F U+00A5 # YEN SIGN Windows: U+FFE5
-3-2170 U+0024 # DOLLAR SIGN Fullwidth: U+FF04
+3-216F U+FFE5 # YEN SIGN Windows: U+FFE5
+3-2170 U+FF04 # DOLLAR SIGN Fullwidth: U+FF04
3-2171 U+00A2 # CENT SIGN Windows: U+FFE0
3-2172 U+00A3 # POUND SIGN Windows: U+FFE1
-3-2173 U+0025 # PERCENT SIGN Fullwidth: U+FF05
-3-2174 U+0023 # NUMBER SIGN Fullwidth: U+FF03
-3-2175 U+0026 # AMPERSAND Fullwidth: U+FF06
-3-2176 U+002A # ASTERISK Fullwidth: U+FF0A
-3-2177 U+0040 # COMMERCIAL AT Fullwidth: U+FF20
+3-2173 U+FF05 # PERCENT SIGN Fullwidth: U+FF05
+3-2174 U+FF03 # NUMBER SIGN Fullwidth: U+FF03
+3-2175 U+FF06 # AMPERSAND Fullwidth: U+FF06
+3-2176 U+FF0A # ASTERISK Fullwidth: U+FF0A
+3-2177 U+FF20 # COMMERCIAL AT Fullwidth: U+FF20
3-2178 U+00A7 # SECTION SIGN
3-2179 U+2606 # WHITE STAR
3-217A U+2605 # BLACK STAR
@@ -128,9 +128,9 @@
3-222C U+2191 # UPWARDS ARROW
3-222D U+2193 # DOWNWARDS ARROW
3-222E U+3013 # GETA MARK
-3-222F U+0027 # APOSTROPHE Fullwidth: U+FF07
-3-2230 U+0022 # QUOTATION MARK [2000] Fullwidth: U+FF02
-3-2231 U+002D # HYPHEN-MINUS [2000] Fullwidth: U+FF0D
+3-222F U+FF07 # APOSTROPHE Fullwidth: U+FF07
+3-2230 U+FF02 # QUOTATION MARK [2000] Fullwidth: U+FF02
+3-2231 U+FF0D # HYPHEN-MINUS [2000] Fullwidth: U+FF0D
3-2232 U+007E # TILDE [2000] Fullwidth: U+FF5E
3-2233 U+3033 # VERTICAL KANA REPEAT MARK UPPER HALF [2000]
3-2234 U+3034 # VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF [2000]
@@ -223,16 +223,16 @@
3-232D U+21E9 # DOWNWARDS WHITE ARROW [2000]
3-232E U+2934 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS [2000] [Unicode3.2]
3-232F U+2935 # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS [2000] [Unicode3.2]
-3-2330 U+0030 # DIGIT ZERO Fullwidth: U+FF10
-3-2331 U+0031 # DIGIT ONE Fullwidth: U+FF11
-3-2332 U+0032 # DIGIT TWO Fullwidth: U+FF12
-3-2333 U+0033 # DIGIT THREE Fullwidth: U+FF13
-3-2334 U+0034 # DIGIT FOUR Fullwidth: U+FF14
-3-2335 U+0035 # DIGIT FIVE Fullwidth: U+FF15
-3-2336 U+0036 # DIGIT SIX Fullwidth: U+FF16
-3-2337 U+0037 # DIGIT SEVEN Fullwidth: U+FF17
-3-2338 U+0038 # DIGIT EIGHT Fullwidth: U+FF18
-3-2339 U+0039 # DIGIT NINE Fullwidth: U+FF19
+3-2330 U+FF10 # DIGIT ZERO Fullwidth: U+FF10
+3-2331 U+FF11 # DIGIT ONE Fullwidth: U+FF11
+3-2332 U+FF12 # DIGIT TWO Fullwidth: U+FF12
+3-2333 U+FF13 # DIGIT THREE Fullwidth: U+FF13
+3-2334 U+FF14 # DIGIT FOUR Fullwidth: U+FF14
+3-2335 U+FF15 # DIGIT FIVE Fullwidth: U+FF15
+3-2336 U+FF16 # DIGIT SIX Fullwidth: U+FF16
+3-2337 U+FF17 # DIGIT SEVEN Fullwidth: U+FF17
+3-2338 U+FF18 # DIGIT EIGHT Fullwidth: U+FF18
+3-2339 U+FF19 # DIGIT NINE Fullwidth: U+FF19
3-233A U+29BF # CIRCLED BULLET [2000] [Unicode3.2]
3-233B U+25C9 # FISHEYE [2000]
3-233C U+303D # PART ALTERNATION MARK [2000] [Unicode3.2]
@@ -240,64 +240,64 @@
3-233E U+FE45 # SESAME DOT [2000] [Unicode3.2]
3-233F U+25E6 # WHITE BULLET [2000]
3-2340 U+2022 # BULLET [2000]
-3-2341 U+0041 # LATIN CAPITAL LETTER A Fullwidth: U+FF21
-3-2342 U+0042 # LATIN CAPITAL LETTER B Fullwidth: U+FF22
-3-2343 U+0043 # LATIN CAPITAL LETTER C Fullwidth: U+FF23
-3-2344 U+0044 # LATIN CAPITAL LETTER D Fullwidth: U+FF24
-3-2345 U+0045 # LATIN CAPITAL LETTER E Fullwidth: U+FF25
-3-2346 U+0046 # LATIN CAPITAL LETTER F Fullwidth: U+FF26
-3-2347 U+0047 # LATIN CAPITAL LETTER G Fullwidth: U+FF27
-3-2348 U+0048 # LATIN CAPITAL LETTER H Fullwidth: U+FF28
-3-2349 U+0049 # LATIN CAPITAL LETTER I Fullwidth: U+FF29
-3-234A U+004A # LATIN CAPITAL LETTER J Fullwidth: U+FF2A
-3-234B U+004B # LATIN CAPITAL LETTER K Fullwidth: U+FF2B
-3-234C U+004C # LATIN CAPITAL LETTER L Fullwidth: U+FF2C
-3-234D U+004D # LATIN CAPITAL LETTER M Fullwidth: U+FF2D
-3-234E U+004E # LATIN CAPITAL LETTER N Fullwidth: U+FF2E
-3-234F U+004F # LATIN CAPITAL LETTER O Fullwidth: U+FF2F
-3-2350 U+0050 # LATIN CAPITAL LETTER P Fullwidth: U+FF30
-3-2351 U+0051 # LATIN CAPITAL LETTER Q Fullwidth: U+FF31
-3-2352 U+0052 # LATIN CAPITAL LETTER R Fullwidth: U+FF32
-3-2353 U+0053 # LATIN CAPITAL LETTER S Fullwidth: U+FF33
-3-2354 U+0054 # LATIN CAPITAL LETTER T Fullwidth: U+FF34
-3-2355 U+0055 # LATIN CAPITAL LETTER U Fullwidth: U+FF35
-3-2356 U+0056 # LATIN CAPITAL LETTER V Fullwidth: U+FF36
-3-2357 U+0057 # LATIN CAPITAL LETTER W Fullwidth: U+FF37
-3-2358 U+0058 # LATIN CAPITAL LETTER X Fullwidth: U+FF38
-3-2359 U+0059 # LATIN CAPITAL LETTER Y Fullwidth: U+FF39
-3-235A U+005A # LATIN CAPITAL LETTER Z Fullwidth: U+FF3A
+3-2341 U+FF21 # LATIN CAPITAL LETTER A Fullwidth: U+FF21
+3-2342 U+FF22 # LATIN CAPITAL LETTER B Fullwidth: U+FF22
+3-2343 U+FF23 # LATIN CAPITAL LETTER C Fullwidth: U+FF23
+3-2344 U+FF24 # LATIN CAPITAL LETTER D Fullwidth: U+FF24
+3-2345 U+FF25 # LATIN CAPITAL LETTER E Fullwidth: U+FF25
+3-2346 U+FF26 # LATIN CAPITAL LETTER F Fullwidth: U+FF26
+3-2347 U+FF27 # LATIN CAPITAL LETTER G Fullwidth: U+FF27
+3-2348 U+FF28 # LATIN CAPITAL LETTER H Fullwidth: U+FF28
+3-2349 U+FF29 # LATIN CAPITAL LETTER I Fullwidth: U+FF29
+3-234A U+FF2A # LATIN CAPITAL LETTER J Fullwidth: U+FF2A
+3-234B U+FF2B # LATIN CAPITAL LETTER K Fullwidth: U+FF2B
+3-234C U+FF2C # LATIN CAPITAL LETTER L Fullwidth: U+FF2C
+3-234D U+FF2D # LATIN CAPITAL LETTER M Fullwidth: U+FF2D
+3-234E U+FF2E # LATIN CAPITAL LETTER N Fullwidth: U+FF2E
+3-234F U+FF2F # LATIN CAPITAL LETTER O Fullwidth: U+FF2F
+3-2350 U+FF30 # LATIN CAPITAL LETTER P Fullwidth: U+FF30
+3-2351 U+FF31 # LATIN CAPITAL LETTER Q Fullwidth: U+FF31
+3-2352 U+FF32 # LATIN CAPITAL LETTER R Fullwidth: U+FF32
+3-2353 U+FF33 # LATIN CAPITAL LETTER S Fullwidth: U+FF33
+3-2354 U+FF34 # LATIN CAPITAL LETTER T Fullwidth: U+FF34
+3-2355 U+FF35 # LATIN CAPITAL LETTER U Fullwidth: U+FF35
+3-2356 U+FF36 # LATIN CAPITAL LETTER V Fullwidth: U+FF36
+3-2357 U+FF37 # LATIN CAPITAL LETTER W Fullwidth: U+FF37
+3-2358 U+FF38 # LATIN CAPITAL LETTER X Fullwidth: U+FF38
+3-2359 U+FF39 # LATIN CAPITAL LETTER Y Fullwidth: U+FF39
+3-235A U+FF3A # LATIN CAPITAL LETTER Z Fullwidth: U+FF3A
3-235B U+2213 # MINUS-OR-PLUS SIGN [2000]
3-235C U+2135 # ALEF SYMBOL [2000]
3-235D U+210F # PLANCK CONSTANT OVER TWO PI [2000]
3-235E U+33CB # SQUARE HP [2000]
3-235F U+2113 # SCRIPT SMALL L [2000]
3-2360 U+2127 # INVERTED OHM SIGN [2000]
-3-2361 U+0061 # LATIN SMALL LETTER A Fullwidth: U+FF41
-3-2362 U+0062 # LATIN SMALL LETTER B Fullwidth: U+FF42
-3-2363 U+0063 # LATIN SMALL LETTER C Fullwidth: U+FF43
-3-2364 U+0064 # LATIN SMALL LETTER D Fullwidth: U+FF44
-3-2365 U+0065 # LATIN SMALL LETTER E Fullwidth: U+FF45
-3-2366 U+0066 # LATIN SMALL LETTER F Fullwidth: U+FF46
-3-2367 U+0067 # LATIN SMALL LETTER G Fullwidth: U+FF47
-3-2368 U+0068 # LATIN SMALL LETTER H Fullwidth: U+FF48
-3-2369 U+0069 # LATIN SMALL LETTER I Fullwidth: U+FF49
-3-236A U+006A # LATIN SMALL LETTER J Fullwidth: U+FF4A
-3-236B U+006B # LATIN SMALL LETTER K Fullwidth: U+FF4B
-3-236C U+006C # LATIN SMALL LETTER L Fullwidth: U+FF4C
-3-236D U+006D # LATIN SMALL LETTER M Fullwidth: U+FF4D
-3-236E U+006E # LATIN SMALL LETTER N Fullwidth: U+FF4E
-3-236F U+006F # LATIN SMALL LETTER O Fullwidth: U+FF4F
-3-2370 U+0070 # LATIN SMALL LETTER P Fullwidth: U+FF50
-3-2371 U+0071 # LATIN SMALL LETTER Q Fullwidth: U+FF51
-3-2372 U+0072 # LATIN SMALL LETTER R Fullwidth: U+FF52
-3-2373 U+0073 # LATIN SMALL LETTER S Fullwidth: U+FF53
-3-2374 U+0074 # LATIN SMALL LETTER T Fullwidth: U+FF54
-3-2375 U+0075 # LATIN SMALL LETTER U Fullwidth: U+FF55
-3-2376 U+0076 # LATIN SMALL LETTER V Fullwidth: U+FF56
-3-2377 U+0077 # LATIN SMALL LETTER W Fullwidth: U+FF57
-3-2378 U+0078 # LATIN SMALL LETTER X Fullwidth: U+FF58
-3-2379 U+0079 # LATIN SMALL LETTER Y Fullwidth: U+FF59
-3-237A U+007A # LATIN SMALL LETTER Z Fullwidth: U+FF5A
+3-2361 U+FF41 # LATIN SMALL LETTER A Fullwidth: U+FF41
+3-2362 U+FF42 # LATIN SMALL LETTER B Fullwidth: U+FF42
+3-2363 U+FF43 # LATIN SMALL LETTER C Fullwidth: U+FF43
+3-2364 U+FF44 # LATIN SMALL LETTER D Fullwidth: U+FF44
+3-2365 U+FF45 # LATIN SMALL LETTER E Fullwidth: U+FF45
+3-2366 U+FF46 # LATIN SMALL LETTER F Fullwidth: U+FF46
+3-2367 U+FF47 # LATIN SMALL LETTER G Fullwidth: U+FF47
+3-2368 U+FF48 # LATIN SMALL LETTER H Fullwidth: U+FF48
+3-2369 U+FF49 # LATIN SMALL LETTER I Fullwidth: U+FF49
+3-236A U+FF4A # LATIN SMALL LETTER J Fullwidth: U+FF4A
+3-236B U+FF4B # LATIN SMALL LETTER K Fullwidth: U+FF4B
+3-236C U+FF4C # LATIN SMALL LETTER L Fullwidth: U+FF4C
+3-236D U+FF4D # LATIN SMALL LETTER M Fullwidth: U+FF4D
+3-236E U+FF4E # LATIN SMALL LETTER N Fullwidth: U+FF4E
+3-236F U+FF4F # LATIN SMALL LETTER O Fullwidth: U+FF4F
+3-2370 U+FF50 # LATIN SMALL LETTER P Fullwidth: U+FF50
+3-2371 U+FF51 # LATIN SMALL LETTER Q Fullwidth: U+FF51
+3-2372 U+FF52 # LATIN SMALL LETTER R Fullwidth: U+FF52
+3-2373 U+FF53 # LATIN SMALL LETTER S Fullwidth: U+FF53
+3-2374 U+FF54 # LATIN SMALL LETTER T Fullwidth: U+FF54
+3-2375 U+FF55 # LATIN SMALL LETTER U Fullwidth: U+FF55
+3-2376 U+FF56 # LATIN SMALL LETTER V Fullwidth: U+FF56
+3-2377 U+FF57 # LATIN SMALL LETTER W Fullwidth: U+FF57
+3-2378 U+FF58 # LATIN SMALL LETTER X Fullwidth: U+FF58
+3-2379 U+FF59 # LATIN SMALL LETTER Y Fullwidth: U+FF59
+3-237A U+FF5A # LATIN SMALL LETTER Z Fullwidth: U+FF5A
3-237B U+30A0 # KATAKANA-HIRAGANA DOUBLE HYPHEN [2000] [Unicode3.2]
3-237C U+2013 # EN DASH [2000]
3-237D U+29FA # DOUBLE PLUS [2000] [Unicode3.2]

View file

@ -0,0 +1,351 @@
--- jisx0213-2000-std.txt.orig Tue Apr 16 23:32:38 2002
+++ jisx0213-2004-std.txt Thu Jul 8 11:51:54 2004
@@ -1,6 +1,6 @@
-## JIS X 0213:2000 vs Unicode mapping table
+## JIS X 0213:2004 vs Unicode mapping table
##
-## Date: 16 Apr 2002 13:09:49 GMT
+## Date: 7 Jul 2004 13:09:49 GMT
## License:
## Copyright (C) 2001 earthian@tama.or.jp, All Rights Reserved.
## Copyright (C) 2001 I'O, All Rights Reserved.
@@ -23,21 +23,21 @@
3-2121 U+3000 # IDEOGRAPHIC SPACE
3-2122 U+3001 # IDEOGRAPHIC COMMA
3-2123 U+3002 # IDEOGRAPHIC FULL STOP
-3-2124 U+002C # COMMA Fullwidth: U+FF0C
-3-2125 U+002E # FULL STOP Fullwidth: U+FF0E
+3-2124 U+FF0C # COMMA Fullwidth: U+FF0C
+3-2125 U+FF0E # FULL STOP Fullwidth: U+FF0E
3-2126 U+30FB # KATAKANA MIDDLE DOT
-3-2127 U+003A # COLON Fullwidth: U+FF1A
-3-2128 U+003B # SEMICOLON Fullwidth: U+FF1B
-3-2129 U+003F # QUESTION MARK Fullwidth: U+FF1F
-3-212A U+0021 # EXCLAMATION MARK Fullwidth: U+FF01
+3-2127 U+FF1A # COLON Fullwidth: U+FF1A
+3-2128 U+FF1B # SEMICOLON Fullwidth: U+FF1B
+3-2129 U+FF1F # QUESTION MARK Fullwidth: U+FF1F
+3-212A U+FF01 # EXCLAMATION MARK Fullwidth: U+FF01
3-212B U+309B # KATAKANA-HIRAGANA VOICED SOUND MARK
3-212C U+309C # KATAKANA-HIRAGANA SEMI-VOICED SOUND MARK
3-212D U+00B4 # ACUTE ACCENT
-3-212E U+0060 # GRAVE ACCENT Fullwidth: U+FF40
+3-212E U+FF40 # GRAVE ACCENT Fullwidth: U+FF40
3-212F U+00A8 # DIAERESIS
-3-2130 U+005E # CIRCUMFLEX ACCENT Fullwidth: U+FF3E
-3-2131 U+203E # OVERLINE Windows: U+FFE3
-3-2132 U+005F # LOW LINE Fullwidth: U+FF3F
+3-2130 U+FF3E # CIRCUMFLEX ACCENT Fullwidth: U+FF3E
+3-2131 U+FFE3 # OVERLINE Windows: U+FFE3
+3-2132 U+FF3F # LOW LINE Fullwidth: U+FF3F
3-2133 U+30FD # KATAKANA ITERATION MARK
3-2134 U+30FE # KATAKANA VOICED ITERATION MARK
3-2135 U+309D # HIRAGANA ITERATION MARK
@@ -48,27 +48,27 @@
3-213A U+3006 # IDEOGRAPHIC CLOSING MARK
3-213B U+3007 # IDEOGRAPHIC NUMBER ZERO
3-213C U+30FC # KATAKANA-HIRAGANA PROLONGED SOUND MARK
-3-213D U+2014 # EM DASH Windows: U+2015
+3-213D U+2015 # EM DASH Windows: U+2015
3-213E U+2010 # HYPHEN
-3-213F U+002F # SOLIDUS Fullwidth: U+FF0F
+3-213F U+FF0F # SOLIDUS Fullwidth: U+FF0F
3-2140 U+005C # REVERSE SOLIDUS Fullwidth: U+FF3C
3-2141 U+301C # WAVE DASH Windows: U+FF5E
3-2142 U+2016 # DOUBLE VERTICAL LINE Windows: U+2225
-3-2143 U+007C # VERTICAL LINE Fullwidth: U+FF5C
+3-2143 U+FF5C # VERTICAL LINE Fullwidth: U+FF5C
3-2144 U+2026 # HORIZONTAL ELLIPSIS
3-2145 U+2025 # TWO DOT LEADER
3-2146 U+2018 # LEFT SINGLE QUOTATION MARK
3-2147 U+2019 # RIGHT SINGLE QUOTATION MARK
3-2148 U+201C # LEFT DOUBLE QUOTATION MARK
3-2149 U+201D # RIGHT DOUBLE QUOTATION MARK
-3-214A U+0028 # LEFT PARENTHESIS Fullwidth: U+FF08
-3-214B U+0029 # RIGHT PARENTHESIS Fullwidth: U+FF09
+3-214A U+FF08 # LEFT PARENTHESIS Fullwidth: U+FF08
+3-214B U+FF09 # RIGHT PARENTHESIS Fullwidth: U+FF09
3-214C U+3014 # LEFT TORTOISE SHELL BRACKET
3-214D U+3015 # RIGHT TORTOISE SHELL BRACKET
-3-214E U+005B # LEFT SQUARE BRACKET Fullwidth: U+FF3B
-3-214F U+005D # RIGHT SQUARE BRACKET Fullwidth: U+FF3D
-3-2150 U+007B # LEFT CURLY BRACKET Fullwidth: U+FF5B
-3-2151 U+007D # RIGHT CURLY BRACKET Fullwidth: U+FF5D
+3-214E U+FF3B # LEFT SQUARE BRACKET Fullwidth: U+FF3B
+3-214F U+FF3D # RIGHT SQUARE BRACKET Fullwidth: U+FF3D
+3-2150 U+FF5B # LEFT CURLY BRACKET Fullwidth: U+FF5B
+3-2151 U+FF5D # RIGHT CURLY BRACKET Fullwidth: U+FF5D
3-2152 U+3008 # LEFT ANGLE BRACKET
3-2153 U+3009 # RIGHT ANGLE BRACKET
3-2154 U+300A # LEFT DOUBLE ANGLE BRACKET
@@ -79,15 +79,15 @@
3-2159 U+300F # RIGHT WHITE CORNER BRACKET
3-215A U+3010 # LEFT BLACK LENTICULAR BRACKET
3-215B U+3011 # RIGHT BLACK LENTICULAR BRACKET
-3-215C U+002B # PLUS SIGN Fullwidth: U+FF0B
+3-215C U+FF0B # PLUS SIGN Fullwidth: U+FF0B
3-215D U+2212 # MINUS SIGN Windows: U+FF0D
3-215E U+00B1 # PLUS-MINUS SIGN
3-215F U+00D7 # MULTIPLICATION SIGN
3-2160 U+00F7 # DIVISION SIGN
-3-2161 U+003D # EQUALS SIGN Fullwidth: U+FF1D
+3-2161 U+FF1D # EQUALS SIGN Fullwidth: U+FF1D
3-2162 U+2260 # NOT EQUAL TO
-3-2163 U+003C # LESS-THAN SIGN Fullwidth: U+FF1C
-3-2164 U+003E # GREATER-THAN SIGN Fullwidth: U+FF1E
+3-2163 U+FF1C # LESS-THAN SIGN Fullwidth: U+FF1C
+3-2164 U+FF1E # GREATER-THAN SIGN Fullwidth: U+FF1E
3-2165 U+2266 # LESS-THAN OVER EQUAL TO
3-2166 U+2267 # GREATER-THAN OVER EQUAL TO
3-2167 U+221E # INFINITY
@@ -98,15 +98,15 @@
3-216C U+2032 # PRIME
3-216D U+2033 # DOUBLE PRIME
3-216E U+2103 # DEGREE CELSIUS
-3-216F U+00A5 # YEN SIGN Windows: U+FFE5
-3-2170 U+0024 # DOLLAR SIGN Fullwidth: U+FF04
+3-216F U+FFE5 # YEN SIGN Windows: U+FFE5
+3-2170 U+FF04 # DOLLAR SIGN Fullwidth: U+FF04
3-2171 U+00A2 # CENT SIGN Windows: U+FFE0
3-2172 U+00A3 # POUND SIGN Windows: U+FFE1
-3-2173 U+0025 # PERCENT SIGN Fullwidth: U+FF05
-3-2174 U+0023 # NUMBER SIGN Fullwidth: U+FF03
-3-2175 U+0026 # AMPERSAND Fullwidth: U+FF06
-3-2176 U+002A # ASTERISK Fullwidth: U+FF0A
-3-2177 U+0040 # COMMERCIAL AT Fullwidth: U+FF20
+3-2173 U+FF05 # PERCENT SIGN Fullwidth: U+FF05
+3-2174 U+FF03 # NUMBER SIGN Fullwidth: U+FF03
+3-2175 U+FF06 # AMPERSAND Fullwidth: U+FF06
+3-2176 U+FF0A # ASTERISK Fullwidth: U+FF0A
+3-2177 U+FF20 # COMMERCIAL AT Fullwidth: U+FF20
3-2178 U+00A7 # SECTION SIGN
3-2179 U+2606 # WHITE STAR
3-217A U+2605 # BLACK STAR
@@ -128,9 +128,9 @@
3-222C U+2191 # UPWARDS ARROW
3-222D U+2193 # DOWNWARDS ARROW
3-222E U+3013 # GETA MARK
-3-222F U+0027 # APOSTROPHE Fullwidth: U+FF07
-3-2230 U+0022 # QUOTATION MARK [2000] Fullwidth: U+FF02
-3-2231 U+002D # HYPHEN-MINUS [2000] Fullwidth: U+FF0D
+3-222F U+FF07 # APOSTROPHE Fullwidth: U+FF07
+3-2230 U+FF02 # QUOTATION MARK [2000] Fullwidth: U+FF02
+3-2231 U+FF0D # HYPHEN-MINUS [2000] Fullwidth: U+FF0D
3-2232 U+007E # TILDE [2000] Fullwidth: U+FF5E
3-2233 U+3033 # VERTICAL KANA REPEAT MARK UPPER HALF [2000]
3-2234 U+3034 # VERTICAL KANA REPEAT WITH VOICED SOUND MARK UPPER HALF [2000]
@@ -223,16 +223,16 @@
3-232D U+21E9 # DOWNWARDS WHITE ARROW [2000]
3-232E U+2934 # ARROW POINTING RIGHTWARDS THEN CURVING UPWARDS [2000] [Unicode3.2]
3-232F U+2935 # ARROW POINTING RIGHTWARDS THEN CURVING DOWNWARDS [2000] [Unicode3.2]
-3-2330 U+0030 # DIGIT ZERO Fullwidth: U+FF10
-3-2331 U+0031 # DIGIT ONE Fullwidth: U+FF11
-3-2332 U+0032 # DIGIT TWO Fullwidth: U+FF12
-3-2333 U+0033 # DIGIT THREE Fullwidth: U+FF13
-3-2334 U+0034 # DIGIT FOUR Fullwidth: U+FF14
-3-2335 U+0035 # DIGIT FIVE Fullwidth: U+FF15
-3-2336 U+0036 # DIGIT SIX Fullwidth: U+FF16
-3-2337 U+0037 # DIGIT SEVEN Fullwidth: U+FF17
-3-2338 U+0038 # DIGIT EIGHT Fullwidth: U+FF18
-3-2339 U+0039 # DIGIT NINE Fullwidth: U+FF19
+3-2330 U+FF10 # DIGIT ZERO Fullwidth: U+FF10
+3-2331 U+FF11 # DIGIT ONE Fullwidth: U+FF11
+3-2332 U+FF12 # DIGIT TWO Fullwidth: U+FF12
+3-2333 U+FF13 # DIGIT THREE Fullwidth: U+FF13
+3-2334 U+FF14 # DIGIT FOUR Fullwidth: U+FF14
+3-2335 U+FF15 # DIGIT FIVE Fullwidth: U+FF15
+3-2336 U+FF16 # DIGIT SIX Fullwidth: U+FF16
+3-2337 U+FF17 # DIGIT SEVEN Fullwidth: U+FF17
+3-2338 U+FF18 # DIGIT EIGHT Fullwidth: U+FF18
+3-2339 U+FF19 # DIGIT NINE Fullwidth: U+FF19
3-233A U+29BF # CIRCLED BULLET [2000] [Unicode3.2]
3-233B U+25C9 # FISHEYE [2000]
3-233C U+303D # PART ALTERNATION MARK [2000] [Unicode3.2]
@@ -240,64 +240,64 @@
3-233E U+FE45 # SESAME DOT [2000] [Unicode3.2]
3-233F U+25E6 # WHITE BULLET [2000]
3-2340 U+2022 # BULLET [2000]
-3-2341 U+0041 # LATIN CAPITAL LETTER A Fullwidth: U+FF21
-3-2342 U+0042 # LATIN CAPITAL LETTER B Fullwidth: U+FF22
-3-2343 U+0043 # LATIN CAPITAL LETTER C Fullwidth: U+FF23
-3-2344 U+0044 # LATIN CAPITAL LETTER D Fullwidth: U+FF24
-3-2345 U+0045 # LATIN CAPITAL LETTER E Fullwidth: U+FF25
-3-2346 U+0046 # LATIN CAPITAL LETTER F Fullwidth: U+FF26
-3-2347 U+0047 # LATIN CAPITAL LETTER G Fullwidth: U+FF27
-3-2348 U+0048 # LATIN CAPITAL LETTER H Fullwidth: U+FF28
-3-2349 U+0049 # LATIN CAPITAL LETTER I Fullwidth: U+FF29
-3-234A U+004A # LATIN CAPITAL LETTER J Fullwidth: U+FF2A
-3-234B U+004B # LATIN CAPITAL LETTER K Fullwidth: U+FF2B
-3-234C U+004C # LATIN CAPITAL LETTER L Fullwidth: U+FF2C
-3-234D U+004D # LATIN CAPITAL LETTER M Fullwidth: U+FF2D
-3-234E U+004E # LATIN CAPITAL LETTER N Fullwidth: U+FF2E
-3-234F U+004F # LATIN CAPITAL LETTER O Fullwidth: U+FF2F
-3-2350 U+0050 # LATIN CAPITAL LETTER P Fullwidth: U+FF30
-3-2351 U+0051 # LATIN CAPITAL LETTER Q Fullwidth: U+FF31
-3-2352 U+0052 # LATIN CAPITAL LETTER R Fullwidth: U+FF32
-3-2353 U+0053 # LATIN CAPITAL LETTER S Fullwidth: U+FF33
-3-2354 U+0054 # LATIN CAPITAL LETTER T Fullwidth: U+FF34
-3-2355 U+0055 # LATIN CAPITAL LETTER U Fullwidth: U+FF35
-3-2356 U+0056 # LATIN CAPITAL LETTER V Fullwidth: U+FF36
-3-2357 U+0057 # LATIN CAPITAL LETTER W Fullwidth: U+FF37
-3-2358 U+0058 # LATIN CAPITAL LETTER X Fullwidth: U+FF38
-3-2359 U+0059 # LATIN CAPITAL LETTER Y Fullwidth: U+FF39
-3-235A U+005A # LATIN CAPITAL LETTER Z Fullwidth: U+FF3A
+3-2341 U+FF21 # LATIN CAPITAL LETTER A Fullwidth: U+FF21
+3-2342 U+FF22 # LATIN CAPITAL LETTER B Fullwidth: U+FF22
+3-2343 U+FF23 # LATIN CAPITAL LETTER C Fullwidth: U+FF23
+3-2344 U+FF24 # LATIN CAPITAL LETTER D Fullwidth: U+FF24
+3-2345 U+FF25 # LATIN CAPITAL LETTER E Fullwidth: U+FF25
+3-2346 U+FF26 # LATIN CAPITAL LETTER F Fullwidth: U+FF26
+3-2347 U+FF27 # LATIN CAPITAL LETTER G Fullwidth: U+FF27
+3-2348 U+FF28 # LATIN CAPITAL LETTER H Fullwidth: U+FF28
+3-2349 U+FF29 # LATIN CAPITAL LETTER I Fullwidth: U+FF29
+3-234A U+FF2A # LATIN CAPITAL LETTER J Fullwidth: U+FF2A
+3-234B U+FF2B # LATIN CAPITAL LETTER K Fullwidth: U+FF2B
+3-234C U+FF2C # LATIN CAPITAL LETTER L Fullwidth: U+FF2C
+3-234D U+FF2D # LATIN CAPITAL LETTER M Fullwidth: U+FF2D
+3-234E U+FF2E # LATIN CAPITAL LETTER N Fullwidth: U+FF2E
+3-234F U+FF2F # LATIN CAPITAL LETTER O Fullwidth: U+FF2F
+3-2350 U+FF30 # LATIN CAPITAL LETTER P Fullwidth: U+FF30
+3-2351 U+FF31 # LATIN CAPITAL LETTER Q Fullwidth: U+FF31
+3-2352 U+FF32 # LATIN CAPITAL LETTER R Fullwidth: U+FF32
+3-2353 U+FF33 # LATIN CAPITAL LETTER S Fullwidth: U+FF33
+3-2354 U+FF34 # LATIN CAPITAL LETTER T Fullwidth: U+FF34
+3-2355 U+FF35 # LATIN CAPITAL LETTER U Fullwidth: U+FF35
+3-2356 U+FF36 # LATIN CAPITAL LETTER V Fullwidth: U+FF36
+3-2357 U+FF37 # LATIN CAPITAL LETTER W Fullwidth: U+FF37
+3-2358 U+FF38 # LATIN CAPITAL LETTER X Fullwidth: U+FF38
+3-2359 U+FF39 # LATIN CAPITAL LETTER Y Fullwidth: U+FF39
+3-235A U+FF3A # LATIN CAPITAL LETTER Z Fullwidth: U+FF3A
3-235B U+2213 # MINUS-OR-PLUS SIGN [2000]
3-235C U+2135 # ALEF SYMBOL [2000]
3-235D U+210F # PLANCK CONSTANT OVER TWO PI [2000]
3-235E U+33CB # SQUARE HP [2000]
3-235F U+2113 # SCRIPT SMALL L [2000]
3-2360 U+2127 # INVERTED OHM SIGN [2000]
-3-2361 U+0061 # LATIN SMALL LETTER A Fullwidth: U+FF41
-3-2362 U+0062 # LATIN SMALL LETTER B Fullwidth: U+FF42
-3-2363 U+0063 # LATIN SMALL LETTER C Fullwidth: U+FF43
-3-2364 U+0064 # LATIN SMALL LETTER D Fullwidth: U+FF44
-3-2365 U+0065 # LATIN SMALL LETTER E Fullwidth: U+FF45
-3-2366 U+0066 # LATIN SMALL LETTER F Fullwidth: U+FF46
-3-2367 U+0067 # LATIN SMALL LETTER G Fullwidth: U+FF47
-3-2368 U+0068 # LATIN SMALL LETTER H Fullwidth: U+FF48
-3-2369 U+0069 # LATIN SMALL LETTER I Fullwidth: U+FF49
-3-236A U+006A # LATIN SMALL LETTER J Fullwidth: U+FF4A
-3-236B U+006B # LATIN SMALL LETTER K Fullwidth: U+FF4B
-3-236C U+006C # LATIN SMALL LETTER L Fullwidth: U+FF4C
-3-236D U+006D # LATIN SMALL LETTER M Fullwidth: U+FF4D
-3-236E U+006E # LATIN SMALL LETTER N Fullwidth: U+FF4E
-3-236F U+006F # LATIN SMALL LETTER O Fullwidth: U+FF4F
-3-2370 U+0070 # LATIN SMALL LETTER P Fullwidth: U+FF50
-3-2371 U+0071 # LATIN SMALL LETTER Q Fullwidth: U+FF51
-3-2372 U+0072 # LATIN SMALL LETTER R Fullwidth: U+FF52
-3-2373 U+0073 # LATIN SMALL LETTER S Fullwidth: U+FF53
-3-2374 U+0074 # LATIN SMALL LETTER T Fullwidth: U+FF54
-3-2375 U+0075 # LATIN SMALL LETTER U Fullwidth: U+FF55
-3-2376 U+0076 # LATIN SMALL LETTER V Fullwidth: U+FF56
-3-2377 U+0077 # LATIN SMALL LETTER W Fullwidth: U+FF57
-3-2378 U+0078 # LATIN SMALL LETTER X Fullwidth: U+FF58
-3-2379 U+0079 # LATIN SMALL LETTER Y Fullwidth: U+FF59
-3-237A U+007A # LATIN SMALL LETTER Z Fullwidth: U+FF5A
+3-2361 U+FF41 # LATIN SMALL LETTER A Fullwidth: U+FF41
+3-2362 U+FF42 # LATIN SMALL LETTER B Fullwidth: U+FF42
+3-2363 U+FF43 # LATIN SMALL LETTER C Fullwidth: U+FF43
+3-2364 U+FF44 # LATIN SMALL LETTER D Fullwidth: U+FF44
+3-2365 U+FF45 # LATIN SMALL LETTER E Fullwidth: U+FF45
+3-2366 U+FF46 # LATIN SMALL LETTER F Fullwidth: U+FF46
+3-2367 U+FF47 # LATIN SMALL LETTER G Fullwidth: U+FF47
+3-2368 U+FF48 # LATIN SMALL LETTER H Fullwidth: U+FF48
+3-2369 U+FF49 # LATIN SMALL LETTER I Fullwidth: U+FF49
+3-236A U+FF4A # LATIN SMALL LETTER J Fullwidth: U+FF4A
+3-236B U+FF4B # LATIN SMALL LETTER K Fullwidth: U+FF4B
+3-236C U+FF4C # LATIN SMALL LETTER L Fullwidth: U+FF4C
+3-236D U+FF4D # LATIN SMALL LETTER M Fullwidth: U+FF4D
+3-236E U+FF4E # LATIN SMALL LETTER N Fullwidth: U+FF4E
+3-236F U+FF4F # LATIN SMALL LETTER O Fullwidth: U+FF4F
+3-2370 U+FF50 # LATIN SMALL LETTER P Fullwidth: U+FF50
+3-2371 U+FF51 # LATIN SMALL LETTER Q Fullwidth: U+FF51
+3-2372 U+FF52 # LATIN SMALL LETTER R Fullwidth: U+FF52
+3-2373 U+FF53 # LATIN SMALL LETTER S Fullwidth: U+FF53
+3-2374 U+FF54 # LATIN SMALL LETTER T Fullwidth: U+FF54
+3-2375 U+FF55 # LATIN SMALL LETTER U Fullwidth: U+FF55
+3-2376 U+FF56 # LATIN SMALL LETTER V Fullwidth: U+FF56
+3-2377 U+FF57 # LATIN SMALL LETTER W Fullwidth: U+FF57
+3-2378 U+FF58 # LATIN SMALL LETTER X Fullwidth: U+FF58
+3-2379 U+FF59 # LATIN SMALL LETTER Y Fullwidth: U+FF59
+3-237A U+FF5A # LATIN SMALL LETTER Z Fullwidth: U+FF5A
3-237B U+30A0 # KATAKANA-HIRAGANA DOUBLE HYPHEN [2000] [Unicode3.2]
3-237C U+2013 # EN DASH [2000]
3-237D U+29FA # DOUBLE PLUS [2000] [Unicode3.2]
@@ -1242,7 +1242,7 @@
3-2D7C # <reserved> Windows: U+222A
3-2D7D U+2756 # BLACK DIAMOND MINUS WHITE X [2000]
3-2D7E U+261E # WHITE RIGHT POINTING INDEX [2000]
-3-2E21 # <reserved>
+3-2E21 U+4FF1 # <cjk> [2004]
3-2E22 U+2000B # <cjk> [2000] [Unicode3.1] Private: U+F780
3-2E23 U+3402 # <cjk> [2000]
3-2E24 U+4E28 # <cjk> [2000]
@@ -1429,7 +1429,7 @@
3-2F7B U+218BD # <cjk> [2000] [Unicode3.1] Private: U+F78F
3-2F7C U+5B19 # <cjk> [2000]
3-2F7D U+5B25 # <cjk> [2000]
-3-2F7E # <reserved>
+3-2F7E U+525D # <cjk> [2004]
3-3021 U+4E9C # <cjk>
3-3022 U+5516 # <cjk>
3-3023 U+5A03 # <cjk>
@@ -4395,7 +4395,7 @@
3-4F51 U+6E7E # <cjk>
3-4F52 U+7897 # <cjk>
3-4F53 U+8155 # <cjk>
-3-4F54 # <reserved>
+3-4F54 U+20B9F # <cjk> [2004]
3-4F55 U+5B41 # <cjk> [2000]
3-4F56 U+5B56 # <cjk> [2000]
3-4F57 U+5B7D # <cjk> [2000]
@@ -4437,7 +4437,7 @@
3-4F7B U+5DA7 # <cjk> [2000]
3-4F7C U+5DB8 # <cjk> [2000]
3-4F7D U+5DCB # <cjk> [2000]
-3-4F7E # <reserved>
+3-4F7E U+541E # <cjk> [2004]
3-5021 U+5F0C # <cjk>
3-5022 U+4E10 # <cjk>
3-5023 U+4E15 # <cjk>
@@ -7828,7 +7828,7 @@
3-7424 U+7464 # <cjk> [1983]
3-7425 U+51DC # <cjk> [1990]
3-7426 U+7199 # <cjk> [1990]
-3-7427 # <reserved>
+3-7427 U+5653 # <cjk> [2004]
3-7428 U+5DE2 # <cjk> [2000]
3-7429 U+5E14 # <cjk> [2000]
3-742A U+5E18 # <cjk> [2000]
@@ -8851,11 +8851,11 @@
3-7E77 U+9F94 # <cjk> [2000]
3-7E78 U+9F97 # <cjk> [2000]
3-7E79 U+9FA2 # <cjk> [2000]
-3-7E7A # <reserved>
-3-7E7B # <reserved>
-3-7E7C # <reserved>
-3-7E7D # <reserved>
-3-7E7E # <reserved>
+3-7E7A U+59F8 # <cjk> [2004]
+3-7E7B U+5C5B # <cjk> [2004]
+3-7E7C U+5E77 # <cjk> [2004]
+3-7E7D U+7626 # <cjk> [2004]
+3-7E7E U+7E6B # <cjk> [2004]
4-2121 U+20089 # <cjk> [2000] [Unicode3.1] Private: U+F7D1
4-2122 U+4E02 # <cjk> [2000]
4-2123 U+4E0F # <cjk> [2000]
@@ -11138,7 +11138,7 @@
4-7D38 U+9B10 # <cjk> [2000]
4-7D39 U+9B12 # <cjk> [2000]
4-7D3A U+9B16 # <cjk> [2000]
-4-7D3B U+9B1D # <cjk> [2000]
+4-7D3B U+9B1C # <cjk> [2000]
4-7D3C U+9B2B # <cjk> [2000]
4-7D3D U+9B33 # <cjk> [2000]
4-7D3E U+9B3D # <cjk> [2000]

File diff suppressed because it is too large Load diff

File diff suppressed because it is too large Load diff