Add merged charset
This commit is contained in:
@@ -0,0 +1,47 @@
|
|||||||
|
from pathlib import Path
|
||||||
|
import re
|
||||||
|
|
||||||
|
def load_existing_list(path):
|
||||||
|
with open(path, encoding='utf-8', newline='') as f:
|
||||||
|
return f.read()
|
||||||
|
|
||||||
|
japanese_list = '../msgothic_0_charset_Japanese.txt'
|
||||||
|
multilang_list = '../msgothic_2_charset_OtherLang.txt'
|
||||||
|
out_char_list = '../msgothic_2_charset_JP_and_OtherLang.txt'
|
||||||
|
|
||||||
|
jp = load_existing_list(japanese_list)
|
||||||
|
multi = load_existing_list(multilang_list)
|
||||||
|
|
||||||
|
chars_to_add = set(jp)
|
||||||
|
existing_chars = set(multi)
|
||||||
|
|
||||||
|
|
||||||
|
with open(out_char_list, 'w', encoding='utf-8', newline='') as f:
|
||||||
|
for i, c in enumerate(multi):
|
||||||
|
f.write(c)
|
||||||
|
|
||||||
|
# This is very bad for performance if there are lots of new chars found, but it works for now to maintain ordering
|
||||||
|
remove_list = []
|
||||||
|
for new_character in chars_to_add:
|
||||||
|
if new_character < c:
|
||||||
|
f.write(new_character)
|
||||||
|
remove_list.append(new_character)
|
||||||
|
print(f"Inserting new character {new_character} at position {i} as it is less than {c}")
|
||||||
|
|
||||||
|
for item in remove_list:
|
||||||
|
chars_to_add.remove(item)
|
||||||
|
|
||||||
|
remove_list = []
|
||||||
|
for char in chars_to_add:
|
||||||
|
if char not in existing_chars:
|
||||||
|
f.write(char)
|
||||||
|
else:
|
||||||
|
print(f"WARNING: character {char} already exists, skipping")
|
||||||
|
remove_list.append(char)
|
||||||
|
|
||||||
|
for item in remove_list:
|
||||||
|
chars_to_add.remove(item)
|
||||||
|
|
||||||
|
|
||||||
|
if chars_to_add:
|
||||||
|
raise Exception(f"One or more characters were not added {chars_to_add}")
|
||||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user