From 68f8bdd9ae5eba0a3a5fef7d15c03f97c8634ba8 Mon Sep 17 00:00:00 2001 From: drojf <1249449+drojf@users.noreply.github.com> Date: Sun, 14 Jan 2024 19:08:04 +1100 Subject: [PATCH] =?UTF-8?q?Update=20otherlang=20charset=20and=20add=20pyth?= =?UTF-8?q?on=20script=20for=20extract=20characters=20rei:=20=E5=87=B8=20h?= =?UTF-8?q?ou:=20=E2=96=B2=20and=20=E2=96=BC=20Note:=20there=20is=20alread?= =?UTF-8?q?y=20a=20swift=20script,=20but=20this=20rudimentary=20python=20s?= =?UTF-8?q?cript=20should=20be=20good=20enough=20The=20missing=20=E5=87=B8?= =?UTF-8?q?=20was=20reported=20in=20https://github.com/07th-mod/higurashi-?= =?UTF-8?q?rei/issues/22?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../PythonTextExtractor/extract.py | 65 +++++++++++++++++++ .../msgothic_2_charset_OtherLang.txt | 2 +- 2 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 scripts/CharacterInfoExtraction/PythonTextExtractor/extract.py diff --git a/scripts/CharacterInfoExtraction/PythonTextExtractor/extract.py b/scripts/CharacterInfoExtraction/PythonTextExtractor/extract.py new file mode 100644 index 0000000..249436e --- /dev/null +++ b/scripts/CharacterInfoExtraction/PythonTextExtractor/extract.py @@ -0,0 +1,65 @@ +from pathlib import Path +import re + +en_regex = re.compile(r'OutputLine\([^,]*,\s*[^,]*,\s*[^,]*,\s*([^,]*)') + +def load_existing_list(path): + with open(path, encoding='utf-8', newline='') as f: + return f.read() + + +existing_char_list = Path('C:/drojf/large_projects/umineko/ui-editing-scripts/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang.txt') +out_char_list = existing_char_list.with_suffix(existing_char_list.suffix + '.out') +source_directory = Path('C:/drojf/large_projects/umineko/HIGURASHI_REPOS') + +existing_char_list_text = load_existing_list(existing_char_list) +existing_font_set = set(existing_char_list_text) + +all_chars = set() + +for file in source_directory.rglob("*.txt"): + print(file) + with open(file, encoding='utf-8') as f: + whole_file_string = f.read() + for match in en_regex.finditer(whole_file_string): + if match: + outputline_english_arg = match.group(1) + for c in outputline_english_arg: + all_chars.add(c) + +all_chars_list = list(all_chars) +all_chars_list.sort() + +chars_to_add = [] +new_char_found = False +for char in all_chars_list: + if char not in existing_font_set: + print(f'NEW CHAR: {char}') + new_char_found = True + chars_to_add.append(char) + +if not new_char_found: + print("No new characters found!") + +final_list = list(existing_font_set.union(all_chars)) +final_list.sort() + +for c in final_list: + print(c, end='') + +print() + + +with open(out_char_list, 'w', encoding='utf-8', newline='') as f: + for i, c in enumerate(existing_char_list_text): + f.write(c) + + # This is very bad for performance if there are lots of new chars found, but it works for now to maintain ordering + for new_character in chars_to_add: + if new_character < c: + f.write(new_character) + chars_to_add.remove(new_character) + print(f"Inserting new character {new_character} at position {i} as it is less than {c}") + +if chars_to_add: + raise Exception(f"One or more characters were not added {chars_to_add}") \ No newline at end of file diff --git a/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang.txt b/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang.txt index ba8406b..6c2c9a8 100644 --- a/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang.txt +++ b/scripts/CharacterInfoExtraction/msgothic_2_charset_OtherLang.txt @@ -1,2 +1,2 @@ - !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃDŽDždžLJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ΄΅Ά·ΈΉΊΌΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϏϐϑϒϓϔϕϖϗϘϙϚϛϜϝϞϟϠϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϰϱϲϳϴϵ϶ϷϸϹϺϻϼϽϾϿЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѠѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿҀҁ҂҃҄҅҆҇҈҉ҊҋҌҍҎҏҐґҒғҔҕҖҗҘҙҚқҜҝҞҟҠҡҢңҤҥҦҧҨҩҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿӀӁӂӃӄӅӆӇӈӉӊӋӌӍӎӏӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹӺӻӼӽӾӿԀԁԂԃԄԅԆԇԈԉԊԋԌԍԎԏԐԑԒԓԔԕԖԗԘԙԚԛԜԝԞԟԠԡԢԣԤԥԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖՙ՚՛՜՝՞՟աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև։֊ḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏḐḑḒḓḔḕḖḗḘḙḚḛḜḝḞḟḠḡḢḣḤḥḦḧḨḩḪḫḬḭḮḯḰḱḲḳḴḵḶḷḸḹḺḻḼḽḾḿṀṁṂṃṄṅṆṇṈṉṊṋṌṍṎṏṐṑṒṓṔṕṖṗṘṙṚṛṜṝṞṟṠṡṢṣṤṥṦṧṨṩṪṫṬṭṮṯṰṱṲṳṴṵṶṷṸṹṺṻṼṽṾṿẀẁẂẃẄẅẆẇẈẉẊẋẌẍẎẏẐẑẒẓẔẕẖẗẘẙẚẛẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ—―“”…※ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ←↑→↓−■△▽◆○◎●◯★☆♪ 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〰〱〲〳〴〵〶〷〻〼〽ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ゙゚゛゜ゝゞゟ゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヲンヴヵヶ・ー一三上下両中主予事二人介以件休会伝何例便優元入全公内冒出刊刑初制刺前勘北占却厚原叔古号名味喜営回園在地場夏夜夢大天夫姉娘婦子学害家察局屋崎席帳年幸店座建当後御心志急恋恨悪感戻手担拶挨捜推撲故敷文断新既日明昼暇暗書服望末本条来東板林染査格検業標歯死殺母気求沙沢派流深混港準滅潰火災無照父版牲特犠犯獄現生用由画疲病療発盥目省石研破礁示社神祟祭科稿積究空第箱粉紅紙級紹終組統絵綿緊線編罪署老考者聞脅脚自臼茜茶荘落蒐蜜表要見規覧親言計記設訳診詩話誌誘語読課調請講賓路車転迫通週進達選遺部都重鈴録長閉間際隠雀集雛雨雪雲電非面音順頭顛鬼魅魎魔麦機!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚¥ \ No newline at end of file + !"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~ ¡¢£¤¥¦§¨©ª«¬­®¯°±²³´µ¶·¸¹º»¼½¾¿ÀÁÂÃÄÅÆÇÈÉÊËÌÍÎÏÐÑÒÓÔÕÖרÙÚÛÜÝÞßàáâãäåæçèéêëìíîïðñòóôõö÷øùúûüýþÿĀāĂ㥹ĆćĈĉĊċČčĎďĐđĒēĔĕĖėĘęĚěĜĝĞğĠġĢģĤĥĦħĨĩĪīĬĭĮįİıIJijĴĵĶķĸĹĺĻļĽľĿŀŁłŃńŅņŇňʼnŊŋŌōŎŏŐőŒœŔŕŖŗŘřŚśŜŝŞşŠšŢţŤťŦŧŨũŪūŬŭŮůŰűŲųŴŵŶŷŸŹźŻżŽžſƀƁƂƃƄƅƆƇƈƉƊƋƌƍƎƏƐƑƒƓƔƕƖƗƘƙƚƛƜƝƞƟƠơƢƣƤƥƦƧƨƩƪƫƬƭƮƯưƱƲƳƴƵƶƷƸƹƺƻƼƽƾƿǀǁǂǃDŽDždžLJLjljNJNjnjǍǎǏǐǑǒǓǔǕǖǗǘǙǚǛǜǝǞǟǠǡǢǣǤǥǦǧǨǩǪǫǬǭǮǯǰDZDzdzǴǵǶǷǸǹǺǻǼǽǾǿȀȁȂȃȄȅȆȇȈȉȊȋȌȍȎȏȐȑȒȓȔȕȖȗȘșȚțȜȝȞȟȠȡȢȣȤȥȦȧȨȩȪȫȬȭȮȯȰȱȲȳȴȵȶȷȸȹȺȻȼȽȾȿɀɁɂɃɄɅɆɇɈɉɊɋɌɍɎɏͰͱͲͳʹ͵Ͷͷͺͻͼͽ;Ϳ΄΅Ά·ΈΉΊΌΎΏΐΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩΪΫάέήίΰαβγδεζηθικλμνξοπρςστυφχψωϊϋόύώϏϐϑϒϓϔϕϖϗϘϙϚϛϜϝϞϟϠϡϢϣϤϥϦϧϨϩϪϫϬϭϮϯϰϱϲϳϴϵ϶ϷϸϹϺϻϼϽϾϿЀЁЂЃЄЅІЇЈЉЊЋЌЍЎЏАБВГДЕЖЗИЙКЛМНОПРСТУФХЦЧШЩЪЫЬЭЮЯабвгдежзийклмнопрстуфхцчшщъыьэюяѐёђѓєѕіїјљњћќѝўџѠѡѢѣѤѥѦѧѨѩѪѫѬѭѮѯѰѱѲѳѴѵѶѷѸѹѺѻѼѽѾѿҀҁ҂҃҄҅҆҇҈҉ҊҋҌҍҎҏҐґҒғҔҕҖҗҘҙҚқҜҝҞҟҠҡҢңҤҥҦҧҨҩҪҫҬҭҮүҰұҲҳҴҵҶҷҸҹҺһҼҽҾҿӀӁӂӃӄӅӆӇӈӉӊӋӌӍӎӏӐӑӒӓӔӕӖӗӘәӚӛӜӝӞӟӠӡӢӣӤӥӦӧӨөӪӫӬӭӮӯӰӱӲӳӴӵӶӷӸӹӺӻӼӽӾӿԀԁԂԃԄԅԆԇԈԉԊԋԌԍԎԏԐԑԒԓԔԕԖԗԘԙԚԛԜԝԞԟԠԡԢԣԤԥԱԲԳԴԵԶԷԸԹԺԻԼԽԾԿՀՁՂՃՄՅՆՇՈՉՊՋՌՍՎՏՐՑՒՓՔՕՖՙ՚՛՜՝՞՟աբգդեզէըթժիլխծկհձղճմյնշոչպջռսվտրցւփքօֆև։֊ḀḁḂḃḄḅḆḇḈḉḊḋḌḍḎḏḐḑḒḓḔḕḖḗḘḙḚḛḜḝḞḟḠḡḢḣḤḥḦḧḨḩḪḫḬḭḮḯḰḱḲḳḴḵḶḷḸḹḺḻḼḽḾḿṀṁṂṃṄṅṆṇṈṉṊṋṌṍṎṏṐṑṒṓṔṕṖṗṘṙṚṛṜṝṞṟṠṡṢṣṤṥṦṧṨṩṪṫṬṭṮṯṰṱṲṳṴṵṶṷṸṹṺṻṼṽṾṿẀẁẂẃẄẅẆẇẈẉẊẋẌẍẎẏẐẑẒẓẔẕẖẗẘẙẚẛẠạẢảẤấẦầẨẩẪẫẬậẮắẰằẲẳẴẵẶặẸẹẺẻẼẽẾếỀềỂểỄễỆệỈỉỊịỌọỎỏỐốỒồỔổỖỗỘộỚớỜờỞởỠỡỢợỤụỦủỨứỪừỬửỮữỰựỲỳỴỵỶỷỸỹ—―“”…※ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩ←↑→↓−■△▲▽▼◆○◎●◯★☆♪ 、。〃〄々〆〇〈〉《》「」『』【】〒〓〔〕〖〗〘〙〚〛〜〝〞〟〠〰〱〲〳〴〵〶〷〻〼〽ぁあぃいぅうぇえぉおかがきぎくぐけげこごさざしじすずせぜそぞただちぢっつづてでとどなにぬねのはばぱひびぴふぶぷへべぺほぼぽまみむめもゃやゅゆょよらりるれろゎわゐゑをんゔゕゖ゙゚゛゜ゝゞゟ゠ァアィイゥウェエォオカガキギクグケゲコゴサザシジスズセゼソゾタダチヂッツヅテデトドナニヌネノハバパヒビピフブプヘベペホボポマミムメモャヤュユョヨラリルレロヮワヲンヴヵヶ・ー一三上下両中主予事二人介以件休会伝何例便優元入全公内冒出凸刊刑初制刺前勘北占却厚原叔古号名味喜営回園在地場夏夜夢大天夫姉娘婦子学害家察局屋崎席帳年幸店座建当後御心志急恋恨悪感戻手担拶挨捜推撲故敷文断新既日明昼暇暗書服望末本条来東板林染査格検業標歯死殺母気求沙沢派流深混港準滅潰火災無照父版牲特犠犯獄現生用由画疲病療発盥目省石研破礁示社神祟祭科稿積究空第箱粉紅紙級紹終組統絵綿緊線編罪署老考者聞脅脚自臼茜茶荘落蒐蜜表要見規覧親言計記設訳診詩話誌誘語読課調請講賓路車転迫通週進達選遺部都重鈴録長閉間際隠雀集雛雨雪雲電非面音順頭顛鬼魅魎魔麦機!"#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\]^_`abcdefghijklmnopqrstuvwxyz{|}~⦅⦆。「」、・ヲァィゥェォャュョッーアイウエオカキクケコサシスセソタチツテトナニヌネノハヒフヘホマミムメモヤユヨラリルレロワン゙゚¥ \ No newline at end of file