github.com/saferwall/pe@v1.5.2/scripts/extract-rsrc-lang.py (about) 1 # Text file containing languages and sub-languages extracted from: 2 # Language Identifier Constants and Strings for Microsoft Windows doc. 3 spec = "ms-lcid.txt" 4 5 class Language: 6 language = "" 7 originalLanguage = "" 8 id = 0 9 tag = "" 10 isSubLang = False 11 12 def __str__(self) -> str: 13 return f"{self.originalLanguage} : {self.id} : {self.tag}" 14 15 def sanitize_lang(language): 16 language = language.replace(".", "") # example: U.A.E. 17 language = language.replace("(", "") # example: (Latin) 18 language = language.replace(")", "") # example: (Latin) 19 language = language.replace("'", "") # example: People's Republic of China 20 language = language.replace("[", "") # example: Cocos [Keeling] Islands 21 language = language.replace("]", "") # example: Cocos [Keeling] Islands 22 language = language.replace("-", "") # example: Guinea-Bissau 23 language = language.replace("/", "") # example: # Pseudo locale for east Asian/complex script localization testing 24 language = language.replace(" ", "") # example: Congo, DRC 25 language = language.replace(",", "") # example: Congo, DRC 26 return language 27 28 def read_lang_ids(filename): 29 lines = [] 30 with open(filename, 'r', encoding="utf-8") as f: 31 lines = f.readlines() 32 33 lang_ids = [] 34 for line in lines: 35 elements = line.split() 36 lang_ids.append(elements[0]) 37 38 return lang_ids 39 40 def parse_txt_file(filename, lang_ids): 41 lines = [] 42 with open(filename, 'r', encoding="utf-8") as f: 43 lines = f.readlines() 44 45 languages = [] 46 for line in lines: 47 lang = Language() 48 line = line.strip() 49 elements = line.split() 50 lang.tag = elements[-1] 51 lang.id = elements[-2] 52 if "-" not in lang.tag: 53 lang.isSubLang = False 54 else: 55 if not lang.id in lang_ids: 56 lang.isSubLang = True 57 i = 0 58 59 while i < len(elements) - 2: 60 for letter in ["(", "["]: 61 if elements[i].startswith(letter): 62 # Capitalize words so golang is happy. 63 lang.originalLanguage += letter + elements[i][1:].capitalize() + " " 64 break 65 else: 66 lang.originalLanguage += elements[i].capitalize() + " " 67 break 68 i += 1 69 70 begin = lang.originalLanguage.find("-") 71 if begin > 0: 72 lang.originalLanguage = lang.originalLanguage[:begin+1] + \ 73 lang.originalLanguage[begin+1:begin+3].capitalize() + lang.originalLanguage[begin+3:] 74 75 # Strip the last whitespace. 76 lang.originalLanguage = lang.originalLanguage[:-1] 77 lang.language = sanitize_lang(lang.originalLanguage) 78 79 # Skip unsupported locals. 80 if lang.id == "0x1000": 81 print (f"skipping {lang}") 82 continue 83 84 languages.append(lang) 85 86 return languages 87 88 def generate_go_code(languages : list[Language]): 89 code = "" 90 91 # Generate langs constants 92 for lang in languages: 93 if lang.isSubLang: 94 continue 95 else: 96 code += f"// {lang.originalLanguage} ({lang.tag})\n" 97 code += f"Lang{lang.language} ResourceLang = {lang.id}\n" 98 99 # Generate sub-langs constants 100 i = 0 101 for lang in languages: 102 if lang.isSubLang: 103 code += f"// {lang.originalLanguage} ({lang.tag})\n" 104 code += f"SubLang{lang.language}\n" 105 i += 1 106 return code 107 108 def generate_lang_string(languages : list[Language]): 109 code = "" 110 for lang in languages: 111 if lang.isSubLang: 112 continue 113 code += f'Lang{lang.language} : "{lang.originalLanguage} ({lang.tag})",\n' 114 return code 115 116 def generate_sub_lang_string(languages : list[Language]): 117 code = "" 118 for lang in languages: 119 if not lang.isSubLang: 120 continue 121 code += f'SubLang{lang.language} : "{lang.originalLanguage} ({lang.tag})",\n' 122 return code 123 124 def generate_lang_sub_lang_map_string(languages : list[Language]): 125 code = "" 126 curly_bracket_is_open = False 127 # The following tags don't have a location. 128 ignore_list = ["0x0476", "0x05FE", "0x0501", "0x09FF", "0x043D", "0x0471", "0x045F", "0x7C67"] 129 for lang in languages: 130 if lang.id in ignore_list: 131 continue 132 if not lang.isSubLang: 133 if curly_bracket_is_open: 134 code += f"}},\n" 135 code += f"Lang{lang.language} : {{\n" 136 curly_bracket_is_open = True 137 else: 138 id = int(lang.id, 0) >> 10 139 code += f' 0x{id:x} : SubLang{lang.language}.String(),\n' 140 return code 141 142 def write_generated_code(code, filename): 143 with open(filename, "w", encoding="utf-8") as f: 144 f.write(code) 145 146 147 if __name__ == "__main__": 148 lang_ids = read_lang_ids("lang_ids.txt") 149 languages = parse_txt_file(spec, lang_ids) 150 151 code = generate_go_code(languages) 152 write_generated_code(code, "out.txt") 153 154 code = generate_lang_string(languages) 155 langs = write_generated_code(code, "langs.txt") 156 157 code = generate_sub_lang_string(languages) 158 langs = write_generated_code(code, "sub_langs.txt") 159 160 code = generate_lang_sub_lang_map_string(languages) 161 langs = write_generated_code(code, "map.txt") 162