github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/cmd/lst2json/main.go (about) 1 // The lst2json tool extracts information for decomp from IDA assembly listings 2 // (*.lst -> *.json). 3 package main 4 5 import ( 6 "encoding/json" 7 "flag" 8 "fmt" 9 "io/ioutil" 10 "log" 11 "os" 12 "regexp" 13 "sort" 14 "strconv" 15 16 "github.com/decomp/exp/bin" 17 "github.com/mewkiz/pkg/term" 18 "github.com/pkg/errors" 19 ) 20 21 // dbg represents a logger with the "lst2json:" prefix, which logs debug 22 // messages to standard error. 23 var dbg = log.New(os.Stderr, term.RedBold("lst2json:")+" ", 0) 24 25 func usage() { 26 const use = ` 27 Extract information for decomp from IDA assembly listings (*.lst -> *.json). 28 29 Usage: 30 31 lst2json [OPTION]... FILE.lst 32 33 Flags: 34 ` 35 fmt.Fprint(os.Stderr, use[1:]) 36 flag.PrintDefaults() 37 } 38 39 func main() { 40 // Parse command line flags. 41 flag.Parse() 42 flag.Usage = usage 43 flag.Parse() 44 if flag.NArg() != 1 { 45 flag.Usage() 46 os.Exit(1) 47 } 48 lstPath := flag.Arg(0) 49 50 if err := extract(lstPath); err != nil { 51 log.Fatalf("%+v", err) 52 } 53 } 54 55 // extract extracts information for decomp from the given IDA assembly listing. 56 func extract(lstPath string) error { 57 // Read file. 58 input, err := ioutil.ReadFile(lstPath) 59 if err != nil { 60 return errors.WithStack(err) 61 } 62 63 // Regular expressions for locating addresses. 64 const ( 65 // Functions (and basic blocks). 66 regFunc = `[\n](:?[.]text|ROM)[:]([0-9a-fA-F]+)[\t ;#]+=============== S U B R O U T I N E =======================================` 67 // Basic blocks. 68 regFallthrough = `[ \t]+(loop|loope|loopne|ja|jb|jbe|jecxz|jg|jge|jl|jle|jnb|jns|jnz|jp|js|jz)[ \t]+[^\n]*\n[.]text[:]00([0-9a-fA-F]+)` 69 regTarget = `[.]text[:]00([0-9a-fA-F]+)[ \t][$@_a-zA-Z][$@_a-zA-Z0-9]+:` 70 // Instructions. 71 regInst = `[\n](:?[.]text|ROM)[:]([0-9a-fA-F]+)[ \t]*[ ]{7}[a-z]` 72 regTextData = `[\n](:?[.]text|ROM)[:]([0-9a-fA-F]+)[ \t]*[ ]{7}(?:db|dw|dd|dq|align|assume|include|public)[ ]` 73 // Data. 74 regJumpTable = `[a-zA-Z]+[:]00([0-9a-fA-F]+)[^\n]*;[ \t]jump[ \t]table` 75 regIndirectTable = `[a-zA-Z]+[:]00([0-9a-fA-F]+)[^\n]*;[ \t]indirect[ \t]table` 76 regJumpPastData = `[ \t]+jmp[ \t]+[^\n]*\n[.][a-zA-Z]+[a-zA-Z]+[:]00([0-9a-fA-F]+)[ \t]+; ---------------------------------------------------------------------------[\n][.][a-zA-Z]+[:]00([0-9a-fA-F]+)[ \t]+` 77 regAlign = `; ---------------------------------------------------------------------------[\n][.][a-zA-Z]+[:]00([0-9a-fA-F]+)[ \t]+align[ \t]+` 78 ) 79 80 // Function, basic block, instruction and data addresses. 81 var ( 82 funcAddrs []bin.Address 83 blockAddrs []bin.Address 84 instAddrs []bin.Address 85 dataAddrs []bin.Address 86 ) 87 88 // Locate function addresses. 89 m := make(map[bin.Address]bool) 90 if err := locateAddrs(input, m, regFunc); err != nil { 91 return errors.WithStack(err) 92 } 93 for funcAddr := range m { 94 funcAddrs = append(funcAddrs, funcAddr) 95 } 96 sort.Sort(bin.Addresses(funcAddrs)) 97 98 // Locate basic block addresses. 99 // 100 // Don't reset m, since the address of each function is the address of its 101 // entry basic block. 102 if err := locateAddrs(input, m, regFallthrough); err != nil { 103 return errors.WithStack(err) 104 } 105 if err := locateAddrs(input, m, regTarget); err != nil { 106 return errors.WithStack(err) 107 } 108 for blockAddr := range m { 109 blockAddrs = append(blockAddrs, blockAddr) 110 } 111 sort.Sort(bin.Addresses(blockAddrs)) 112 113 // Locate instruction addresses. 114 // 115 // Don't reset m, since the address of each function and basic block is used 116 // to remove false negatives in instruction address tagging. 117 instAddrSet := make(map[bin.Address]bool) 118 if err := locateAddrs(input, instAddrSet, regInst); err != nil { 119 return errors.WithStack(err) 120 } 121 textDataAddrSet := make(map[bin.Address]bool) 122 if err := locateAddrs(input, textDataAddrSet, regTextData); err != nil { 123 return errors.WithStack(err) 124 } 125 // Remove data directives from instruction addresses (e.g. "dd 0x00"), except 126 // if the address is that of a function or basic block, since a given 127 // instruction may appear at the same address as a data declaration at the 128 // start of some functions; e.g. 129 // 130 // .text:00401000 assume cs:_text ; <== data declaration 131 // .text:00401000 j__crt_cpp_init proc near 132 // .text:00401000 jmp $+5 ; <== instruction 133 for dataAddr := range textDataAddrSet { 134 if !m[dataAddr] { 135 delete(instAddrSet, dataAddr) 136 } 137 } 138 for instAddr := range instAddrSet { 139 instAddrs = append(instAddrs, instAddr) 140 } 141 sort.Sort(bin.Addresses(instAddrs)) 142 143 // Locate data addresses. 144 tableAddrs := make(map[bin.Address]bool) 145 if err := locateAddrs(input, tableAddrs, regJumpTable); err != nil { 146 return errors.WithStack(err) 147 } 148 for dataAddr := range tableAddrs { 149 dataAddrs = append(dataAddrs, dataAddr) 150 } 151 // Reset m. 152 m = make(map[bin.Address]bool) 153 if err := locateAddrs(input, m, regIndirectTable); err != nil { 154 return errors.WithStack(err) 155 } 156 if err := locateAddrs(input, m, regJumpPastData); err != nil { 157 return errors.WithStack(err) 158 } 159 if err := locateAddrs(input, m, regAlign); err != nil { 160 return errors.WithStack(err) 161 } 162 for dataAddr := range m { 163 dataAddrs = append(dataAddrs, dataAddr) 164 } 165 sort.Sort(bin.Addresses(dataAddrs)) 166 167 // Locate targets of jump tables. 168 tables, err := locateTargets(input, tableAddrs) 169 if err != nil { 170 return errors.WithStack(err) 171 } 172 173 // Locate function signatures. 174 sigs, err := locateFuncSigs(input) 175 if err != nil { 176 return errors.WithStack(err) 177 } 178 for _, funcAddr := range funcAddrs { 179 if _, ok := sigs[funcAddr]; !ok { 180 dbg.Printf("WARNING: unable to locate function signature for function at %v", funcAddr) 181 } 182 } 183 184 // Locate imports. 185 imports, err := locateImports(input) 186 if err != nil { 187 return errors.WithStack(err) 188 } 189 190 // Locate function chunks. 191 chunks, err := locateFuncChunks(input) 192 if err != nil { 193 return errors.WithStack(err) 194 } 195 196 // Store JSON files. 197 if err := storeJSON("funcs.json", funcAddrs); err != nil { 198 return errors.WithStack(err) 199 } 200 if err := storeJSON("blocks.json", blockAddrs); err != nil { 201 return errors.WithStack(err) 202 } 203 if err := storeJSON("insts.json", instAddrs); err != nil { 204 return errors.WithStack(err) 205 } 206 if err := storeJSON("data.json", dataAddrs); err != nil { 207 return errors.WithStack(err) 208 } 209 if err := storeJSON("tables.json", tables); err != nil { 210 return errors.WithStack(err) 211 } 212 if err := storeJSON("sigs.json", sigs); err != nil { 213 return errors.WithStack(err) 214 } 215 if err := storeJSON("imports.json", imports); err != nil { 216 return errors.WithStack(err) 217 } 218 if err := storeJSON("chunks.json", chunks); err != nil { 219 return errors.WithStack(err) 220 } 221 222 return nil 223 } 224 225 // FuncSig represents a function signature. 226 type FuncSig struct { 227 // Function name. 228 Name string `json:"name"` 229 // Function signature. 230 Sig string `json:"sig"` 231 } 232 233 // locateFuncSigs locates function signatures in the input IDA assembly listing. 234 func locateFuncSigs(input []byte) (map[bin.Address]FuncSig, error) { 235 const regFuncSig = `(;[ \t]*([^\n]+))?[\n][.]text[:]00([0-9a-fA-F]+)[ \t]+([a-zA-Z0-9_?@$]+)[ \t]+proc[ \t]near` 236 re, err := regexp.Compile(regFuncSig) 237 if err != nil { 238 return nil, errors.WithStack(err) 239 } 240 subs := re.FindAllSubmatch(input, -1) 241 sigs := make(map[bin.Address]FuncSig) 242 for _, sub := range subs { 243 var sig FuncSig 244 // parse function signature. 245 sig.Sig = string(sub[2]) 246 // parse address. 247 s := string(sub[3]) 248 x, err := strconv.ParseUint(s, 16, 64) 249 if err != nil { 250 return nil, errors.WithStack(err) 251 } 252 addr := bin.Address(x) 253 // parse function name. 254 sig.Name = string(sub[4]) 255 sigs[addr] = sig 256 } 257 return sigs, nil 258 } 259 260 // locateImports locates imports in the input IDA assembly listing. 261 func locateImports(input []byte) (map[bin.Address]FuncSig, error) { 262 const regImport = `([.]idata[:]00[0-9a-fA-F]+[ \t];[ \t]*([^\n]+))?[\n][.]idata[:]00([0-9a-fA-F]+)[ \t]+extrn[ \t]+([a-zA-Z0-9_?@$]+)` 263 re, err := regexp.Compile(regImport) 264 if err != nil { 265 return nil, errors.WithStack(err) 266 } 267 subs := re.FindAllSubmatch(input, -1) 268 sigs := make(map[bin.Address]FuncSig) 269 for _, sub := range subs { 270 var sig FuncSig 271 // parse function signature. 272 sig.Sig = string(sub[2]) 273 // parse address. 274 s := string(sub[3]) 275 x, err := strconv.ParseUint(s, 16, 64) 276 if err != nil { 277 return nil, errors.WithStack(err) 278 } 279 addr := bin.Address(x) 280 // parse function name. 281 sig.Name = string(sub[4]) 282 sigs[addr] = sig 283 } 284 return sigs, nil 285 } 286 287 // locateFuncChunks locates addresses of function chunks belonging to parent 288 // functions. 289 func locateFuncChunks(input []byte) (map[bin.Address]map[bin.Address]bool, error) { 290 const regFuncChunk = `[.]text[:]00([0-9a-fA-F]+)[ \t];[ \t]FUNCTION[ \t]CHUNK[ \t]AT[ \t][.]text[:]00([0-9a-fA-F]+)` 291 re, err := regexp.Compile(regFuncChunk) 292 if err != nil { 293 return nil, errors.WithStack(err) 294 } 295 chunks := make(map[bin.Address]map[bin.Address]bool) 296 subs := re.FindAllSubmatch(input, -1) 297 for _, sub := range subs { 298 // Parent function address. 299 var parent bin.Address 300 // Function chunk address. 301 var chunk bin.Address 302 if err := parent.Set(fmt.Sprintf("0x%s", sub[1])); err != nil { 303 return nil, errors.WithStack(err) 304 } 305 if err := chunk.Set(fmt.Sprintf("0x%s", sub[2])); err != nil { 306 return nil, errors.WithStack(err) 307 } 308 if _, ok := chunks[chunk]; !ok { 309 chunks[chunk] = make(map[bin.Address]bool) 310 } 311 chunks[chunk][parent] = true 312 } 313 return chunks, nil 314 } 315 316 // locateTargets locates the targets of jump tables in the input IDA assembly 317 // listing. 318 func locateTargets(input []byte, tableAddrs map[bin.Address]bool) (map[bin.Address][]bin.Address, error) { 319 tables := make(map[bin.Address][]bin.Address) 320 for tableAddr := range tableAddrs { 321 present := make(map[bin.Address]bool) 322 s := fmt.Sprintf("%06X", uint64(tableAddr)) 323 regTargets := `[.][a-zA-Z]+[:]00` + s + `[^\n]*? dd (([^\n]*?offset[ \t]loc_([0-9a-fA-F]+))+)` 324 re, err := regexp.Compile(regTargets) 325 if err != nil { 326 return nil, errors.WithStack(err) 327 } 328 subs := re.FindAllSubmatch(input, -1) 329 for _, sub := range subs { 330 line := sub[1] 331 // line contains data formatted as follows. 332 // 333 // offset loc_422F0B, offset loc_422F0B, offset loc_422F1B 334 re, err := regexp.Compile("loc_([0-9a-fA-F]+)") 335 if err != nil { 336 return nil, errors.WithStack(err) 337 } 338 subs := re.FindAllSubmatch(line, -1) 339 for _, sub := range subs { 340 var target bin.Address 341 s := "0x" + string(sub[1]) 342 if err := target.Set(s); err != nil { 343 return nil, errors.WithStack(err) 344 } 345 if present[target] { 346 // skip if target already present. 347 continue 348 } 349 tables[tableAddr] = append(tables[tableAddr], target) 350 present[target] = true 351 } 352 } 353 } 354 return tables, nil 355 } 356 357 // locateAddrs locates addresses in the input IDA assembly listing based on the 358 // given regular expression. 359 func locateAddrs(input []byte, m map[bin.Address]bool, reg string) error { 360 re, err := regexp.Compile(reg) 361 if err != nil { 362 return errors.WithStack(err) 363 } 364 subs := re.FindAllSubmatch(input, -1) 365 for _, sub := range subs { 366 s := string(sub[len(sub)-1]) 367 x, err := strconv.ParseUint(s, 16, 64) 368 if err != nil { 369 return errors.WithStack(err) 370 } 371 addr := bin.Address(x) 372 m[addr] = true 373 } 374 return nil 375 } 376 377 // storeJSON stores a JSON encoded representation of the addresses to the given 378 // file. 379 func storeJSON(path string, v interface{}) error { 380 buf, err := json.MarshalIndent(v, "", "\t") 381 if err != nil { 382 return errors.WithStack(err) 383 } 384 buf = append(buf, '\n') 385 if err := ioutil.WriteFile(path, buf, 0644); err != nil { 386 return errors.WithStack(err) 387 } 388 return nil 389 }