github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/cmd/lst2json/main.go (about)

     1  // The lst2json tool extracts information for decomp from IDA assembly listings
     2  // (*.lst -> *.json).
     3  package main
     4  
     5  import (
     6  	"encoding/json"
     7  	"flag"
     8  	"fmt"
     9  	"io/ioutil"
    10  	"log"
    11  	"os"
    12  	"regexp"
    13  	"sort"
    14  	"strconv"
    15  
    16  	"github.com/decomp/exp/bin"
    17  	"github.com/mewkiz/pkg/term"
    18  	"github.com/pkg/errors"
    19  )
    20  
    21  // dbg represents a logger with the "lst2json:" prefix, which logs debug
    22  // messages to standard error.
    23  var dbg = log.New(os.Stderr, term.RedBold("lst2json:")+" ", 0)
    24  
    25  func usage() {
    26  	const use = `
    27  Extract information for decomp from IDA assembly listings (*.lst -> *.json).
    28  
    29  Usage:
    30  
    31  	lst2json [OPTION]... FILE.lst
    32  
    33  Flags:
    34  `
    35  	fmt.Fprint(os.Stderr, use[1:])
    36  	flag.PrintDefaults()
    37  }
    38  
    39  func main() {
    40  	// Parse command line flags.
    41  	flag.Parse()
    42  	flag.Usage = usage
    43  	flag.Parse()
    44  	if flag.NArg() != 1 {
    45  		flag.Usage()
    46  		os.Exit(1)
    47  	}
    48  	lstPath := flag.Arg(0)
    49  
    50  	if err := extract(lstPath); err != nil {
    51  		log.Fatalf("%+v", err)
    52  	}
    53  }
    54  
    55  // extract extracts information for decomp from the given IDA assembly listing.
    56  func extract(lstPath string) error {
    57  	// Read file.
    58  	input, err := ioutil.ReadFile(lstPath)
    59  	if err != nil {
    60  		return errors.WithStack(err)
    61  	}
    62  
    63  	// Regular expressions for locating addresses.
    64  	const (
    65  		// Functions (and basic blocks).
    66  		regFunc = `[\n](:?[.]text|ROM)[:]([0-9a-fA-F]+)[\t ;#]+=============== S U B R O U T I N E =======================================`
    67  		// Basic blocks.
    68  		regFallthrough = `[ \t]+(loop|loope|loopne|ja|jb|jbe|jecxz|jg|jge|jl|jle|jnb|jns|jnz|jp|js|jz)[ \t]+[^\n]*\n[.]text[:]00([0-9a-fA-F]+)`
    69  		regTarget      = `[.]text[:]00([0-9a-fA-F]+)[ \t][$@_a-zA-Z][$@_a-zA-Z0-9]+:`
    70  		// Instructions.
    71  		regInst     = `[\n](:?[.]text|ROM)[:]([0-9a-fA-F]+)[ \t]*[ ]{7}[a-z]`
    72  		regTextData = `[\n](:?[.]text|ROM)[:]([0-9a-fA-F]+)[ \t]*[ ]{7}(?:db|dw|dd|dq|align|assume|include|public)[ ]`
    73  		// Data.
    74  		regJumpTable     = `[a-zA-Z]+[:]00([0-9a-fA-F]+)[^\n]*;[ \t]jump[ \t]table`
    75  		regIndirectTable = `[a-zA-Z]+[:]00([0-9a-fA-F]+)[^\n]*;[ \t]indirect[ \t]table`
    76  		regJumpPastData  = `[ \t]+jmp[ \t]+[^\n]*\n[.][a-zA-Z]+[a-zA-Z]+[:]00([0-9a-fA-F]+)[ \t]+; ---------------------------------------------------------------------------[\n][.][a-zA-Z]+[:]00([0-9a-fA-F]+)[ \t]+`
    77  		regAlign         = `; ---------------------------------------------------------------------------[\n][.][a-zA-Z]+[:]00([0-9a-fA-F]+)[ \t]+align[ \t]+`
    78  	)
    79  
    80  	// Function, basic block, instruction and data addresses.
    81  	var (
    82  		funcAddrs  []bin.Address
    83  		blockAddrs []bin.Address
    84  		instAddrs  []bin.Address
    85  		dataAddrs  []bin.Address
    86  	)
    87  
    88  	// Locate function addresses.
    89  	m := make(map[bin.Address]bool)
    90  	if err := locateAddrs(input, m, regFunc); err != nil {
    91  		return errors.WithStack(err)
    92  	}
    93  	for funcAddr := range m {
    94  		funcAddrs = append(funcAddrs, funcAddr)
    95  	}
    96  	sort.Sort(bin.Addresses(funcAddrs))
    97  
    98  	// Locate basic block addresses.
    99  	//
   100  	// Don't reset m, since the address of each function is the address of its
   101  	// entry basic block.
   102  	if err := locateAddrs(input, m, regFallthrough); err != nil {
   103  		return errors.WithStack(err)
   104  	}
   105  	if err := locateAddrs(input, m, regTarget); err != nil {
   106  		return errors.WithStack(err)
   107  	}
   108  	for blockAddr := range m {
   109  		blockAddrs = append(blockAddrs, blockAddr)
   110  	}
   111  	sort.Sort(bin.Addresses(blockAddrs))
   112  
   113  	// Locate instruction addresses.
   114  	//
   115  	// Don't reset m, since the address of each function and basic block is used
   116  	// to remove false negatives in instruction address tagging.
   117  	instAddrSet := make(map[bin.Address]bool)
   118  	if err := locateAddrs(input, instAddrSet, regInst); err != nil {
   119  		return errors.WithStack(err)
   120  	}
   121  	textDataAddrSet := make(map[bin.Address]bool)
   122  	if err := locateAddrs(input, textDataAddrSet, regTextData); err != nil {
   123  		return errors.WithStack(err)
   124  	}
   125  	// Remove data directives from instruction addresses (e.g. "dd 0x00"), except
   126  	// if the address is that of a function or basic block, since a given
   127  	// instruction may appear at the same address as a data declaration at the
   128  	// start of some functions; e.g.
   129  	//
   130  	//    .text:00401000		       assume cs:_text ; <== data declaration
   131  	//    .text:00401000 j__crt_cpp_init proc near
   132  	//    .text:00401000		       jmp     $+5     ; <== instruction
   133  	for dataAddr := range textDataAddrSet {
   134  		if !m[dataAddr] {
   135  			delete(instAddrSet, dataAddr)
   136  		}
   137  	}
   138  	for instAddr := range instAddrSet {
   139  		instAddrs = append(instAddrs, instAddr)
   140  	}
   141  	sort.Sort(bin.Addresses(instAddrs))
   142  
   143  	// Locate data addresses.
   144  	tableAddrs := make(map[bin.Address]bool)
   145  	if err := locateAddrs(input, tableAddrs, regJumpTable); err != nil {
   146  		return errors.WithStack(err)
   147  	}
   148  	for dataAddr := range tableAddrs {
   149  		dataAddrs = append(dataAddrs, dataAddr)
   150  	}
   151  	// Reset m.
   152  	m = make(map[bin.Address]bool)
   153  	if err := locateAddrs(input, m, regIndirectTable); err != nil {
   154  		return errors.WithStack(err)
   155  	}
   156  	if err := locateAddrs(input, m, regJumpPastData); err != nil {
   157  		return errors.WithStack(err)
   158  	}
   159  	if err := locateAddrs(input, m, regAlign); err != nil {
   160  		return errors.WithStack(err)
   161  	}
   162  	for dataAddr := range m {
   163  		dataAddrs = append(dataAddrs, dataAddr)
   164  	}
   165  	sort.Sort(bin.Addresses(dataAddrs))
   166  
   167  	// Locate targets of jump tables.
   168  	tables, err := locateTargets(input, tableAddrs)
   169  	if err != nil {
   170  		return errors.WithStack(err)
   171  	}
   172  
   173  	// Locate function signatures.
   174  	sigs, err := locateFuncSigs(input)
   175  	if err != nil {
   176  		return errors.WithStack(err)
   177  	}
   178  	for _, funcAddr := range funcAddrs {
   179  		if _, ok := sigs[funcAddr]; !ok {
   180  			dbg.Printf("WARNING: unable to locate function signature for function at %v", funcAddr)
   181  		}
   182  	}
   183  
   184  	// Locate imports.
   185  	imports, err := locateImports(input)
   186  	if err != nil {
   187  		return errors.WithStack(err)
   188  	}
   189  
   190  	// Locate function chunks.
   191  	chunks, err := locateFuncChunks(input)
   192  	if err != nil {
   193  		return errors.WithStack(err)
   194  	}
   195  
   196  	// Store JSON files.
   197  	if err := storeJSON("funcs.json", funcAddrs); err != nil {
   198  		return errors.WithStack(err)
   199  	}
   200  	if err := storeJSON("blocks.json", blockAddrs); err != nil {
   201  		return errors.WithStack(err)
   202  	}
   203  	if err := storeJSON("insts.json", instAddrs); err != nil {
   204  		return errors.WithStack(err)
   205  	}
   206  	if err := storeJSON("data.json", dataAddrs); err != nil {
   207  		return errors.WithStack(err)
   208  	}
   209  	if err := storeJSON("tables.json", tables); err != nil {
   210  		return errors.WithStack(err)
   211  	}
   212  	if err := storeJSON("sigs.json", sigs); err != nil {
   213  		return errors.WithStack(err)
   214  	}
   215  	if err := storeJSON("imports.json", imports); err != nil {
   216  		return errors.WithStack(err)
   217  	}
   218  	if err := storeJSON("chunks.json", chunks); err != nil {
   219  		return errors.WithStack(err)
   220  	}
   221  
   222  	return nil
   223  }
   224  
   225  // FuncSig represents a function signature.
   226  type FuncSig struct {
   227  	// Function name.
   228  	Name string `json:"name"`
   229  	// Function signature.
   230  	Sig string `json:"sig"`
   231  }
   232  
   233  // locateFuncSigs locates function signatures in the input IDA assembly listing.
   234  func locateFuncSigs(input []byte) (map[bin.Address]FuncSig, error) {
   235  	const regFuncSig = `(;[ \t]*([^\n]+))?[\n][.]text[:]00([0-9a-fA-F]+)[ \t]+([a-zA-Z0-9_?@$]+)[ \t]+proc[ \t]near`
   236  	re, err := regexp.Compile(regFuncSig)
   237  	if err != nil {
   238  		return nil, errors.WithStack(err)
   239  	}
   240  	subs := re.FindAllSubmatch(input, -1)
   241  	sigs := make(map[bin.Address]FuncSig)
   242  	for _, sub := range subs {
   243  		var sig FuncSig
   244  		// parse function signature.
   245  		sig.Sig = string(sub[2])
   246  		// parse address.
   247  		s := string(sub[3])
   248  		x, err := strconv.ParseUint(s, 16, 64)
   249  		if err != nil {
   250  			return nil, errors.WithStack(err)
   251  		}
   252  		addr := bin.Address(x)
   253  		// parse function name.
   254  		sig.Name = string(sub[4])
   255  		sigs[addr] = sig
   256  	}
   257  	return sigs, nil
   258  }
   259  
   260  // locateImports locates imports in the input IDA assembly listing.
   261  func locateImports(input []byte) (map[bin.Address]FuncSig, error) {
   262  	const regImport = `([.]idata[:]00[0-9a-fA-F]+[ \t];[ \t]*([^\n]+))?[\n][.]idata[:]00([0-9a-fA-F]+)[ \t]+extrn[ \t]+([a-zA-Z0-9_?@$]+)`
   263  	re, err := regexp.Compile(regImport)
   264  	if err != nil {
   265  		return nil, errors.WithStack(err)
   266  	}
   267  	subs := re.FindAllSubmatch(input, -1)
   268  	sigs := make(map[bin.Address]FuncSig)
   269  	for _, sub := range subs {
   270  		var sig FuncSig
   271  		// parse function signature.
   272  		sig.Sig = string(sub[2])
   273  		// parse address.
   274  		s := string(sub[3])
   275  		x, err := strconv.ParseUint(s, 16, 64)
   276  		if err != nil {
   277  			return nil, errors.WithStack(err)
   278  		}
   279  		addr := bin.Address(x)
   280  		// parse function name.
   281  		sig.Name = string(sub[4])
   282  		sigs[addr] = sig
   283  	}
   284  	return sigs, nil
   285  }
   286  
   287  // locateFuncChunks locates addresses of function chunks belonging to parent
   288  // functions.
   289  func locateFuncChunks(input []byte) (map[bin.Address]map[bin.Address]bool, error) {
   290  	const regFuncChunk = `[.]text[:]00([0-9a-fA-F]+)[ \t];[ \t]FUNCTION[ \t]CHUNK[ \t]AT[ \t][.]text[:]00([0-9a-fA-F]+)`
   291  	re, err := regexp.Compile(regFuncChunk)
   292  	if err != nil {
   293  		return nil, errors.WithStack(err)
   294  	}
   295  	chunks := make(map[bin.Address]map[bin.Address]bool)
   296  	subs := re.FindAllSubmatch(input, -1)
   297  	for _, sub := range subs {
   298  		// Parent function address.
   299  		var parent bin.Address
   300  		// Function chunk address.
   301  		var chunk bin.Address
   302  		if err := parent.Set(fmt.Sprintf("0x%s", sub[1])); err != nil {
   303  			return nil, errors.WithStack(err)
   304  		}
   305  		if err := chunk.Set(fmt.Sprintf("0x%s", sub[2])); err != nil {
   306  			return nil, errors.WithStack(err)
   307  		}
   308  		if _, ok := chunks[chunk]; !ok {
   309  			chunks[chunk] = make(map[bin.Address]bool)
   310  		}
   311  		chunks[chunk][parent] = true
   312  	}
   313  	return chunks, nil
   314  }
   315  
   316  // locateTargets locates the targets of jump tables in the input IDA assembly
   317  // listing.
   318  func locateTargets(input []byte, tableAddrs map[bin.Address]bool) (map[bin.Address][]bin.Address, error) {
   319  	tables := make(map[bin.Address][]bin.Address)
   320  	for tableAddr := range tableAddrs {
   321  		present := make(map[bin.Address]bool)
   322  		s := fmt.Sprintf("%06X", uint64(tableAddr))
   323  		regTargets := `[.][a-zA-Z]+[:]00` + s + `[^\n]*? dd (([^\n]*?offset[ \t]loc_([0-9a-fA-F]+))+)`
   324  		re, err := regexp.Compile(regTargets)
   325  		if err != nil {
   326  			return nil, errors.WithStack(err)
   327  		}
   328  		subs := re.FindAllSubmatch(input, -1)
   329  		for _, sub := range subs {
   330  			line := sub[1]
   331  			// line contains data formatted as follows.
   332  			//
   333  			//    offset loc_422F0B, offset loc_422F0B, offset loc_422F1B
   334  			re, err := regexp.Compile("loc_([0-9a-fA-F]+)")
   335  			if err != nil {
   336  				return nil, errors.WithStack(err)
   337  			}
   338  			subs := re.FindAllSubmatch(line, -1)
   339  			for _, sub := range subs {
   340  				var target bin.Address
   341  				s := "0x" + string(sub[1])
   342  				if err := target.Set(s); err != nil {
   343  					return nil, errors.WithStack(err)
   344  				}
   345  				if present[target] {
   346  					// skip if target already present.
   347  					continue
   348  				}
   349  				tables[tableAddr] = append(tables[tableAddr], target)
   350  				present[target] = true
   351  			}
   352  		}
   353  	}
   354  	return tables, nil
   355  }
   356  
   357  // locateAddrs locates addresses in the input IDA assembly listing based on the
   358  // given regular expression.
   359  func locateAddrs(input []byte, m map[bin.Address]bool, reg string) error {
   360  	re, err := regexp.Compile(reg)
   361  	if err != nil {
   362  		return errors.WithStack(err)
   363  	}
   364  	subs := re.FindAllSubmatch(input, -1)
   365  	for _, sub := range subs {
   366  		s := string(sub[len(sub)-1])
   367  		x, err := strconv.ParseUint(s, 16, 64)
   368  		if err != nil {
   369  			return errors.WithStack(err)
   370  		}
   371  		addr := bin.Address(x)
   372  		m[addr] = true
   373  	}
   374  	return nil
   375  }
   376  
   377  // storeJSON stores a JSON encoded representation of the addresses to the given
   378  // file.
   379  func storeJSON(path string, v interface{}) error {
   380  	buf, err := json.MarshalIndent(v, "", "\t")
   381  	if err != nil {
   382  		return errors.WithStack(err)
   383  	}
   384  	buf = append(buf, '\n')
   385  	if err := ioutil.WriteFile(path, buf, 0644); err != nil {
   386  		return errors.WithStack(err)
   387  	}
   388  	return nil
   389  }