github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/cmd/bin2ll/main.go (about)

     1  // The bin2ll tool lifts binary executables to equivalent LLVM IR assembly
     2  // (*.exe -> *.ll).
     3  package main
     4  
     5  import (
     6  	"flag"
     7  	"fmt"
     8  	"io/ioutil"
     9  	"log"
    10  	"os"
    11  	"sort"
    12  
    13  	"github.com/decomp/exp/bin"
    14  	_ "github.com/decomp/exp/bin/elf" // register ELF decoder
    15  	_ "github.com/decomp/exp/bin/pe"  // register PE decoder
    16  	_ "github.com/decomp/exp/bin/pef" // register PEF decoder
    17  	"github.com/decomp/exp/bin/raw"
    18  	"github.com/decomp/exp/lift/x86"
    19  	"github.com/llir/llvm/ir"
    20  	"github.com/llir/llvm/ir/constant"
    21  	"github.com/llir/llvm/ir/types"
    22  	"github.com/mewkiz/pkg/term"
    23  	"github.com/pkg/errors"
    24  )
    25  
    26  // Loggers.
    27  var (
    28  	// dbg represents a logger with the "bin2ll:" prefix, which logs debug
    29  	// messages to standard error.
    30  	dbg = log.New(os.Stderr, term.MagentaBold("bin2ll:")+" ", 0)
    31  	// warn represents a logger with the "warning:" prefix, which logs warning
    32  	// messages to standard error.
    33  	warn = log.New(os.Stderr, term.RedBold("warning:")+" ", 0)
    34  )
    35  
    36  func usage() {
    37  	const use = `
    38  Lift binary executables to equivalent LLVM IR assembly (*.exe -> *.ll).
    39  
    40  Usage:
    41  
    42  	bin2ll [OPTION]... FILE
    43  
    44  Flags:
    45  `
    46  	fmt.Fprint(os.Stderr, use[1:])
    47  	flag.PrintDefaults()
    48  }
    49  
    50  func main() {
    51  	// Parse command line arguments.
    52  	var (
    53  		// blockAddr specifies a basic block address to lift.
    54  		blockAddr bin.Address
    55  		// TODO: Remove -first flag and firstAddr.
    56  		// firstAddr specifies the first function address to lift.
    57  		firstAddr bin.Address
    58  		// funcAddr specifies a function address to lift.
    59  		funcAddr bin.Address
    60  		// TODO: Remove -last flag and lastAddr.
    61  		// lastAddr specifies the last function address to disassemble.
    62  		lastAddr bin.Address
    63  		// output specifies the output path.
    64  		output string
    65  		// cfgonly specifies whether to output minimal LLVM IR needed for CFG generation.
    66  		cfgonly bool
    67  		// quiet specifies whether to suppress non-error messages.
    68  		quiet bool
    69  		// rawArch specifies the machine architecture of a raw binary executable.
    70  		rawArch bin.Arch
    71  		// rawEntry specifies the entry point of a raw binary executable.
    72  		rawEntry bin.Address
    73  		// rawBase specifies the base address of a raw binary executable.
    74  		rawBase bin.Address
    75  	)
    76  	flag.Usage = usage
    77  	flag.Var(&blockAddr, "block", "basic block address to lift")
    78  	flag.Var(&firstAddr, "first", "first function address to lift")
    79  	flag.Var(&funcAddr, "func", "function address to lift")
    80  	flag.Var(&lastAddr, "last", "last function address to lift")
    81  	flag.StringVar(&output, "o", "", "output path")
    82  	flag.BoolVar(&quiet, "q", false, "suppress non-error messages")
    83  	flag.BoolVar(&cfgonly, "cfg-only", false, "output minimal LLVM IR needed for CFG generation")
    84  	flag.Var(&rawArch, "raw", "machine architecture of raw binary executable (x86_32, x86_64, PowerPC_32, ...)")
    85  	flag.Var(&rawEntry, "rawentry", "entry point of raw binary executable")
    86  	flag.Var(&rawBase, "rawbase", "base address of raw binary executable")
    87  	flag.Parse()
    88  	if flag.NArg() != 1 {
    89  		flag.Usage()
    90  		os.Exit(1)
    91  	}
    92  	binPath := flag.Arg(0)
    93  	// Mute debug and warning messages if `-q` is set.
    94  	if quiet {
    95  		dbg.SetOutput(ioutil.Discard)
    96  		warn.SetOutput(ioutil.Discard)
    97  	}
    98  
    99  	// Prepare x86 to LLVM IR lifter for the binary executable.
   100  	l, err := newLifter(binPath, rawArch, rawEntry, rawBase)
   101  	if err != nil {
   102  		log.Fatalf("%+v", err)
   103  	}
   104  
   105  	// Lift basic block.
   106  	if blockAddr != 0 {
   107  		block, err := l.DecodeBlock(blockAddr)
   108  		if err != nil {
   109  			log.Fatalf("%+v", err)
   110  		}
   111  		_ = block
   112  		return
   113  	}
   114  
   115  	// Lift function specified by `-func` flag.
   116  	var funcAddrs bin.Addresses
   117  	if funcAddr != 0 {
   118  		funcAddrs = []bin.Address{funcAddr}
   119  	} else {
   120  		for _, funcAddr := range l.FuncAddrs {
   121  			if firstAddr != 0 && funcAddr < firstAddr {
   122  				// skip functions before first address.
   123  				continue
   124  			}
   125  			if lastAddr != 0 && funcAddr >= lastAddr {
   126  				// skip functions after last address.
   127  				break
   128  			}
   129  			funcAddrs = append(funcAddrs, funcAddr)
   130  		}
   131  	}
   132  
   133  	// Create function lifters.
   134  	for _, funcAddr := range funcAddrs {
   135  		asmFunc, err := l.DecodeFunc(funcAddr)
   136  		if err != nil {
   137  			log.Fatalf("%+v", err)
   138  		}
   139  		f := l.NewFunc(asmFunc)
   140  		l.Funcs[funcAddr] = f
   141  	}
   142  
   143  	// Lift functions.
   144  	for i, funcAddr := range funcAddrs {
   145  		if i != 0 {
   146  			dbg.Println()
   147  		}
   148  		f, ok := l.Funcs[funcAddr]
   149  		if !ok {
   150  			continue
   151  		}
   152  		f.Lift()
   153  		dbg.Println(f)
   154  	}
   155  
   156  	// Store LLVM IR output.
   157  	w := os.Stdout
   158  	if len(output) > 0 {
   159  		f, err := os.Create(output)
   160  		if err != nil {
   161  			log.Fatal(err)
   162  		}
   163  		defer f.Close()
   164  		w = f
   165  	}
   166  	var funcs []*ir.Func
   167  	var allFuncAddrs bin.Addresses
   168  	for funcAddr := range l.Funcs {
   169  		allFuncAddrs = append(allFuncAddrs, funcAddr)
   170  	}
   171  	sort.Sort(allFuncAddrs)
   172  	for _, funcAddr := range allFuncAddrs {
   173  		f := l.Funcs[funcAddr]
   174  		funcs = append(funcs, f.Func)
   175  	}
   176  	var globals []*ir.Global
   177  	var globalAddrs bin.Addresses
   178  	for globalAddr := range l.Globals {
   179  		globalAddrs = append(globalAddrs, globalAddr)
   180  	}
   181  	sort.Sort(globalAddrs)
   182  	for _, globalAddr := range globalAddrs {
   183  		g := l.Globals[globalAddr]
   184  		globals = append(globals, g)
   185  	}
   186  	m := &ir.Module{
   187  		TypeDefs: l.TypeDefs,
   188  		Globals:  globals,
   189  		Funcs:    funcs,
   190  	}
   191  	if cfgonly {
   192  		pruneModule(m)
   193  	}
   194  	if _, err := fmt.Fprintln(w, m); err != nil {
   195  		log.Fatalf("%+v", err)
   196  	}
   197  
   198  	// Create call graph.
   199  	if err := genCallGraph(l.Funcs); err != nil {
   200  		log.Fatalf("%+v", err)
   201  	}
   202  }
   203  
   204  // newLifter returns a new x86 to LLVM IR lifter for the given binary
   205  // executable.
   206  func newLifter(binPath string, rawArch bin.Arch, rawEntry, rawBase bin.Address) (*x86.Lifter, error) {
   207  	// Parse raw binary executable.
   208  	if rawArch != 0 {
   209  		file, err := raw.ParseFile(binPath, rawArch)
   210  		if err != nil {
   211  			return nil, errors.WithStack(err)
   212  		}
   213  		file.Entry = rawEntry
   214  		file.Sections[0].Addr = rawBase
   215  		return x86.NewLifter(file)
   216  	}
   217  	// Parse binary executable.
   218  	file, err := bin.ParseFile(binPath)
   219  	if err != nil {
   220  		return nil, errors.WithStack(err)
   221  	}
   222  	return x86.NewLifter(file)
   223  }
   224  
   225  // pruneModule prunes the LLVM IR module to the minimal needed for CFG
   226  // generation.
   227  func pruneModule(m *ir.Module) {
   228  	for _, f := range m.Funcs {
   229  		condNum := 0
   230  		xNum := 0
   231  		if len(f.Blocks) == 0 {
   232  			continue
   233  		}
   234  		entry := f.Blocks[0]
   235  		entry.Insts = entry.Insts[:0]
   236  		for _, block := range f.Blocks {
   237  			// Prune instructions.
   238  			block.Insts = block.Insts[:0]
   239  			switch term := block.Term.(type) {
   240  			case *ir.TermRet:
   241  				if term.X != nil {
   242  					term.X = constant.NewZeroInitializer(f.Sig.RetType)
   243  				}
   244  			case *ir.TermBr:
   245  				// nothing to do.
   246  			case *ir.TermCondBr:
   247  				// Allocate dummy condition variable.
   248  				condName := fmt.Sprintf("c%d", condNum)
   249  				condNum++
   250  				condMem := entry.NewAlloca(types.I1)
   251  				condMem.SetName(condName + "_mem")
   252  				cond := block.NewLoad(condMem)
   253  				cond.SetName(condName)
   254  				term.Cond = cond
   255  			case *ir.TermSwitch:
   256  				// Allocate dummy control variable.
   257  				xName := fmt.Sprintf("x%d", xNum)
   258  				xNum++
   259  				xMem := entry.NewAlloca(term.X.Type())
   260  				xMem.SetName(xName + "_mem")
   261  				x := block.NewLoad(xMem)
   262  				x.SetName(xName)
   263  				term.X = x
   264  			//case *ir.TermIndirectBr:
   265  			//case *ir.TermInvoke:
   266  			//case *ir.TermResume:
   267  			//case *ir.TermCatchSwitch:
   268  			//case *ir.TermCatchRet:
   269  			//case *ir.TermCleanupRet:
   270  			case *ir.TermUnreachable:
   271  				// nothing to do.
   272  			default:
   273  				panic(fmt.Errorf("support for terminator %T not yet implemented", term))
   274  			}
   275  		}
   276  	}
   277  }