github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/lift/x86/lift.go (about)

     1  // Package x86 implements x86 to LLVM IR lifting.
     2  package x86
     3  
     4  import (
     5  	"fmt"
     6  	"log"
     7  	"os"
     8  
     9  	"github.com/decomp/exp/bin"
    10  	"github.com/decomp/exp/disasm/x86"
    11  	"github.com/llir/llvm/asm"
    12  	"github.com/llir/llvm/ir"
    13  	"github.com/llir/llvm/ir/metadata"
    14  	"github.com/llir/llvm/ir/types"
    15  	"github.com/mewkiz/pkg/osutil"
    16  	"github.com/mewkiz/pkg/term"
    17  	"github.com/pkg/errors"
    18  )
    19  
    20  // TODO: Remove loggers once the library matures.
    21  
    22  // Loggers.
    23  var (
    24  	// dbg represents a logger with the "lift:" prefix, which logs debug
    25  	// messages to standard error.
    26  	dbg = log.New(os.Stderr, term.CyanBold("lift:")+" ", 0)
    27  	// warn represents a logger with the "warning:" prefix, which logs warning
    28  	// messages to standard error.
    29  	warn = log.New(os.Stderr, term.RedBold("warning:")+" ", 0)
    30  )
    31  
    32  // A Lifter tracks information required to lift the assembly of a binary
    33  // executable.
    34  //
    35  // Data should only be written to this structure during initialization. After
    36  // initialization the structure is considered in read-only mode to allow for
    37  // concurrent lifting of functions.
    38  type Lifter struct {
    39  	*x86.Disasm
    40  	// Type definitions.
    41  	TypeDefs []types.Type
    42  	// Functions.
    43  	Funcs map[bin.Address]*Func
    44  	// Map from function name to function. May also contain external functions
    45  	// without associated virtual addresses (e.g. loaded using GetProcAddress).
    46  	FuncByName map[string]*ir.Func
    47  	// Global variables.
    48  	Globals map[bin.Address]*ir.Global
    49  }
    50  
    51  // NewLifter creates a new Lifter for accessing the assembly instructions of the
    52  // given binary executable, and the information contained within associated JSON
    53  // and LLVM IR files.
    54  //
    55  // Associated files of the generic disassembler.
    56  //
    57  //    funcs.json
    58  //    blocks.json
    59  //    tables.json
    60  //    chunks.json
    61  //    data.json
    62  //
    63  // Associated files of the x86 disassembler.
    64  //
    65  //    contexts.json
    66  //
    67  // Associated files of the x86 to LLVM IR lifter.
    68  //
    69  //    info.ll
    70  func NewLifter(file *bin.File) (*Lifter, error) {
    71  	// Prepare x86 to LLVM IR lifter.
    72  	dis, err := x86.NewDisasm(file)
    73  	if err != nil {
    74  		return nil, errors.WithStack(err)
    75  	}
    76  	l := &Lifter{
    77  		Disasm:     dis,
    78  		Funcs:      make(map[bin.Address]*Func),
    79  		FuncByName: make(map[string]*ir.Func),
    80  		Globals:    make(map[bin.Address]*ir.Global),
    81  	}
    82  
    83  	// Parse associated LLVM IR information.
    84  	llPath := "info.ll"
    85  	module, err := parseModule(llPath)
    86  	if err != nil {
    87  		return nil, errors.WithStack(err)
    88  	}
    89  
    90  	// Parse types.
    91  	l.TypeDefs = module.TypeDefs
    92  
    93  	// Parse globals.
    94  	for _, g := range module.Globals {
    95  		node, ok := findMetadataAttachment(g.Metadata, "addr")
    96  		if !ok {
    97  			return nil, errors.Errorf(`unable to locate "addr" metadata for global variable %q`, g.Ident())
    98  		}
    99  		addr, err := parseMetadataAddr(node)
   100  		if err != nil {
   101  			return nil, errors.WithStack(err)
   102  		}
   103  		l.Globals[addr] = g
   104  	}
   105  
   106  	// Parse function signatures.
   107  	for _, f := range module.Funcs {
   108  		l.FuncByName[f.Name()] = f
   109  		node, ok := findMetadataAttachment(f.Metadata, "addr")
   110  		if !ok {
   111  			warn.Printf(`unable to locate "addr" metadata for function %q; potentially external function without associated virtual addresses (e.g. loaded with GetProcAddress)`, f.Ident())
   112  			continue
   113  		}
   114  		entry, err := parseMetadataAddr(node)
   115  		if err != nil {
   116  			return nil, errors.WithStack(err)
   117  		}
   118  		fn := &Func{
   119  			Func: f,
   120  		}
   121  		l.Funcs[entry] = fn
   122  	}
   123  
   124  	// Parse imports.
   125  	addFunc := func(entry bin.Address, name string) {
   126  		// TODO: Mark function signature as unknown (using metadata), so that type
   127  		// analysis may replace it.
   128  		name = fmt.Sprintf("_imp_%s", name)
   129  		sig := types.NewFunc(types.Void)
   130  		typ := types.NewPointer(sig)
   131  		f := &ir.Func{
   132  			Typ: typ,
   133  			Sig: sig,
   134  		}
   135  		f.SetName(name)
   136  		md := &metadata.Attachment{
   137  			Name: "addr",
   138  			Node: &metadata.Tuple{
   139  				Fields: []metadata.Field{&metadata.String{Value: entry.String()}},
   140  			},
   141  		}
   142  		f.Metadata = append(f.Metadata, md)
   143  		fn := &Func{
   144  			Func: f,
   145  		}
   146  		l.Funcs[entry] = fn
   147  	}
   148  	for entry, fname := range l.File.Imports {
   149  		if _, ok := l.Funcs[entry]; ok {
   150  			// Skip import if already specified through function signature.
   151  			continue
   152  		}
   153  		dbg.Printf("function import at %v: %v\n", entry, fname)
   154  		addFunc(entry, fname)
   155  	}
   156  
   157  	// Parse exports.
   158  	for entry, fname := range dis.File.Exports {
   159  		if _, ok := l.Funcs[entry]; ok {
   160  			// Skip export if already specified through function signature.
   161  			continue
   162  		}
   163  		addFunc(entry, fname)
   164  	}
   165  
   166  	return l, nil
   167  }
   168  
   169  // ### [ Helper functions ] ####################################################
   170  
   171  // parseModule parses and returns the given LLVM IR module.
   172  func parseModule(llPath string) (*ir.Module, error) {
   173  	if !osutil.Exists(llPath) {
   174  		warn.Printf("unable to locate LLVM IR file %q", llPath)
   175  		return &ir.Module{}, nil
   176  	}
   177  	return asm.ParseFile(llPath)
   178  }
   179  
   180  // findMetadataAttachment locates the metadata node of the given metadata
   181  // attachment. The boolean return value indicates success.
   182  func findMetadataAttachment(mds []*metadata.Attachment, name string) (metadata.MDNode, bool) {
   183  	for _, md := range mds {
   184  		if md.Name == name {
   185  			return md.Node, true
   186  		}
   187  	}
   188  	return nil, false
   189  }
   190  
   191  // parseMetadataAddr returns the address corresponding to the given "addr"
   192  // metadata node.
   193  func parseMetadataAddr(node metadata.MDNode) (bin.Address, error) {
   194  	switch node := node.(type) {
   195  	case *metadata.Tuple:
   196  		if len(node.Fields) != 1 {
   197  			return 0, errors.Errorf(`invalid number of fields in "addr" metadata node, expected 1, got %d`, len(node.Fields))
   198  		}
   199  		field, ok := node.Fields[0].(*metadata.String)
   200  		if !ok {
   201  			panic(fmt.Errorf("invalid metadata field type; expected *metadata.String, got %T", node.Fields[0]))
   202  		}
   203  		var addr bin.Address
   204  		if err := addr.Set(field.Value); err != nil {
   205  			return 0, errors.WithStack(err)
   206  		}
   207  		return addr, nil
   208  	default:
   209  		panic(fmt.Errorf("support for metadata node %T not yet implemented", node))
   210  	}
   211  }