github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/lift/x86/lift.go (about) 1 // Package x86 implements x86 to LLVM IR lifting. 2 package x86 3 4 import ( 5 "fmt" 6 "log" 7 "os" 8 9 "github.com/decomp/exp/bin" 10 "github.com/decomp/exp/disasm/x86" 11 "github.com/llir/llvm/asm" 12 "github.com/llir/llvm/ir" 13 "github.com/llir/llvm/ir/metadata" 14 "github.com/llir/llvm/ir/types" 15 "github.com/mewkiz/pkg/osutil" 16 "github.com/mewkiz/pkg/term" 17 "github.com/pkg/errors" 18 ) 19 20 // TODO: Remove loggers once the library matures. 21 22 // Loggers. 23 var ( 24 // dbg represents a logger with the "lift:" prefix, which logs debug 25 // messages to standard error. 26 dbg = log.New(os.Stderr, term.CyanBold("lift:")+" ", 0) 27 // warn represents a logger with the "warning:" prefix, which logs warning 28 // messages to standard error. 29 warn = log.New(os.Stderr, term.RedBold("warning:")+" ", 0) 30 ) 31 32 // A Lifter tracks information required to lift the assembly of a binary 33 // executable. 34 // 35 // Data should only be written to this structure during initialization. After 36 // initialization the structure is considered in read-only mode to allow for 37 // concurrent lifting of functions. 38 type Lifter struct { 39 *x86.Disasm 40 // Type definitions. 41 TypeDefs []types.Type 42 // Functions. 43 Funcs map[bin.Address]*Func 44 // Map from function name to function. May also contain external functions 45 // without associated virtual addresses (e.g. loaded using GetProcAddress). 46 FuncByName map[string]*ir.Func 47 // Global variables. 48 Globals map[bin.Address]*ir.Global 49 } 50 51 // NewLifter creates a new Lifter for accessing the assembly instructions of the 52 // given binary executable, and the information contained within associated JSON 53 // and LLVM IR files. 54 // 55 // Associated files of the generic disassembler. 56 // 57 // funcs.json 58 // blocks.json 59 // tables.json 60 // chunks.json 61 // data.json 62 // 63 // Associated files of the x86 disassembler. 64 // 65 // contexts.json 66 // 67 // Associated files of the x86 to LLVM IR lifter. 68 // 69 // info.ll 70 func NewLifter(file *bin.File) (*Lifter, error) { 71 // Prepare x86 to LLVM IR lifter. 72 dis, err := x86.NewDisasm(file) 73 if err != nil { 74 return nil, errors.WithStack(err) 75 } 76 l := &Lifter{ 77 Disasm: dis, 78 Funcs: make(map[bin.Address]*Func), 79 FuncByName: make(map[string]*ir.Func), 80 Globals: make(map[bin.Address]*ir.Global), 81 } 82 83 // Parse associated LLVM IR information. 84 llPath := "info.ll" 85 module, err := parseModule(llPath) 86 if err != nil { 87 return nil, errors.WithStack(err) 88 } 89 90 // Parse types. 91 l.TypeDefs = module.TypeDefs 92 93 // Parse globals. 94 for _, g := range module.Globals { 95 node, ok := findMetadataAttachment(g.Metadata, "addr") 96 if !ok { 97 return nil, errors.Errorf(`unable to locate "addr" metadata for global variable %q`, g.Ident()) 98 } 99 addr, err := parseMetadataAddr(node) 100 if err != nil { 101 return nil, errors.WithStack(err) 102 } 103 l.Globals[addr] = g 104 } 105 106 // Parse function signatures. 107 for _, f := range module.Funcs { 108 l.FuncByName[f.Name()] = f 109 node, ok := findMetadataAttachment(f.Metadata, "addr") 110 if !ok { 111 warn.Printf(`unable to locate "addr" metadata for function %q; potentially external function without associated virtual addresses (e.g. loaded with GetProcAddress)`, f.Ident()) 112 continue 113 } 114 entry, err := parseMetadataAddr(node) 115 if err != nil { 116 return nil, errors.WithStack(err) 117 } 118 fn := &Func{ 119 Func: f, 120 } 121 l.Funcs[entry] = fn 122 } 123 124 // Parse imports. 125 addFunc := func(entry bin.Address, name string) { 126 // TODO: Mark function signature as unknown (using metadata), so that type 127 // analysis may replace it. 128 name = fmt.Sprintf("_imp_%s", name) 129 sig := types.NewFunc(types.Void) 130 typ := types.NewPointer(sig) 131 f := &ir.Func{ 132 Typ: typ, 133 Sig: sig, 134 } 135 f.SetName(name) 136 md := &metadata.Attachment{ 137 Name: "addr", 138 Node: &metadata.Tuple{ 139 Fields: []metadata.Field{&metadata.String{Value: entry.String()}}, 140 }, 141 } 142 f.Metadata = append(f.Metadata, md) 143 fn := &Func{ 144 Func: f, 145 } 146 l.Funcs[entry] = fn 147 } 148 for entry, fname := range l.File.Imports { 149 if _, ok := l.Funcs[entry]; ok { 150 // Skip import if already specified through function signature. 151 continue 152 } 153 dbg.Printf("function import at %v: %v\n", entry, fname) 154 addFunc(entry, fname) 155 } 156 157 // Parse exports. 158 for entry, fname := range dis.File.Exports { 159 if _, ok := l.Funcs[entry]; ok { 160 // Skip export if already specified through function signature. 161 continue 162 } 163 addFunc(entry, fname) 164 } 165 166 return l, nil 167 } 168 169 // ### [ Helper functions ] #################################################### 170 171 // parseModule parses and returns the given LLVM IR module. 172 func parseModule(llPath string) (*ir.Module, error) { 173 if !osutil.Exists(llPath) { 174 warn.Printf("unable to locate LLVM IR file %q", llPath) 175 return &ir.Module{}, nil 176 } 177 return asm.ParseFile(llPath) 178 } 179 180 // findMetadataAttachment locates the metadata node of the given metadata 181 // attachment. The boolean return value indicates success. 182 func findMetadataAttachment(mds []*metadata.Attachment, name string) (metadata.MDNode, bool) { 183 for _, md := range mds { 184 if md.Name == name { 185 return md.Node, true 186 } 187 } 188 return nil, false 189 } 190 191 // parseMetadataAddr returns the address corresponding to the given "addr" 192 // metadata node. 193 func parseMetadataAddr(node metadata.MDNode) (bin.Address, error) { 194 switch node := node.(type) { 195 case *metadata.Tuple: 196 if len(node.Fields) != 1 { 197 return 0, errors.Errorf(`invalid number of fields in "addr" metadata node, expected 1, got %d`, len(node.Fields)) 198 } 199 field, ok := node.Fields[0].(*metadata.String) 200 if !ok { 201 panic(fmt.Errorf("invalid metadata field type; expected *metadata.String, got %T", node.Fields[0])) 202 } 203 var addr bin.Address 204 if err := addr.Set(field.Value); err != nil { 205 return 0, errors.WithStack(err) 206 } 207 return addr, nil 208 default: 209 panic(fmt.Errorf("support for metadata node %T not yet implemented", node)) 210 } 211 }