github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/cmd/bin2ll/main.go (about) 1 // The bin2ll tool lifts binary executables to equivalent LLVM IR assembly 2 // (*.exe -> *.ll). 3 package main 4 5 import ( 6 "flag" 7 "fmt" 8 "io/ioutil" 9 "log" 10 "os" 11 "sort" 12 13 "github.com/decomp/exp/bin" 14 _ "github.com/decomp/exp/bin/elf" // register ELF decoder 15 _ "github.com/decomp/exp/bin/pe" // register PE decoder 16 _ "github.com/decomp/exp/bin/pef" // register PEF decoder 17 "github.com/decomp/exp/bin/raw" 18 "github.com/decomp/exp/lift/x86" 19 "github.com/llir/llvm/ir" 20 "github.com/llir/llvm/ir/constant" 21 "github.com/llir/llvm/ir/types" 22 "github.com/mewkiz/pkg/term" 23 "github.com/pkg/errors" 24 ) 25 26 // Loggers. 27 var ( 28 // dbg represents a logger with the "bin2ll:" prefix, which logs debug 29 // messages to standard error. 30 dbg = log.New(os.Stderr, term.MagentaBold("bin2ll:")+" ", 0) 31 // warn represents a logger with the "warning:" prefix, which logs warning 32 // messages to standard error. 33 warn = log.New(os.Stderr, term.RedBold("warning:")+" ", 0) 34 ) 35 36 func usage() { 37 const use = ` 38 Lift binary executables to equivalent LLVM IR assembly (*.exe -> *.ll). 39 40 Usage: 41 42 bin2ll [OPTION]... FILE 43 44 Flags: 45 ` 46 fmt.Fprint(os.Stderr, use[1:]) 47 flag.PrintDefaults() 48 } 49 50 func main() { 51 // Parse command line arguments. 52 var ( 53 // blockAddr specifies a basic block address to lift. 54 blockAddr bin.Address 55 // TODO: Remove -first flag and firstAddr. 56 // firstAddr specifies the first function address to lift. 57 firstAddr bin.Address 58 // funcAddr specifies a function address to lift. 59 funcAddr bin.Address 60 // TODO: Remove -last flag and lastAddr. 61 // lastAddr specifies the last function address to disassemble. 62 lastAddr bin.Address 63 // output specifies the output path. 64 output string 65 // cfgonly specifies whether to output minimal LLVM IR needed for CFG generation. 66 cfgonly bool 67 // quiet specifies whether to suppress non-error messages. 68 quiet bool 69 // rawArch specifies the machine architecture of a raw binary executable. 70 rawArch bin.Arch 71 // rawEntry specifies the entry point of a raw binary executable. 72 rawEntry bin.Address 73 // rawBase specifies the base address of a raw binary executable. 74 rawBase bin.Address 75 ) 76 flag.Usage = usage 77 flag.Var(&blockAddr, "block", "basic block address to lift") 78 flag.Var(&firstAddr, "first", "first function address to lift") 79 flag.Var(&funcAddr, "func", "function address to lift") 80 flag.Var(&lastAddr, "last", "last function address to lift") 81 flag.StringVar(&output, "o", "", "output path") 82 flag.BoolVar(&quiet, "q", false, "suppress non-error messages") 83 flag.BoolVar(&cfgonly, "cfg-only", false, "output minimal LLVM IR needed for CFG generation") 84 flag.Var(&rawArch, "raw", "machine architecture of raw binary executable (x86_32, x86_64, PowerPC_32, ...)") 85 flag.Var(&rawEntry, "rawentry", "entry point of raw binary executable") 86 flag.Var(&rawBase, "rawbase", "base address of raw binary executable") 87 flag.Parse() 88 if flag.NArg() != 1 { 89 flag.Usage() 90 os.Exit(1) 91 } 92 binPath := flag.Arg(0) 93 // Mute debug and warning messages if `-q` is set. 94 if quiet { 95 dbg.SetOutput(ioutil.Discard) 96 warn.SetOutput(ioutil.Discard) 97 } 98 99 // Prepare x86 to LLVM IR lifter for the binary executable. 100 l, err := newLifter(binPath, rawArch, rawEntry, rawBase) 101 if err != nil { 102 log.Fatalf("%+v", err) 103 } 104 105 // Lift basic block. 106 if blockAddr != 0 { 107 block, err := l.DecodeBlock(blockAddr) 108 if err != nil { 109 log.Fatalf("%+v", err) 110 } 111 _ = block 112 return 113 } 114 115 // Lift function specified by `-func` flag. 116 var funcAddrs bin.Addresses 117 if funcAddr != 0 { 118 funcAddrs = []bin.Address{funcAddr} 119 } else { 120 for _, funcAddr := range l.FuncAddrs { 121 if firstAddr != 0 && funcAddr < firstAddr { 122 // skip functions before first address. 123 continue 124 } 125 if lastAddr != 0 && funcAddr >= lastAddr { 126 // skip functions after last address. 127 break 128 } 129 funcAddrs = append(funcAddrs, funcAddr) 130 } 131 } 132 133 // Create function lifters. 134 for _, funcAddr := range funcAddrs { 135 asmFunc, err := l.DecodeFunc(funcAddr) 136 if err != nil { 137 log.Fatalf("%+v", err) 138 } 139 f := l.NewFunc(asmFunc) 140 l.Funcs[funcAddr] = f 141 } 142 143 // Lift functions. 144 for i, funcAddr := range funcAddrs { 145 if i != 0 { 146 dbg.Println() 147 } 148 f, ok := l.Funcs[funcAddr] 149 if !ok { 150 continue 151 } 152 f.Lift() 153 dbg.Println(f) 154 } 155 156 // Store LLVM IR output. 157 w := os.Stdout 158 if len(output) > 0 { 159 f, err := os.Create(output) 160 if err != nil { 161 log.Fatal(err) 162 } 163 defer f.Close() 164 w = f 165 } 166 var funcs []*ir.Func 167 var allFuncAddrs bin.Addresses 168 for funcAddr := range l.Funcs { 169 allFuncAddrs = append(allFuncAddrs, funcAddr) 170 } 171 sort.Sort(allFuncAddrs) 172 for _, funcAddr := range allFuncAddrs { 173 f := l.Funcs[funcAddr] 174 funcs = append(funcs, f.Func) 175 } 176 var globals []*ir.Global 177 var globalAddrs bin.Addresses 178 for globalAddr := range l.Globals { 179 globalAddrs = append(globalAddrs, globalAddr) 180 } 181 sort.Sort(globalAddrs) 182 for _, globalAddr := range globalAddrs { 183 g := l.Globals[globalAddr] 184 globals = append(globals, g) 185 } 186 m := &ir.Module{ 187 TypeDefs: l.TypeDefs, 188 Globals: globals, 189 Funcs: funcs, 190 } 191 if cfgonly { 192 pruneModule(m) 193 } 194 if _, err := fmt.Fprintln(w, m); err != nil { 195 log.Fatalf("%+v", err) 196 } 197 198 // Create call graph. 199 if err := genCallGraph(l.Funcs); err != nil { 200 log.Fatalf("%+v", err) 201 } 202 } 203 204 // newLifter returns a new x86 to LLVM IR lifter for the given binary 205 // executable. 206 func newLifter(binPath string, rawArch bin.Arch, rawEntry, rawBase bin.Address) (*x86.Lifter, error) { 207 // Parse raw binary executable. 208 if rawArch != 0 { 209 file, err := raw.ParseFile(binPath, rawArch) 210 if err != nil { 211 return nil, errors.WithStack(err) 212 } 213 file.Entry = rawEntry 214 file.Sections[0].Addr = rawBase 215 return x86.NewLifter(file) 216 } 217 // Parse binary executable. 218 file, err := bin.ParseFile(binPath) 219 if err != nil { 220 return nil, errors.WithStack(err) 221 } 222 return x86.NewLifter(file) 223 } 224 225 // pruneModule prunes the LLVM IR module to the minimal needed for CFG 226 // generation. 227 func pruneModule(m *ir.Module) { 228 for _, f := range m.Funcs { 229 condNum := 0 230 xNum := 0 231 if len(f.Blocks) == 0 { 232 continue 233 } 234 entry := f.Blocks[0] 235 entry.Insts = entry.Insts[:0] 236 for _, block := range f.Blocks { 237 // Prune instructions. 238 block.Insts = block.Insts[:0] 239 switch term := block.Term.(type) { 240 case *ir.TermRet: 241 if term.X != nil { 242 term.X = constant.NewZeroInitializer(f.Sig.RetType) 243 } 244 case *ir.TermBr: 245 // nothing to do. 246 case *ir.TermCondBr: 247 // Allocate dummy condition variable. 248 condName := fmt.Sprintf("c%d", condNum) 249 condNum++ 250 condMem := entry.NewAlloca(types.I1) 251 condMem.SetName(condName + "_mem") 252 cond := block.NewLoad(condMem) 253 cond.SetName(condName) 254 term.Cond = cond 255 case *ir.TermSwitch: 256 // Allocate dummy control variable. 257 xName := fmt.Sprintf("x%d", xNum) 258 xNum++ 259 xMem := entry.NewAlloca(term.X.Type()) 260 xMem.SetName(xName + "_mem") 261 x := block.NewLoad(xMem) 262 x.SetName(xName) 263 term.X = x 264 //case *ir.TermIndirectBr: 265 //case *ir.TermInvoke: 266 //case *ir.TermResume: 267 //case *ir.TermCatchSwitch: 268 //case *ir.TermCatchRet: 269 //case *ir.TermCleanupRet: 270 case *ir.TermUnreachable: 271 // nothing to do. 272 default: 273 panic(fmt.Errorf("support for terminator %T not yet implemented", term)) 274 } 275 } 276 } 277 }