github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/bin/elf/elf.go (about) 1 // Package elf provides access to Executable and Linkable Format (ELF) files. 2 package elf 3 4 import ( 5 "bytes" 6 "debug/elf" 7 "encoding/binary" 8 "fmt" 9 "io" 10 "io/ioutil" 11 "os" 12 "sort" 13 14 "github.com/decomp/exp/bin" 15 "github.com/pkg/errors" 16 ) 17 18 // Register ELF format. 19 func init() { 20 // Executable and Linkable Format (ELF) 21 // 22 // 7F 45 4C 46 |.ELF| 23 const magic = "\x7FELF" 24 bin.RegisterFormat("elf", magic, Parse) 25 } 26 27 // ParseFile parses the given ELF binary executable, reading from path. 28 func ParseFile(path string) (*bin.File, error) { 29 f, err := os.Open(path) 30 if err != nil { 31 return nil, errors.WithStack(err) 32 } 33 defer f.Close() 34 return Parse(f) 35 } 36 37 // Parse parses the given ELF binary executable, reading from r. 38 // 39 // Users are responsible for closing r. 40 func Parse(r io.ReaderAt) (*bin.File, error) { 41 // Open ELF file. 42 f, err := elf.NewFile(r) 43 if err != nil { 44 return nil, errors.WithStack(err) 45 } 46 47 // Parse machine architecture. 48 file := &bin.File{ 49 Imports: make(map[bin.Address]string), 50 Exports: make(map[bin.Address]string), 51 } 52 switch f.Machine { 53 case elf.EM_386: 54 file.Arch = bin.ArchX86_32 55 case elf.EM_X86_64: 56 file.Arch = bin.ArchX86_64 57 case elf.EM_PPC: 58 file.Arch = bin.ArchPowerPC_32 59 } 60 61 // Parse entry address. 62 file.Entry = bin.Address(f.Entry) 63 64 // Parse sections. 65 for _, s := range f.Sections { 66 perm := parseSectFlags(s.Flags) 67 var data []byte 68 if s.Type != elf.SHT_NOBITS { 69 data, err = s.Data() 70 if err != nil { 71 return nil, errors.WithStack(err) 72 } 73 if len(data) == 0 { 74 continue 75 } 76 } 77 sect := &bin.Section{ 78 Name: s.Name, 79 Addr: bin.Address(s.Addr), 80 Offset: s.Offset, 81 FileSize: int(s.FileSize), 82 MemSize: int(s.Size), 83 Data: data, 84 Perm: perm, 85 } 86 file.Sections = append(file.Sections, sect) 87 } 88 89 // Sort sections in ascending order. 90 less := func(i, j int) bool { 91 if file.Sections[i].Addr == file.Sections[j].Addr { 92 if len(file.Sections[i].Data) > len(file.Sections[j].Data) { 93 // prioritize longer sections with identical addresses. 94 return true 95 } 96 return file.Sections[i].Name < file.Sections[j].Name 97 } 98 return file.Sections[i].Addr < file.Sections[j].Addr 99 } 100 sort.Slice(file.Sections, less) 101 102 // Parse segments. 103 var segments []*bin.Section 104 for _, prog := range f.Progs { 105 if prog.Type != elf.PT_LOAD { 106 continue 107 } 108 r := prog.Open() 109 data, err := ioutil.ReadAll(r) 110 if err != nil { 111 return nil, errors.WithStack(err) 112 } 113 perm := parseProgFlags(prog.Flags) 114 seg := &bin.Section{ 115 Addr: bin.Address(prog.Vaddr), 116 Offset: prog.Off, 117 Data: data, 118 FileSize: int(prog.Filesz), 119 MemSize: int(prog.Memsz), 120 Perm: perm, 121 } 122 segments = append(segments, seg) 123 } 124 125 // Sort segments in ascending order. 126 sort.Slice(segments, less) 127 128 // Fix section permissions. 129 if len(segments) > 0 { 130 for _, sect := range file.Sections { 131 for _, seg := range segments { 132 end := seg.Addr + bin.Address(len(seg.Data)) 133 if seg.Addr <= sect.Addr && sect.Addr < end { 134 if sect.Perm == 0 { 135 sect.Perm = seg.Perm 136 } 137 } 138 } 139 } 140 } 141 142 // Append segments as sections. 143 file.Sections = append(file.Sections, segments...) 144 145 // Sort sections (and segments) in ascending order. 146 sort.Slice(segments, less) 147 148 // TODO: Parse imports. 149 150 // Parse imports. 151 gotplt := f.Section(".got.plt") 152 // TODO: Add support for reading .got.plt from segments when section 153 // information is missing. Locate using DT_PLTGOT in .dynamic. 154 if gotplt != nil { 155 gotpltData, err := gotplt.Data() 156 if err != nil { 157 return nil, errors.WithStack(err) 158 } 159 dynSyms, err := f.DynamicSymbols() 160 if err != nil { 161 return nil, errors.WithStack(err) 162 } 163 // Program Linkage Table example. 164 // 165 // plt: 166 // 167 // ... 168 // 169 // .printf: 170 // jmp [rel (BASE_DATA - BASE_CODE) + got_plt.printf] 171 // 172 // .resolve_printf: 173 // push QWORD dynsym.printf_idx 174 // jmp NEAR .resolve 175 // jmp [rel (BASE_DATA - BASE_CODE) + got_plt.printf] 176 // 177 // ... 178 // 179 // ref: https://github.com/mewrev/dissection/blob/master/elf.asm 180 181 // The length of the 32- and 64-bit JMP instruction. 182 // 183 // jmp [rel (BASE_DATA - BASE_CODE) + got_plt.printf] 184 const jmplen = 6 185 switch file.Arch.BitSize() { 186 case 32: 187 // skip .got.plt:dynamic (4 bytes) 188 // skip .got.plt:link_map (4 bytes) 189 // skip .got.plt:dl_runtime_resolve (4 bytes) 190 r := bytes.NewReader(gotpltData[4+4+4:]) 191 for _, dynSym := range dynSyms { 192 var v uint32 193 if err := binary.Read(r, binary.LittleEndian, &v); err != nil { 194 if errors.Cause(err) == io.EOF { 195 break 196 } 197 return nil, errors.WithStack(err) 198 } 199 // v points to .plt:resolve_printf, and .plt:printf is at the jmp 200 // instruction directly preceding; thus subtract the length of the 201 // jmp instruction from v to arrive at .plt:printf. 202 addr := bin.Address(v) - jmplen 203 file.Imports[addr] = dynSym.Name 204 } 205 case 64: 206 // skip .got.plt:dynamic (8 bytes) 207 // skip .got.plt:link_map (8 bytes) 208 // skip .got.plt:dl_runtime_resolve (8 bytes) 209 r := bytes.NewReader(gotpltData[8+8+8:]) 210 for _, dynSym := range dynSyms { 211 var v uint64 212 if err := binary.Read(r, binary.LittleEndian, &v); err != nil { 213 if errors.Cause(err) == io.EOF { 214 break 215 } 216 return nil, errors.WithStack(err) 217 } 218 // v points to .plt:resolve_printf, and .plt:printf is at the jmp 219 // instruction directly preceding; thus subtract the length of the 220 // jmp instruction from v to arrive at .plt:printf. 221 addr := bin.Address(v) - jmplen 222 file.Imports[addr] = dynSym.Name 223 } 224 default: 225 panic(fmt.Errorf("support for CPU bit size %d not yet implemented", file.Arch.BitSize())) 226 } 227 } 228 229 // Parse exports. 230 symtab := f.Section(".symtab") 231 strtab := f.Section(".strtab") 232 if symtab != nil && strtab != nil { 233 symtabData, err := symtab.Data() 234 if err != nil { 235 return nil, errors.WithStack(err) 236 } 237 strtabData, err := strtab.Data() 238 if err != nil { 239 return nil, errors.WithStack(err) 240 } 241 r := bytes.NewReader(symtabData) 242 // undef specifies that a symbol is not associated with a specific 243 // section. 244 const undef = 0 245 switch file.Arch.BitSize() { 246 case 32: 247 // Sym32 represents a 32-bit symbol descriptor. 248 type Sym32 struct { 249 // Index into the symbol string table. 250 Name uint32 251 // Value of the associated symbol. Depending on the context, this can 252 // be an absolute value, an address, etc. 253 Value uint32 254 // Size in bytes; or 0 if the symbol has no size or an unknown size. 255 Size uint32 256 // Symbol type and binding information. 257 Info uint8 258 // Symbol visibility. 259 Visibility SymVisibility 260 // Section header table index relevant for the symbol. 261 SectHdrIndex uint16 262 } 263 for { 264 var sym Sym32 265 if err := binary.Read(r, binary.LittleEndian, &sym); err != nil { 266 if errors.Cause(err) == io.EOF { 267 break 268 } 269 return nil, errors.WithStack(err) 270 } 271 //pretty.Println("sym:", sym) 272 name := parseString(strtabData[sym.Name:]) 273 addr := bin.Address(sym.Value) 274 typ := SymType(sym.Info & 0x0F) 275 //bind := SymBind(sym.Info >> 4) 276 // TODO: Remove debug output. 277 //fmt.Println("name:", name) 278 //fmt.Println("addr:", addr) 279 //fmt.Println("size:", sym.Size) 280 //fmt.Println("typ:", typ) 281 //fmt.Println("bind:", bind) 282 //fmt.Println("visibility:", sym.Visibility) 283 //fmt.Println() 284 if typ == SymTypeFunc && sym.SectHdrIndex != undef { 285 file.Exports[addr] = name 286 } 287 } 288 case 64: 289 // Sym64 represents a 64-bit symbol descriptor. 290 type Sym64 struct { 291 // Index into the symbol string table. 292 Name uint32 293 // Symbol type and binding information. 294 Info uint8 295 // Symbol visibility. 296 Visibility SymVisibility 297 // Section header table index relevant for the symbol. 298 SectHdrIndex uint16 299 // Value of the associated symbol. Depending on the context, this can 300 // be an absolute value, an address, etc. 301 Value uint64 302 // Size in bytes; or 0 if the symbol has no size or an unknown size. 303 Size uint64 304 } 305 for { 306 var sym Sym64 307 if err := binary.Read(r, binary.LittleEndian, &sym); err != nil { 308 if errors.Cause(err) == io.EOF { 309 break 310 } 311 return nil, errors.WithStack(err) 312 } 313 //pretty.Println("sym:", sym) 314 name := parseString(strtabData[sym.Name:]) 315 addr := bin.Address(sym.Value) 316 typ := SymType(sym.Info & 0x0F) 317 //bind := SymBind(sym.Info >> 4) 318 // TODO: Remove debug output. 319 //fmt.Println("name:", name) 320 //fmt.Println("addr:", addr) 321 //fmt.Println("size:", sym.Size) 322 //fmt.Println("typ:", typ) 323 //fmt.Println("bind:", bind) 324 //fmt.Println("visibility:", sym.Visibility) 325 //fmt.Println() 326 if typ == SymTypeFunc && sym.SectHdrIndex != undef { 327 file.Exports[addr] = name 328 } 329 } 330 default: 331 panic(fmt.Errorf("support for CPU bit size %d not yet implemented", file.Arch.BitSize())) 332 } 333 } 334 335 return file, nil 336 } 337 338 // SymType specifies a symbol type. 339 type SymType uint8 340 341 // String returns the string representation of the symbol type. 342 func (typ SymType) String() string { 343 m := map[SymType]string{ 344 SymTypeNone: "none", 345 SymTypeObject: "object", 346 SymTypeFunc: "function", 347 SymTypeSection: "section", 348 SymTypeFile: "file", 349 SymTypeCommon: "common", 350 SymTypeOS0: "OS 0", 351 SymTypeOS1: "OS 1", 352 SymTypeOS2: "OS 2", 353 SymTypeProc0: "processor 0", 354 SymTypeProc1: "processor 1", 355 SymTypeProc2: "processor 2", 356 } 357 if s, ok := m[typ]; ok { 358 return s 359 } 360 panic(fmt.Errorf("support for symbol type %v not yet implemented", uint8(typ))) 361 } 362 363 // Symbol types. 364 const ( 365 // The symbol type is not specified. 366 SymTypeNone SymType = 0 367 // This symbol is associated with a data object, such as a variable, an 368 // array, and so forth. 369 SymTypeObject SymType = 1 370 // This symbol is associated with a function or other executable code. 371 SymTypeFunc SymType = 2 372 // This symbol is associated with a section. 373 SymTypeSection SymType = 3 374 // Name of the source file associated with the object file 375 SymTypeFile SymType = 4 376 // This symbol labels an uninitialized common block. 377 SymTypeCommon SymType = 5 378 // Reserved for operating system-specific semantics. 379 SymTypeOS0 SymType = 10 380 // Reserved for operating system-specific semantics. 381 SymTypeOS1 SymType = 11 382 // Reserved for operating system-specific semantics. 383 SymTypeOS2 SymType = 12 384 // Reserved for processor-specific semantics. 385 SymTypeProc0 SymType = 13 386 // Reserved for processor-specific semantics. 387 SymTypeProc1 SymType = 14 388 // Reserved for processor-specific semantics. 389 SymTypeProc2 SymType = 15 390 ) 391 392 // SymBind specifies a symbol binding. 393 type SymBind uint8 394 395 // String returns the string representation of the symbol binding. 396 func (bind SymBind) String() string { 397 m := map[SymBind]string{ 398 SymBindLocal: "local", 399 SymBindGlobal: "global", 400 SymBindWeak: "weak", 401 SymBindOS0: "OS 0", 402 SymBindOS1: "OS 1", 403 SymBindOS2: "OS 2", 404 SymBindProc0: "processor 0", 405 SymBindProc1: "processor 1", 406 SymBindProc2: "processor 2", 407 } 408 if s, ok := m[bind]; ok { 409 return s 410 } 411 panic(fmt.Errorf("support for symbol binding %v not yet implemented", uint8(bind))) 412 } 413 414 // Symbol bindings. 415 const ( 416 // Local symbol. 417 SymBindLocal SymBind = 0 418 // Global symbol. 419 SymBindGlobal SymBind = 1 420 // Weak symbol. 421 SymBindWeak SymBind = 2 422 // Reserved for operating system-specific semantics. 423 SymBindOS0 SymBind = 10 424 // Reserved for operating system-specific semantics. 425 SymBindOS1 SymBind = 11 426 // Reserved for operating system-specific semantics. 427 SymBindOS2 SymBind = 12 428 // Reserved for processor-specific semantics. 429 SymBindProc0 SymBind = 13 430 // Reserved for processor-specific semantics. 431 SymBindProc1 SymBind = 14 432 // Reserved for processor-specific semantics. 433 SymBindProc2 SymBind = 15 434 ) 435 436 // SymVisibility specifies a symbol visibility. 437 type SymVisibility uint8 438 439 // String returns the string representation of the symbol binding. 440 func (v SymVisibility) String() string { 441 m := map[SymVisibility]string{ 442 SymVisibilityDefault: "default", 443 SymVisibilityInternal: "internal", 444 SymVisibilityHidden: "hidden", 445 SymVisibilityProtected: "protected", 446 } 447 if s, ok := m[v]; ok { 448 return s 449 } 450 panic(fmt.Errorf("support for symbol visibility %v not yet implemented", uint8(v))) 451 } 452 453 // Symbol visibility. 454 const ( 455 // Default symbol visiblity as specified by the symbol binding. 456 SymVisibilityDefault SymVisibility = 0 457 // Internal symbol visibility. 458 SymVisibilityInternal SymVisibility = 1 459 // Hidden symbol visibility. 460 SymVisibilityHidden SymVisibility = 2 461 // Protected symbol visibility. 462 SymVisibilityProtected SymVisibility = 3 463 ) 464 465 // parseSectFlags returns the memory access permissions represented by the given 466 // section header flags. 467 func parseSectFlags(flags elf.SectionFlag) bin.Perm { 468 var perm bin.Perm 469 if flags&elf.SHF_WRITE != 0 { 470 perm |= bin.PermW 471 } 472 if flags&elf.SHF_EXECINSTR != 0 { 473 perm |= bin.PermX 474 } 475 return perm 476 } 477 478 // parseProgFlags returns the memory access permissions represented by the given 479 // program header flags. 480 func parseProgFlags(flags elf.ProgFlag) bin.Perm { 481 var perm bin.Perm 482 if flags&elf.PF_R != 0 { 483 perm |= bin.PermR 484 } 485 if flags&elf.PF_W != 0 { 486 perm |= bin.PermW 487 } 488 if flags&elf.PF_X != 0 { 489 perm |= bin.PermX 490 } 491 return perm 492 } 493 494 // ### [ Helper functions ] #################################################### 495 496 // parseString parses the NULL-terminated string in the given data. 497 func parseString(data []byte) string { 498 pos := bytes.IndexByte(data, '\x00') 499 if pos == -1 { 500 panic(fmt.Errorf("unable to locate NULL-terminated string in % 02X", data)) 501 } 502 return string(data[:pos]) 503 }