github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/bin/elf/elf.go (about)

     1  // Package elf provides access to Executable and Linkable Format (ELF) files.
     2  package elf
     3  
     4  import (
     5  	"bytes"
     6  	"debug/elf"
     7  	"encoding/binary"
     8  	"fmt"
     9  	"io"
    10  	"io/ioutil"
    11  	"os"
    12  	"sort"
    13  
    14  	"github.com/decomp/exp/bin"
    15  	"github.com/pkg/errors"
    16  )
    17  
    18  // Register ELF format.
    19  func init() {
    20  	// Executable and Linkable Format (ELF)
    21  	//
    22  	//    7F 45 4C 46  |.ELF|
    23  	const magic = "\x7FELF"
    24  	bin.RegisterFormat("elf", magic, Parse)
    25  }
    26  
    27  // ParseFile parses the given ELF binary executable, reading from path.
    28  func ParseFile(path string) (*bin.File, error) {
    29  	f, err := os.Open(path)
    30  	if err != nil {
    31  		return nil, errors.WithStack(err)
    32  	}
    33  	defer f.Close()
    34  	return Parse(f)
    35  }
    36  
    37  // Parse parses the given ELF binary executable, reading from r.
    38  //
    39  // Users are responsible for closing r.
    40  func Parse(r io.ReaderAt) (*bin.File, error) {
    41  	// Open ELF file.
    42  	f, err := elf.NewFile(r)
    43  	if err != nil {
    44  		return nil, errors.WithStack(err)
    45  	}
    46  
    47  	// Parse machine architecture.
    48  	file := &bin.File{
    49  		Imports: make(map[bin.Address]string),
    50  		Exports: make(map[bin.Address]string),
    51  	}
    52  	switch f.Machine {
    53  	case elf.EM_386:
    54  		file.Arch = bin.ArchX86_32
    55  	case elf.EM_X86_64:
    56  		file.Arch = bin.ArchX86_64
    57  	case elf.EM_PPC:
    58  		file.Arch = bin.ArchPowerPC_32
    59  	}
    60  
    61  	// Parse entry address.
    62  	file.Entry = bin.Address(f.Entry)
    63  
    64  	// Parse sections.
    65  	for _, s := range f.Sections {
    66  		perm := parseSectFlags(s.Flags)
    67  		var data []byte
    68  		if s.Type != elf.SHT_NOBITS {
    69  			data, err = s.Data()
    70  			if err != nil {
    71  				return nil, errors.WithStack(err)
    72  			}
    73  			if len(data) == 0 {
    74  				continue
    75  			}
    76  		}
    77  		sect := &bin.Section{
    78  			Name:     s.Name,
    79  			Addr:     bin.Address(s.Addr),
    80  			Offset:   s.Offset,
    81  			FileSize: int(s.FileSize),
    82  			MemSize:  int(s.Size),
    83  			Data:     data,
    84  			Perm:     perm,
    85  		}
    86  		file.Sections = append(file.Sections, sect)
    87  	}
    88  
    89  	// Sort sections in ascending order.
    90  	less := func(i, j int) bool {
    91  		if file.Sections[i].Addr == file.Sections[j].Addr {
    92  			if len(file.Sections[i].Data) > len(file.Sections[j].Data) {
    93  				// prioritize longer sections with identical addresses.
    94  				return true
    95  			}
    96  			return file.Sections[i].Name < file.Sections[j].Name
    97  		}
    98  		return file.Sections[i].Addr < file.Sections[j].Addr
    99  	}
   100  	sort.Slice(file.Sections, less)
   101  
   102  	// Parse segments.
   103  	var segments []*bin.Section
   104  	for _, prog := range f.Progs {
   105  		if prog.Type != elf.PT_LOAD {
   106  			continue
   107  		}
   108  		r := prog.Open()
   109  		data, err := ioutil.ReadAll(r)
   110  		if err != nil {
   111  			return nil, errors.WithStack(err)
   112  		}
   113  		perm := parseProgFlags(prog.Flags)
   114  		seg := &bin.Section{
   115  			Addr:     bin.Address(prog.Vaddr),
   116  			Offset:   prog.Off,
   117  			Data:     data,
   118  			FileSize: int(prog.Filesz),
   119  			MemSize:  int(prog.Memsz),
   120  			Perm:     perm,
   121  		}
   122  		segments = append(segments, seg)
   123  	}
   124  
   125  	// Sort segments in ascending order.
   126  	sort.Slice(segments, less)
   127  
   128  	// Fix section permissions.
   129  	if len(segments) > 0 {
   130  		for _, sect := range file.Sections {
   131  			for _, seg := range segments {
   132  				end := seg.Addr + bin.Address(len(seg.Data))
   133  				if seg.Addr <= sect.Addr && sect.Addr < end {
   134  					if sect.Perm == 0 {
   135  						sect.Perm = seg.Perm
   136  					}
   137  				}
   138  			}
   139  		}
   140  	}
   141  
   142  	// Append segments as sections.
   143  	file.Sections = append(file.Sections, segments...)
   144  
   145  	// Sort sections (and segments) in ascending order.
   146  	sort.Slice(segments, less)
   147  
   148  	// TODO: Parse imports.
   149  
   150  	// Parse imports.
   151  	gotplt := f.Section(".got.plt")
   152  	// TODO: Add support for reading .got.plt from segments when section
   153  	// information is missing. Locate using DT_PLTGOT in .dynamic.
   154  	if gotplt != nil {
   155  		gotpltData, err := gotplt.Data()
   156  		if err != nil {
   157  			return nil, errors.WithStack(err)
   158  		}
   159  		dynSyms, err := f.DynamicSymbols()
   160  		if err != nil {
   161  			return nil, errors.WithStack(err)
   162  		}
   163  		// Program Linkage Table example.
   164  		//
   165  		//    plt:
   166  		//
   167  		//      ...
   168  		//
   169  		//      .printf:
   170  		//        jmp     [rel (BASE_DATA - BASE_CODE) + got_plt.printf]
   171  		//
   172  		//      .resolve_printf:
   173  		//        push    QWORD dynsym.printf_idx
   174  		//        jmp     NEAR .resolve
   175  		//        jmp     [rel (BASE_DATA - BASE_CODE) + got_plt.printf]
   176  		//
   177  		//      ...
   178  		//
   179  		// ref: https://github.com/mewrev/dissection/blob/master/elf.asm
   180  
   181  		// The length of the 32- and 64-bit JMP instruction.
   182  		//
   183  		//    jmp     [rel (BASE_DATA - BASE_CODE) + got_plt.printf]
   184  		const jmplen = 6
   185  		switch file.Arch.BitSize() {
   186  		case 32:
   187  			// skip .got.plt:dynamic            (4 bytes)
   188  			// skip .got.plt:link_map           (4 bytes)
   189  			// skip .got.plt:dl_runtime_resolve (4 bytes)
   190  			r := bytes.NewReader(gotpltData[4+4+4:])
   191  			for _, dynSym := range dynSyms {
   192  				var v uint32
   193  				if err := binary.Read(r, binary.LittleEndian, &v); err != nil {
   194  					if errors.Cause(err) == io.EOF {
   195  						break
   196  					}
   197  					return nil, errors.WithStack(err)
   198  				}
   199  				// v points to .plt:resolve_printf, and .plt:printf is at the jmp
   200  				// instruction directly preceding; thus subtract the length of the
   201  				// jmp instruction from v to arrive at .plt:printf.
   202  				addr := bin.Address(v) - jmplen
   203  				file.Imports[addr] = dynSym.Name
   204  			}
   205  		case 64:
   206  			// skip .got.plt:dynamic            (8 bytes)
   207  			// skip .got.plt:link_map           (8 bytes)
   208  			// skip .got.plt:dl_runtime_resolve (8 bytes)
   209  			r := bytes.NewReader(gotpltData[8+8+8:])
   210  			for _, dynSym := range dynSyms {
   211  				var v uint64
   212  				if err := binary.Read(r, binary.LittleEndian, &v); err != nil {
   213  					if errors.Cause(err) == io.EOF {
   214  						break
   215  					}
   216  					return nil, errors.WithStack(err)
   217  				}
   218  				// v points to .plt:resolve_printf, and .plt:printf is at the jmp
   219  				// instruction directly preceding; thus subtract the length of the
   220  				// jmp instruction from v to arrive at .plt:printf.
   221  				addr := bin.Address(v) - jmplen
   222  				file.Imports[addr] = dynSym.Name
   223  			}
   224  		default:
   225  			panic(fmt.Errorf("support for CPU bit size %d not yet implemented", file.Arch.BitSize()))
   226  		}
   227  	}
   228  
   229  	// Parse exports.
   230  	symtab := f.Section(".symtab")
   231  	strtab := f.Section(".strtab")
   232  	if symtab != nil && strtab != nil {
   233  		symtabData, err := symtab.Data()
   234  		if err != nil {
   235  			return nil, errors.WithStack(err)
   236  		}
   237  		strtabData, err := strtab.Data()
   238  		if err != nil {
   239  			return nil, errors.WithStack(err)
   240  		}
   241  		r := bytes.NewReader(symtabData)
   242  		// undef specifies that a symbol is not associated with a specific
   243  		// section.
   244  		const undef = 0
   245  		switch file.Arch.BitSize() {
   246  		case 32:
   247  			// Sym32 represents a 32-bit symbol descriptor.
   248  			type Sym32 struct {
   249  				// Index into the symbol string table.
   250  				Name uint32
   251  				// Value of the associated symbol. Depending on the context, this can
   252  				// be an absolute value, an address, etc.
   253  				Value uint32
   254  				// Size in bytes; or 0 if the symbol has no size or an unknown size.
   255  				Size uint32
   256  				// Symbol type and binding information.
   257  				Info uint8
   258  				// Symbol visibility.
   259  				Visibility SymVisibility
   260  				// Section header table index relevant for the symbol.
   261  				SectHdrIndex uint16
   262  			}
   263  			for {
   264  				var sym Sym32
   265  				if err := binary.Read(r, binary.LittleEndian, &sym); err != nil {
   266  					if errors.Cause(err) == io.EOF {
   267  						break
   268  					}
   269  					return nil, errors.WithStack(err)
   270  				}
   271  				//pretty.Println("sym:", sym)
   272  				name := parseString(strtabData[sym.Name:])
   273  				addr := bin.Address(sym.Value)
   274  				typ := SymType(sym.Info & 0x0F)
   275  				//bind := SymBind(sym.Info >> 4)
   276  				// TODO: Remove debug output.
   277  				//fmt.Println("name:", name)
   278  				//fmt.Println("addr:", addr)
   279  				//fmt.Println("size:", sym.Size)
   280  				//fmt.Println("typ:", typ)
   281  				//fmt.Println("bind:", bind)
   282  				//fmt.Println("visibility:", sym.Visibility)
   283  				//fmt.Println()
   284  				if typ == SymTypeFunc && sym.SectHdrIndex != undef {
   285  					file.Exports[addr] = name
   286  				}
   287  			}
   288  		case 64:
   289  			// Sym64 represents a 64-bit symbol descriptor.
   290  			type Sym64 struct {
   291  				// Index into the symbol string table.
   292  				Name uint32
   293  				// Symbol type and binding information.
   294  				Info uint8
   295  				// Symbol visibility.
   296  				Visibility SymVisibility
   297  				// Section header table index relevant for the symbol.
   298  				SectHdrIndex uint16
   299  				// Value of the associated symbol. Depending on the context, this can
   300  				// be an absolute value, an address, etc.
   301  				Value uint64
   302  				// Size in bytes; or 0 if the symbol has no size or an unknown size.
   303  				Size uint64
   304  			}
   305  			for {
   306  				var sym Sym64
   307  				if err := binary.Read(r, binary.LittleEndian, &sym); err != nil {
   308  					if errors.Cause(err) == io.EOF {
   309  						break
   310  					}
   311  					return nil, errors.WithStack(err)
   312  				}
   313  				//pretty.Println("sym:", sym)
   314  				name := parseString(strtabData[sym.Name:])
   315  				addr := bin.Address(sym.Value)
   316  				typ := SymType(sym.Info & 0x0F)
   317  				//bind := SymBind(sym.Info >> 4)
   318  				// TODO: Remove debug output.
   319  				//fmt.Println("name:", name)
   320  				//fmt.Println("addr:", addr)
   321  				//fmt.Println("size:", sym.Size)
   322  				//fmt.Println("typ:", typ)
   323  				//fmt.Println("bind:", bind)
   324  				//fmt.Println("visibility:", sym.Visibility)
   325  				//fmt.Println()
   326  				if typ == SymTypeFunc && sym.SectHdrIndex != undef {
   327  					file.Exports[addr] = name
   328  				}
   329  			}
   330  		default:
   331  			panic(fmt.Errorf("support for CPU bit size %d not yet implemented", file.Arch.BitSize()))
   332  		}
   333  	}
   334  
   335  	return file, nil
   336  }
   337  
   338  // SymType specifies a symbol type.
   339  type SymType uint8
   340  
   341  // String returns the string representation of the symbol type.
   342  func (typ SymType) String() string {
   343  	m := map[SymType]string{
   344  		SymTypeNone:    "none",
   345  		SymTypeObject:  "object",
   346  		SymTypeFunc:    "function",
   347  		SymTypeSection: "section",
   348  		SymTypeFile:    "file",
   349  		SymTypeCommon:  "common",
   350  		SymTypeOS0:     "OS 0",
   351  		SymTypeOS1:     "OS 1",
   352  		SymTypeOS2:     "OS 2",
   353  		SymTypeProc0:   "processor 0",
   354  		SymTypeProc1:   "processor 1",
   355  		SymTypeProc2:   "processor 2",
   356  	}
   357  	if s, ok := m[typ]; ok {
   358  		return s
   359  	}
   360  	panic(fmt.Errorf("support for symbol type %v not yet implemented", uint8(typ)))
   361  }
   362  
   363  // Symbol types.
   364  const (
   365  	// The symbol type is not specified.
   366  	SymTypeNone SymType = 0
   367  	// This symbol is associated with a data object, such as a variable, an
   368  	// array, and so forth.
   369  	SymTypeObject SymType = 1
   370  	// This symbol is associated with a function or other executable code.
   371  	SymTypeFunc SymType = 2
   372  	// This symbol is associated with a section.
   373  	SymTypeSection SymType = 3
   374  	// Name of the source file associated with the object file
   375  	SymTypeFile SymType = 4
   376  	// This symbol labels an uninitialized common block.
   377  	SymTypeCommon SymType = 5
   378  	// Reserved for operating system-specific semantics.
   379  	SymTypeOS0 SymType = 10
   380  	// Reserved for operating system-specific semantics.
   381  	SymTypeOS1 SymType = 11
   382  	// Reserved for operating system-specific semantics.
   383  	SymTypeOS2 SymType = 12
   384  	// Reserved for processor-specific semantics.
   385  	SymTypeProc0 SymType = 13
   386  	// Reserved for processor-specific semantics.
   387  	SymTypeProc1 SymType = 14
   388  	// Reserved for processor-specific semantics.
   389  	SymTypeProc2 SymType = 15
   390  )
   391  
   392  // SymBind specifies a symbol binding.
   393  type SymBind uint8
   394  
   395  // String returns the string representation of the symbol binding.
   396  func (bind SymBind) String() string {
   397  	m := map[SymBind]string{
   398  		SymBindLocal:  "local",
   399  		SymBindGlobal: "global",
   400  		SymBindWeak:   "weak",
   401  		SymBindOS0:    "OS 0",
   402  		SymBindOS1:    "OS 1",
   403  		SymBindOS2:    "OS 2",
   404  		SymBindProc0:  "processor 0",
   405  		SymBindProc1:  "processor 1",
   406  		SymBindProc2:  "processor 2",
   407  	}
   408  	if s, ok := m[bind]; ok {
   409  		return s
   410  	}
   411  	panic(fmt.Errorf("support for symbol binding %v not yet implemented", uint8(bind)))
   412  }
   413  
   414  // Symbol bindings.
   415  const (
   416  	// Local symbol.
   417  	SymBindLocal SymBind = 0
   418  	// Global symbol.
   419  	SymBindGlobal SymBind = 1
   420  	// Weak symbol.
   421  	SymBindWeak SymBind = 2
   422  	// Reserved for operating system-specific semantics.
   423  	SymBindOS0 SymBind = 10
   424  	// Reserved for operating system-specific semantics.
   425  	SymBindOS1 SymBind = 11
   426  	// Reserved for operating system-specific semantics.
   427  	SymBindOS2 SymBind = 12
   428  	// Reserved for processor-specific semantics.
   429  	SymBindProc0 SymBind = 13
   430  	// Reserved for processor-specific semantics.
   431  	SymBindProc1 SymBind = 14
   432  	// Reserved for processor-specific semantics.
   433  	SymBindProc2 SymBind = 15
   434  )
   435  
   436  // SymVisibility specifies a symbol visibility.
   437  type SymVisibility uint8
   438  
   439  // String returns the string representation of the symbol binding.
   440  func (v SymVisibility) String() string {
   441  	m := map[SymVisibility]string{
   442  		SymVisibilityDefault:   "default",
   443  		SymVisibilityInternal:  "internal",
   444  		SymVisibilityHidden:    "hidden",
   445  		SymVisibilityProtected: "protected",
   446  	}
   447  	if s, ok := m[v]; ok {
   448  		return s
   449  	}
   450  	panic(fmt.Errorf("support for symbol visibility %v not yet implemented", uint8(v)))
   451  }
   452  
   453  // Symbol visibility.
   454  const (
   455  	// Default symbol visiblity as specified by the symbol binding.
   456  	SymVisibilityDefault SymVisibility = 0
   457  	// Internal symbol visibility.
   458  	SymVisibilityInternal SymVisibility = 1
   459  	// Hidden symbol visibility.
   460  	SymVisibilityHidden SymVisibility = 2
   461  	// Protected symbol visibility.
   462  	SymVisibilityProtected SymVisibility = 3
   463  )
   464  
   465  // parseSectFlags returns the memory access permissions represented by the given
   466  // section header flags.
   467  func parseSectFlags(flags elf.SectionFlag) bin.Perm {
   468  	var perm bin.Perm
   469  	if flags&elf.SHF_WRITE != 0 {
   470  		perm |= bin.PermW
   471  	}
   472  	if flags&elf.SHF_EXECINSTR != 0 {
   473  		perm |= bin.PermX
   474  	}
   475  	return perm
   476  }
   477  
   478  // parseProgFlags returns the memory access permissions represented by the given
   479  // program header flags.
   480  func parseProgFlags(flags elf.ProgFlag) bin.Perm {
   481  	var perm bin.Perm
   482  	if flags&elf.PF_R != 0 {
   483  		perm |= bin.PermR
   484  	}
   485  	if flags&elf.PF_W != 0 {
   486  		perm |= bin.PermW
   487  	}
   488  	if flags&elf.PF_X != 0 {
   489  		perm |= bin.PermX
   490  	}
   491  	return perm
   492  }
   493  
   494  // ### [ Helper functions ] ####################################################
   495  
   496  // parseString parses the NULL-terminated string in the given data.
   497  func parseString(data []byte) string {
   498  	pos := bytes.IndexByte(data, '\x00')
   499  	if pos == -1 {
   500  		panic(fmt.Errorf("unable to locate NULL-terminated string in % 02X", data))
   501  	}
   502  	return string(data[:pos])
   503  }