github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/bin/pe/pe.go (about)

     1  // Package pe provides access to PE (Portable Executable) files.
     2  package pe
     3  
     4  import (
     5  	"bytes"
     6  	"debug/pe"
     7  	"encoding/binary"
     8  	"encoding/hex"
     9  	"fmt"
    10  	"io"
    11  	"io/ioutil"
    12  	"log"
    13  	"os"
    14  	"sort"
    15  
    16  	"github.com/decomp/exp/bin"
    17  	//"github.com/kr/pretty"
    18  	"github.com/mewkiz/pkg/pathutil"
    19  	"github.com/mewkiz/pkg/term"
    20  	"github.com/pkg/errors"
    21  )
    22  
    23  var (
    24  	// dbg is a logger with the "pe:" prefix which logs debug messages to
    25  	// standard error.
    26  	dbg = log.New(ioutil.Discard, term.MagentaBold("pe:")+" ", 0)
    27  	// warn is a logger with the "pe:" prefix which logs warning messages to
    28  	// standard error.
    29  	warn = log.New(os.Stderr, term.RedBold("pe:")+" ", 0)
    30  )
    31  
    32  // Register PE format.
    33  func init() {
    34  	// Portable Executable (PE) format.
    35  	//
    36  	//    4D 5A  |MZ|
    37  	const magic = "MZ"
    38  	bin.RegisterFormat("pe", magic, Parse)
    39  }
    40  
    41  // ParseFile parses the given PE binary executable, reading from path.
    42  func ParseFile(path string) (*bin.File, error) {
    43  	f, err := os.Open(path)
    44  	if err != nil {
    45  		return nil, errors.WithStack(err)
    46  	}
    47  	defer f.Close()
    48  	return Parse(f)
    49  }
    50  
    51  // Parse parses the given PE binary executable, reading from r.
    52  //
    53  // Users are responsible for closing r.
    54  func Parse(r io.ReaderAt) (*bin.File, error) {
    55  	// Open PE file.
    56  	f, err := pe.NewFile(r)
    57  	if err != nil {
    58  		return nil, errors.WithStack(err)
    59  	}
    60  
    61  	// Parse machine architecture.
    62  	file := &bin.File{
    63  		Imports: make(map[bin.Address]string),
    64  		// TODO: implement exports
    65  	}
    66  	switch f.FileHeader.Machine {
    67  	case pe.IMAGE_FILE_MACHINE_I386:
    68  		file.Arch = bin.ArchX86_32
    69  	case pe.IMAGE_FILE_MACHINE_AMD64:
    70  		file.Arch = bin.ArchX86_64
    71  	case pe.IMAGE_FILE_MACHINE_POWERPC:
    72  		file.Arch = bin.ArchPowerPC_32
    73  	default:
    74  		panic(fmt.Errorf("support for machine architecture %v not yet implemented", f.FileHeader.Machine))
    75  	}
    76  
    77  	// Parse entry address.
    78  	var (
    79  		// Image base address.
    80  		imageBase uint64
    81  		// Import table RVA and size.
    82  		itRVA  uint64
    83  		itSize uint64
    84  		// Import address table (IAT) RVA and size.
    85  		iatRVA  uint64
    86  		iatSize uint64
    87  	)
    88  	// Data directory indices.
    89  	const (
    90  		ImportTableIndex        = 1
    91  		ImportAddressTableIndex = 12
    92  	)
    93  	switch opt := f.OptionalHeader.(type) {
    94  	case *pe.OptionalHeader32:
    95  		file.Entry = bin.Address(opt.ImageBase + opt.AddressOfEntryPoint)
    96  		imageBase = uint64(opt.ImageBase)
    97  		itRVA = uint64(opt.DataDirectory[ImportTableIndex].VirtualAddress)
    98  		itSize = uint64(opt.DataDirectory[ImportTableIndex].Size)
    99  		iatRVA = uint64(opt.DataDirectory[ImportAddressTableIndex].VirtualAddress)
   100  		iatSize = uint64(opt.DataDirectory[ImportAddressTableIndex].Size)
   101  	case *pe.OptionalHeader64:
   102  		file.Entry = bin.Address(opt.ImageBase) + bin.Address(opt.AddressOfEntryPoint)
   103  		imageBase = uint64(opt.ImageBase)
   104  		itRVA = uint64(opt.DataDirectory[ImportTableIndex].VirtualAddress)
   105  		itSize = uint64(opt.DataDirectory[ImportTableIndex].Size)
   106  		iatRVA = uint64(opt.DataDirectory[ImportAddressTableIndex].VirtualAddress)
   107  		iatSize = uint64(opt.DataDirectory[ImportAddressTableIndex].Size)
   108  	default:
   109  		panic(fmt.Errorf("support for optional header type %T not yet implemented", opt))
   110  	}
   111  
   112  	// Parse sections.
   113  	for _, s := range f.Sections {
   114  		addr := bin.Address(imageBase) + bin.Address(s.VirtualAddress)
   115  		raw, err := s.Data()
   116  		if err != nil {
   117  			return nil, errors.WithStack(err)
   118  		}
   119  		data := raw
   120  		fileSize := len(raw)
   121  		memSize := int(s.VirtualSize)
   122  		if fileSize > memSize {
   123  			// Ignore section alignment padding.
   124  			data = raw[:memSize]
   125  		}
   126  		perm := parsePerm(s.Characteristics)
   127  		sect := &bin.Section{
   128  			Name:     s.Name,
   129  			Addr:     addr,
   130  			Offset:   uint64(s.Offset),
   131  			Data:     data,
   132  			FileSize: fileSize,
   133  			MemSize:  memSize,
   134  			Perm:     perm,
   135  		}
   136  		file.Sections = append(file.Sections, sect)
   137  	}
   138  	less := func(i, j int) bool {
   139  		if file.Sections[i].Addr == file.Sections[j].Addr {
   140  			if len(file.Sections[i].Data) > len(file.Sections[j].Data) {
   141  				// prioritize longer sections with identical addresses.
   142  				return true
   143  			}
   144  			return file.Sections[i].Name < file.Sections[j].Name
   145  		}
   146  		return file.Sections[i].Addr < file.Sections[j].Addr
   147  	}
   148  	sort.Slice(file.Sections, less)
   149  
   150  	// Parse import address table (IAT).
   151  	dbg.Println("iat")
   152  	if iatSize != 0 {
   153  		iatAddr := bin.Address(imageBase + iatRVA)
   154  		dbg.Println("iat addr:", iatAddr)
   155  		data := file.Data(iatAddr)
   156  		data = data[:iatSize]
   157  		dbg.Println(hex.Dump(data))
   158  	}
   159  
   160  	// Early return if import table not present.
   161  	if itSize == 0 {
   162  		return file, nil
   163  	}
   164  
   165  	// Parse import table.
   166  	dbg.Println("it")
   167  	itAddr := bin.Address(imageBase + itRVA)
   168  	dbg.Println("it addr:", itAddr)
   169  	data := file.Data(itAddr)
   170  	data = data[:itSize]
   171  	dbg.Println(hex.Dump(data))
   172  	br := bytes.NewReader(data)
   173  	zero := importDesc{}
   174  	var impDescs []importDesc
   175  	for {
   176  		var impDesc importDesc
   177  		if err := binary.Read(br, binary.LittleEndian, &impDesc); err != nil {
   178  			return nil, errors.WithStack(err)
   179  		}
   180  		if impDesc == zero {
   181  			break
   182  		}
   183  		impDescs = append(impDescs, impDesc)
   184  	}
   185  	for _, impDesc := range impDescs {
   186  		//dbg.Printf("impDesc: %#v\n", pretty.Formatter(impDesc))
   187  		dllNameAddr := bin.Address(imageBase) + bin.Address(impDesc.DLLNameRVA)
   188  		data := file.Data(dllNameAddr)
   189  		dllName := parseString(data)
   190  		dbg.Println("dll name:", dllName)
   191  		// Parse import name table and import address table.
   192  		impNameTableAddr := bin.Address(imageBase) + bin.Address(impDesc.ImportNameTableRVA)
   193  		impAddrTableAddr := bin.Address(imageBase) + bin.Address(impDesc.ImportAddressTableRVA)
   194  		inAddr := impNameTableAddr
   195  		iaAddr := impAddrTableAddr
   196  		for {
   197  			impNameRVA, n := readUintptr(file, inAddr)
   198  			if impNameRVA == 0 {
   199  				break
   200  			}
   201  			impAddr := iaAddr
   202  			inAddr += bin.Address(n)
   203  			iaAddr += bin.Address(n)
   204  			dbg.Println("impAddr:", impAddr)
   205  			if impNameRVA&0x80000000 != 0 {
   206  				// ordinal
   207  				ordinal := impNameRVA &^ 0x80000000
   208  				dbg.Println("===> ordinal", ordinal)
   209  				impName := fmt.Sprintf("%s_ordinal_%d", pathutil.TrimExt(dllName), ordinal)
   210  				file.Imports[impAddr] = impName
   211  				continue
   212  			}
   213  			impNameAddr := bin.Address(imageBase + impNameRVA)
   214  			data := file.Data(impNameAddr)
   215  			ordinal := binary.LittleEndian.Uint16(data)
   216  			data = data[2:]
   217  			impName := parseString(data)
   218  			dbg.Println("ordinal:", ordinal)
   219  			dbg.Println("impName:", impName)
   220  			file.Imports[impAddr] = impName
   221  		}
   222  		dbg.Println()
   223  	}
   224  
   225  	return file, nil
   226  }
   227  
   228  // ref: https://msdn.microsoft.com/en-us/library/ms809762.aspx
   229  
   230  // An importDesc is an import descriptor.
   231  type importDesc struct {
   232  	// Import name table RVA.
   233  	ImportNameTableRVA uint32
   234  	// Time stamp.
   235  	Date uint32
   236  	// Forward chain; index into importAddressTableRVA for forwarding a function
   237  	// to another DLL.
   238  	ForwardChain uint32
   239  	// DLL name RVA.
   240  	DLLNameRVA uint32
   241  	// Import address table RVA.
   242  	ImportAddressTableRVA uint32
   243  }
   244  
   245  // An importName specifies the name of an import.
   246  type importName struct {
   247  	// Approximate ordinal number (used by loader to initiate binary search).
   248  	Ordinal uint16
   249  	// Name of the import.
   250  	Name string
   251  }
   252  
   253  // parsePerm returns the memory access permissions represented by the given PE
   254  // image characteristics.
   255  func parsePerm(char uint32) bin.Perm {
   256  	// Characteristics.
   257  	const (
   258  		// permR specifies that the memory is readable.
   259  		permR = 0x40000000
   260  		// permW specifies that the memory is writeable.
   261  		permW = 0x80000000
   262  		// permX specifies that the memory is executable.
   263  		permX = 0x20000000
   264  	)
   265  	var perm bin.Perm
   266  	if char&permR != 0 {
   267  		perm |= bin.PermR
   268  	}
   269  	if char&permW != 0 {
   270  		perm |= bin.PermW
   271  	}
   272  	if char&permX != 0 {
   273  		perm |= bin.PermX
   274  	}
   275  	return perm
   276  }
   277  
   278  // ### [ Helper functions ] ####################################################
   279  
   280  // parseString parses the NULL-terminated string in the given data.
   281  func parseString(data []byte) string {
   282  	pos := bytes.IndexByte(data, '\x00')
   283  	if pos == -1 {
   284  		panic(fmt.Errorf("unable to locate NULL-terminated string in % 02X", data))
   285  	}
   286  	return string(data[:pos])
   287  }
   288  
   289  // readUintptr reads a little-endian encoded value of pointer size based on the
   290  // CPU architecture, and returns the number of bytes read.
   291  func readUintptr(file *bin.File, addr bin.Address) (uint64, int) {
   292  	bits := file.Arch.BitSize()
   293  	data := file.Data(addr)
   294  	switch bits {
   295  	case 32:
   296  		if len(data) < 4 {
   297  			panic(fmt.Errorf("data length too short; expected >= 4 bytes, got %d", len(data)))
   298  		}
   299  		return uint64(binary.LittleEndian.Uint32(data)), 4
   300  	case 64:
   301  		if len(data) < 8 {
   302  			panic(fmt.Errorf("data length too short; expected >= 8 bytes, got %d", len(data)))
   303  		}
   304  		return binary.LittleEndian.Uint64(data), 8
   305  	default:
   306  		panic(fmt.Errorf("support for machine architecture with bit size %d not yet implemented", bits))
   307  	}
   308  }