github.com/decomp/exp@v0.0.0-20210624183419-6d058f5e1da6/bin/pe/pe.go (about) 1 // Package pe provides access to PE (Portable Executable) files. 2 package pe 3 4 import ( 5 "bytes" 6 "debug/pe" 7 "encoding/binary" 8 "encoding/hex" 9 "fmt" 10 "io" 11 "io/ioutil" 12 "log" 13 "os" 14 "sort" 15 16 "github.com/decomp/exp/bin" 17 //"github.com/kr/pretty" 18 "github.com/mewkiz/pkg/pathutil" 19 "github.com/mewkiz/pkg/term" 20 "github.com/pkg/errors" 21 ) 22 23 var ( 24 // dbg is a logger with the "pe:" prefix which logs debug messages to 25 // standard error. 26 dbg = log.New(ioutil.Discard, term.MagentaBold("pe:")+" ", 0) 27 // warn is a logger with the "pe:" prefix which logs warning messages to 28 // standard error. 29 warn = log.New(os.Stderr, term.RedBold("pe:")+" ", 0) 30 ) 31 32 // Register PE format. 33 func init() { 34 // Portable Executable (PE) format. 35 // 36 // 4D 5A |MZ| 37 const magic = "MZ" 38 bin.RegisterFormat("pe", magic, Parse) 39 } 40 41 // ParseFile parses the given PE binary executable, reading from path. 42 func ParseFile(path string) (*bin.File, error) { 43 f, err := os.Open(path) 44 if err != nil { 45 return nil, errors.WithStack(err) 46 } 47 defer f.Close() 48 return Parse(f) 49 } 50 51 // Parse parses the given PE binary executable, reading from r. 52 // 53 // Users are responsible for closing r. 54 func Parse(r io.ReaderAt) (*bin.File, error) { 55 // Open PE file. 56 f, err := pe.NewFile(r) 57 if err != nil { 58 return nil, errors.WithStack(err) 59 } 60 61 // Parse machine architecture. 62 file := &bin.File{ 63 Imports: make(map[bin.Address]string), 64 // TODO: implement exports 65 } 66 switch f.FileHeader.Machine { 67 case pe.IMAGE_FILE_MACHINE_I386: 68 file.Arch = bin.ArchX86_32 69 case pe.IMAGE_FILE_MACHINE_AMD64: 70 file.Arch = bin.ArchX86_64 71 case pe.IMAGE_FILE_MACHINE_POWERPC: 72 file.Arch = bin.ArchPowerPC_32 73 default: 74 panic(fmt.Errorf("support for machine architecture %v not yet implemented", f.FileHeader.Machine)) 75 } 76 77 // Parse entry address. 78 var ( 79 // Image base address. 80 imageBase uint64 81 // Import table RVA and size. 82 itRVA uint64 83 itSize uint64 84 // Import address table (IAT) RVA and size. 85 iatRVA uint64 86 iatSize uint64 87 ) 88 // Data directory indices. 89 const ( 90 ImportTableIndex = 1 91 ImportAddressTableIndex = 12 92 ) 93 switch opt := f.OptionalHeader.(type) { 94 case *pe.OptionalHeader32: 95 file.Entry = bin.Address(opt.ImageBase + opt.AddressOfEntryPoint) 96 imageBase = uint64(opt.ImageBase) 97 itRVA = uint64(opt.DataDirectory[ImportTableIndex].VirtualAddress) 98 itSize = uint64(opt.DataDirectory[ImportTableIndex].Size) 99 iatRVA = uint64(opt.DataDirectory[ImportAddressTableIndex].VirtualAddress) 100 iatSize = uint64(opt.DataDirectory[ImportAddressTableIndex].Size) 101 case *pe.OptionalHeader64: 102 file.Entry = bin.Address(opt.ImageBase) + bin.Address(opt.AddressOfEntryPoint) 103 imageBase = uint64(opt.ImageBase) 104 itRVA = uint64(opt.DataDirectory[ImportTableIndex].VirtualAddress) 105 itSize = uint64(opt.DataDirectory[ImportTableIndex].Size) 106 iatRVA = uint64(opt.DataDirectory[ImportAddressTableIndex].VirtualAddress) 107 iatSize = uint64(opt.DataDirectory[ImportAddressTableIndex].Size) 108 default: 109 panic(fmt.Errorf("support for optional header type %T not yet implemented", opt)) 110 } 111 112 // Parse sections. 113 for _, s := range f.Sections { 114 addr := bin.Address(imageBase) + bin.Address(s.VirtualAddress) 115 raw, err := s.Data() 116 if err != nil { 117 return nil, errors.WithStack(err) 118 } 119 data := raw 120 fileSize := len(raw) 121 memSize := int(s.VirtualSize) 122 if fileSize > memSize { 123 // Ignore section alignment padding. 124 data = raw[:memSize] 125 } 126 perm := parsePerm(s.Characteristics) 127 sect := &bin.Section{ 128 Name: s.Name, 129 Addr: addr, 130 Offset: uint64(s.Offset), 131 Data: data, 132 FileSize: fileSize, 133 MemSize: memSize, 134 Perm: perm, 135 } 136 file.Sections = append(file.Sections, sect) 137 } 138 less := func(i, j int) bool { 139 if file.Sections[i].Addr == file.Sections[j].Addr { 140 if len(file.Sections[i].Data) > len(file.Sections[j].Data) { 141 // prioritize longer sections with identical addresses. 142 return true 143 } 144 return file.Sections[i].Name < file.Sections[j].Name 145 } 146 return file.Sections[i].Addr < file.Sections[j].Addr 147 } 148 sort.Slice(file.Sections, less) 149 150 // Parse import address table (IAT). 151 dbg.Println("iat") 152 if iatSize != 0 { 153 iatAddr := bin.Address(imageBase + iatRVA) 154 dbg.Println("iat addr:", iatAddr) 155 data := file.Data(iatAddr) 156 data = data[:iatSize] 157 dbg.Println(hex.Dump(data)) 158 } 159 160 // Early return if import table not present. 161 if itSize == 0 { 162 return file, nil 163 } 164 165 // Parse import table. 166 dbg.Println("it") 167 itAddr := bin.Address(imageBase + itRVA) 168 dbg.Println("it addr:", itAddr) 169 data := file.Data(itAddr) 170 data = data[:itSize] 171 dbg.Println(hex.Dump(data)) 172 br := bytes.NewReader(data) 173 zero := importDesc{} 174 var impDescs []importDesc 175 for { 176 var impDesc importDesc 177 if err := binary.Read(br, binary.LittleEndian, &impDesc); err != nil { 178 return nil, errors.WithStack(err) 179 } 180 if impDesc == zero { 181 break 182 } 183 impDescs = append(impDescs, impDesc) 184 } 185 for _, impDesc := range impDescs { 186 //dbg.Printf("impDesc: %#v\n", pretty.Formatter(impDesc)) 187 dllNameAddr := bin.Address(imageBase) + bin.Address(impDesc.DLLNameRVA) 188 data := file.Data(dllNameAddr) 189 dllName := parseString(data) 190 dbg.Println("dll name:", dllName) 191 // Parse import name table and import address table. 192 impNameTableAddr := bin.Address(imageBase) + bin.Address(impDesc.ImportNameTableRVA) 193 impAddrTableAddr := bin.Address(imageBase) + bin.Address(impDesc.ImportAddressTableRVA) 194 inAddr := impNameTableAddr 195 iaAddr := impAddrTableAddr 196 for { 197 impNameRVA, n := readUintptr(file, inAddr) 198 if impNameRVA == 0 { 199 break 200 } 201 impAddr := iaAddr 202 inAddr += bin.Address(n) 203 iaAddr += bin.Address(n) 204 dbg.Println("impAddr:", impAddr) 205 if impNameRVA&0x80000000 != 0 { 206 // ordinal 207 ordinal := impNameRVA &^ 0x80000000 208 dbg.Println("===> ordinal", ordinal) 209 impName := fmt.Sprintf("%s_ordinal_%d", pathutil.TrimExt(dllName), ordinal) 210 file.Imports[impAddr] = impName 211 continue 212 } 213 impNameAddr := bin.Address(imageBase + impNameRVA) 214 data := file.Data(impNameAddr) 215 ordinal := binary.LittleEndian.Uint16(data) 216 data = data[2:] 217 impName := parseString(data) 218 dbg.Println("ordinal:", ordinal) 219 dbg.Println("impName:", impName) 220 file.Imports[impAddr] = impName 221 } 222 dbg.Println() 223 } 224 225 return file, nil 226 } 227 228 // ref: https://msdn.microsoft.com/en-us/library/ms809762.aspx 229 230 // An importDesc is an import descriptor. 231 type importDesc struct { 232 // Import name table RVA. 233 ImportNameTableRVA uint32 234 // Time stamp. 235 Date uint32 236 // Forward chain; index into importAddressTableRVA for forwarding a function 237 // to another DLL. 238 ForwardChain uint32 239 // DLL name RVA. 240 DLLNameRVA uint32 241 // Import address table RVA. 242 ImportAddressTableRVA uint32 243 } 244 245 // An importName specifies the name of an import. 246 type importName struct { 247 // Approximate ordinal number (used by loader to initiate binary search). 248 Ordinal uint16 249 // Name of the import. 250 Name string 251 } 252 253 // parsePerm returns the memory access permissions represented by the given PE 254 // image characteristics. 255 func parsePerm(char uint32) bin.Perm { 256 // Characteristics. 257 const ( 258 // permR specifies that the memory is readable. 259 permR = 0x40000000 260 // permW specifies that the memory is writeable. 261 permW = 0x80000000 262 // permX specifies that the memory is executable. 263 permX = 0x20000000 264 ) 265 var perm bin.Perm 266 if char&permR != 0 { 267 perm |= bin.PermR 268 } 269 if char&permW != 0 { 270 perm |= bin.PermW 271 } 272 if char&permX != 0 { 273 perm |= bin.PermX 274 } 275 return perm 276 } 277 278 // ### [ Helper functions ] #################################################### 279 280 // parseString parses the NULL-terminated string in the given data. 281 func parseString(data []byte) string { 282 pos := bytes.IndexByte(data, '\x00') 283 if pos == -1 { 284 panic(fmt.Errorf("unable to locate NULL-terminated string in % 02X", data)) 285 } 286 return string(data[:pos]) 287 } 288 289 // readUintptr reads a little-endian encoded value of pointer size based on the 290 // CPU architecture, and returns the number of bytes read. 291 func readUintptr(file *bin.File, addr bin.Address) (uint64, int) { 292 bits := file.Arch.BitSize() 293 data := file.Data(addr) 294 switch bits { 295 case 32: 296 if len(data) < 4 { 297 panic(fmt.Errorf("data length too short; expected >= 4 bytes, got %d", len(data))) 298 } 299 return uint64(binary.LittleEndian.Uint32(data)), 4 300 case 64: 301 if len(data) < 8 { 302 panic(fmt.Errorf("data length too short; expected >= 8 bytes, got %d", len(data))) 303 } 304 return binary.LittleEndian.Uint64(data), 8 305 default: 306 panic(fmt.Errorf("support for machine architecture with bit size %d not yet implemented", bits)) 307 } 308 }