github.com/saferwall/pe@v1.5.2/symbol.go (about) 1 // Copyright 2018 Saferwall. All rights reserved. 2 // Use of this source code is governed by Apache v2 license 3 // license that can be found in the LICENSE file. 4 5 package pe 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "errors" 11 "strings" 12 ) 13 14 const ( 15 16 // MaxDefaultSymbolsCount represents the default maximum number of COFF 17 // symbols to parse. Some malware uses a fake huge NumberOfSymbols that 18 // can cause an OOM exception. 19 // Example: 0000e876c5b712b6b7b3ce97f757ddd918fb3dbdc5a3938e850716fbd841309f 20 MaxDefaultCOFFSymbolsCount = 0x10000 21 22 // MaxCOFFSymStrLength represents the maximum string length of a COFF symbol 23 // to read. 24 MaxCOFFSymStrLength = 0x50 25 26 // 27 // Type Representation 28 // 29 30 // ImageSymTypeNull indicates no type information or unknown base type. 31 // Microsoft tools use this setting. 32 ImageSymTypeNull = 0 33 34 // ImageSymTypeVoid indicates no type no valid type; used with void pointers and functions. 35 ImageSymTypeVoid = 1 36 37 // ImageSymTypeChar indicates a character (signed byte). 38 ImageSymTypeChar = 2 39 40 // ImageSymTypeShort indicates a 2-byte signed integer. 41 ImageSymTypeShort = 3 42 43 // ImageSymTypeInt indicates a natural integer type (normally 4 bytes in 44 // Windows). 45 ImageSymTypeInt = 4 46 47 // ImageSymTypeLong indicates a 4-byte signed integer. 48 ImageSymTypeLong = 5 49 50 // ImageSymTypeFloat indicates a 4-byte floating-point number. 51 ImageSymTypeFloat = 6 52 53 // ImageSymTypeDouble indicates an 8-byte floating-point number. 54 ImageSymTypeDouble = 7 55 56 // ImageSymTypeStruct indicates a structure. 57 ImageSymTypeStruct = 8 58 59 // ImageSymTypeUnion indicates a union. 60 ImageSymTypeUnion = 9 61 62 // ImageSymTypeEnum indicates an enumerated type. 63 ImageSymTypeEnum = 10 64 65 // ImageSymTypeMoe A member of enumeration (a specific value). 66 ImageSymTypeMoe = 11 67 68 // ImageSymTypeByte indicates a byte; unsigned 1-byte integer. 69 ImageSymTypeByte = 12 70 71 // ImageSymTypeWord indicates a word; unsigned 2-byte integer. 72 ImageSymTypeWord = 13 73 74 // ImageSymTypeUint indicates an unsigned integer of natural size 75 // (normally, 4 bytes). 76 ImageSymTypeUint = 14 77 78 // ImageSymTypeDword indicates an unsigned 4-byte integer. 79 ImageSymTypeDword = 15 80 81 // 82 // Storage Class 83 // 84 85 // ImageSymClassEndOfFunction indicates a special symbol that represents 86 // the end of function, for debugging purposes. 87 ImageSymClassEndOfFunction = 0xff 88 89 // ImageSymClassNull indicates no assigned storage class. 90 ImageSymClassNull = 0 91 92 // ImageSymClassAutomatic indicates automatic (stack) variable. The Value 93 // field specifies the stack frame offset. 94 ImageSymClassAutomatic = 1 95 96 // ImageSymClassExternal indicates a value that Microsoft tools use for 97 // external symbols. The Value field indicates the size if the section 98 // number is IMAGE_SYM_UNDEFINED (0). If the section number is not zero, 99 // then the Value field specifies the offset within the section. 100 ImageSymClassExternal = 2 101 102 // ImageSymClassStatic indicates the offset of the symbol within the 103 // section. If the Value field is zero, then the symbol represents a 104 // section name. 105 ImageSymClassStatic = 3 106 107 // ImageSymClassRegister indicates a register variable. The Value field 108 // specifies the register number. 109 ImageSymClassRegister = 4 110 111 // ImageSymClassExternalDef indicates a symbol that is defined externally. 112 ImageSymClassExternalDef = 5 113 114 // ImageSymClassLabel indicates a code label that is defined within the 115 // module. The Value field specifies the offset of the symbol within the 116 // section. 117 ImageSymClassLabel = 6 118 119 // ImageSymClassUndefinedLabel indicates a reference to a code label that 120 // is not defined. 121 ImageSymClassUndefinedLabel = 7 122 123 // ImageSymClassMemberOfStruct indicates the structure member. The Value 124 // field specifies the n th member. 125 ImageSymClassMemberOfStruct = 8 126 127 // ImageSymClassArgument indicates a formal argument (parameter) of a 128 // function. The Value field specifies the n th argument. 129 ImageSymClassArgument = 9 130 131 // ImageSymClassStructTag indicates the structure tag-name entry. 132 ImageSymClassStructTag = 10 133 134 // ImageSymClassMemberOfUnion indicates a union member. The Value field 135 // specifies the n th member. 136 ImageSymClassMemberOfUnion = 11 137 138 // ImageSymClassUnionTag indicates the structure tag-name entry. 139 ImageSymClassUnionTag = 12 140 141 // ImageSymClassTypeDefinition indicates a typedef entry. 142 ImageSymClassTypeDefinition = 13 143 144 // ImageSymClassUndefinedStatic indicates a static data declaration. 145 ImageSymClassUndefinedStatic = 14 146 147 // ImageSymClassEnumTag indicates an enumerated type tagname entry. 148 ImageSymClassEnumTag = 15 149 150 // ImageSymClassMemberOfEnum indicates a member of an enumeration. The 151 // Value field specifies the n th member. 152 ImageSymClassMemberOfEnum = 16 153 154 // ImageSymClassRegisterParam indicates a register parameter. 155 ImageSymClassRegisterParam = 17 156 157 // ImageSymClassBitField indicates a bit-field reference. The Value field 158 // specifies the n th bit in the bit field. 159 ImageSymClassBitField = 18 160 161 // ImageSymClassBlock indicates a .bb (beginning of block) or .eb (end of 162 // block) record. The Value field is the relocatable address of the code 163 // location. 164 ImageSymClassBlock = 100 165 166 // ImageSymClassFunction indicates a value that Microsoft tools use for 167 // symbol records that define the extent of a function: begin function (.bf 168 // ), end function ( .ef ), and lines in function ( .lf ). For .lf 169 // records, the Value field gives the number of source lines in the 170 // function. For .ef records, the Value field gives the size of the 171 // function code. 172 ImageSymClassFunction = 101 173 174 // ImageSymClassEndOfStruct indicates an end-of-structure entry. 175 ImageSymClassEndOfStruct = 102 176 177 // ImageSymClassFile indicates a value that Microsoft tools, as well as 178 // traditional COFF format, use for the source-file symbol record. The 179 // symbol is followed by auxiliary records that name the file. 180 ImageSymClassFile = 103 181 182 // ImageSymClassSsection indicates a definition of a section (Microsoft 183 // tools use STATIC storage class instead). 184 ImageSymClassSsection = 104 185 186 // ImageSymClassWeakExternal indicates a weak external. For more 187 // information, see Auxiliary Format 3: Weak Externals. 188 ImageSymClassWeakExternal = 24 189 190 // ImageSymClassClrToken indicates a CLR token symbol. The name is an ASCII 191 // string that consists of the hexadecimal value of the token. For more 192 // information, see CLR Token Definition (Object Only). 193 ImageSymClassClrToken = 25 194 195 // 196 // Section Number Values. 197 // 198 199 // ImageSymUndefined indicates that the symbol record is not yet assigned a 200 // section. A value of zero indicates that a reference to an external 201 // symbol is defined elsewhere. A value of non-zero is a common symbol with 202 // a size that is specified by the value. 203 ImageSymUndefined = 0 204 205 // ImageSymAbsolute indicates that the symbol has an absolute 206 // (non-relocatable) value and is not an address. 207 ImageSymAbsolute = -1 208 209 // ImageSymDebug indicates that the symbol provides general type or 210 // debugging information but does not correspond to a section. Microsoft 211 // tools use this setting along with .file records (storage class FILE). 212 ImageSymDebug = -2 213 ) 214 215 var ( 216 errCOFFTableNotPresent = errors.New( 217 "PE image does not contains a COFF symbol table") 218 errNoCOFFStringInTable = errors.New( 219 "PE image got a PointerToSymbolTable but no string in the COFF string table") 220 errCOFFSymbolOutOfBounds = errors.New( 221 "COFF symbol offset out of bounds") 222 errCOFFSymbolsTooHigh = errors.New( 223 "COFF symbols count is absurdly high") 224 ) 225 226 // COFFSymbol represents an entry in the COFF symbol table, which it is an 227 // array of records, each 18 bytes long. Each record is either a standard or 228 // auxiliary symbol-table record. A standard record defines a symbol or name 229 // and has the following format. 230 type COFFSymbol struct { 231 // The name of the symbol, represented by a union of three structures. An 232 // array of 8 bytes is used if the name is not more than 8 bytes long. 233 // union { 234 // BYTE ShortName[8]; 235 // struct { 236 // DWORD Short; // if 0, use LongName 237 // DWORD Long; // offset into string table 238 // } Name; 239 // DWORD LongName[2]; // PBYTE [2] 240 // } N; 241 Name [8]byte `json:"name"` 242 243 // The value that is associated with the symbol. The interpretation of this 244 // field depends on SectionNumber and StorageClass. A typical meaning is 245 // the relocatable address. 246 Value uint32 `json:"value"` 247 248 // The signed integer that identifies the section, using a one-based index 249 // into the section table. Some values have special meaning. 250 // See "Section Number Values." 251 SectionNumber int16 `json:"section_number"` 252 253 // A number that represents type. Microsoft tools set this field to 254 // 0x20 (function) or 0x0 (not a function). For more information, 255 // see Type Representation. 256 Type uint16 `json:"type"` 257 258 // An enumerated value that represents storage class. 259 // For more information, see Storage Class. 260 StorageClass uint8 `json:"storage_class"` 261 262 // The number of auxiliary symbol table entries that follow this record. 263 NumberOfAuxSymbols uint8 `json:"number_of_aux_symbols"` 264 } 265 266 // COFF holds properties related to the COFF format. 267 type COFF struct { 268 SymbolTable []COFFSymbol `json:"symbol_table"` 269 StringTable []string `json:"string_table"` 270 StringTableOffset uint32 `json:"string_table_offset"` 271 // Map the symbol offset => symbol name. 272 StringTableM map[uint32]string `json:"-"` 273 } 274 275 // ParseCOFFSymbolTable parses the COFF symbol table. The symbol table is 276 // inherited from the traditional COFF format. It is distinct from Microsoft 277 // Visual C++ debug information. A file can contain both a COFF symbol table 278 // and Visual C++ debug information, and the two are kept separate. Some 279 // Microsoft tools use the symbol table for limited but important purposes, 280 // such as communicating COMDAT information to the linker. Section names and 281 // file names, as well as code and data symbols, are listed in the symbol table. 282 func (pe *File) ParseCOFFSymbolTable() error { 283 pointerToSymbolTable := pe.NtHeader.FileHeader.PointerToSymbolTable 284 if pointerToSymbolTable == 0 { 285 return errCOFFTableNotPresent 286 } 287 288 symCount := pe.NtHeader.FileHeader.NumberOfSymbols 289 if symCount == 0 { 290 return nil 291 } 292 if symCount > pe.opts.MaxCOFFSymbolsCount { 293 pe.addAnomaly(AnoCOFFSymbolsCount) 294 return errCOFFSymbolsTooHigh 295 } 296 297 // The location of the symbol table is indicated in the COFF header. 298 offset := pe.NtHeader.FileHeader.PointerToSymbolTable 299 300 // The symbol table is an array of records, each 18 bytes long. 301 size := uint32(binary.Size(COFFSymbol{})) 302 symbols := make([]COFFSymbol, symCount) 303 304 // Each record is either a standard or auxiliary symbol-table record. 305 // A standard record defines a symbol or name and has the COFFSymbol STRUCT format. 306 for i := uint32(0); i < symCount; i++ { 307 err := pe.structUnpack(&symbols[i], offset, size) 308 if err != nil { 309 return err 310 } 311 offset += size 312 } 313 314 pe.COFF.SymbolTable = symbols 315 316 // Get the COFF string table. 317 pe.COFFStringTable() 318 319 pe.HasCOFF = true 320 return nil 321 } 322 323 // COFFStringTable retrieves the list of strings in the COFF string table if 324 // any. 325 func (pe *File) COFFStringTable() error { 326 m := make(map[uint32]string) 327 pointerToSymbolTable := pe.NtHeader.FileHeader.PointerToSymbolTable 328 if pointerToSymbolTable == 0 { 329 return errCOFFTableNotPresent 330 } 331 332 symCount := pe.NtHeader.FileHeader.NumberOfSymbols 333 if symCount == 0 { 334 return nil 335 } 336 if symCount > pe.opts.MaxCOFFSymbolsCount { 337 pe.addAnomaly(AnoCOFFSymbolsCount) 338 return errCOFFSymbolsTooHigh 339 } 340 341 // COFF String Table immediately following the COFF symbol table. The 342 // position of this table is found by taking the symbol table address in 343 // the COFF header and adding the number of symbols multiplied by the size 344 // of a symbol. 345 size := uint32(binary.Size(COFFSymbol{})) 346 offset := pointerToSymbolTable + (size * symCount) 347 348 // At the beginning of the COFF string table are 4 bytes that contain the 349 // total size (in bytes) of the rest of the string table. This size 350 // includes the size field itself, so that the value in this location would 351 // be 4 if no strings were present. 352 pe.COFF.StringTableOffset = offset 353 strTableSize, err := pe.ReadUint32(offset) 354 if err != nil { 355 return err 356 } 357 if strTableSize <= 4 { 358 return errNoCOFFStringInTable 359 } 360 offset += 4 361 362 // Following the size are null-terminated strings that are pointed to by 363 // symbols in the COFF symbol table. We create a map to map offset to 364 // string. 365 end := offset + strTableSize - 4 366 for offset < end { 367 len, str := pe.readASCIIStringAtOffset(offset, MaxCOFFSymStrLength) 368 if len == 0 { 369 break 370 } 371 m[offset] = str 372 offset += len + 1 373 pe.COFF.StringTable = append(pe.COFF.StringTable, str) 374 } 375 376 pe.COFF.StringTableM = m 377 return nil 378 } 379 380 // String returns the representation of the symbol name. 381 func (symbol *COFFSymbol) String(pe *File) (string, error) { 382 var short, long uint32 383 384 // The ShortName field in a symbol table consists of 8 bytes 385 // that contain the name itself, if it is not more than 8 386 // bytes long, or the ShortName field gives an offset into 387 // the string table. 388 highDw := bytes.NewBuffer(symbol.Name[4:]) 389 lowDw := bytes.NewBuffer(symbol.Name[:4]) 390 errl := binary.Read(lowDw, binary.LittleEndian, &short) 391 errh := binary.Read(highDw, binary.LittleEndian, &long) 392 if errl != nil || errh != nil { 393 return "", errCOFFSymbolOutOfBounds 394 } 395 396 // To determine whether the name itself or an offset is given, 397 // test the first 4 bytes for equality to zero. 398 if short != 0 { 399 name := strings.Replace(string(symbol.Name[:]), "\x00", "", -1) 400 return name, nil 401 } 402 403 // Long name offset to the string table. 404 strOff := pe.COFF.StringTableOffset + long 405 name := pe.COFF.StringTableM[strOff] 406 return name, nil 407 } 408 409 // SectionNumberName returns the name of the section corresponding to a section 410 // symbol number if any. 411 func (symbol *COFFSymbol) SectionNumberName(pe *File) string { 412 413 // Normally, the Section Value field in a symbol table entry is a one-based 414 // index into the section table. However, this field is a signed integer 415 // and can take negative values. The following values, less than one, have 416 // special meanings. 417 if symbol.SectionNumber > 0 && symbol.SectionNumber < int16(len(pe.Sections)) { 418 return pe.Sections[symbol.SectionNumber-1].String() 419 } 420 421 switch symbol.SectionNumber { 422 case ImageSymUndefined: 423 return "Undefined" 424 case ImageSymAbsolute: 425 return "Absolute" 426 case ImageSymDebug: 427 return "Debug" 428 } 429 430 return "?" 431 } 432 433 // PrettyCOFFTypeRepresentation returns the string representation of the `Type` 434 // field of a COFF table entry. 435 func (pe *File) PrettyCOFFTypeRepresentation(k uint16) string { 436 coffSymTypeMap := map[uint16]string{ 437 ImageSymTypeNull: "Null", 438 ImageSymTypeVoid: "Void", 439 ImageSymTypeChar: "Char", 440 ImageSymTypeShort: "Short", 441 ImageSymTypeInt: "Int", 442 ImageSymTypeLong: "Long", 443 ImageSymTypeFloat: "Float", 444 ImageSymTypeDouble: "Double", 445 ImageSymTypeStruct: "Struct", 446 ImageSymTypeUnion: "Union", 447 ImageSymTypeEnum: "Enum", 448 ImageSymTypeMoe: "Moe", 449 ImageSymTypeByte: "Byte", 450 ImageSymTypeWord: "Word", 451 ImageSymTypeUint: "Uint", 452 ImageSymTypeDword: "Dword", 453 } 454 455 if value, ok := coffSymTypeMap[k]; ok { 456 return value 457 } 458 return "" 459 }