github.com/saferwall/pe@v1.5.2/symbol.go (about)

     1  // Copyright 2018 Saferwall. All rights reserved.
     2  // Use of this source code is governed by Apache v2 license
     3  // license that can be found in the LICENSE file.
     4  
     5  package pe
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"errors"
    11  	"strings"
    12  )
    13  
    14  const (
    15  
    16  	// MaxDefaultSymbolsCount represents the default maximum number of COFF
    17  	// symbols to parse. Some malware uses a fake huge NumberOfSymbols that
    18  	// can cause an OOM exception.
    19  	// Example: 0000e876c5b712b6b7b3ce97f757ddd918fb3dbdc5a3938e850716fbd841309f
    20  	MaxDefaultCOFFSymbolsCount = 0x10000
    21  
    22  	// MaxCOFFSymStrLength represents the maximum string length of a COFF symbol
    23  	// to read.
    24  	MaxCOFFSymStrLength = 0x50
    25  
    26  	//
    27  	// Type Representation
    28  	//
    29  
    30  	// ImageSymTypeNull indicates no type information or unknown base type.
    31  	// Microsoft tools use this setting.
    32  	ImageSymTypeNull = 0
    33  
    34  	// ImageSymTypeVoid indicates no type no valid type; used with void pointers and functions.
    35  	ImageSymTypeVoid = 1
    36  
    37  	// ImageSymTypeChar indicates a character (signed byte).
    38  	ImageSymTypeChar = 2
    39  
    40  	// ImageSymTypeShort indicates a 2-byte signed integer.
    41  	ImageSymTypeShort = 3
    42  
    43  	// ImageSymTypeInt indicates a natural integer type (normally 4 bytes in
    44  	// Windows).
    45  	ImageSymTypeInt = 4
    46  
    47  	// ImageSymTypeLong indicates a 4-byte signed integer.
    48  	ImageSymTypeLong = 5
    49  
    50  	// ImageSymTypeFloat indicates a 4-byte floating-point number.
    51  	ImageSymTypeFloat = 6
    52  
    53  	// ImageSymTypeDouble indicates an 8-byte floating-point number.
    54  	ImageSymTypeDouble = 7
    55  
    56  	// ImageSymTypeStruct indicates a structure.
    57  	ImageSymTypeStruct = 8
    58  
    59  	// ImageSymTypeUnion indicates a union.
    60  	ImageSymTypeUnion = 9
    61  
    62  	// ImageSymTypeEnum indicates an enumerated type.
    63  	ImageSymTypeEnum = 10
    64  
    65  	// ImageSymTypeMoe A member of enumeration (a specific value).
    66  	ImageSymTypeMoe = 11
    67  
    68  	// ImageSymTypeByte indicates a byte; unsigned 1-byte integer.
    69  	ImageSymTypeByte = 12
    70  
    71  	// ImageSymTypeWord indicates a word; unsigned 2-byte integer.
    72  	ImageSymTypeWord = 13
    73  
    74  	// ImageSymTypeUint indicates an unsigned integer of natural size
    75  	// (normally, 4 bytes).
    76  	ImageSymTypeUint = 14
    77  
    78  	// ImageSymTypeDword indicates an unsigned 4-byte integer.
    79  	ImageSymTypeDword = 15
    80  
    81  	//
    82  	// Storage Class
    83  	//
    84  
    85  	// ImageSymClassEndOfFunction indicates a special symbol that represents
    86  	// the end of function, for debugging purposes.
    87  	ImageSymClassEndOfFunction = 0xff
    88  
    89  	// ImageSymClassNull indicates no assigned storage class.
    90  	ImageSymClassNull = 0
    91  
    92  	// ImageSymClassAutomatic indicates automatic (stack) variable. The Value
    93  	// field specifies the stack frame offset.
    94  	ImageSymClassAutomatic = 1
    95  
    96  	// ImageSymClassExternal indicates a value that Microsoft tools use for
    97  	// external symbols. The Value field indicates the size if the section
    98  	// number is IMAGE_SYM_UNDEFINED (0). If the section number is not zero,
    99  	// then the Value field specifies the offset within the section.
   100  	ImageSymClassExternal = 2
   101  
   102  	// ImageSymClassStatic indicates the offset of the symbol within the
   103  	// section. If the Value field is zero, then the symbol represents a
   104  	// section name.
   105  	ImageSymClassStatic = 3
   106  
   107  	// ImageSymClassRegister indicates a register variable. The Value field
   108  	// specifies the register number.
   109  	ImageSymClassRegister = 4
   110  
   111  	// ImageSymClassExternalDef indicates a symbol that is defined externally.
   112  	ImageSymClassExternalDef = 5
   113  
   114  	// ImageSymClassLabel indicates a code label that is defined within the
   115  	// module. The Value field specifies the offset of the symbol within the
   116  	// section.
   117  	ImageSymClassLabel = 6
   118  
   119  	// ImageSymClassUndefinedLabel indicates a reference to a code label that
   120  	// is not defined.
   121  	ImageSymClassUndefinedLabel = 7
   122  
   123  	// ImageSymClassMemberOfStruct indicates the structure member. The Value
   124  	// field specifies the n th member.
   125  	ImageSymClassMemberOfStruct = 8
   126  
   127  	// ImageSymClassArgument indicates a formal argument (parameter) of a
   128  	// function. The Value field specifies the n th argument.
   129  	ImageSymClassArgument = 9
   130  
   131  	// ImageSymClassStructTag indicates the structure tag-name entry.
   132  	ImageSymClassStructTag = 10
   133  
   134  	// ImageSymClassMemberOfUnion indicates a union member. The Value field
   135  	// specifies the n th member.
   136  	ImageSymClassMemberOfUnion = 11
   137  
   138  	// ImageSymClassUnionTag indicates the structure tag-name entry.
   139  	ImageSymClassUnionTag = 12
   140  
   141  	// ImageSymClassTypeDefinition indicates a typedef entry.
   142  	ImageSymClassTypeDefinition = 13
   143  
   144  	// ImageSymClassUndefinedStatic indicates a static data declaration.
   145  	ImageSymClassUndefinedStatic = 14
   146  
   147  	// ImageSymClassEnumTag indicates an enumerated type tagname entry.
   148  	ImageSymClassEnumTag = 15
   149  
   150  	// ImageSymClassMemberOfEnum indicates a member of an enumeration. The
   151  	// Value field specifies the n th member.
   152  	ImageSymClassMemberOfEnum = 16
   153  
   154  	// ImageSymClassRegisterParam indicates a register parameter.
   155  	ImageSymClassRegisterParam = 17
   156  
   157  	// ImageSymClassBitField indicates a bit-field reference. The Value field
   158  	// specifies the n th bit in the bit field.
   159  	ImageSymClassBitField = 18
   160  
   161  	// ImageSymClassBlock indicates a .bb (beginning of block) or .eb (end of
   162  	// block) record. The Value field is the relocatable address of the code
   163  	// location.
   164  	ImageSymClassBlock = 100
   165  
   166  	// ImageSymClassFunction indicates a value that Microsoft tools use for
   167  	// symbol records that define the extent of a function: begin function (.bf
   168  	// ), end function ( .ef ), and lines in function ( .lf ). For .lf
   169  	// records, the Value field gives the number of source lines in the
   170  	// function. For .ef records, the Value field gives the size of the
   171  	// function code.
   172  	ImageSymClassFunction = 101
   173  
   174  	// ImageSymClassEndOfStruct indicates an end-of-structure entry.
   175  	ImageSymClassEndOfStruct = 102
   176  
   177  	// ImageSymClassFile indicates a value that Microsoft tools, as well as
   178  	// traditional COFF format, use for the source-file symbol record. The
   179  	// symbol is followed by auxiliary records that name the file.
   180  	ImageSymClassFile = 103
   181  
   182  	// ImageSymClassSsection indicates a definition of a section (Microsoft
   183  	// tools use STATIC storage class instead).
   184  	ImageSymClassSsection = 104
   185  
   186  	// ImageSymClassWeakExternal indicates a weak external. For more
   187  	// information, see Auxiliary Format 3: Weak Externals.
   188  	ImageSymClassWeakExternal = 24
   189  
   190  	// ImageSymClassClrToken indicates a CLR token symbol. The name is an ASCII
   191  	// string that consists of the hexadecimal value of the token. For more
   192  	// information, see CLR Token Definition (Object Only).
   193  	ImageSymClassClrToken = 25
   194  
   195  	//
   196  	// Section Number Values.
   197  	//
   198  
   199  	// ImageSymUndefined indicates that the symbol record is not yet assigned a
   200  	// section. A value of zero indicates that a reference to an external
   201  	// symbol is defined elsewhere. A value of non-zero is a common symbol with
   202  	// a size that is specified by the value.
   203  	ImageSymUndefined = 0
   204  
   205  	// ImageSymAbsolute indicates that the symbol has an absolute
   206  	// (non-relocatable) value and is not an address.
   207  	ImageSymAbsolute = -1
   208  
   209  	// ImageSymDebug indicates that the symbol provides general type or
   210  	// debugging information but does not correspond to a section. Microsoft
   211  	// tools use this setting along with .file records (storage class FILE).
   212  	ImageSymDebug = -2
   213  )
   214  
   215  var (
   216  	errCOFFTableNotPresent = errors.New(
   217  		"PE image does not contains a COFF symbol table")
   218  	errNoCOFFStringInTable = errors.New(
   219  		"PE image got a PointerToSymbolTable but no string in the COFF string table")
   220  	errCOFFSymbolOutOfBounds = errors.New(
   221  		"COFF symbol offset out of bounds")
   222  	errCOFFSymbolsTooHigh = errors.New(
   223  		"COFF symbols count is absurdly high")
   224  )
   225  
   226  // COFFSymbol represents an entry in the COFF symbol table, which it is an
   227  // array of records, each 18 bytes long. Each record is either a standard or
   228  // auxiliary symbol-table record. A standard record defines a symbol or name
   229  // and has the following format.
   230  type COFFSymbol struct {
   231  	// The name of the symbol, represented by a union of three structures. An
   232  	// array of 8 bytes is used if the name is not more than 8 bytes long.
   233  	// union {
   234  	//    BYTE     ShortName[8];
   235  	//    struct {
   236  	//        DWORD   Short;     // if 0, use LongName
   237  	//        DWORD   Long;      // offset into string table
   238  	//    } Name;
   239  	//    DWORD   LongName[2];    // PBYTE  [2]
   240  	// } N;
   241  	Name [8]byte `json:"name"`
   242  
   243  	// The value that is associated with the symbol. The interpretation of this
   244  	// field depends on SectionNumber and StorageClass. A typical meaning is
   245  	// the relocatable address.
   246  	Value uint32 `json:"value"`
   247  
   248  	// The signed integer that identifies the section, using a one-based index
   249  	// into the section table. Some values have special meaning.
   250  	// See "Section Number Values."
   251  	SectionNumber int16 `json:"section_number"`
   252  
   253  	// A number that represents type. Microsoft tools set this field to
   254  	// 0x20 (function) or 0x0 (not a function). For more information,
   255  	// see Type Representation.
   256  	Type uint16 `json:"type"`
   257  
   258  	// An enumerated value that represents storage class.
   259  	// For more information, see Storage Class.
   260  	StorageClass uint8 `json:"storage_class"`
   261  
   262  	// The number of auxiliary symbol table entries that follow this record.
   263  	NumberOfAuxSymbols uint8 `json:"number_of_aux_symbols"`
   264  }
   265  
   266  // COFF holds properties related to the COFF format.
   267  type COFF struct {
   268  	SymbolTable       []COFFSymbol `json:"symbol_table"`
   269  	StringTable       []string     `json:"string_table"`
   270  	StringTableOffset uint32       `json:"string_table_offset"`
   271  	// Map the symbol offset => symbol name.
   272  	StringTableM map[uint32]string `json:"-"`
   273  }
   274  
   275  // ParseCOFFSymbolTable parses the COFF symbol table. The symbol table is
   276  // inherited from the traditional COFF format. It is distinct from Microsoft
   277  // Visual C++ debug information. A file can contain both a COFF symbol table
   278  // and Visual C++ debug information, and the two are kept separate. Some
   279  // Microsoft tools use the symbol table for limited but important purposes,
   280  // such as communicating COMDAT information to the linker. Section names and
   281  // file names, as well as code and data symbols, are listed in the symbol table.
   282  func (pe *File) ParseCOFFSymbolTable() error {
   283  	pointerToSymbolTable := pe.NtHeader.FileHeader.PointerToSymbolTable
   284  	if pointerToSymbolTable == 0 {
   285  		return errCOFFTableNotPresent
   286  	}
   287  
   288  	symCount := pe.NtHeader.FileHeader.NumberOfSymbols
   289  	if symCount == 0 {
   290  		return nil
   291  	}
   292  	if symCount > pe.opts.MaxCOFFSymbolsCount {
   293  		pe.addAnomaly(AnoCOFFSymbolsCount)
   294  		return errCOFFSymbolsTooHigh
   295  	}
   296  
   297  	// The location of the symbol table is indicated in the COFF header.
   298  	offset := pe.NtHeader.FileHeader.PointerToSymbolTable
   299  
   300  	// The symbol table is an array of records, each 18 bytes long.
   301  	size := uint32(binary.Size(COFFSymbol{}))
   302  	symbols := make([]COFFSymbol, symCount)
   303  
   304  	// Each record is either a standard or auxiliary symbol-table record.
   305  	// A standard record defines a symbol or name and has the COFFSymbol STRUCT format.
   306  	for i := uint32(0); i < symCount; i++ {
   307  		err := pe.structUnpack(&symbols[i], offset, size)
   308  		if err != nil {
   309  			return err
   310  		}
   311  		offset += size
   312  	}
   313  
   314  	pe.COFF.SymbolTable = symbols
   315  
   316  	// Get the COFF string table.
   317  	pe.COFFStringTable()
   318  
   319  	pe.HasCOFF = true
   320  	return nil
   321  }
   322  
   323  // COFFStringTable retrieves the list of strings in the COFF string table if
   324  // any.
   325  func (pe *File) COFFStringTable() error {
   326  	m := make(map[uint32]string)
   327  	pointerToSymbolTable := pe.NtHeader.FileHeader.PointerToSymbolTable
   328  	if pointerToSymbolTable == 0 {
   329  		return errCOFFTableNotPresent
   330  	}
   331  
   332  	symCount := pe.NtHeader.FileHeader.NumberOfSymbols
   333  	if symCount == 0 {
   334  		return nil
   335  	}
   336  	if symCount > pe.opts.MaxCOFFSymbolsCount {
   337  		pe.addAnomaly(AnoCOFFSymbolsCount)
   338  		return errCOFFSymbolsTooHigh
   339  	}
   340  
   341  	// COFF String Table immediately following the COFF symbol table. The
   342  	// position of this table is found by taking the symbol table address in
   343  	// the COFF header and adding the number of symbols multiplied by the size
   344  	// of a symbol.
   345  	size := uint32(binary.Size(COFFSymbol{}))
   346  	offset := pointerToSymbolTable + (size * symCount)
   347  
   348  	// At the beginning of the COFF string table are 4 bytes that contain the
   349  	// total size (in bytes) of the rest of the string table. This size
   350  	// includes the size field itself, so that the value in this location would
   351  	// be 4 if no strings were present.
   352  	pe.COFF.StringTableOffset = offset
   353  	strTableSize, err := pe.ReadUint32(offset)
   354  	if err != nil {
   355  		return err
   356  	}
   357  	if strTableSize <= 4 {
   358  		return errNoCOFFStringInTable
   359  	}
   360  	offset += 4
   361  
   362  	// Following the size are null-terminated strings that are pointed to by
   363  	// symbols in the COFF symbol table. We create a map to map offset to
   364  	// string.
   365  	end := offset + strTableSize - 4
   366  	for offset < end {
   367  		len, str := pe.readASCIIStringAtOffset(offset, MaxCOFFSymStrLength)
   368  		if len == 0 {
   369  			break
   370  		}
   371  		m[offset] = str
   372  		offset += len + 1
   373  		pe.COFF.StringTable = append(pe.COFF.StringTable, str)
   374  	}
   375  
   376  	pe.COFF.StringTableM = m
   377  	return nil
   378  }
   379  
   380  // String returns the representation of the symbol name.
   381  func (symbol *COFFSymbol) String(pe *File) (string, error) {
   382  	var short, long uint32
   383  
   384  	// The ShortName field in a symbol table consists of 8 bytes
   385  	// that contain the name itself, if it is not more than 8
   386  	// bytes long, or the ShortName field gives an offset into
   387  	// the string table.
   388  	highDw := bytes.NewBuffer(symbol.Name[4:])
   389  	lowDw := bytes.NewBuffer(symbol.Name[:4])
   390  	errl := binary.Read(lowDw, binary.LittleEndian, &short)
   391  	errh := binary.Read(highDw, binary.LittleEndian, &long)
   392  	if errl != nil || errh != nil {
   393  		return "", errCOFFSymbolOutOfBounds
   394  	}
   395  
   396  	// To determine whether the name itself or an offset is given,
   397  	// test the first 4 bytes for equality to zero.
   398  	if short != 0 {
   399  		name := strings.Replace(string(symbol.Name[:]), "\x00", "", -1)
   400  		return name, nil
   401  	}
   402  
   403  	// Long name offset to the string table.
   404  	strOff := pe.COFF.StringTableOffset + long
   405  	name := pe.COFF.StringTableM[strOff]
   406  	return name, nil
   407  }
   408  
   409  // SectionNumberName returns the name of the section corresponding to a section
   410  // symbol number if any.
   411  func (symbol *COFFSymbol) SectionNumberName(pe *File) string {
   412  
   413  	// Normally, the Section Value field in a symbol table entry is a one-based
   414  	// index into the section table. However, this field is a signed integer
   415  	// and can take negative values. The following values, less than one, have
   416  	// special meanings.
   417  	if symbol.SectionNumber > 0 && symbol.SectionNumber < int16(len(pe.Sections)) {
   418  		return pe.Sections[symbol.SectionNumber-1].String()
   419  	}
   420  
   421  	switch symbol.SectionNumber {
   422  	case ImageSymUndefined:
   423  		return "Undefined"
   424  	case ImageSymAbsolute:
   425  		return "Absolute"
   426  	case ImageSymDebug:
   427  		return "Debug"
   428  	}
   429  
   430  	return "?"
   431  }
   432  
   433  // PrettyCOFFTypeRepresentation returns the string representation of the `Type`
   434  // field of a COFF table entry.
   435  func (pe *File) PrettyCOFFTypeRepresentation(k uint16) string {
   436  	coffSymTypeMap := map[uint16]string{
   437  		ImageSymTypeNull:   "Null",
   438  		ImageSymTypeVoid:   "Void",
   439  		ImageSymTypeChar:   "Char",
   440  		ImageSymTypeShort:  "Short",
   441  		ImageSymTypeInt:    "Int",
   442  		ImageSymTypeLong:   "Long",
   443  		ImageSymTypeFloat:  "Float",
   444  		ImageSymTypeDouble: "Double",
   445  		ImageSymTypeStruct: "Struct",
   446  		ImageSymTypeUnion:  "Union",
   447  		ImageSymTypeEnum:   "Enum",
   448  		ImageSymTypeMoe:    "Moe",
   449  		ImageSymTypeByte:   "Byte",
   450  		ImageSymTypeWord:   "Word",
   451  		ImageSymTypeUint:   "Uint",
   452  		ImageSymTypeDword:  "Dword",
   453  	}
   454  
   455  	if value, ok := coffSymTypeMap[k]; ok {
   456  		return value
   457  	}
   458  	return ""
   459  }