github.com/anchore/syft@v1.38.2/syft/pkg/cataloger/internal/pe/pe.go (about)

     1  package pe
     2  
     3  import (
     4  	"bytes"
     5  	"debug/pe"
     6  	"encoding/binary"
     7  	"errors"
     8  	"fmt"
     9  	"io"
    10  	"unicode/utf16"
    11  
    12  	"github.com/scylladb/go-set/strset"
    13  	"github.com/scylladb/go-set/u32set"
    14  
    15  	"github.com/anchore/syft/internal/log"
    16  	"github.com/anchore/syft/syft/file"
    17  	"github.com/anchore/syft/syft/internal/unionreader"
    18  )
    19  
    20  const peMaxAllowedDirectoryEntries = 0x1000
    21  
    22  var imageDirectoryEntryIndexes = []int{
    23  	pe.IMAGE_DIRECTORY_ENTRY_RESOURCE,       // where version resources are stored
    24  	pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR, // where info about the CLR is stored
    25  }
    26  
    27  // File does not directly represent a binary shape to be parsed, instead it represents the
    28  // information of interest extracted from a PE file.
    29  type File struct {
    30  	// Location is where the PE file was found
    31  	Location file.Location
    32  
    33  	// CLR is the information about the CLR (common language runtime) version found in the PE file which helps
    34  	// understand if this executable is even a .NET application.
    35  	CLR *CLREvidence
    36  
    37  	// VersionResources is a map of version resource keys to their values found in the VERSIONINFO resource directory.
    38  	VersionResources map[string]string
    39  }
    40  
    41  // CLREvidence is basic info about the CLR (common language runtime) version from the COM descriptor.
    42  // This is not a complete representation of the CLR version, but rather a subset of the information that is
    43  // useful to us.
    44  type CLREvidence struct {
    45  	// HasClrResourceNames is true if there are CLR resource names found in the PE file (e.g. "CLRDEBUGINFO").
    46  	HasClrResourceNames bool
    47  
    48  	// MajorVersion is the minimum supported major version of the CLR.
    49  	MajorVersion uint16
    50  
    51  	// MinorVersion is the minimum supported minor version of the CLR.
    52  	MinorVersion uint16
    53  }
    54  
    55  // HasEvidenceOfCLR returns true if the PE file has evidence of a CLR (common language runtime) version.
    56  func (c *CLREvidence) HasEvidenceOfCLR() bool {
    57  	return c != nil && (c.MajorVersion != 0 && c.MinorVersion != 0 || c.HasClrResourceNames)
    58  }
    59  
    60  type peDosHeader struct {
    61  	Magic                 [2]byte // "MZ"
    62  	Unused                [58]byte
    63  	AddressOfNewEXEHeader uint32 // offset to PE header
    64  }
    65  
    66  // peImageCore20 represents the .NET Core 2.0 header structure.
    67  // Source: https://github.com/dotnet/msbuild/blob/9fa9d800dabce3bfcf8365f651f3a713e01f8a85/src/Tasks/NativeMethods.cs#L761-L775
    68  type peImageCore20 struct {
    69  	Cb                  uint32
    70  	MajorRuntimeVersion uint16
    71  	MinorRuntimeVersion uint16
    72  }
    73  
    74  // peImageResourceDirectory represents the resource directory structure.
    75  type peImageResourceDirectory struct {
    76  	Characteristics      uint32
    77  	TimeDateStamp        uint32
    78  	MajorVersion         uint16
    79  	MinorVersion         uint16
    80  	NumberOfNamedEntries uint16
    81  	NumberOfIDEntries    uint16
    82  }
    83  
    84  // peImageResourceDirectoryEntry represents an entry in the resource directory entries.
    85  type peImageResourceDirectoryEntry struct {
    86  	Name         uint32
    87  	OffsetToData uint32
    88  }
    89  
    90  // peImageResourceDataEntry is the unit of raw data in the Resource Data area.
    91  type peImageResourceDataEntry struct {
    92  	OffsetToData uint32
    93  	Size         uint32
    94  	CodePage     uint32
    95  	Reserved     uint32
    96  }
    97  
    98  // peVsFixedFileInfo represents the fixed file information structure.
    99  type peVsFixedFileInfo struct {
   100  	Signature        uint32
   101  	StructVersion    uint32
   102  	FileVersionMS    uint32
   103  	FileVersionLS    uint32
   104  	ProductVersionMS uint32
   105  	ProductVersionLS uint32
   106  	FileFlagsMask    uint32
   107  	FileFlags        uint32
   108  	FileOS           uint32
   109  	FileType         uint32
   110  	FileSubtype      uint32
   111  	FileDateMS       uint32
   112  	FileDateLS       uint32
   113  }
   114  
   115  type peVsVersionInfo peLenValLenType
   116  
   117  type peStringFileInfo peLenValLenType
   118  
   119  type peStringTable peLenValLenType
   120  
   121  type peString peLenValLenType
   122  
   123  type peLenValLenType struct {
   124  	Length      uint16
   125  	ValueLength uint16
   126  	Type        uint16
   127  }
   128  
   129  type extractedSection struct {
   130  	RVA     uint32
   131  	BaseRVA uint32
   132  	Size    uint32
   133  	Reader  *bytes.Reader
   134  }
   135  
   136  func (s extractedSection) exists() bool {
   137  	return s.RVA != 0 && s.Size != 0
   138  }
   139  
   140  func directoryName(i int) string {
   141  	switch i {
   142  	case pe.IMAGE_DIRECTORY_ENTRY_RESOURCE:
   143  		return "Resource"
   144  	case pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR:
   145  		return "COM Descriptor"
   146  	}
   147  	return fmt.Sprintf("Unknown (%d)", i)
   148  }
   149  
   150  func Read(f file.LocationReadCloser) (*File, error) {
   151  	r, err := unionreader.GetUnionReader(f)
   152  	if err != nil {
   153  		return nil, err
   154  	}
   155  
   156  	sections, _, err := parsePEFile(r)
   157  	if err != nil {
   158  		return nil, fmt.Errorf("unable to parse PE sections: %w", err)
   159  	}
   160  
   161  	dirs := u32set.New()                        // keep track of the RVAs we have already parsed (prevent infinite recursion edge cases)
   162  	versionResources := make(map[string]string) // map of version resource keys to their values
   163  	resourceNames := strset.New()               // set of resource names found in the PE file
   164  	err = parseResourceDirectory(sections[pe.IMAGE_DIRECTORY_ENTRY_RESOURCE], dirs, versionResources, resourceNames)
   165  	if err != nil {
   166  		return nil, err
   167  	}
   168  
   169  	c, err := parseCLR(sections[pe.IMAGE_DIRECTORY_ENTRY_COM_DESCRIPTOR], resourceNames)
   170  	if err != nil {
   171  		return nil, fmt.Errorf("unable to parse PE CLR directory: %w", err)
   172  	}
   173  
   174  	return &File{
   175  		Location:         f.Location,
   176  		CLR:              c,
   177  		VersionResources: versionResources,
   178  	}, nil
   179  }
   180  
   181  // parsePEFile creates readers for targeted sections of the binary used by downstream processing.
   182  func parsePEFile(file unionreader.UnionReader) (map[int]*extractedSection, []pe.SectionHeader32, error) {
   183  	fileHeader, magic, err := parsePEHeader(file)
   184  	if err != nil {
   185  		return nil, nil, fmt.Errorf("error parsing PE header: %w", err)
   186  	}
   187  
   188  	soi, headers, err := parseSectionHeaders(file, magic, fileHeader.NumberOfSections)
   189  	if err != nil {
   190  		return nil, nil, fmt.Errorf("error parsing section headers: %w", err)
   191  	}
   192  
   193  	for i, sec := range soi {
   194  		if !sec.exists() {
   195  			continue
   196  		}
   197  		data, err := readDataFromRVA(file, sec.RVA, sec.Size, headers)
   198  		if err != nil {
   199  			return nil, nil, fmt.Errorf("error reading %q section data: %w", directoryName(i), err)
   200  		}
   201  		sec.Reader = data
   202  	}
   203  
   204  	return soi, headers, nil
   205  }
   206  
   207  // parsePEHeader reads the beginning of a PE formatted file, returning the file header and "magic" indicator
   208  // for downstream logic to determine 32/64 bit parsing.
   209  func parsePEHeader(file unionreader.UnionReader) (*pe.FileHeader, uint16, error) {
   210  	var dosHeader peDosHeader
   211  	if err := binary.Read(file, binary.LittleEndian, &dosHeader); err != nil {
   212  		return nil, 0, fmt.Errorf("error reading DOS header: %w", err)
   213  	}
   214  	if string(dosHeader.Magic[:]) != "MZ" {
   215  		return nil, 0, fmt.Errorf("invalid DOS header magic")
   216  	}
   217  
   218  	peOffset := int64(dosHeader.AddressOfNewEXEHeader)
   219  	if _, err := file.Seek(peOffset, io.SeekStart); err != nil {
   220  		return nil, 0, fmt.Errorf("error seeking to PE header: %w", err)
   221  	}
   222  
   223  	var signature [4]byte
   224  	if err := binary.Read(file, binary.LittleEndian, &signature); err != nil {
   225  		return nil, 0, fmt.Errorf("error reading PE signature: %w", err)
   226  	}
   227  	if !bytes.Equal(signature[:], []byte("PE\x00\x00")) {
   228  		return nil, 0, fmt.Errorf("invalid PE signature")
   229  	}
   230  
   231  	var fileHeader pe.FileHeader
   232  	if err := binary.Read(file, binary.LittleEndian, &fileHeader); err != nil {
   233  		return nil, 0, fmt.Errorf("error reading file header: %w", err)
   234  	}
   235  
   236  	var magic uint16
   237  	if err := binary.Read(file, binary.LittleEndian, &magic); err != nil {
   238  		return nil, 0, fmt.Errorf("error reading optional header magic: %w", err)
   239  	}
   240  
   241  	// seek back to before reading magic (since that value is in the header)
   242  	if _, err := file.Seek(-2, io.SeekCurrent); err != nil {
   243  		return nil, 0, fmt.Errorf("error seeking back to before reading magic: %w", err)
   244  	}
   245  
   246  	return &fileHeader, magic, nil
   247  }
   248  
   249  // parseSectionHeaders reads the section headers from the PE file and extracts the virtual addresses + section size
   250  // information for the sections of interest. Additionally, all section headers are returned to aid in downstream processing.
   251  func parseSectionHeaders(file unionreader.UnionReader, magic uint16, numberOfSections uint16) (map[int]*extractedSection, []pe.SectionHeader32, error) {
   252  	soi := make(map[int]*extractedSection)
   253  	switch magic {
   254  	case 0x10B: // PE32
   255  		var optHeader pe.OptionalHeader32
   256  		if err := binary.Read(file, binary.LittleEndian, &optHeader); err != nil {
   257  			return nil, nil, fmt.Errorf("error reading optional header (PE32): %w", err)
   258  		}
   259  
   260  		for _, i := range imageDirectoryEntryIndexes {
   261  			sectionHeader := optHeader.DataDirectory[i]
   262  			if sectionHeader.Size == 0 {
   263  				continue
   264  			}
   265  			soi[i] = &extractedSection{
   266  				RVA:  sectionHeader.VirtualAddress,
   267  				Size: sectionHeader.Size,
   268  			}
   269  		}
   270  	case 0x20B: // PE32+ (64 bit)
   271  		var optHeader pe.OptionalHeader64
   272  		if err := binary.Read(file, binary.LittleEndian, &optHeader); err != nil {
   273  			return nil, nil, fmt.Errorf("error reading optional header (PE32+): %w", err)
   274  		}
   275  
   276  		for _, i := range imageDirectoryEntryIndexes {
   277  			sectionHeader := optHeader.DataDirectory[i]
   278  			if sectionHeader.Size == 0 {
   279  				continue
   280  			}
   281  			soi[i] = &extractedSection{
   282  				RVA:  sectionHeader.VirtualAddress,
   283  				Size: sectionHeader.Size,
   284  			}
   285  		}
   286  	default:
   287  		return nil, nil, fmt.Errorf("unknown optional header magic: 0x%x", magic)
   288  	}
   289  
   290  	// read section headers
   291  	headers := make([]pe.SectionHeader32, numberOfSections)
   292  	for i := 0; i < int(numberOfSections); i++ {
   293  		if err := binary.Read(file, binary.LittleEndian, &headers[i]); err != nil {
   294  			return nil, nil, fmt.Errorf("error reading section header: %w", err)
   295  		}
   296  	}
   297  
   298  	return soi, headers, nil
   299  }
   300  
   301  // parseCLR extracts the CLR (common language runtime) version information from the COM descriptor and makes
   302  // present/not-present determination based on the presence of CLR resource names.
   303  func parseCLR(sec *extractedSection, resourceNames *strset.Set) (*CLREvidence, error) {
   304  	hasCLRDebugResourceNames := resourceNames.HasAny("CLRDEBUGINFO")
   305  	if sec == nil || sec.Reader == nil {
   306  		return &CLREvidence{
   307  			HasClrResourceNames: hasCLRDebugResourceNames,
   308  		}, nil
   309  	}
   310  
   311  	reader := sec.Reader
   312  	var c peImageCore20
   313  	if err := binary.Read(reader, binary.LittleEndian, &c); err != nil {
   314  		return nil, fmt.Errorf("error reading CLR header: %w", err)
   315  	}
   316  
   317  	return &CLREvidence{
   318  		HasClrResourceNames: hasCLRDebugResourceNames,
   319  		MajorVersion:        c.MajorRuntimeVersion,
   320  		MinorVersion:        c.MinorRuntimeVersion,
   321  	}, nil
   322  }
   323  
   324  // rvaToFileOffset is a helper function to convert RVA to file offset using section headers
   325  func rvaToFileOffset(rva uint32, sections []pe.SectionHeader32) (uint32, error) {
   326  	for _, section := range sections {
   327  		if rva >= section.VirtualAddress && rva < section.VirtualAddress+section.VirtualSize {
   328  			return section.PointerToRawData + (rva - section.VirtualAddress), nil
   329  		}
   330  	}
   331  	return 0, fmt.Errorf("RVA 0x%x not found in any section", rva)
   332  }
   333  
   334  // readDataFromRVA will read data from a specific RVA in the PE file
   335  func readDataFromRVA(file io.ReadSeeker, rva, size uint32, sections []pe.SectionHeader32) (*bytes.Reader, error) {
   336  	if size == 0 {
   337  		return nil, fmt.Errorf("zero size specified")
   338  	}
   339  
   340  	offset, err := rvaToFileOffset(rva, sections)
   341  	if err != nil {
   342  		return nil, err
   343  	}
   344  
   345  	if _, err := file.Seek(int64(offset), io.SeekStart); err != nil {
   346  		return nil, fmt.Errorf("error seeking to data: %w", err)
   347  	}
   348  
   349  	data := make([]byte, size)
   350  	if _, err := io.ReadFull(file, data); err != nil {
   351  		return nil, fmt.Errorf("error reading data: %w", err)
   352  	}
   353  
   354  	return bytes.NewReader(data), nil
   355  }
   356  
   357  // parseResourceDirectory recursively parses a PE resource directory. This takes a relative virtual address (offset of
   358  // a piece of data or code relative to the base address), the size of the resource directory, the set of RVAs already
   359  // parsed, and the map to populate discovered version resource values.
   360  //
   361  // .rsrc Section
   362  // +------------------------------+
   363  // | Resource Directory Table     |
   364  // +------------------------------+
   365  // | Resource Directory Entries   |
   366  // |  +------------------------+  |
   367  // |  | Subdirectory or Data   |  |
   368  // |  +------------------------+  |
   369  // +------------------------------+
   370  // | Resource Data Entries        |
   371  // |  +------------------------+  |
   372  // |  | Resource Data          |  |
   373  // |  +------------------------+  |
   374  // +------------------------------+
   375  // | Actual Resource Data         |
   376  // +------------------------------+
   377  //
   378  // sources:
   379  // - https://learn.microsoft.com/en-us/windows/win32/debug/pe-format#the-rsrc-section
   380  // - https://learn.microsoft.com/en-us/previous-versions/ms809762(v=msdn.10)#pe-file-resources
   381  func parseResourceDirectory(sec *extractedSection, dirs *u32set.Set, fields map[string]string, names *strset.Set) error {
   382  	if sec == nil || sec.Size <= 0 {
   383  		return nil
   384  	}
   385  
   386  	if sec.Reader == nil {
   387  		return errors.New("resource section not found")
   388  	}
   389  
   390  	baseRVA := sec.BaseRVA
   391  	if baseRVA == 0 {
   392  		baseRVA = sec.RVA
   393  	}
   394  
   395  	offset := int64(sec.RVA - baseRVA)
   396  	if _, err := sec.Reader.Seek(offset, io.SeekStart); err != nil {
   397  		return fmt.Errorf("error seeking to directory offset: %w", err)
   398  	}
   399  
   400  	var directoryHeader peImageResourceDirectory
   401  	if err := readIntoStruct(sec.Reader, &directoryHeader); err != nil {
   402  		return fmt.Errorf("error reading directory header: %w", err)
   403  	}
   404  
   405  	numEntries := int(directoryHeader.NumberOfNamedEntries + directoryHeader.NumberOfIDEntries)
   406  	switch {
   407  	case numEntries > peMaxAllowedDirectoryEntries:
   408  		return fmt.Errorf("too many entries in resource directory: %d", numEntries)
   409  	case numEntries == 0:
   410  		return fmt.Errorf("no entries in resource directory")
   411  	case numEntries < 0:
   412  		return fmt.Errorf("invalid number of entries in resource directory: %d", numEntries)
   413  	}
   414  
   415  	for i := 0; i < numEntries; i++ {
   416  		var entry peImageResourceDirectoryEntry
   417  
   418  		entryOffset := offset + int64(binary.Size(directoryHeader)) + int64(i*binary.Size(entry))
   419  		if _, err := sec.Reader.Seek(entryOffset, io.SeekStart); err != nil {
   420  			log.Tracef("error seeking to PE entry offset: %v", err)
   421  			continue
   422  		}
   423  
   424  		if err := readIntoStruct(sec.Reader, &entry); err != nil {
   425  			continue
   426  		}
   427  
   428  		if err := processResourceEntry(entry, baseRVA, sec, dirs, fields, names); err != nil {
   429  			log.Tracef("error processing resource entry: %v", err)
   430  			continue
   431  		}
   432  	}
   433  
   434  	return nil
   435  }
   436  
   437  func processResourceEntry(entry peImageResourceDirectoryEntry, baseRVA uint32, sec *extractedSection, dirs *u32set.Set, fields map[string]string, names *strset.Set) error {
   438  	// if the high bit is set, this is a directory entry, otherwise it is a data entry
   439  	isDirectory := entry.OffsetToData&0x80000000 != 0
   440  
   441  	// note: the offset is relative to the beginning of the resource section, not an RVA
   442  	entryOffsetToData := entry.OffsetToData & 0x7FFFFFFF
   443  
   444  	nameIsString := entry.Name&0x80000000 != 0
   445  	nameOffset := entry.Name & 0x7FFFFFFF
   446  
   447  	// read the string name of the resource directory
   448  	if nameIsString {
   449  		currentPos, err := sec.Reader.Seek(0, io.SeekCurrent)
   450  		if err != nil {
   451  			return fmt.Errorf("error getting current reader position: %w", err)
   452  		}
   453  
   454  		if _, err := sec.Reader.Seek(int64(nameOffset), io.SeekStart); err != nil {
   455  			return fmt.Errorf("error restoring reader position: %w", err)
   456  		}
   457  
   458  		name, err := readUTF16WithLength(sec.Reader)
   459  		if err == nil {
   460  			names.Add(name)
   461  		}
   462  
   463  		if _, err := sec.Reader.Seek(currentPos, io.SeekStart); err != nil {
   464  			return fmt.Errorf("error restoring reader position: %w", err)
   465  		}
   466  	}
   467  
   468  	if isDirectory {
   469  		subRVA := baseRVA + entryOffsetToData
   470  		if dirs.Has(subRVA) {
   471  			// some malware uses recursive PE references to evade analysis
   472  			return fmt.Errorf("recursive PE reference detected; skipping directory at baseRVA=0x%x subRVA=0x%x", baseRVA, subRVA)
   473  		}
   474  
   475  		dirs.Add(subRVA)
   476  		err := parseResourceDirectory(
   477  			&extractedSection{
   478  				RVA:     subRVA,
   479  				BaseRVA: baseRVA,
   480  				Size:    sec.Size - (sec.RVA - baseRVA),
   481  				Reader:  sec.Reader,
   482  			},
   483  			dirs, fields, names)
   484  		if err != nil {
   485  			return err
   486  		}
   487  		return nil
   488  	}
   489  	return parseResourceDataEntry(sec.Reader, baseRVA, baseRVA+entryOffsetToData, sec.Size, fields)
   490  }
   491  
   492  func parseResourceDataEntry(reader *bytes.Reader, baseRVA, rva, remainingSize uint32, fields map[string]string) error {
   493  	var dataEntry peImageResourceDataEntry
   494  	offset := int64(rva - baseRVA)
   495  
   496  	if _, err := reader.Seek(offset, io.SeekStart); err != nil {
   497  		return fmt.Errorf("error seeking to data entry offset: %w", err)
   498  	}
   499  
   500  	if err := readIntoStruct(reader, &dataEntry); err != nil {
   501  		return fmt.Errorf("error reading resource data entry: %w", err)
   502  	}
   503  
   504  	if remainingSize < dataEntry.Size {
   505  		return fmt.Errorf("resource data entry size exceeds remaining size")
   506  	}
   507  
   508  	data := make([]byte, dataEntry.Size)
   509  	if _, err := reader.Seek(int64(dataEntry.OffsetToData-baseRVA), io.SeekStart); err != nil {
   510  		return fmt.Errorf("error seeking to resource data: %w", err)
   511  	}
   512  
   513  	if _, err := reader.Read(data); err != nil {
   514  		return fmt.Errorf("error reading resource data: %w", err)
   515  	}
   516  
   517  	return parseVersionResourceSection(bytes.NewReader(data), fields)
   518  }
   519  
   520  // parseVersionResourceSection parses a PE version resource section from within a resource directory.
   521  //
   522  //	"The main structure in a version resource is the VS_FIXEDFILEINFO structure. Additional structures include the
   523  //	VarFileInfo structure to store language information data, and StringFileInfo for user-defined string information.
   524  //	All strings in a version resource are in Unicode format. Each block of information is aligned on a DWORD boundary."
   525  //
   526  //	"VS_VERSIONINFO" (utf16)
   527  //	+---------------------------------------------------+
   528  //	| wLength (2 bytes)                                 |
   529  //	| wValueLength (2 bytes)                            |
   530  //	| wType (2 bytes)                                   |
   531  //	| szKey ("VS_VERSION_INFO") (utf16)                 |
   532  //	| Padding (to DWORD)                                |
   533  //	+---------------------------------------------------+
   534  //	| VS_FIXEDFILEINFO (52 bytes)                       |
   535  //	+---------------------------------------------------+
   536  //	| "StringFileInfo" (utf16)                          |
   537  //	+---------------------------------------------------+
   538  //	| wLength (2 bytes)                                 |
   539  //	| wValueLength (2 bytes)                            |
   540  //	| wType (2 bytes)                                   |
   541  //	| szKey ("StringFileInfo") (utf16)                  |
   542  //	| Padding (to DWORD)                                |
   543  //	| StringTable                                       |
   544  //	|   +--------------------------------------------+  |
   545  //	|   | wLength (2 bytes)                          |  |
   546  //	|   | wValueLength (2 bytes)                     |  |
   547  //	|   | wType (2 bytes)                            |  |
   548  //	|   | szKey ("040904b0")                         |  |
   549  //	|   | Padding (to DWORD)                         |  |
   550  //	|   | String                                     |  |
   551  //	|   | +--------------------------------------+   |  |
   552  //	|   | | wLength (2 bytes)                    |   |  |
   553  //	|   | | wValueLength (2 bytes)               |   |  |
   554  //	|   | | wType (2 bytes)                      |   |  |
   555  //	|   | | szKey ("FileVersion")                |   |  |
   556  //	|   | | Padding (to DWORD)                   |   |  |
   557  //	|   | | szValue ("15.00.0913.015")           |   |  |
   558  //	|   | | Padding (to DWORD)                   |   |  |
   559  //	|   +--------------------------------------------+  |
   560  //	+---------------------------------------------------+
   561  //	| VarFileInfo  (utf16)                              |
   562  //	+---------------------------------------------------+
   563  //	| (skip!)                                           |
   564  //	+---------------------------------------------------+
   565  //
   566  // sources:
   567  //   - https://learn.microsoft.com/en-us/windows/win32/menurc/resource-file-formats
   568  //   - https://learn.microsoft.com/en-us/windows/win32/menurc/vs-versioninfo
   569  //   - https://learn.microsoft.com/en-us/windows/win32/api/verrsrc/ns-verrsrc-vs_fixedfileinfo
   570  //   - https://learn.microsoft.com/en-us/windows/win32/menurc/varfileinfo
   571  //   - https://learn.microsoft.com/en-us/windows/win32/menurc/stringfileinfo
   572  //   - https://learn.microsoft.com/en-us/windows/win32/menurc/stringtable
   573  func parseVersionResourceSection(reader *bytes.Reader, fields map[string]string) error {
   574  	offset := 0
   575  
   576  	var info peVsVersionInfo
   577  	if szKey, err := readIntoStructAndSzKey(reader, &info, &offset); err != nil {
   578  		return fmt.Errorf("error reading PE version info: %v", err)
   579  	} else if szKey != "VS_VERSION_INFO" {
   580  		// this is a resource section, but not the version resources
   581  		return nil
   582  	}
   583  
   584  	if err := alignAndSeek(reader, &offset); err != nil {
   585  		return fmt.Errorf("error aligning past PE version info: %w", err)
   586  	}
   587  
   588  	var fixedFileInfo peVsFixedFileInfo
   589  	if err := readIntoStruct(reader, &fixedFileInfo, &offset); err != nil {
   590  		return fmt.Errorf("error reading PE FixedFileInfo: %v", err)
   591  	}
   592  
   593  	for reader.Len() > 0 {
   594  		if err := alignAndSeek(reader, &offset); err != nil {
   595  			return fmt.Errorf("error seeking to PE StringFileInfo: %w", err)
   596  		}
   597  
   598  		var sfiHeader peStringFileInfo
   599  		if szKey, err := readIntoStructAndSzKey(reader, &sfiHeader, &offset); err != nil {
   600  			return fmt.Errorf("error reading PE string file info header: %v", err)
   601  		} else if szKey != "StringFileInfo" {
   602  			// we only care about extracting strings from any string tables, skip this
   603  			offset += int(sfiHeader.ValueLength)
   604  			continue
   605  		}
   606  
   607  		var stOffset int
   608  
   609  		// note: the szKey for the prStringTable is the language
   610  		var stHeader peStringTable
   611  		if _, err := readIntoStructAndSzKey(reader, &stHeader, &offset, &stOffset); err != nil {
   612  			return fmt.Errorf("error reading PE string table header: %v", err)
   613  		}
   614  
   615  		for stOffset < int(stHeader.Length) {
   616  			var stringHeader peString
   617  			if err := readIntoStruct(reader, &stringHeader, &offset, &stOffset); err != nil {
   618  				break
   619  			}
   620  
   621  			key := readUTF16(reader, &offset, &stOffset)
   622  
   623  			if err := alignAndSeek(reader, &offset, &stOffset); err != nil {
   624  				return fmt.Errorf("error aligning to next PE string table value: %w", err)
   625  			}
   626  
   627  			var value string
   628  			if stringHeader.ValueLength > 0 {
   629  				value = readUTF16(reader, &offset, &stOffset)
   630  			}
   631  
   632  			fields[key] = value
   633  
   634  			if err := alignAndSeek(reader, &offset, &stOffset); err != nil {
   635  				return fmt.Errorf("error aligning to next PE string table key: %w", err)
   636  			}
   637  		}
   638  	}
   639  
   640  	if fields["FileVersion"] == "" {
   641  		// we can derive the file version from the fixed file info if it is not already specified as a string entry... neat!
   642  		fields["FileVersion"] = fmt.Sprintf("%d.%d.%d.%d",
   643  			fixedFileInfo.FileVersionMS>>16, fixedFileInfo.FileVersionMS&0xFFFF,
   644  			fixedFileInfo.FileVersionLS>>16, fixedFileInfo.FileVersionLS&0xFFFF)
   645  	}
   646  
   647  	return nil
   648  }
   649  
   650  // readIntoStructAndSzKey reads a struct from the reader and updates the offsets if provided, returning the szKey value.
   651  // This is only useful in the context of the resource directory parsing in narrow cases (this is invalid to use outside of that context).
   652  func readIntoStructAndSzKey[T any](reader *bytes.Reader, data *T, offsets ...*int) (string, error) {
   653  	if err := readIntoStruct(reader, data, offsets...); err != nil {
   654  		return "", err
   655  	}
   656  	return readUTF16(reader, offsets...), nil
   657  }
   658  
   659  // readIntoStruct reads a struct from the reader and updates the offsets if provided.
   660  func readIntoStruct[T any](reader io.Reader, data *T, offsets ...*int) error {
   661  	if err := binary.Read(reader, binary.LittleEndian, data); err != nil {
   662  		if errors.Is(err, io.EOF) {
   663  			return nil
   664  		}
   665  		return err
   666  	}
   667  
   668  	for i := range offsets {
   669  		*offsets[i] += binary.Size(*data)
   670  	}
   671  	return nil
   672  }
   673  
   674  // alignAndSeek aligns the reader to the next DWORD boundary and seeks to the new offset (updating any provided trackOffsets).
   675  func alignAndSeek(reader io.Seeker, offset *int, trackOffsets ...*int) error {
   676  	ogOffset := *offset
   677  	*offset = alignToDWORD(*offset)
   678  	diff := *offset - ogOffset
   679  	for i := range trackOffsets {
   680  		*trackOffsets[i] += diff
   681  	}
   682  	_, err := reader.Seek(int64(*offset), io.SeekStart)
   683  	return err
   684  }
   685  
   686  // alignToDWORD aligns the offset to the next DWORD boundary (4 byte boundary)
   687  func alignToDWORD(offset int) int {
   688  	return (offset + 3) & ^3
   689  }
   690  
   691  // readUTF16 is a helper function to read a null-terminated UTF16 string
   692  func readUTF16(reader *bytes.Reader, offsets ...*int) string {
   693  	startPos, err := reader.Seek(0, io.SeekCurrent)
   694  	if err != nil {
   695  		return ""
   696  	}
   697  
   698  	var result []rune
   699  	for {
   700  		var char uint16
   701  		err := binary.Read(reader, binary.LittleEndian, &char)
   702  		if err != nil || char == 0 {
   703  			break
   704  		}
   705  		result = append(result, rune(char))
   706  	}
   707  
   708  	// calculate how many bytes we've actually read (including null terminator)
   709  	endPos, _ := reader.Seek(0, io.SeekCurrent)
   710  	bytesRead := int(endPos - startPos)
   711  
   712  	for i := range offsets {
   713  		*offsets[i] += bytesRead
   714  	}
   715  
   716  	return string(result)
   717  }
   718  
   719  // readUTF16WithLength reads a length-prefixed UTF-16 string from reader.
   720  // The first 2 bytes represent the number of UTF-16 code units.
   721  func readUTF16WithLength(reader *bytes.Reader) (string, error) {
   722  	var length uint16
   723  	if err := binary.Read(reader, binary.LittleEndian, &length); err != nil {
   724  		return "", err
   725  	}
   726  	if length == 0 {
   727  		return "", nil
   728  	}
   729  
   730  	// read length UTF-16 code units.
   731  	codes := make([]uint16, length)
   732  	if err := binary.Read(reader, binary.LittleEndian, &codes); err != nil {
   733  		return "", err
   734  	}
   735  	return string(utf16.Decode(codes)), nil
   736  }