github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/dotnet/dotnetpe/dotnetpe.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  // Package dotnetpe extracts packages from .NET PE files.
    16  package dotnetpe
    17  
    18  import (
    19  	"context"
    20  	"encoding/binary"
    21  	"fmt"
    22  	"os"
    23  	"path/filepath"
    24  	"slices"
    25  	"strings"
    26  
    27  	"github.com/google/osv-scalibr/extractor"
    28  	"github.com/google/osv-scalibr/extractor/filesystem"
    29  	"github.com/google/osv-scalibr/inventory"
    30  	"github.com/google/osv-scalibr/log"
    31  	"github.com/google/osv-scalibr/plugin"
    32  	"github.com/google/osv-scalibr/purl"
    33  	"github.com/google/osv-scalibr/stats"
    34  	"github.com/saferwall/pe"
    35  )
    36  
    37  const (
    38  	// Name is the unique Name of this extractor.
    39  	Name = "dotnet/pe"
    40  )
    41  
    42  // Supported extensions for Portable Executable (PE) files.
    43  // This list may not be exhaustive, as the PE standard does not mandate specific extensions.
    44  // The empty string is intentionally included to handle files without extensions.
    45  var peExtensions = []string{
    46  	".acm", ".ax", ".cpl", ".dll", ".drv", ".efi", ".exe", ".mui", ".ocx",
    47  	".scr", ".sys", ".tsp", ".mun", ".msstyles", "",
    48  }
    49  
    50  // Extractor extracts dotnet dependencies from a PE file
    51  type Extractor struct {
    52  	cfg Config
    53  }
    54  
    55  // Config is the configuration for the .NET PE extractor.
    56  type Config struct {
    57  	// Stats is a stats collector for reporting metrics.
    58  	Stats stats.Collector
    59  	// MaxFileSizeBytes is the maximum file size this extractor will parse. If
    60  	// `FileRequired` gets a bigger file, it will return false.
    61  	// Use 0 to accept all file sizes
    62  	MaxFileSizeBytes int64
    63  }
    64  
    65  // DefaultConfig returns the default configuration of the extractor.
    66  func DefaultConfig() Config {
    67  	return Config{}
    68  }
    69  
    70  // New returns an .NET PE extractor.
    71  //
    72  // For most use cases, initialize with:
    73  // ```
    74  // e := New(DefaultConfig())
    75  // ```
    76  func New(cfg Config) *Extractor {
    77  	return &Extractor{
    78  		cfg: cfg,
    79  	}
    80  }
    81  
    82  // NewDefault returns the extractor with its default configuration.
    83  func NewDefault() filesystem.Extractor { return New(DefaultConfig()) }
    84  
    85  // Name of the extractor.
    86  func (e Extractor) Name() string { return Name }
    87  
    88  // Version of the extractor.
    89  func (e Extractor) Version() int { return 0 }
    90  
    91  // Requirements of the extractor.
    92  func (e Extractor) Requirements() *plugin.Capabilities {
    93  	return &plugin.Capabilities{
    94  		OS: plugin.OSWindows,
    95  	}
    96  }
    97  
    98  // FileRequired returns true if the specified file matches the .NET PE file structure.
    99  func (e Extractor) FileRequired(api filesystem.FileAPI) bool {
   100  	path := api.Path()
   101  
   102  	// check if the file extension matches one of the known PE extensions
   103  	ext := strings.ToLower(filepath.Ext(path))
   104  	if !slices.Contains(peExtensions, ext) {
   105  		return false
   106  	}
   107  
   108  	fileinfo, err := api.Stat()
   109  	if err != nil || (e.cfg.MaxFileSizeBytes > 0 && fileinfo.Size() > e.cfg.MaxFileSizeBytes) {
   110  		e.reportFileRequired(path, stats.FileRequiredResultSizeLimitExceeded)
   111  		return false
   112  	}
   113  
   114  	e.reportFileRequired(path, stats.FileRequiredResultOK)
   115  	return true
   116  }
   117  
   118  // Extract parses the PE files to extract .NET package dependencies.
   119  func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) {
   120  	inventory, err := e.extractFromInput(input)
   121  	if e.cfg.Stats != nil {
   122  		var fileSizeBytes int64
   123  		if input.Info != nil {
   124  			fileSizeBytes = input.Info.Size()
   125  		}
   126  		e.cfg.Stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{
   127  			Path:          input.Path,
   128  			Result:        filesystem.ExtractorErrorToFileExtractedResult(err),
   129  			FileSizeBytes: fileSizeBytes,
   130  		})
   131  	}
   132  	return inventory, err
   133  }
   134  
   135  func (e Extractor) extractFromInput(input *filesystem.ScanInput) (inventory.Inventory, error) {
   136  	// check if the file has the needed magic bytes before doing the heavy parsing
   137  	if ok, err := hasPEMagicBytes(input); !ok {
   138  		return inventory.Inventory{}, fmt.Errorf("the file header does not contain magic bytes %w", err)
   139  	}
   140  
   141  	// Retrieve the real path of the file
   142  	absPath, err := input.GetRealPath()
   143  	if err != nil {
   144  		return inventory.Inventory{}, err
   145  	}
   146  
   147  	if input.Root == "" {
   148  		// The file got copied to a temporary dir, remove it at the end.
   149  		defer func() {
   150  			dir := filepath.Base(absPath)
   151  			if err := os.RemoveAll(dir); err != nil {
   152  				log.Errorf("os.RemoveAll(%q): %v", dir, err)
   153  			}
   154  		}()
   155  	}
   156  
   157  	// Open the PE file
   158  	f, err := pe.New(absPath, &pe.Options{})
   159  	if err != nil {
   160  		return inventory.Inventory{}, err
   161  	}
   162  
   163  	// Parse the PE file
   164  	if err := f.Parse(); err != nil {
   165  		return inventory.Inventory{}, err
   166  	}
   167  
   168  	// Initialize inventory slice to store the dependencies
   169  	var pkgs []*extractor.Package
   170  
   171  	// Iterate over the CLR Metadata Tables to extract assembly information
   172  	for _, table := range f.CLR.MetadataTables {
   173  		pkgs = append(pkgs, tableContentToPackages(f, table.Content)...)
   174  	}
   175  
   176  	// if at least an inventory was found inside the CLR.MetadataTables there is no need to check the VersionResources
   177  	if len(pkgs) > 0 {
   178  		return inventory.Inventory{Packages: pkgs}, nil
   179  	}
   180  
   181  	// If no inventory entries were found in CLR.MetadataTables check the VersionResources as a fallback
   182  	// this is mostly required on .exe files
   183  	versionResources, err := f.ParseVersionResources()
   184  	if err != nil {
   185  		return inventory.Inventory{}, err
   186  	}
   187  
   188  	name, version := versionResources["InternalName"], versionResources["Assembly Version"]
   189  	if name != "" && version != "" {
   190  		pkgs = append(pkgs, &extractor.Package{
   191  			Name:     name,
   192  			Version:  version,
   193  			PURLType: purl.TypeNuget,
   194  		})
   195  	}
   196  
   197  	return inventory.Inventory{Packages: pkgs}, nil
   198  }
   199  
   200  func tableContentToPackages(f *pe.File, content any) []*extractor.Package {
   201  	var pkgs []*extractor.Package
   202  
   203  	switch content := content.(type) {
   204  	case []pe.AssemblyTableRow:
   205  		for _, row := range content {
   206  			name := string(f.GetStringFromData(row.Name, f.CLR.MetadataStreams["#Strings"])) + ".dll"
   207  			version := fmt.Sprintf("%d.%d.%d.%d", row.MajorVersion, row.MinorVersion, row.BuildNumber, row.RevisionNumber)
   208  			pkgs = append(pkgs, &extractor.Package{
   209  				Name:     name,
   210  				Version:  version,
   211  				PURLType: purl.TypeNuget,
   212  			})
   213  		}
   214  	case []pe.AssemblyRefTableRow:
   215  		for _, row := range content {
   216  			name := string(f.GetStringFromData(row.Name, f.CLR.MetadataStreams["#Strings"])) + ".dll"
   217  			version := fmt.Sprintf("%d.%d.%d.%d", row.MajorVersion, row.MinorVersion, row.BuildNumber, row.RevisionNumber)
   218  			pkgs = append(pkgs, &extractor.Package{
   219  				Name:     name,
   220  				Version:  version,
   221  				PURLType: purl.TypeNuget,
   222  			})
   223  		}
   224  	}
   225  
   226  	return pkgs
   227  }
   228  
   229  // hasPEMagicBytes checks if a given file has the PE magic bytes in the header
   230  func hasPEMagicBytes(input *filesystem.ScanInput) (bool, error) {
   231  	// check for the smallest PE size.
   232  	if input.Info.Size() < pe.TinyPESize {
   233  		return false, nil
   234  	}
   235  
   236  	var magic uint16
   237  	if err := binary.Read(input.Reader, binary.LittleEndian, &magic); err != nil {
   238  		return false, err
   239  	}
   240  
   241  	// Validate if the magic bytes match any of the expected PE signatures
   242  	hasPESignature := magic == pe.ImageDOSSignature || magic == pe.ImageDOSZMSignature
   243  	return hasPESignature, nil
   244  }
   245  
   246  func (e Extractor) reportFileRequired(path string, result stats.FileRequiredResult) {
   247  	if e.cfg.Stats == nil {
   248  		return
   249  	}
   250  	e.cfg.Stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{
   251  		Path:   path,
   252  		Result: result,
   253  	})
   254  }
   255  
   256  var _ filesystem.Extractor = Extractor{}