github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/dotnet/dotnetpe/dotnetpe.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 // Package dotnetpe extracts packages from .NET PE files. 16 package dotnetpe 17 18 import ( 19 "context" 20 "encoding/binary" 21 "fmt" 22 "os" 23 "path/filepath" 24 "slices" 25 "strings" 26 27 "github.com/google/osv-scalibr/extractor" 28 "github.com/google/osv-scalibr/extractor/filesystem" 29 "github.com/google/osv-scalibr/inventory" 30 "github.com/google/osv-scalibr/log" 31 "github.com/google/osv-scalibr/plugin" 32 "github.com/google/osv-scalibr/purl" 33 "github.com/google/osv-scalibr/stats" 34 "github.com/saferwall/pe" 35 ) 36 37 const ( 38 // Name is the unique Name of this extractor. 39 Name = "dotnet/pe" 40 ) 41 42 // Supported extensions for Portable Executable (PE) files. 43 // This list may not be exhaustive, as the PE standard does not mandate specific extensions. 44 // The empty string is intentionally included to handle files without extensions. 45 var peExtensions = []string{ 46 ".acm", ".ax", ".cpl", ".dll", ".drv", ".efi", ".exe", ".mui", ".ocx", 47 ".scr", ".sys", ".tsp", ".mun", ".msstyles", "", 48 } 49 50 // Extractor extracts dotnet dependencies from a PE file 51 type Extractor struct { 52 cfg Config 53 } 54 55 // Config is the configuration for the .NET PE extractor. 56 type Config struct { 57 // Stats is a stats collector for reporting metrics. 58 Stats stats.Collector 59 // MaxFileSizeBytes is the maximum file size this extractor will parse. If 60 // `FileRequired` gets a bigger file, it will return false. 61 // Use 0 to accept all file sizes 62 MaxFileSizeBytes int64 63 } 64 65 // DefaultConfig returns the default configuration of the extractor. 66 func DefaultConfig() Config { 67 return Config{} 68 } 69 70 // New returns an .NET PE extractor. 71 // 72 // For most use cases, initialize with: 73 // ``` 74 // e := New(DefaultConfig()) 75 // ``` 76 func New(cfg Config) *Extractor { 77 return &Extractor{ 78 cfg: cfg, 79 } 80 } 81 82 // NewDefault returns the extractor with its default configuration. 83 func NewDefault() filesystem.Extractor { return New(DefaultConfig()) } 84 85 // Name of the extractor. 86 func (e Extractor) Name() string { return Name } 87 88 // Version of the extractor. 89 func (e Extractor) Version() int { return 0 } 90 91 // Requirements of the extractor. 92 func (e Extractor) Requirements() *plugin.Capabilities { 93 return &plugin.Capabilities{ 94 OS: plugin.OSWindows, 95 } 96 } 97 98 // FileRequired returns true if the specified file matches the .NET PE file structure. 99 func (e Extractor) FileRequired(api filesystem.FileAPI) bool { 100 path := api.Path() 101 102 // check if the file extension matches one of the known PE extensions 103 ext := strings.ToLower(filepath.Ext(path)) 104 if !slices.Contains(peExtensions, ext) { 105 return false 106 } 107 108 fileinfo, err := api.Stat() 109 if err != nil || (e.cfg.MaxFileSizeBytes > 0 && fileinfo.Size() > e.cfg.MaxFileSizeBytes) { 110 e.reportFileRequired(path, stats.FileRequiredResultSizeLimitExceeded) 111 return false 112 } 113 114 e.reportFileRequired(path, stats.FileRequiredResultOK) 115 return true 116 } 117 118 // Extract parses the PE files to extract .NET package dependencies. 119 func (e Extractor) Extract(ctx context.Context, input *filesystem.ScanInput) (inventory.Inventory, error) { 120 inventory, err := e.extractFromInput(input) 121 if e.cfg.Stats != nil { 122 var fileSizeBytes int64 123 if input.Info != nil { 124 fileSizeBytes = input.Info.Size() 125 } 126 e.cfg.Stats.AfterFileExtracted(e.Name(), &stats.FileExtractedStats{ 127 Path: input.Path, 128 Result: filesystem.ExtractorErrorToFileExtractedResult(err), 129 FileSizeBytes: fileSizeBytes, 130 }) 131 } 132 return inventory, err 133 } 134 135 func (e Extractor) extractFromInput(input *filesystem.ScanInput) (inventory.Inventory, error) { 136 // check if the file has the needed magic bytes before doing the heavy parsing 137 if ok, err := hasPEMagicBytes(input); !ok { 138 return inventory.Inventory{}, fmt.Errorf("the file header does not contain magic bytes %w", err) 139 } 140 141 // Retrieve the real path of the file 142 absPath, err := input.GetRealPath() 143 if err != nil { 144 return inventory.Inventory{}, err 145 } 146 147 if input.Root == "" { 148 // The file got copied to a temporary dir, remove it at the end. 149 defer func() { 150 dir := filepath.Base(absPath) 151 if err := os.RemoveAll(dir); err != nil { 152 log.Errorf("os.RemoveAll(%q): %v", dir, err) 153 } 154 }() 155 } 156 157 // Open the PE file 158 f, err := pe.New(absPath, &pe.Options{}) 159 if err != nil { 160 return inventory.Inventory{}, err 161 } 162 163 // Parse the PE file 164 if err := f.Parse(); err != nil { 165 return inventory.Inventory{}, err 166 } 167 168 // Initialize inventory slice to store the dependencies 169 var pkgs []*extractor.Package 170 171 // Iterate over the CLR Metadata Tables to extract assembly information 172 for _, table := range f.CLR.MetadataTables { 173 pkgs = append(pkgs, tableContentToPackages(f, table.Content)...) 174 } 175 176 // if at least an inventory was found inside the CLR.MetadataTables there is no need to check the VersionResources 177 if len(pkgs) > 0 { 178 return inventory.Inventory{Packages: pkgs}, nil 179 } 180 181 // If no inventory entries were found in CLR.MetadataTables check the VersionResources as a fallback 182 // this is mostly required on .exe files 183 versionResources, err := f.ParseVersionResources() 184 if err != nil { 185 return inventory.Inventory{}, err 186 } 187 188 name, version := versionResources["InternalName"], versionResources["Assembly Version"] 189 if name != "" && version != "" { 190 pkgs = append(pkgs, &extractor.Package{ 191 Name: name, 192 Version: version, 193 PURLType: purl.TypeNuget, 194 }) 195 } 196 197 return inventory.Inventory{Packages: pkgs}, nil 198 } 199 200 func tableContentToPackages(f *pe.File, content any) []*extractor.Package { 201 var pkgs []*extractor.Package 202 203 switch content := content.(type) { 204 case []pe.AssemblyTableRow: 205 for _, row := range content { 206 name := string(f.GetStringFromData(row.Name, f.CLR.MetadataStreams["#Strings"])) + ".dll" 207 version := fmt.Sprintf("%d.%d.%d.%d", row.MajorVersion, row.MinorVersion, row.BuildNumber, row.RevisionNumber) 208 pkgs = append(pkgs, &extractor.Package{ 209 Name: name, 210 Version: version, 211 PURLType: purl.TypeNuget, 212 }) 213 } 214 case []pe.AssemblyRefTableRow: 215 for _, row := range content { 216 name := string(f.GetStringFromData(row.Name, f.CLR.MetadataStreams["#Strings"])) + ".dll" 217 version := fmt.Sprintf("%d.%d.%d.%d", row.MajorVersion, row.MinorVersion, row.BuildNumber, row.RevisionNumber) 218 pkgs = append(pkgs, &extractor.Package{ 219 Name: name, 220 Version: version, 221 PURLType: purl.TypeNuget, 222 }) 223 } 224 } 225 226 return pkgs 227 } 228 229 // hasPEMagicBytes checks if a given file has the PE magic bytes in the header 230 func hasPEMagicBytes(input *filesystem.ScanInput) (bool, error) { 231 // check for the smallest PE size. 232 if input.Info.Size() < pe.TinyPESize { 233 return false, nil 234 } 235 236 var magic uint16 237 if err := binary.Read(input.Reader, binary.LittleEndian, &magic); err != nil { 238 return false, err 239 } 240 241 // Validate if the magic bytes match any of the expected PE signatures 242 hasPESignature := magic == pe.ImageDOSSignature || magic == pe.ImageDOSZMSignature 243 return hasPESignature, nil 244 } 245 246 func (e Extractor) reportFileRequired(path string, result stats.FileRequiredResult) { 247 if e.cfg.Stats == nil { 248 return 249 } 250 e.cfg.Stats.AfterFileRequired(e.Name(), &stats.FileRequiredStats{ 251 Path: path, 252 Result: result, 253 }) 254 } 255 256 var _ filesystem.Extractor = Extractor{}