github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/wheelegg/wheelegg_test.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package wheelegg_test 16 17 import ( 18 "bytes" 19 "errors" 20 "io/fs" 21 "os" 22 "path/filepath" 23 "testing" 24 25 "github.com/google/go-cmp/cmp" 26 "github.com/google/go-cmp/cmp/cmpopts" 27 "github.com/google/osv-scalibr/extractor" 28 "github.com/google/osv-scalibr/extractor/filesystem" 29 "github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg" 30 "github.com/google/osv-scalibr/extractor/filesystem/simplefileapi" 31 scalibrfs "github.com/google/osv-scalibr/fs" 32 "github.com/google/osv-scalibr/inventory" 33 "github.com/google/osv-scalibr/purl" 34 "github.com/google/osv-scalibr/stats" 35 "github.com/google/osv-scalibr/testing/fakefs" 36 "github.com/google/osv-scalibr/testing/testcollector" 37 ) 38 39 func TestFileRequired(t *testing.T) { 40 tests := []struct { 41 name string 42 path string 43 fileSizeBytes int64 44 maxFileSizeBytes int64 45 wantRequired bool 46 wantResultMetric stats.FileRequiredResult 47 }{ 48 { 49 name: ".dist-info/METADATA", 50 path: "testdata/pip-22.2.2.dist-info/METADATA", 51 wantRequired: true, 52 wantResultMetric: stats.FileRequiredResultOK, 53 }, 54 { 55 name: ".egg/EGG-INFO/PKG-INFO", 56 path: "testdata/setuptools-57.4.0-py3.9.egg/EGG-INFO/PKG-INFO", 57 wantRequired: true, 58 wantResultMetric: stats.FileRequiredResultOK, 59 }, 60 { 61 name: ".egg-info", 62 path: "testdata/pycups-2.0.1.egg-info", 63 wantRequired: true, 64 wantResultMetric: stats.FileRequiredResultOK, 65 }, 66 { 67 name: ".egg-info/PKG-INFO", 68 path: "testdata/httplib2-0.20.4.egg-info/PKG-INFO", 69 wantRequired: true, 70 wantResultMetric: stats.FileRequiredResultOK, 71 }, 72 { 73 name: ".dist-info/TEST", 74 path: "testdata/pip-22.2.2.dist-info/TEST", 75 wantRequired: false, 76 }, 77 { 78 name: ".egg", 79 path: "python3.10/site-packages/monotonic-1.6-py3.10.egg", 80 wantRequired: true, 81 wantResultMetric: stats.FileRequiredResultOK, 82 }, 83 { 84 name: ".whl", 85 path: "python3.10/site-packages/monotonic-1.6-py3.10.whl", 86 wantRequired: true, 87 wantResultMetric: stats.FileRequiredResultOK, 88 }, 89 { 90 name: ".egg-info required if size less than maxFileSizeBytes", 91 path: "testdata/pycups-2.0.1.egg-info", 92 maxFileSizeBytes: 1000, 93 fileSizeBytes: 100, 94 wantRequired: true, 95 wantResultMetric: stats.FileRequiredResultOK, 96 }, 97 { 98 name: ".egg required if size equal to maxFileSizeBytes", 99 path: "python3.10/site-packages/monotonic-1.6-py3.10.egg", 100 maxFileSizeBytes: 1000, 101 fileSizeBytes: 1000, 102 wantRequired: true, 103 wantResultMetric: stats.FileRequiredResultOK, 104 }, 105 { 106 name: ".egg not required if size greater than maxFileSizeBytes", 107 path: "python3.10/site-packages/monotonic-1.6-py3.10.egg", 108 maxFileSizeBytes: 100, 109 fileSizeBytes: 1000, 110 wantRequired: false, 111 wantResultMetric: stats.FileRequiredResultSizeLimitExceeded, 112 }, 113 { 114 name: ".egg required if maxFileSizeBytes explicitly set to 0", 115 path: "python3.10/site-packages/monotonic-1.6-py3.10.egg", 116 maxFileSizeBytes: 0, 117 fileSizeBytes: 1000, 118 wantRequired: true, 119 wantResultMetric: stats.FileRequiredResultOK, 120 }, 121 } 122 123 for _, tt := range tests { 124 t.Run(tt.name, func(t *testing.T) { 125 collector := testcollector.New() 126 e := wheelegg.New(wheelegg.Config{ 127 MaxFileSizeBytes: tt.maxFileSizeBytes, 128 Stats: collector, 129 }) 130 131 // Set a default file size if not specified. 132 fileSizeBytes := tt.fileSizeBytes 133 if fileSizeBytes == 0 { 134 fileSizeBytes = 1000 135 } 136 137 if got := e.FileRequired(simplefileapi.New(tt.path, fakefs.FakeFileInfo{ 138 FileName: filepath.Base(tt.path), 139 FileMode: fs.ModePerm, 140 FileSize: fileSizeBytes, 141 })); got != tt.wantRequired { 142 t.Fatalf("FileRequired(%s): got %v, want %v", tt.path, got, tt.wantRequired) 143 } 144 145 gotResultMetric := collector.FileRequiredResult(tt.path) 146 if tt.wantResultMetric != "" && gotResultMetric != tt.wantResultMetric { 147 t.Errorf("FileRequired(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric) 148 } 149 }) 150 } 151 } 152 153 func TestExtract(t *testing.T) { 154 tests := []struct { 155 name string 156 path string 157 cfg wheelegg.Config 158 wantPackages []*extractor.Package 159 wantErr error 160 wantResultMetric stats.FileExtractedResult 161 }{ 162 { 163 name: ".dist-info/METADATA", 164 path: "testdata/distinfo_meta", 165 wantPackages: []*extractor.Package{{ 166 Name: "pip", 167 Version: "22.2.2", 168 PURLType: purl.TypePyPi, 169 Locations: []string{"testdata/distinfo_meta"}, 170 Metadata: &wheelegg.PythonPackageMetadata{ 171 Author: "The pip developers", 172 AuthorEmail: "distutils-sig@python.org", 173 }, 174 }}, 175 }, 176 { 177 name: ".egg/EGG-INFO/PKG-INFO", 178 path: "testdata/egginfo_pkginfo", 179 wantPackages: []*extractor.Package{{ 180 Name: "setuptools", 181 Version: "57.4.0", 182 PURLType: purl.TypePyPi, 183 Locations: []string{"testdata/egginfo_pkginfo"}, 184 Metadata: &wheelegg.PythonPackageMetadata{ 185 Author: "Python Packaging Authority", 186 AuthorEmail: "distutils-sig@python.org", 187 }, 188 }}, 189 }, 190 { 191 name: ".egg-info", 192 path: "testdata/egginfo", 193 wantPackages: []*extractor.Package{{ 194 Name: "pycups", 195 Version: "2.0.1", 196 PURLType: purl.TypePyPi, 197 Locations: []string{"testdata/egginfo"}, 198 Metadata: &wheelegg.PythonPackageMetadata{ 199 Author: "Zdenek Dohnal", 200 AuthorEmail: "zdohnal@redhat.com", 201 }, 202 }}, 203 }, 204 { 205 name: ".egg-info/PKG-INFO", 206 path: "testdata/pkginfo", 207 wantPackages: []*extractor.Package{{ 208 Name: "httplib2", 209 Version: "0.20.4", 210 PURLType: purl.TypePyPi, 211 Locations: []string{"testdata/pkginfo"}, 212 Metadata: &wheelegg.PythonPackageMetadata{ 213 Author: "Joe Gregorio", 214 AuthorEmail: "joe@bitworking.org", 215 }, 216 }, 217 }, 218 }, 219 { 220 name: "malformed_PKG-INFO", 221 path: "testdata/malformed_pkginfo", 222 wantPackages: []*extractor.Package{{ 223 Name: "passlib", 224 Version: "1.7.4", 225 PURLType: purl.TypePyPi, 226 Locations: []string{"testdata/malformed_pkginfo"}, 227 Metadata: &wheelegg.PythonPackageMetadata{ 228 Author: "Eli Collins", 229 AuthorEmail: "elic@assurancetechnologies.com", 230 }, 231 }}, 232 }, 233 { 234 name: ".egg", 235 path: "testdata/monotonic-1.6-py3.10.egg", 236 wantPackages: []*extractor.Package{{ 237 Name: "monotonic", 238 Version: "1.6", 239 PURLType: purl.TypePyPi, 240 Locations: []string{"testdata/monotonic-1.6-py3.10.egg"}, 241 Metadata: &wheelegg.PythonPackageMetadata{ 242 Author: "Ori Livneh", 243 AuthorEmail: "ori@wikimedia.org", 244 }, 245 }}, 246 }, 247 { 248 name: ".whl", 249 path: "testdata/monotonic-1.6-py2.py3-none-any.whl", 250 wantPackages: []*extractor.Package{{ 251 Name: "monotonic", 252 Version: "1.6", 253 PURLType: purl.TypePyPi, 254 Locations: []string{"testdata/monotonic-1.6-py2.py3-none-any.whl"}, 255 Metadata: &wheelegg.PythonPackageMetadata{ 256 Author: "Ori Livneh", 257 AuthorEmail: "ori@wikimedia.org", 258 }, 259 }}, 260 }, 261 { 262 name: ".egg without PKG-INFO", 263 path: "testdata/monotonic_no_pkginfo-1.6-py3.10.egg", 264 wantPackages: []*extractor.Package{}, 265 }, 266 } 267 268 for _, tt := range tests { 269 // Note the subtest here 270 t.Run(tt.name, func(t *testing.T) { 271 fsys := scalibrfs.DirFS(".") 272 273 r, err := fsys.Open(tt.path) 274 defer func() { 275 if err = r.Close(); err != nil { 276 t.Errorf("Close(): %v", err) 277 } 278 }() 279 if err != nil { 280 t.Fatal(err) 281 } 282 283 info, err := r.Stat() 284 if err != nil { 285 t.Fatalf("Stat(): %v", err) 286 } 287 288 collector := testcollector.New() 289 tt.cfg.Stats = collector 290 291 input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: r} 292 e := wheelegg.New(defaultConfigWith(tt.cfg)) 293 got, err := e.Extract(t.Context(), input) 294 if !cmp.Equal(err, tt.wantErr, cmpopts.EquateErrors()) { 295 t.Fatalf("Extract(%+v) error: got %v, want %v\n", tt.name, err, tt.wantErr) 296 } 297 298 want := inventory.Inventory{Packages: tt.wantPackages} 299 if diff := cmp.Diff(want, got); diff != "" { 300 t.Errorf("Extract(%s) (-want +got):\n%s", tt.path, diff) 301 } 302 303 wantResultMetric := tt.wantResultMetric 304 if wantResultMetric == "" && tt.wantErr == nil { 305 wantResultMetric = stats.FileExtractedResultSuccess 306 } 307 gotResultMetric := collector.FileExtractedResult(tt.path) 308 if gotResultMetric != wantResultMetric { 309 t.Errorf("Extract(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, wantResultMetric) 310 } 311 312 gotFileSizeMetric := collector.FileExtractedFileSize(tt.path) 313 if gotFileSizeMetric != info.Size() { 314 t.Errorf("Extract(%s) recorded file size %v, want file size %v", tt.path, gotFileSizeMetric, info.Size()) 315 } 316 }) 317 } 318 } 319 320 // defaultConfigWith combines any non-zero fields of cfg with wheelegg.DefaultConfig(). 321 func defaultConfigWith(cfg wheelegg.Config) wheelegg.Config { 322 newCfg := wheelegg.DefaultConfig() 323 324 if cfg.MaxFileSizeBytes > 0 { 325 newCfg.MaxFileSizeBytes = cfg.MaxFileSizeBytes 326 } 327 if cfg.Stats != nil { 328 newCfg.Stats = cfg.Stats 329 } 330 return newCfg 331 } 332 333 func TestExtractWithoutReadAt(t *testing.T) { 334 var e filesystem.Extractor = wheelegg.New(wheelegg.DefaultConfig()) 335 336 tests := []struct { 337 name string 338 path string 339 wantPackages *extractor.Package 340 }{ 341 { 342 name: ".egg", 343 path: "testdata/monotonic-1.6-py3.10.egg", 344 wantPackages: &extractor.Package{ 345 Name: "monotonic", 346 Version: "1.6", 347 PURLType: purl.TypePyPi, 348 Locations: []string{"testdata/monotonic-1.6-py3.10.egg"}, 349 Metadata: &wheelegg.PythonPackageMetadata{ 350 Author: "Ori Livneh", 351 AuthorEmail: "ori@wikimedia.org", 352 }, 353 }, 354 }, 355 { 356 name: ".whl", 357 path: "testdata/monotonic-1.6-py2.py3-none-any.whl", 358 wantPackages: &extractor.Package{ 359 Name: "monotonic", 360 Version: "1.6", 361 PURLType: purl.TypePyPi, 362 Locations: []string{"testdata/monotonic-1.6-py2.py3-none-any.whl"}, 363 Metadata: &wheelegg.PythonPackageMetadata{ 364 Author: "Ori Livneh", 365 AuthorEmail: "ori@wikimedia.org", 366 }, 367 }, 368 }, 369 } 370 371 for _, tt := range tests { 372 // Note the subtest here 373 t.Run(tt.name, func(t *testing.T) { 374 r, err := os.Open(tt.path) 375 defer func() { 376 if err = r.Close(); err != nil { 377 t.Errorf("Close(): %v", err) 378 } 379 }() 380 if err != nil { 381 t.Fatal(err) 382 } 383 384 noReadAt := reader{r} 385 386 info, err := noReadAt.Stat() 387 if err != nil { 388 t.Fatalf("Stat(): %v", err) 389 } 390 391 input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: noReadAt} 392 got, err := e.Extract(t.Context(), input) 393 if err != nil { 394 t.Fatalf("Extract(%s): %v", tt.path, err) 395 } 396 397 want := inventory.Inventory{Packages: []*extractor.Package{tt.wantPackages}} 398 if diff := cmp.Diff(want, got); diff != "" { 399 t.Errorf("Extract(%s) (-want +got):\n%s", tt.path, diff) 400 } 401 }) 402 } 403 } 404 405 func TestExtractErrorsWithFakeFiles(t *testing.T) { 406 tests := []struct { 407 name string 408 path string 409 fakeFileInfo fs.FileInfo 410 fakeFileBytes []byte 411 wantErr error 412 wantResultMetric stats.FileExtractedResult 413 }{ 414 { 415 name: "invalid_zip_file", 416 path: "testdata/does_not_exist.egg", 417 fakeFileInfo: fakefs.FakeFileInfo{ 418 FileName: "does_not_exist.egg", 419 FileMode: fs.ModePerm, 420 FileSize: 1000, 421 }, 422 fakeFileBytes: []byte("invalid zip file"), 423 wantErr: cmpopts.AnyError, 424 wantResultMetric: stats.FileExtractedResultErrorUnknown, 425 }, 426 } 427 428 for _, tt := range tests { 429 t.Run(tt.name, func(t *testing.T) { 430 info := tt.fakeFileInfo 431 r := bytes.NewReader(tt.fakeFileBytes) 432 433 collector := testcollector.New() 434 cfg := wheelegg.Config{Stats: collector} 435 436 input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: r} 437 e := wheelegg.New(defaultConfigWith(cfg)) 438 _, err := e.Extract(t.Context(), input) 439 if err == nil { 440 t.Fatalf("Extract(%+v) succeeded, want error: %v", tt.name, tt.wantErr) 441 } 442 if !cmp.Equal(err, tt.wantErr, cmpopts.EquateErrors()) { 443 t.Fatalf("Extract(%+v) error: got %v, want %v", tt.name, err, tt.wantErr) 444 } 445 446 wantResultMetric := tt.wantResultMetric 447 if wantResultMetric == "" && tt.wantErr == nil { 448 wantResultMetric = stats.FileExtractedResultSuccess 449 } 450 gotResultMetric := collector.FileExtractedResult(tt.path) 451 if gotResultMetric != wantResultMetric { 452 t.Errorf("Extract(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, wantResultMetric) 453 } 454 455 gotFileSizeMetric := collector.FileExtractedFileSize(tt.path) 456 if gotFileSizeMetric != info.Size() { 457 t.Errorf("Extract(%s) recorded file size %v, want file size %v", tt.path, gotFileSizeMetric, info.Size()) 458 } 459 }) 460 } 461 } 462 463 type reader struct { 464 f fs.File 465 } 466 467 func (r reader) Read(p []byte) (n int, err error) { 468 return r.f.Read(p) 469 } 470 471 func (r reader) Stat() (fs.FileInfo, error) { 472 return r.f.Stat() 473 } 474 475 func TestExtractEggWithoutSize(t *testing.T) { 476 fsys := scalibrfs.DirFS(".") 477 path := "testdata/monotonic-1.6-py3.10.egg" 478 479 r, err := fsys.Open(path) 480 defer func() { 481 if err = r.Close(); err != nil { 482 t.Errorf("Close(): %v", err) 483 } 484 }() 485 if err != nil { 486 t.Fatal(err) 487 } 488 489 // Set FileInfo to nil, which does not allow input.info.Size(). This is required for unzipping the 490 // egg file. 491 var info fs.FileInfo 492 493 input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: path, Info: info, Reader: r} 494 e := wheelegg.Extractor{} 495 _, gotErr := e.Extract(t.Context(), input) 496 wantErr := wheelegg.ErrSizeNotSet 497 if !errors.Is(gotErr, wantErr) { 498 t.Fatalf("Extract(%s) got err: '%v', want err: '%v'", path, gotErr, wantErr) 499 } 500 }