github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/requirements/requirements_test.go (about) 1 // Copyright 2025 Google LLC 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package requirements_test 16 17 import ( 18 "io/fs" 19 "path/filepath" 20 "testing" 21 22 "github.com/google/go-cmp/cmp" 23 "github.com/google/osv-scalibr/extractor" 24 "github.com/google/osv-scalibr/extractor/filesystem" 25 "github.com/google/osv-scalibr/extractor/filesystem/internal/units" 26 "github.com/google/osv-scalibr/extractor/filesystem/language/python/requirements" 27 "github.com/google/osv-scalibr/extractor/filesystem/simplefileapi" 28 scalibrfs "github.com/google/osv-scalibr/fs" 29 "github.com/google/osv-scalibr/inventory" 30 "github.com/google/osv-scalibr/purl" 31 "github.com/google/osv-scalibr/stats" 32 "github.com/google/osv-scalibr/testing/fakefs" 33 "github.com/google/osv-scalibr/testing/testcollector" 34 ) 35 36 func TestFileRequired(t *testing.T) { 37 tests := []struct { 38 name string 39 path string 40 fileSizeBytes int64 41 maxFileSizeBytes int64 42 wantRequired bool 43 wantResultMetric stats.FileRequiredResult 44 }{ 45 { 46 name: "requirements.txt", 47 path: "RsaCtfTool/requirements.txt", 48 wantRequired: true, 49 wantResultMetric: stats.FileRequiredResultOK, 50 }, 51 { 52 name: "optional-requirements.txt", 53 path: "RsaCtfTool/optional-requirements.txt", 54 wantRequired: true, 55 wantResultMetric: stats.FileRequiredResultOK, 56 }, 57 { 58 name: "non requirements.txt txt file", 59 path: "requirements-asdf/test.txt", 60 wantRequired: false, 61 }, 62 { 63 name: "wrong extension", 64 path: "yolo-txt/requirements.md", 65 wantRequired: false, 66 }, 67 { 68 name: "requirements.txt required if file size < max file size", 69 path: "RsaCtfTool/requirements.txt", 70 fileSizeBytes: 100 * units.KiB, 71 maxFileSizeBytes: 1000 * units.KiB, 72 wantRequired: true, 73 wantResultMetric: stats.FileRequiredResultOK, 74 }, 75 { 76 name: "requirements.txt required if file size == max file size", 77 path: "RsaCtfTool/requirements.txt", 78 fileSizeBytes: 1000 * units.KiB, 79 maxFileSizeBytes: 1000 * units.KiB, 80 wantRequired: true, 81 wantResultMetric: stats.FileRequiredResultOK, 82 }, 83 { 84 name: "requirements.txt not required if file size > max file size", 85 path: "RsaCtfTool/requirements.txt", 86 fileSizeBytes: 1000 * units.KiB, 87 maxFileSizeBytes: 100 * units.KiB, 88 wantRequired: false, 89 wantResultMetric: stats.FileRequiredResultSizeLimitExceeded, 90 }, 91 { 92 name: "requirements.txt required if max file size is 0", 93 path: "RsaCtfTool/requirements.txt", 94 fileSizeBytes: 1000 * units.KiB, 95 maxFileSizeBytes: 0, 96 wantRequired: true, 97 wantResultMetric: stats.FileRequiredResultOK, 98 }, 99 } 100 101 for _, tt := range tests { 102 t.Run(tt.name, func(t *testing.T) { 103 collector := testcollector.New() 104 var e filesystem.Extractor = requirements.New( 105 requirements.Config{ 106 Stats: collector, 107 MaxFileSizeBytes: tt.maxFileSizeBytes, 108 }, 109 ) 110 111 // Set default size if not provided. 112 fileSizeBytes := tt.fileSizeBytes 113 if fileSizeBytes == 0 { 114 fileSizeBytes = 100 * units.KiB 115 } 116 117 if got := e.FileRequired(simplefileapi.New(tt.path, fakefs.FakeFileInfo{ 118 FileName: filepath.Base(tt.path), 119 FileMode: fs.ModePerm, 120 FileSize: fileSizeBytes, 121 })); got != tt.wantRequired { 122 t.Fatalf("FileRequired(%s): got %v, want %v", tt.path, got, tt.wantRequired) 123 } 124 125 gotResultMetric := collector.FileRequiredResult(tt.path) 126 if gotResultMetric != tt.wantResultMetric { 127 t.Errorf("FileRequired(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric) 128 } 129 }) 130 } 131 } 132 133 func TestExtract(t *testing.T) { 134 tests := []struct { 135 name string 136 path string 137 wantPackages []*extractor.Package 138 wantResultMetric stats.FileExtractedResult 139 }{ 140 { 141 name: "no_version", 142 path: "testdata/no_version.txt", 143 wantPackages: []*extractor.Package{ 144 { 145 Name: "PyCrypto", 146 PURLType: purl.TypePyPi, 147 Metadata: &requirements.Metadata{Requirement: "PyCrypto"}, 148 }, 149 { 150 Name: "GMPY2", 151 PURLType: purl.TypePyPi, 152 Metadata: &requirements.Metadata{Requirement: "GMPY2"}}, 153 { 154 Name: "SymPy", 155 PURLType: purl.TypePyPi, 156 Metadata: &requirements.Metadata{Requirement: "SymPy"}}, 157 }, 158 wantResultMetric: stats.FileExtractedResultSuccess, 159 }, 160 { 161 name: "infinite_loop", 162 path: "testdata/loop.txt", 163 // Makes sure we don't get stuck in an infinite loop. 164 wantResultMetric: stats.FileExtractedResultSuccess, 165 }, 166 { 167 name: "with_version", 168 path: "testdata/with_versions.txt", 169 wantPackages: []*extractor.Package{ 170 { 171 Name: "nltk", 172 Version: "3.2.2", 173 PURLType: purl.TypePyPi, 174 Metadata: &requirements.Metadata{Requirement: "nltk==3.2.2"}, 175 }, 176 { 177 Name: "tabulate", 178 Version: "0.7.7", 179 PURLType: purl.TypePyPi, 180 Metadata: &requirements.Metadata{Requirement: "tabulate==0.7.7"}, 181 }, 182 { 183 Name: "newspaper3k", 184 Version: "0.2.2", 185 PURLType: purl.TypePyPi, 186 Metadata: &requirements.Metadata{VersionComparator: ">=", Requirement: "newspaper3k>=0.2.2"}, 187 }, 188 { 189 Name: "asdf", 190 PURLType: purl.TypePyPi, 191 Metadata: &requirements.Metadata{Requirement: "asdf==0.7.*"}, 192 }, 193 { 194 Name: "qwerty", 195 Version: "0.1", 196 PURLType: purl.TypePyPi, 197 Metadata: &requirements.Metadata{Requirement: "qwerty == 0.1"}, 198 }, 199 { 200 Name: "hy-phen", 201 Version: "1.2", 202 PURLType: purl.TypePyPi, 203 Metadata: &requirements.Metadata{Requirement: "hy-phen==1.2"}, 204 }, 205 { 206 Name: "under_score", 207 Version: "1.3", 208 PURLType: purl.TypePyPi, 209 Metadata: &requirements.Metadata{Requirement: "under_score==1.3"}, 210 }, 211 { 212 Name: "yolo", 213 Version: "1.0", 214 PURLType: purl.TypePyPi, 215 Metadata: &requirements.Metadata{VersionComparator: "===", Requirement: "yolo===1.0"}, 216 }, 217 { 218 Name: "pkg", 219 Version: "1.2.3", 220 PURLType: purl.TypePyPi, 221 Metadata: &requirements.Metadata{VersionComparator: "<=", Requirement: "pkg<=1.2.3"}, 222 }, 223 }, 224 wantResultMetric: stats.FileExtractedResultSuccess, 225 }, 226 { 227 name: "comments", 228 path: "testdata/comments.txt", 229 wantPackages: []*extractor.Package{ 230 { 231 Name: "PyCrypto", 232 Version: "1.2-alpha", 233 PURLType: purl.TypePyPi, 234 Metadata: &requirements.Metadata{Requirement: "PyCrypto==1.2-alpha"}, 235 }, 236 { 237 Name: "GMPY2", 238 Version: "1", 239 PURLType: purl.TypePyPi, 240 Metadata: &requirements.Metadata{Requirement: "GMPY2==1"}, 241 }, 242 { 243 Name: "SymPy", 244 Version: "1.2", 245 PURLType: purl.TypePyPi, 246 Metadata: &requirements.Metadata{Requirement: "SymPy==1.2"}, 247 }, 248 { 249 Name: "requests", 250 Version: "1.0", 251 PURLType: purl.TypePyPi, 252 Metadata: &requirements.Metadata{Requirement: "requests ==1.0"}, 253 }, 254 { 255 Name: "six", 256 Version: "1.2", 257 PURLType: purl.TypePyPi, 258 Metadata: &requirements.Metadata{Requirement: "six==1.2"}, 259 }, 260 }, 261 wantResultMetric: stats.FileExtractedResultSuccess, 262 }, 263 { 264 name: "pip_example", 265 path: "testdata/example.txt", 266 wantPackages: []*extractor.Package{ 267 { 268 Name: "pytest", 269 PURLType: purl.TypePyPi, 270 Metadata: &requirements.Metadata{Requirement: "pytest"}, 271 }, 272 { 273 Name: "pytest-cov", 274 PURLType: purl.TypePyPi, 275 Metadata: &requirements.Metadata{Requirement: "pytest-cov"}, 276 }, 277 { 278 Name: "beautifulsoup4", 279 PURLType: purl.TypePyPi, 280 Metadata: &requirements.Metadata{Requirement: "beautifulsoup4"}, 281 }, 282 { 283 Name: "docopt", 284 Version: "0.6.1", 285 PURLType: purl.TypePyPi, 286 Metadata: &requirements.Metadata{Requirement: "docopt == 0.6.1"}, 287 }, 288 { 289 Name: "requests", 290 PURLType: purl.TypePyPi, 291 Metadata: &requirements.Metadata{Requirement: "requests [security] >= 2.8.1, == 2.8.* ; python_version < \"2.7\""}, 292 }, 293 // not urllib3, because it's pinned to a zip file 294 { 295 Name: "keyring", 296 Version: "4.1.1", 297 PURLType: purl.TypePyPi, 298 Metadata: &requirements.Metadata{VersionComparator: ">=", Requirement: "keyring >= 4.1.1"}, 299 }, 300 { 301 Name: "coverage", 302 PURLType: purl.TypePyPi, 303 Metadata: &requirements.Metadata{Requirement: "coverage != 3.5"}, 304 }, 305 { 306 Name: "Mopidy-Dirble", 307 Version: "1.1", 308 PURLType: purl.TypePyPi, 309 Metadata: &requirements.Metadata{VersionComparator: "~=", Requirement: "Mopidy-Dirble ~= 1.1"}, 310 }, 311 { 312 Name: "transitive-req", 313 Version: "1", 314 PURLType: purl.TypePyPi, 315 Locations: []string{"testdata/example.txt", "testdata/other-requirements.txt"}, 316 Metadata: &requirements.Metadata{Requirement: "transitive-req==1"}, 317 }, 318 }, 319 wantResultMetric: stats.FileExtractedResultSuccess, 320 }, 321 { 322 name: "extras", 323 path: "testdata/extras.txt", 324 wantPackages: []*extractor.Package{ 325 { 326 Name: "pyjwt", 327 Version: "2.1.0", 328 PURLType: purl.TypePyPi, 329 Metadata: &requirements.Metadata{Requirement: "pyjwt [crypto] == 2.1.0"}, 330 }, 331 { 332 Name: "celery", 333 Version: "4.4.7", 334 PURLType: purl.TypePyPi, 335 Metadata: &requirements.Metadata{Requirement: "celery [redis, pytest] == 4.4.7"}, 336 }, 337 }, 338 wantResultMetric: stats.FileExtractedResultSuccess, 339 }, 340 { 341 name: "env_variable", 342 path: "testdata/env_var.txt", 343 wantPackages: []*extractor.Package{ 344 { 345 Name: "asdf", 346 Version: "1.2", 347 PURLType: purl.TypePyPi, 348 Metadata: &requirements.Metadata{Requirement: "asdf==1.2"}}, 349 { 350 Name: "another", 351 Version: "1.0", 352 PURLType: purl.TypePyPi, 353 Metadata: &requirements.Metadata{Requirement: "another==1.0"}, 354 }, 355 }, 356 wantResultMetric: stats.FileExtractedResultSuccess, 357 }, 358 { 359 name: "invalid", 360 path: "testdata/invalid.txt", 361 wantResultMetric: stats.FileExtractedResultSuccess, 362 }, 363 { 364 name: "per_requirement_options", 365 path: "testdata/per_req_options.txt", 366 wantPackages: []*extractor.Package{ 367 { 368 // foo1==1.0 --hash=sha256: 369 Name: "foo1", 370 Version: "1.0", 371 PURLType: purl.TypePyPi, 372 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo1==1.0"}, 373 }, 374 { 375 // foo2==1.0 --hash=sha256:123 --global-option=foo --config-settings=bar 376 Name: "foo2", 377 Version: "1.0", 378 PURLType: purl.TypePyPi, 379 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo2==1.0"}, 380 }, 381 { 382 // foo3==1.0 --config-settings=bar --global-option=foo --hash=sha256:123 383 Name: "foo3", 384 Version: "1.0", 385 PURLType: purl.TypePyPi, 386 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo3==1.0"}, 387 }, 388 { 389 // foo4==1.0 --hash=wrongformatbutok 390 Name: "foo4", 391 Version: "1.0", 392 PURLType: purl.TypePyPi, 393 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"wrongformatbutok"}, Requirement: "foo4==1.0"}, 394 }, 395 { 396 // foo5==1.0; python_version < "2.7" --hash=sha256:123 397 Name: "foo5", 398 Version: "1.0", 399 PURLType: purl.TypePyPi, 400 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo5==1.0; python_version < \"2.7\""}, 401 }, 402 { 403 // foo6==1.0 --hash=sha256:123 unexpected_text_after_first_option_does_not_stay_around --global-option=foo 404 Name: "foo6", 405 Version: "1.0", 406 PURLType: purl.TypePyPi, 407 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo6==1.0"}, 408 }, 409 { 410 // foo7==1.0 unexpected_text_before_options_stays_around --hash=sha256:123 411 Name: "foo7", 412 Version: "1.0unexpected_text_before_options_stays_around", 413 PURLType: purl.TypePyPi, 414 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo7==1.0 unexpected_text_before_options_stays_around"}, 415 }, 416 { 417 // foo8==1.0 --hash=sha256:123 --hash=sha256:456 418 Name: "foo8", 419 Version: "1.0", 420 PURLType: purl.TypePyPi, 421 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123", "sha256:456"}, Requirement: "foo8==1.0"}, 422 }, 423 { 424 // foo9==1.0 --hash=sha256:123 \ 425 // --hash=sha256:456 426 Name: "foo9", 427 Version: "1.0", 428 PURLType: purl.TypePyPi, 429 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123", "sha256:456"}, Requirement: "foo9==1.0"}, 430 }, 431 432 // missing a version 433 // foo10== --hash=sha256:123 --hash=sha256:123 434 435 { 436 // foo11==1.0 --hash=sha256:not_base16_encoded_is_ok_;# 437 Name: "foo11", 438 Version: "1.0", 439 PURLType: purl.TypePyPi, 440 Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:not_base16_encoded_is_ok_;#"}, Requirement: "foo11==1.0"}, 441 }, 442 { 443 // foo12==1.0 --hash= 444 Name: "foo12", 445 Version: "1.0", 446 PURLType: purl.TypePyPi, 447 Metadata: &requirements.Metadata{Requirement: "foo12==1.0"}, 448 }, 449 { 450 // foo13==1.0 --hash sha256:123 451 // The hash in this case is not recognized because it does not use an "=" separator 452 // as specified by https://pip.pypa.io/en/stable/topics/secure-installs/#hash-checking-mode, 453 // but it is dropped from the version. 454 Name: "foo13", 455 Version: "1.0", 456 PURLType: purl.TypePyPi, 457 Metadata: &requirements.Metadata{Requirement: "foo13==1.0"}, 458 }, 459 { 460 // foo14=1.0 -C bar 461 // short form for --config-settings flag, see https://pip.pypa.io/en/stable/cli/pip_install/#install-config-settings 462 Name: "foo14", 463 Version: "1.0", 464 PURLType: purl.TypePyPi, 465 Metadata: &requirements.Metadata{Requirement: "foo14==1.0"}, 466 }, 467 468 // Per the grammar in https://peps.python.org/pep-0508/#grammar, "--config-settings" may be 469 // a valid version component, but such a string is not allowed as a version by 470 // https://packaging.python.org/en/latest/specifications/version-specifiers/#version-specifiers. 471 // 472 // foo15== --config-settings --hash=sha256:123 473 }, 474 wantResultMetric: stats.FileExtractedResultSuccess, 475 }, 476 } 477 478 // fill Location and Extractor 479 for _, t := range tests { 480 for _, p := range t.wantPackages { 481 if p.Locations == nil { 482 p.Locations = []string{t.path} 483 } 484 if p.Metadata == nil { 485 p.Metadata = &requirements.Metadata{} 486 } 487 if p.Metadata.(*requirements.Metadata).HashCheckingModeValues == nil { 488 p.Metadata.(*requirements.Metadata).HashCheckingModeValues = []string{} 489 } 490 if p.Version != "" && p.Metadata.(*requirements.Metadata).VersionComparator == "" { 491 p.Metadata.(*requirements.Metadata).VersionComparator = "==" 492 } 493 } 494 } 495 496 for _, tt := range tests { 497 // Note the subtest here 498 t.Run(tt.name, func(t *testing.T) { 499 collector := testcollector.New() 500 var e filesystem.Extractor = requirements.New(requirements.Config{Stats: collector}) 501 502 fsys := scalibrfs.DirFS(".") 503 504 r, err := fsys.Open(tt.path) 505 defer func() { 506 if err = r.Close(); err != nil { 507 t.Errorf("Close(): %v", err) 508 } 509 }() 510 if err != nil { 511 t.Fatal(err) 512 } 513 514 info, err := r.Stat() 515 if err != nil { 516 t.Fatalf("Stat(): %v", err) 517 } 518 519 input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: r} 520 got, err := e.Extract(t.Context(), input) 521 if err != nil { 522 t.Fatalf("Extract(%s): %v", tt.path, err) 523 } 524 525 want := inventory.Inventory{Packages: tt.wantPackages} 526 if diff := cmp.Diff(want, got); diff != "" { 527 t.Errorf("Extract(%s) (-want +got):\n%s", tt.path, diff) 528 } 529 530 gotResultMetric := collector.FileExtractedResult(tt.path) 531 if gotResultMetric != tt.wantResultMetric { 532 t.Errorf("Extract(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric) 533 } 534 535 gotFileSizeMetric := collector.FileExtractedFileSize(tt.path) 536 if gotFileSizeMetric != info.Size() { 537 t.Errorf("Extract(%s) recorded file size %v, want file size %v", tt.path, gotFileSizeMetric, info.Size()) 538 } 539 }) 540 } 541 }