github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/requirements/requirements_test.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package requirements_test
    16  
    17  import (
    18  	"io/fs"
    19  	"path/filepath"
    20  	"testing"
    21  
    22  	"github.com/google/go-cmp/cmp"
    23  	"github.com/google/osv-scalibr/extractor"
    24  	"github.com/google/osv-scalibr/extractor/filesystem"
    25  	"github.com/google/osv-scalibr/extractor/filesystem/internal/units"
    26  	"github.com/google/osv-scalibr/extractor/filesystem/language/python/requirements"
    27  	"github.com/google/osv-scalibr/extractor/filesystem/simplefileapi"
    28  	scalibrfs "github.com/google/osv-scalibr/fs"
    29  	"github.com/google/osv-scalibr/inventory"
    30  	"github.com/google/osv-scalibr/purl"
    31  	"github.com/google/osv-scalibr/stats"
    32  	"github.com/google/osv-scalibr/testing/fakefs"
    33  	"github.com/google/osv-scalibr/testing/testcollector"
    34  )
    35  
    36  func TestFileRequired(t *testing.T) {
    37  	tests := []struct {
    38  		name             string
    39  		path             string
    40  		fileSizeBytes    int64
    41  		maxFileSizeBytes int64
    42  		wantRequired     bool
    43  		wantResultMetric stats.FileRequiredResult
    44  	}{
    45  		{
    46  			name:             "requirements.txt",
    47  			path:             "RsaCtfTool/requirements.txt",
    48  			wantRequired:     true,
    49  			wantResultMetric: stats.FileRequiredResultOK,
    50  		},
    51  		{
    52  			name:             "optional-requirements.txt",
    53  			path:             "RsaCtfTool/optional-requirements.txt",
    54  			wantRequired:     true,
    55  			wantResultMetric: stats.FileRequiredResultOK,
    56  		},
    57  		{
    58  			name:         "non requirements.txt txt file",
    59  			path:         "requirements-asdf/test.txt",
    60  			wantRequired: false,
    61  		},
    62  		{
    63  			name:         "wrong extension",
    64  			path:         "yolo-txt/requirements.md",
    65  			wantRequired: false,
    66  		},
    67  		{
    68  			name:             "requirements.txt required if file size < max file size",
    69  			path:             "RsaCtfTool/requirements.txt",
    70  			fileSizeBytes:    100 * units.KiB,
    71  			maxFileSizeBytes: 1000 * units.KiB,
    72  			wantRequired:     true,
    73  			wantResultMetric: stats.FileRequiredResultOK,
    74  		},
    75  		{
    76  			name:             "requirements.txt required if file size == max file size",
    77  			path:             "RsaCtfTool/requirements.txt",
    78  			fileSizeBytes:    1000 * units.KiB,
    79  			maxFileSizeBytes: 1000 * units.KiB,
    80  			wantRequired:     true,
    81  			wantResultMetric: stats.FileRequiredResultOK,
    82  		},
    83  		{
    84  			name:             "requirements.txt not required if file size > max file size",
    85  			path:             "RsaCtfTool/requirements.txt",
    86  			fileSizeBytes:    1000 * units.KiB,
    87  			maxFileSizeBytes: 100 * units.KiB,
    88  			wantRequired:     false,
    89  			wantResultMetric: stats.FileRequiredResultSizeLimitExceeded,
    90  		},
    91  		{
    92  			name:             "requirements.txt required if max file size is 0",
    93  			path:             "RsaCtfTool/requirements.txt",
    94  			fileSizeBytes:    1000 * units.KiB,
    95  			maxFileSizeBytes: 0,
    96  			wantRequired:     true,
    97  			wantResultMetric: stats.FileRequiredResultOK,
    98  		},
    99  	}
   100  
   101  	for _, tt := range tests {
   102  		t.Run(tt.name, func(t *testing.T) {
   103  			collector := testcollector.New()
   104  			var e filesystem.Extractor = requirements.New(
   105  				requirements.Config{
   106  					Stats:            collector,
   107  					MaxFileSizeBytes: tt.maxFileSizeBytes,
   108  				},
   109  			)
   110  
   111  			// Set default size if not provided.
   112  			fileSizeBytes := tt.fileSizeBytes
   113  			if fileSizeBytes == 0 {
   114  				fileSizeBytes = 100 * units.KiB
   115  			}
   116  
   117  			if got := e.FileRequired(simplefileapi.New(tt.path, fakefs.FakeFileInfo{
   118  				FileName: filepath.Base(tt.path),
   119  				FileMode: fs.ModePerm,
   120  				FileSize: fileSizeBytes,
   121  			})); got != tt.wantRequired {
   122  				t.Fatalf("FileRequired(%s): got %v, want %v", tt.path, got, tt.wantRequired)
   123  			}
   124  
   125  			gotResultMetric := collector.FileRequiredResult(tt.path)
   126  			if gotResultMetric != tt.wantResultMetric {
   127  				t.Errorf("FileRequired(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric)
   128  			}
   129  		})
   130  	}
   131  }
   132  
   133  func TestExtract(t *testing.T) {
   134  	tests := []struct {
   135  		name             string
   136  		path             string
   137  		wantPackages     []*extractor.Package
   138  		wantResultMetric stats.FileExtractedResult
   139  	}{
   140  		{
   141  			name: "no_version",
   142  			path: "testdata/no_version.txt",
   143  			wantPackages: []*extractor.Package{
   144  				{
   145  					Name:     "PyCrypto",
   146  					PURLType: purl.TypePyPi,
   147  					Metadata: &requirements.Metadata{Requirement: "PyCrypto"},
   148  				},
   149  				{
   150  					Name:     "GMPY2",
   151  					PURLType: purl.TypePyPi,
   152  					Metadata: &requirements.Metadata{Requirement: "GMPY2"}},
   153  				{
   154  					Name:     "SymPy",
   155  					PURLType: purl.TypePyPi,
   156  					Metadata: &requirements.Metadata{Requirement: "SymPy"}},
   157  			},
   158  			wantResultMetric: stats.FileExtractedResultSuccess,
   159  		},
   160  		{
   161  			name: "infinite_loop",
   162  			path: "testdata/loop.txt",
   163  			// Makes sure we don't get stuck in an infinite loop.
   164  			wantResultMetric: stats.FileExtractedResultSuccess,
   165  		},
   166  		{
   167  			name: "with_version",
   168  			path: "testdata/with_versions.txt",
   169  			wantPackages: []*extractor.Package{
   170  				{
   171  					Name:     "nltk",
   172  					Version:  "3.2.2",
   173  					PURLType: purl.TypePyPi,
   174  					Metadata: &requirements.Metadata{Requirement: "nltk==3.2.2"},
   175  				},
   176  				{
   177  					Name:     "tabulate",
   178  					Version:  "0.7.7",
   179  					PURLType: purl.TypePyPi,
   180  					Metadata: &requirements.Metadata{Requirement: "tabulate==0.7.7"},
   181  				},
   182  				{
   183  					Name:     "newspaper3k",
   184  					Version:  "0.2.2",
   185  					PURLType: purl.TypePyPi,
   186  					Metadata: &requirements.Metadata{VersionComparator: ">=", Requirement: "newspaper3k>=0.2.2"},
   187  				},
   188  				{
   189  					Name:     "asdf",
   190  					PURLType: purl.TypePyPi,
   191  					Metadata: &requirements.Metadata{Requirement: "asdf==0.7.*"},
   192  				},
   193  				{
   194  					Name:     "qwerty",
   195  					Version:  "0.1",
   196  					PURLType: purl.TypePyPi,
   197  					Metadata: &requirements.Metadata{Requirement: "qwerty   == 0.1"},
   198  				},
   199  				{
   200  					Name:     "hy-phen",
   201  					Version:  "1.2",
   202  					PURLType: purl.TypePyPi,
   203  					Metadata: &requirements.Metadata{Requirement: "hy-phen==1.2"},
   204  				},
   205  				{
   206  					Name:     "under_score",
   207  					Version:  "1.3",
   208  					PURLType: purl.TypePyPi,
   209  					Metadata: &requirements.Metadata{Requirement: "under_score==1.3"},
   210  				},
   211  				{
   212  					Name:     "yolo",
   213  					Version:  "1.0",
   214  					PURLType: purl.TypePyPi,
   215  					Metadata: &requirements.Metadata{VersionComparator: "===", Requirement: "yolo===1.0"},
   216  				},
   217  				{
   218  					Name:     "pkg",
   219  					Version:  "1.2.3",
   220  					PURLType: purl.TypePyPi,
   221  					Metadata: &requirements.Metadata{VersionComparator: "<=", Requirement: "pkg<=1.2.3"},
   222  				},
   223  			},
   224  			wantResultMetric: stats.FileExtractedResultSuccess,
   225  		},
   226  		{
   227  			name: "comments",
   228  			path: "testdata/comments.txt",
   229  			wantPackages: []*extractor.Package{
   230  				{
   231  					Name:     "PyCrypto",
   232  					Version:  "1.2-alpha",
   233  					PURLType: purl.TypePyPi,
   234  					Metadata: &requirements.Metadata{Requirement: "PyCrypto==1.2-alpha"},
   235  				},
   236  				{
   237  					Name:     "GMPY2",
   238  					Version:  "1",
   239  					PURLType: purl.TypePyPi,
   240  					Metadata: &requirements.Metadata{Requirement: "GMPY2==1"},
   241  				},
   242  				{
   243  					Name:     "SymPy",
   244  					Version:  "1.2",
   245  					PURLType: purl.TypePyPi,
   246  					Metadata: &requirements.Metadata{Requirement: "SymPy==1.2"},
   247  				},
   248  				{
   249  					Name:     "requests",
   250  					Version:  "1.0",
   251  					PURLType: purl.TypePyPi,
   252  					Metadata: &requirements.Metadata{Requirement: "requests ==1.0"},
   253  				},
   254  				{
   255  					Name:     "six",
   256  					Version:  "1.2",
   257  					PURLType: purl.TypePyPi,
   258  					Metadata: &requirements.Metadata{Requirement: "six==1.2"},
   259  				},
   260  			},
   261  			wantResultMetric: stats.FileExtractedResultSuccess,
   262  		},
   263  		{
   264  			name: "pip_example",
   265  			path: "testdata/example.txt",
   266  			wantPackages: []*extractor.Package{
   267  				{
   268  					Name:     "pytest",
   269  					PURLType: purl.TypePyPi,
   270  					Metadata: &requirements.Metadata{Requirement: "pytest"},
   271  				},
   272  				{
   273  					Name:     "pytest-cov",
   274  					PURLType: purl.TypePyPi,
   275  					Metadata: &requirements.Metadata{Requirement: "pytest-cov"},
   276  				},
   277  				{
   278  					Name:     "beautifulsoup4",
   279  					PURLType: purl.TypePyPi,
   280  					Metadata: &requirements.Metadata{Requirement: "beautifulsoup4"},
   281  				},
   282  				{
   283  					Name:     "docopt",
   284  					Version:  "0.6.1",
   285  					PURLType: purl.TypePyPi,
   286  					Metadata: &requirements.Metadata{Requirement: "docopt == 0.6.1"},
   287  				},
   288  				{
   289  					Name:     "requests",
   290  					PURLType: purl.TypePyPi,
   291  					Metadata: &requirements.Metadata{Requirement: "requests [security] >= 2.8.1, == 2.8.* ; python_version < \"2.7\""},
   292  				},
   293  				// not urllib3, because it's pinned to a zip file
   294  				{
   295  					Name:     "keyring",
   296  					Version:  "4.1.1",
   297  					PURLType: purl.TypePyPi,
   298  					Metadata: &requirements.Metadata{VersionComparator: ">=", Requirement: "keyring >= 4.1.1"},
   299  				},
   300  				{
   301  					Name:     "coverage",
   302  					PURLType: purl.TypePyPi,
   303  					Metadata: &requirements.Metadata{Requirement: "coverage != 3.5"},
   304  				},
   305  				{
   306  					Name:     "Mopidy-Dirble",
   307  					Version:  "1.1",
   308  					PURLType: purl.TypePyPi,
   309  					Metadata: &requirements.Metadata{VersionComparator: "~=", Requirement: "Mopidy-Dirble ~= 1.1"},
   310  				},
   311  				{
   312  					Name:      "transitive-req",
   313  					Version:   "1",
   314  					PURLType:  purl.TypePyPi,
   315  					Locations: []string{"testdata/example.txt", "testdata/other-requirements.txt"},
   316  					Metadata:  &requirements.Metadata{Requirement: "transitive-req==1"},
   317  				},
   318  			},
   319  			wantResultMetric: stats.FileExtractedResultSuccess,
   320  		},
   321  		{
   322  			name: "extras",
   323  			path: "testdata/extras.txt",
   324  			wantPackages: []*extractor.Package{
   325  				{
   326  					Name:     "pyjwt",
   327  					Version:  "2.1.0",
   328  					PURLType: purl.TypePyPi,
   329  					Metadata: &requirements.Metadata{Requirement: "pyjwt [crypto] == 2.1.0"},
   330  				},
   331  				{
   332  					Name:     "celery",
   333  					Version:  "4.4.7",
   334  					PURLType: purl.TypePyPi,
   335  					Metadata: &requirements.Metadata{Requirement: "celery [redis, pytest] == 4.4.7"},
   336  				},
   337  			},
   338  			wantResultMetric: stats.FileExtractedResultSuccess,
   339  		},
   340  		{
   341  			name: "env_variable",
   342  			path: "testdata/env_var.txt",
   343  			wantPackages: []*extractor.Package{
   344  				{
   345  					Name:     "asdf",
   346  					Version:  "1.2",
   347  					PURLType: purl.TypePyPi,
   348  					Metadata: &requirements.Metadata{Requirement: "asdf==1.2"}},
   349  				{
   350  					Name:     "another",
   351  					Version:  "1.0",
   352  					PURLType: purl.TypePyPi,
   353  					Metadata: &requirements.Metadata{Requirement: "another==1.0"},
   354  				},
   355  			},
   356  			wantResultMetric: stats.FileExtractedResultSuccess,
   357  		},
   358  		{
   359  			name:             "invalid",
   360  			path:             "testdata/invalid.txt",
   361  			wantResultMetric: stats.FileExtractedResultSuccess,
   362  		},
   363  		{
   364  			name: "per_requirement_options",
   365  			path: "testdata/per_req_options.txt",
   366  			wantPackages: []*extractor.Package{
   367  				{
   368  					// foo1==1.0 --hash=sha256:
   369  					Name:     "foo1",
   370  					Version:  "1.0",
   371  					PURLType: purl.TypePyPi,
   372  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo1==1.0"},
   373  				},
   374  				{
   375  					// foo2==1.0 --hash=sha256:123 --global-option=foo --config-settings=bar
   376  					Name:     "foo2",
   377  					Version:  "1.0",
   378  					PURLType: purl.TypePyPi,
   379  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo2==1.0"},
   380  				},
   381  				{
   382  					// foo3==1.0 --config-settings=bar --global-option=foo --hash=sha256:123
   383  					Name:     "foo3",
   384  					Version:  "1.0",
   385  					PURLType: purl.TypePyPi,
   386  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo3==1.0"},
   387  				},
   388  				{
   389  					// foo4==1.0 --hash=wrongformatbutok
   390  					Name:     "foo4",
   391  					Version:  "1.0",
   392  					PURLType: purl.TypePyPi,
   393  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"wrongformatbutok"}, Requirement: "foo4==1.0"},
   394  				},
   395  				{
   396  					// foo5==1.0; python_version < "2.7" --hash=sha256:123
   397  					Name:     "foo5",
   398  					Version:  "1.0",
   399  					PURLType: purl.TypePyPi,
   400  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo5==1.0; python_version < \"2.7\""},
   401  				},
   402  				{
   403  					// foo6==1.0 --hash=sha256:123 unexpected_text_after_first_option_does_not_stay_around --global-option=foo
   404  					Name:     "foo6",
   405  					Version:  "1.0",
   406  					PURLType: purl.TypePyPi,
   407  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo6==1.0"},
   408  				},
   409  				{
   410  					// foo7==1.0 unexpected_text_before_options_stays_around --hash=sha256:123
   411  					Name:     "foo7",
   412  					Version:  "1.0unexpected_text_before_options_stays_around",
   413  					PURLType: purl.TypePyPi,
   414  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123"}, Requirement: "foo7==1.0 unexpected_text_before_options_stays_around"},
   415  				},
   416  				{
   417  					// foo8==1.0 --hash=sha256:123 --hash=sha256:456
   418  					Name:     "foo8",
   419  					Version:  "1.0",
   420  					PURLType: purl.TypePyPi,
   421  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123", "sha256:456"}, Requirement: "foo8==1.0"},
   422  				},
   423  				{
   424  					// foo9==1.0 --hash=sha256:123 \
   425  					// 	--hash=sha256:456
   426  					Name:     "foo9",
   427  					Version:  "1.0",
   428  					PURLType: purl.TypePyPi,
   429  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:123", "sha256:456"}, Requirement: "foo9==1.0"},
   430  				},
   431  
   432  				// missing a version
   433  				// foo10== --hash=sha256:123 --hash=sha256:123
   434  
   435  				{
   436  					// foo11==1.0 --hash=sha256:not_base16_encoded_is_ok_;#
   437  					Name:     "foo11",
   438  					Version:  "1.0",
   439  					PURLType: purl.TypePyPi,
   440  					Metadata: &requirements.Metadata{HashCheckingModeValues: []string{"sha256:not_base16_encoded_is_ok_;#"}, Requirement: "foo11==1.0"},
   441  				},
   442  				{
   443  					// foo12==1.0 --hash=
   444  					Name:     "foo12",
   445  					Version:  "1.0",
   446  					PURLType: purl.TypePyPi,
   447  					Metadata: &requirements.Metadata{Requirement: "foo12==1.0"},
   448  				},
   449  				{
   450  					// foo13==1.0 --hash sha256:123
   451  					// The hash in this case is not recognized because it does not use an "=" separator
   452  					// as specified by https://pip.pypa.io/en/stable/topics/secure-installs/#hash-checking-mode,
   453  					// but it is dropped from the version.
   454  					Name:     "foo13",
   455  					Version:  "1.0",
   456  					PURLType: purl.TypePyPi,
   457  					Metadata: &requirements.Metadata{Requirement: "foo13==1.0"},
   458  				},
   459  				{
   460  					// foo14=1.0 -C bar
   461  					// short form for --config-settings flag, see https://pip.pypa.io/en/stable/cli/pip_install/#install-config-settings
   462  					Name:     "foo14",
   463  					Version:  "1.0",
   464  					PURLType: purl.TypePyPi,
   465  					Metadata: &requirements.Metadata{Requirement: "foo14==1.0"},
   466  				},
   467  
   468  				// Per the grammar in https://peps.python.org/pep-0508/#grammar, "--config-settings" may be
   469  				// a valid version component, but such a string is not allowed as a version by
   470  				// https://packaging.python.org/en/latest/specifications/version-specifiers/#version-specifiers.
   471  				//
   472  				// foo15== --config-settings --hash=sha256:123
   473  			},
   474  			wantResultMetric: stats.FileExtractedResultSuccess,
   475  		},
   476  	}
   477  
   478  	// fill Location and Extractor
   479  	for _, t := range tests {
   480  		for _, p := range t.wantPackages {
   481  			if p.Locations == nil {
   482  				p.Locations = []string{t.path}
   483  			}
   484  			if p.Metadata == nil {
   485  				p.Metadata = &requirements.Metadata{}
   486  			}
   487  			if p.Metadata.(*requirements.Metadata).HashCheckingModeValues == nil {
   488  				p.Metadata.(*requirements.Metadata).HashCheckingModeValues = []string{}
   489  			}
   490  			if p.Version != "" && p.Metadata.(*requirements.Metadata).VersionComparator == "" {
   491  				p.Metadata.(*requirements.Metadata).VersionComparator = "=="
   492  			}
   493  		}
   494  	}
   495  
   496  	for _, tt := range tests {
   497  		// Note the subtest here
   498  		t.Run(tt.name, func(t *testing.T) {
   499  			collector := testcollector.New()
   500  			var e filesystem.Extractor = requirements.New(requirements.Config{Stats: collector})
   501  
   502  			fsys := scalibrfs.DirFS(".")
   503  
   504  			r, err := fsys.Open(tt.path)
   505  			defer func() {
   506  				if err = r.Close(); err != nil {
   507  					t.Errorf("Close(): %v", err)
   508  				}
   509  			}()
   510  			if err != nil {
   511  				t.Fatal(err)
   512  			}
   513  
   514  			info, err := r.Stat()
   515  			if err != nil {
   516  				t.Fatalf("Stat(): %v", err)
   517  			}
   518  
   519  			input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: r}
   520  			got, err := e.Extract(t.Context(), input)
   521  			if err != nil {
   522  				t.Fatalf("Extract(%s): %v", tt.path, err)
   523  			}
   524  
   525  			want := inventory.Inventory{Packages: tt.wantPackages}
   526  			if diff := cmp.Diff(want, got); diff != "" {
   527  				t.Errorf("Extract(%s) (-want +got):\n%s", tt.path, diff)
   528  			}
   529  
   530  			gotResultMetric := collector.FileExtractedResult(tt.path)
   531  			if gotResultMetric != tt.wantResultMetric {
   532  				t.Errorf("Extract(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric)
   533  			}
   534  
   535  			gotFileSizeMetric := collector.FileExtractedFileSize(tt.path)
   536  			if gotFileSizeMetric != info.Size() {
   537  				t.Errorf("Extract(%s) recorded file size %v, want file size %v", tt.path, gotFileSizeMetric, info.Size())
   538  			}
   539  		})
   540  	}
   541  }