github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/python/wheelegg/wheelegg_test.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package wheelegg_test
    16  
    17  import (
    18  	"bytes"
    19  	"errors"
    20  	"io/fs"
    21  	"os"
    22  	"path/filepath"
    23  	"testing"
    24  
    25  	"github.com/google/go-cmp/cmp"
    26  	"github.com/google/go-cmp/cmp/cmpopts"
    27  	"github.com/google/osv-scalibr/extractor"
    28  	"github.com/google/osv-scalibr/extractor/filesystem"
    29  	"github.com/google/osv-scalibr/extractor/filesystem/language/python/wheelegg"
    30  	"github.com/google/osv-scalibr/extractor/filesystem/simplefileapi"
    31  	scalibrfs "github.com/google/osv-scalibr/fs"
    32  	"github.com/google/osv-scalibr/inventory"
    33  	"github.com/google/osv-scalibr/purl"
    34  	"github.com/google/osv-scalibr/stats"
    35  	"github.com/google/osv-scalibr/testing/fakefs"
    36  	"github.com/google/osv-scalibr/testing/testcollector"
    37  )
    38  
    39  func TestFileRequired(t *testing.T) {
    40  	tests := []struct {
    41  		name             string
    42  		path             string
    43  		fileSizeBytes    int64
    44  		maxFileSizeBytes int64
    45  		wantRequired     bool
    46  		wantResultMetric stats.FileRequiredResult
    47  	}{
    48  		{
    49  			name:             ".dist-info/METADATA",
    50  			path:             "testdata/pip-22.2.2.dist-info/METADATA",
    51  			wantRequired:     true,
    52  			wantResultMetric: stats.FileRequiredResultOK,
    53  		},
    54  		{
    55  			name:             ".egg/EGG-INFO/PKG-INFO",
    56  			path:             "testdata/setuptools-57.4.0-py3.9.egg/EGG-INFO/PKG-INFO",
    57  			wantRequired:     true,
    58  			wantResultMetric: stats.FileRequiredResultOK,
    59  		},
    60  		{
    61  			name:             ".egg-info",
    62  			path:             "testdata/pycups-2.0.1.egg-info",
    63  			wantRequired:     true,
    64  			wantResultMetric: stats.FileRequiredResultOK,
    65  		},
    66  		{
    67  			name:             ".egg-info/PKG-INFO",
    68  			path:             "testdata/httplib2-0.20.4.egg-info/PKG-INFO",
    69  			wantRequired:     true,
    70  			wantResultMetric: stats.FileRequiredResultOK,
    71  		},
    72  		{
    73  			name:         ".dist-info/TEST",
    74  			path:         "testdata/pip-22.2.2.dist-info/TEST",
    75  			wantRequired: false,
    76  		},
    77  		{
    78  			name:             ".egg",
    79  			path:             "python3.10/site-packages/monotonic-1.6-py3.10.egg",
    80  			wantRequired:     true,
    81  			wantResultMetric: stats.FileRequiredResultOK,
    82  		},
    83  		{
    84  			name:             ".whl",
    85  			path:             "python3.10/site-packages/monotonic-1.6-py3.10.whl",
    86  			wantRequired:     true,
    87  			wantResultMetric: stats.FileRequiredResultOK,
    88  		},
    89  		{
    90  			name:             ".egg-info required if size less than maxFileSizeBytes",
    91  			path:             "testdata/pycups-2.0.1.egg-info",
    92  			maxFileSizeBytes: 1000,
    93  			fileSizeBytes:    100,
    94  			wantRequired:     true,
    95  			wantResultMetric: stats.FileRequiredResultOK,
    96  		},
    97  		{
    98  			name:             ".egg required if size equal to maxFileSizeBytes",
    99  			path:             "python3.10/site-packages/monotonic-1.6-py3.10.egg",
   100  			maxFileSizeBytes: 1000,
   101  			fileSizeBytes:    1000,
   102  			wantRequired:     true,
   103  			wantResultMetric: stats.FileRequiredResultOK,
   104  		},
   105  		{
   106  			name:             ".egg not required if size greater than maxFileSizeBytes",
   107  			path:             "python3.10/site-packages/monotonic-1.6-py3.10.egg",
   108  			maxFileSizeBytes: 100,
   109  			fileSizeBytes:    1000,
   110  			wantRequired:     false,
   111  			wantResultMetric: stats.FileRequiredResultSizeLimitExceeded,
   112  		},
   113  		{
   114  			name:             ".egg required if maxFileSizeBytes explicitly set to 0",
   115  			path:             "python3.10/site-packages/monotonic-1.6-py3.10.egg",
   116  			maxFileSizeBytes: 0,
   117  			fileSizeBytes:    1000,
   118  			wantRequired:     true,
   119  			wantResultMetric: stats.FileRequiredResultOK,
   120  		},
   121  	}
   122  
   123  	for _, tt := range tests {
   124  		t.Run(tt.name, func(t *testing.T) {
   125  			collector := testcollector.New()
   126  			e := wheelegg.New(wheelegg.Config{
   127  				MaxFileSizeBytes: tt.maxFileSizeBytes,
   128  				Stats:            collector,
   129  			})
   130  
   131  			// Set a default file size if not specified.
   132  			fileSizeBytes := tt.fileSizeBytes
   133  			if fileSizeBytes == 0 {
   134  				fileSizeBytes = 1000
   135  			}
   136  
   137  			if got := e.FileRequired(simplefileapi.New(tt.path, fakefs.FakeFileInfo{
   138  				FileName: filepath.Base(tt.path),
   139  				FileMode: fs.ModePerm,
   140  				FileSize: fileSizeBytes,
   141  			})); got != tt.wantRequired {
   142  				t.Fatalf("FileRequired(%s): got %v, want %v", tt.path, got, tt.wantRequired)
   143  			}
   144  
   145  			gotResultMetric := collector.FileRequiredResult(tt.path)
   146  			if tt.wantResultMetric != "" && gotResultMetric != tt.wantResultMetric {
   147  				t.Errorf("FileRequired(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric)
   148  			}
   149  		})
   150  	}
   151  }
   152  
   153  func TestExtract(t *testing.T) {
   154  	tests := []struct {
   155  		name             string
   156  		path             string
   157  		cfg              wheelegg.Config
   158  		wantPackages     []*extractor.Package
   159  		wantErr          error
   160  		wantResultMetric stats.FileExtractedResult
   161  	}{
   162  		{
   163  			name: ".dist-info/METADATA",
   164  			path: "testdata/distinfo_meta",
   165  			wantPackages: []*extractor.Package{{
   166  				Name:      "pip",
   167  				Version:   "22.2.2",
   168  				PURLType:  purl.TypePyPi,
   169  				Locations: []string{"testdata/distinfo_meta"},
   170  				Metadata: &wheelegg.PythonPackageMetadata{
   171  					Author:      "The pip developers",
   172  					AuthorEmail: "distutils-sig@python.org",
   173  				},
   174  			}},
   175  		},
   176  		{
   177  			name: ".egg/EGG-INFO/PKG-INFO",
   178  			path: "testdata/egginfo_pkginfo",
   179  			wantPackages: []*extractor.Package{{
   180  				Name:      "setuptools",
   181  				Version:   "57.4.0",
   182  				PURLType:  purl.TypePyPi,
   183  				Locations: []string{"testdata/egginfo_pkginfo"},
   184  				Metadata: &wheelegg.PythonPackageMetadata{
   185  					Author:      "Python Packaging Authority",
   186  					AuthorEmail: "distutils-sig@python.org",
   187  				},
   188  			}},
   189  		},
   190  		{
   191  			name: ".egg-info",
   192  			path: "testdata/egginfo",
   193  			wantPackages: []*extractor.Package{{
   194  				Name:      "pycups",
   195  				Version:   "2.0.1",
   196  				PURLType:  purl.TypePyPi,
   197  				Locations: []string{"testdata/egginfo"},
   198  				Metadata: &wheelegg.PythonPackageMetadata{
   199  					Author:      "Zdenek Dohnal",
   200  					AuthorEmail: "zdohnal@redhat.com",
   201  				},
   202  			}},
   203  		},
   204  		{
   205  			name: ".egg-info/PKG-INFO",
   206  			path: "testdata/pkginfo",
   207  			wantPackages: []*extractor.Package{{
   208  				Name:      "httplib2",
   209  				Version:   "0.20.4",
   210  				PURLType:  purl.TypePyPi,
   211  				Locations: []string{"testdata/pkginfo"},
   212  				Metadata: &wheelegg.PythonPackageMetadata{
   213  					Author:      "Joe Gregorio",
   214  					AuthorEmail: "joe@bitworking.org",
   215  				},
   216  			},
   217  			},
   218  		},
   219  		{
   220  			name: "malformed_PKG-INFO",
   221  			path: "testdata/malformed_pkginfo",
   222  			wantPackages: []*extractor.Package{{
   223  				Name:      "passlib",
   224  				Version:   "1.7.4",
   225  				PURLType:  purl.TypePyPi,
   226  				Locations: []string{"testdata/malformed_pkginfo"},
   227  				Metadata: &wheelegg.PythonPackageMetadata{
   228  					Author:      "Eli Collins",
   229  					AuthorEmail: "elic@assurancetechnologies.com",
   230  				},
   231  			}},
   232  		},
   233  		{
   234  			name: ".egg",
   235  			path: "testdata/monotonic-1.6-py3.10.egg",
   236  			wantPackages: []*extractor.Package{{
   237  				Name:      "monotonic",
   238  				Version:   "1.6",
   239  				PURLType:  purl.TypePyPi,
   240  				Locations: []string{"testdata/monotonic-1.6-py3.10.egg"},
   241  				Metadata: &wheelegg.PythonPackageMetadata{
   242  					Author:      "Ori Livneh",
   243  					AuthorEmail: "ori@wikimedia.org",
   244  				},
   245  			}},
   246  		},
   247  		{
   248  			name: ".whl",
   249  			path: "testdata/monotonic-1.6-py2.py3-none-any.whl",
   250  			wantPackages: []*extractor.Package{{
   251  				Name:      "monotonic",
   252  				Version:   "1.6",
   253  				PURLType:  purl.TypePyPi,
   254  				Locations: []string{"testdata/monotonic-1.6-py2.py3-none-any.whl"},
   255  				Metadata: &wheelegg.PythonPackageMetadata{
   256  					Author:      "Ori Livneh",
   257  					AuthorEmail: "ori@wikimedia.org",
   258  				},
   259  			}},
   260  		},
   261  		{
   262  			name:         ".egg without PKG-INFO",
   263  			path:         "testdata/monotonic_no_pkginfo-1.6-py3.10.egg",
   264  			wantPackages: []*extractor.Package{},
   265  		},
   266  	}
   267  
   268  	for _, tt := range tests {
   269  		// Note the subtest here
   270  		t.Run(tt.name, func(t *testing.T) {
   271  			fsys := scalibrfs.DirFS(".")
   272  
   273  			r, err := fsys.Open(tt.path)
   274  			defer func() {
   275  				if err = r.Close(); err != nil {
   276  					t.Errorf("Close(): %v", err)
   277  				}
   278  			}()
   279  			if err != nil {
   280  				t.Fatal(err)
   281  			}
   282  
   283  			info, err := r.Stat()
   284  			if err != nil {
   285  				t.Fatalf("Stat(): %v", err)
   286  			}
   287  
   288  			collector := testcollector.New()
   289  			tt.cfg.Stats = collector
   290  
   291  			input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: r}
   292  			e := wheelegg.New(defaultConfigWith(tt.cfg))
   293  			got, err := e.Extract(t.Context(), input)
   294  			if !cmp.Equal(err, tt.wantErr, cmpopts.EquateErrors()) {
   295  				t.Fatalf("Extract(%+v) error: got %v, want %v\n", tt.name, err, tt.wantErr)
   296  			}
   297  
   298  			want := inventory.Inventory{Packages: tt.wantPackages}
   299  			if diff := cmp.Diff(want, got); diff != "" {
   300  				t.Errorf("Extract(%s) (-want +got):\n%s", tt.path, diff)
   301  			}
   302  
   303  			wantResultMetric := tt.wantResultMetric
   304  			if wantResultMetric == "" && tt.wantErr == nil {
   305  				wantResultMetric = stats.FileExtractedResultSuccess
   306  			}
   307  			gotResultMetric := collector.FileExtractedResult(tt.path)
   308  			if gotResultMetric != wantResultMetric {
   309  				t.Errorf("Extract(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, wantResultMetric)
   310  			}
   311  
   312  			gotFileSizeMetric := collector.FileExtractedFileSize(tt.path)
   313  			if gotFileSizeMetric != info.Size() {
   314  				t.Errorf("Extract(%s) recorded file size %v, want file size %v", tt.path, gotFileSizeMetric, info.Size())
   315  			}
   316  		})
   317  	}
   318  }
   319  
   320  // defaultConfigWith combines any non-zero fields of cfg with wheelegg.DefaultConfig().
   321  func defaultConfigWith(cfg wheelegg.Config) wheelegg.Config {
   322  	newCfg := wheelegg.DefaultConfig()
   323  
   324  	if cfg.MaxFileSizeBytes > 0 {
   325  		newCfg.MaxFileSizeBytes = cfg.MaxFileSizeBytes
   326  	}
   327  	if cfg.Stats != nil {
   328  		newCfg.Stats = cfg.Stats
   329  	}
   330  	return newCfg
   331  }
   332  
   333  func TestExtractWithoutReadAt(t *testing.T) {
   334  	var e filesystem.Extractor = wheelegg.New(wheelegg.DefaultConfig())
   335  
   336  	tests := []struct {
   337  		name         string
   338  		path         string
   339  		wantPackages *extractor.Package
   340  	}{
   341  		{
   342  			name: ".egg",
   343  			path: "testdata/monotonic-1.6-py3.10.egg",
   344  			wantPackages: &extractor.Package{
   345  				Name:      "monotonic",
   346  				Version:   "1.6",
   347  				PURLType:  purl.TypePyPi,
   348  				Locations: []string{"testdata/monotonic-1.6-py3.10.egg"},
   349  				Metadata: &wheelegg.PythonPackageMetadata{
   350  					Author:      "Ori Livneh",
   351  					AuthorEmail: "ori@wikimedia.org",
   352  				},
   353  			},
   354  		},
   355  		{
   356  			name: ".whl",
   357  			path: "testdata/monotonic-1.6-py2.py3-none-any.whl",
   358  			wantPackages: &extractor.Package{
   359  				Name:      "monotonic",
   360  				Version:   "1.6",
   361  				PURLType:  purl.TypePyPi,
   362  				Locations: []string{"testdata/monotonic-1.6-py2.py3-none-any.whl"},
   363  				Metadata: &wheelegg.PythonPackageMetadata{
   364  					Author:      "Ori Livneh",
   365  					AuthorEmail: "ori@wikimedia.org",
   366  				},
   367  			},
   368  		},
   369  	}
   370  
   371  	for _, tt := range tests {
   372  		// Note the subtest here
   373  		t.Run(tt.name, func(t *testing.T) {
   374  			r, err := os.Open(tt.path)
   375  			defer func() {
   376  				if err = r.Close(); err != nil {
   377  					t.Errorf("Close(): %v", err)
   378  				}
   379  			}()
   380  			if err != nil {
   381  				t.Fatal(err)
   382  			}
   383  
   384  			noReadAt := reader{r}
   385  
   386  			info, err := noReadAt.Stat()
   387  			if err != nil {
   388  				t.Fatalf("Stat(): %v", err)
   389  			}
   390  
   391  			input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: noReadAt}
   392  			got, err := e.Extract(t.Context(), input)
   393  			if err != nil {
   394  				t.Fatalf("Extract(%s): %v", tt.path, err)
   395  			}
   396  
   397  			want := inventory.Inventory{Packages: []*extractor.Package{tt.wantPackages}}
   398  			if diff := cmp.Diff(want, got); diff != "" {
   399  				t.Errorf("Extract(%s) (-want +got):\n%s", tt.path, diff)
   400  			}
   401  		})
   402  	}
   403  }
   404  
   405  func TestExtractErrorsWithFakeFiles(t *testing.T) {
   406  	tests := []struct {
   407  		name             string
   408  		path             string
   409  		fakeFileInfo     fs.FileInfo
   410  		fakeFileBytes    []byte
   411  		wantErr          error
   412  		wantResultMetric stats.FileExtractedResult
   413  	}{
   414  		{
   415  			name: "invalid_zip_file",
   416  			path: "testdata/does_not_exist.egg",
   417  			fakeFileInfo: fakefs.FakeFileInfo{
   418  				FileName: "does_not_exist.egg",
   419  				FileMode: fs.ModePerm,
   420  				FileSize: 1000,
   421  			},
   422  			fakeFileBytes:    []byte("invalid zip file"),
   423  			wantErr:          cmpopts.AnyError,
   424  			wantResultMetric: stats.FileExtractedResultErrorUnknown,
   425  		},
   426  	}
   427  
   428  	for _, tt := range tests {
   429  		t.Run(tt.name, func(t *testing.T) {
   430  			info := tt.fakeFileInfo
   431  			r := bytes.NewReader(tt.fakeFileBytes)
   432  
   433  			collector := testcollector.New()
   434  			cfg := wheelegg.Config{Stats: collector}
   435  
   436  			input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: r}
   437  			e := wheelegg.New(defaultConfigWith(cfg))
   438  			_, err := e.Extract(t.Context(), input)
   439  			if err == nil {
   440  				t.Fatalf("Extract(%+v) succeeded, want error: %v", tt.name, tt.wantErr)
   441  			}
   442  			if !cmp.Equal(err, tt.wantErr, cmpopts.EquateErrors()) {
   443  				t.Fatalf("Extract(%+v) error: got %v, want %v", tt.name, err, tt.wantErr)
   444  			}
   445  
   446  			wantResultMetric := tt.wantResultMetric
   447  			if wantResultMetric == "" && tt.wantErr == nil {
   448  				wantResultMetric = stats.FileExtractedResultSuccess
   449  			}
   450  			gotResultMetric := collector.FileExtractedResult(tt.path)
   451  			if gotResultMetric != wantResultMetric {
   452  				t.Errorf("Extract(%s) recorded result metric %v, want result metric %v", tt.path, gotResultMetric, wantResultMetric)
   453  			}
   454  
   455  			gotFileSizeMetric := collector.FileExtractedFileSize(tt.path)
   456  			if gotFileSizeMetric != info.Size() {
   457  				t.Errorf("Extract(%s) recorded file size %v, want file size %v", tt.path, gotFileSizeMetric, info.Size())
   458  			}
   459  		})
   460  	}
   461  }
   462  
   463  type reader struct {
   464  	f fs.File
   465  }
   466  
   467  func (r reader) Read(p []byte) (n int, err error) {
   468  	return r.f.Read(p)
   469  }
   470  
   471  func (r reader) Stat() (fs.FileInfo, error) {
   472  	return r.f.Stat()
   473  }
   474  
   475  func TestExtractEggWithoutSize(t *testing.T) {
   476  	fsys := scalibrfs.DirFS(".")
   477  	path := "testdata/monotonic-1.6-py3.10.egg"
   478  
   479  	r, err := fsys.Open(path)
   480  	defer func() {
   481  		if err = r.Close(); err != nil {
   482  			t.Errorf("Close(): %v", err)
   483  		}
   484  	}()
   485  	if err != nil {
   486  		t.Fatal(err)
   487  	}
   488  
   489  	// Set FileInfo to nil, which does not allow input.info.Size(). This is required for unzipping the
   490  	// egg file.
   491  	var info fs.FileInfo
   492  
   493  	input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: path, Info: info, Reader: r}
   494  	e := wheelegg.Extractor{}
   495  	_, gotErr := e.Extract(t.Context(), input)
   496  	wantErr := wheelegg.ErrSizeNotSet
   497  	if !errors.Is(gotErr, wantErr) {
   498  		t.Fatalf("Extract(%s) got err: '%v', want err: '%v'", path, gotErr, wantErr)
   499  	}
   500  }