github.com/google/osv-scalibr@v0.4.1/extractor/filesystem/language/java/archive/archive_test.go (about)

     1  // Copyright 2025 Google LLC
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package archive_test
    16  
    17  import (
    18  	"archive/zip"
    19  	"errors"
    20  	"fmt"
    21  	"io"
    22  	"io/fs"
    23  	"os"
    24  	"path/filepath"
    25  	"testing"
    26  
    27  	"github.com/google/go-cmp/cmp"
    28  	"github.com/google/go-cmp/cmp/cmpopts"
    29  	"github.com/google/osv-scalibr/extractor"
    30  	"github.com/google/osv-scalibr/extractor/filesystem"
    31  	"github.com/google/osv-scalibr/extractor/filesystem/language/java/archive"
    32  	archivemeta "github.com/google/osv-scalibr/extractor/filesystem/language/java/archive/metadata"
    33  	"github.com/google/osv-scalibr/extractor/filesystem/simplefileapi"
    34  	scalibrfs "github.com/google/osv-scalibr/fs"
    35  	"github.com/google/osv-scalibr/inventory"
    36  	"github.com/google/osv-scalibr/log"
    37  	"github.com/google/osv-scalibr/purl"
    38  	"github.com/google/osv-scalibr/stats"
    39  	"github.com/google/osv-scalibr/testing/fakefs"
    40  	"github.com/google/osv-scalibr/testing/testcollector"
    41  )
    42  
    43  var (
    44  	errAny = errors.New("any error")
    45  )
    46  
    47  func TestFileRequired(t *testing.T) {
    48  	tests := []struct {
    49  		name             string
    50  		path             string
    51  		fileSizeBytes    int64
    52  		maxFileSizeBytes int64
    53  		wantRequired     bool
    54  		wantResultMetric stats.FileRequiredResult
    55  	}{
    56  		{
    57  			name:         ".jar",
    58  			path:         filepath.FromSlash("some/path/a.jar"),
    59  			wantRequired: true,
    60  		},
    61  		{
    62  			name:         ".JAR",
    63  			path:         filepath.FromSlash("some/path/a.JAR"),
    64  			wantRequired: true,
    65  		},
    66  		{
    67  			name:         ".war",
    68  			path:         filepath.FromSlash("some/path/a.war"),
    69  			wantRequired: true,
    70  		},
    71  		{
    72  			name:         ".ear",
    73  			path:         filepath.FromSlash("some/path/a.ear"),
    74  			wantRequired: true,
    75  		},
    76  		{
    77  			name:         ".jmod",
    78  			path:         filepath.FromSlash("some/path/a.jmod"),
    79  			wantRequired: true,
    80  		},
    81  		{
    82  			name:         ".par",
    83  			path:         filepath.FromSlash("some/path/a.par"),
    84  			wantRequired: true,
    85  		},
    86  		{
    87  			name:         ".sar",
    88  			path:         filepath.FromSlash("some/path/a.sar"),
    89  			wantRequired: true,
    90  		},
    91  		{
    92  			name:         ".jpi",
    93  			path:         filepath.FromSlash("some/path/a.jpi"),
    94  			wantRequired: true,
    95  		},
    96  		{
    97  			name:         ".hpi",
    98  			path:         filepath.FromSlash("some/path/a.hpi"),
    99  			wantRequired: true,
   100  		},
   101  		{
   102  			name:         ".lpkg",
   103  			path:         filepath.FromSlash("some/path/a.lpkg"),
   104  			wantRequired: true,
   105  		},
   106  		{
   107  			name:         ".nar",
   108  			path:         filepath.FromSlash("some/path/a.nar"),
   109  			wantRequired: true,
   110  		},
   111  		{
   112  			name:         "not archive file",
   113  			path:         filepath.FromSlash("some/path/a.txt"),
   114  			wantRequired: false,
   115  		},
   116  		{
   117  			name:         "no extension should be ignored",
   118  			path:         filepath.FromSlash("some/path/a"),
   119  			wantRequired: false,
   120  		},
   121  		{
   122  			name:             ".jar required if size less than maxFileSizeBytes",
   123  			path:             filepath.FromSlash("some/path/a.jar"),
   124  			maxFileSizeBytes: 1000,
   125  			fileSizeBytes:    100,
   126  			wantRequired:     true,
   127  		},
   128  		{
   129  			name:             ".war required if size equal to maxFileSizeBytes",
   130  			path:             filepath.FromSlash("some/path/a.jar"),
   131  			maxFileSizeBytes: 1000,
   132  			fileSizeBytes:    1000,
   133  			wantRequired:     true,
   134  		},
   135  		{
   136  			name:             ".jar not required if size greater than maxFileSizeBytes",
   137  			path:             filepath.FromSlash("some/path/a.jar"),
   138  			maxFileSizeBytes: 100,
   139  			fileSizeBytes:    1000,
   140  			wantRequired:     false,
   141  			wantResultMetric: stats.FileRequiredResultSizeLimitExceeded,
   142  		},
   143  		{
   144  			name:             ".jar required if maxFileSizeBytes explicitly set to 0",
   145  			path:             filepath.FromSlash("some/path/a.jar"),
   146  			maxFileSizeBytes: 0,
   147  			fileSizeBytes:    1000,
   148  			wantRequired:     true,
   149  		},
   150  	}
   151  
   152  	for _, tt := range tests {
   153  		t.Run(tt.name, func(t *testing.T) {
   154  			collector := testcollector.New()
   155  			cfg := defaultConfigWith(archive.Config{
   156  				MaxFileSizeBytes: tt.maxFileSizeBytes,
   157  				Stats:            collector,
   158  			})
   159  
   160  			var e filesystem.Extractor = archive.New(cfg)
   161  
   162  			if got := e.FileRequired(simplefileapi.New(tt.path, fakefs.FakeFileInfo{
   163  				FileName: filepath.Base(tt.path),
   164  				FileMode: fs.ModePerm,
   165  				FileSize: tt.fileSizeBytes,
   166  			})); got != tt.wantRequired {
   167  				t.Fatalf("FileRequired(%s): got %v, want %v", tt.path, got, tt.wantRequired)
   168  			}
   169  
   170  			gotResultMetric := collector.FileRequiredResult(tt.path)
   171  			if tt.wantResultMetric != "" && tt.wantResultMetric != gotResultMetric {
   172  				t.Fatalf("FileRequired(%s): recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric)
   173  			}
   174  		})
   175  	}
   176  }
   177  
   178  func TestExtract(t *testing.T) {
   179  	tests := []struct {
   180  		name             string
   181  		description      string
   182  		cfg              archive.Config
   183  		path             string
   184  		contentPath      string
   185  		want             []*extractor.Package
   186  		wantErr          error
   187  		wantResultMetric stats.FileExtractedResult
   188  	}{
   189  		{
   190  			name: "Empty_jar_file_should_not_return_anything",
   191  			path: filepath.FromSlash("testdata/empty.jar"),
   192  		},
   193  		{
   194  			name:    "Not a valid jar file",
   195  			path:    filepath.FromSlash("testdata/not_jar"),
   196  			wantErr: errAny,
   197  		},
   198  		{
   199  			name:    "Invalid jar file",
   200  			path:    filepath.FromSlash("testdata/invalid_jar.jar"),
   201  			wantErr: errAny,
   202  		},
   203  		{
   204  			name:        "Jar file with no pom.properties",
   205  			description: "Contains other files but no pom.properties.",
   206  			path:        filepath.FromSlash("testdata/no_pom_properties.jar"),
   207  			want:        []*extractor.Package{},
   208  		},
   209  		{
   210  			name:        "Jar file with invalid pom.properties",
   211  			description: "Contains a pom.properties which is missing the `groupId` field and so it is ignored.",
   212  			path:        filepath.FromSlash("testdata/pom_missing_group_id.jar"),
   213  			want:        []*extractor.Package{},
   214  		},
   215  		{
   216  			name: "Jar_file_with_pom.properties",
   217  			path: filepath.FromSlash("testdata/simple.jar"),
   218  			want: []*extractor.Package{{
   219  				Name:     "com.some.package:package-name",
   220  				Version:  "1.2.3",
   221  				PURLType: purl.TypeMaven,
   222  				Metadata: &archivemeta.Metadata{ArtifactID: "package-name", GroupID: "com.some.package"},
   223  				Locations: []string{
   224  					filepath.FromSlash("testdata/simple.jar"),
   225  					filepath.FromSlash("testdata/simple.jar/pom.properties"),
   226  				},
   227  			}},
   228  		},
   229  		{
   230  			name:        "Jar file with no pom.properties, and IdentifyByFilename enabled",
   231  			description: "Contains other files but no pom.properties. Has invalid filename.",
   232  			path:        filepath.FromSlash("testdata/no_pom_properties.jar"),
   233  			cfg: archive.Config{
   234  				ExtractFromFilename: true,
   235  			},
   236  			want: []*extractor.Package{},
   237  		},
   238  		{
   239  			name:        "Jar file with pom.properties, IdentifyByFilename enabled",
   240  			description: "Contains valid pom.properties, won't be identified by filename.",
   241  			path:        filepath.FromSlash("testdata/simple.jar"),
   242  			cfg: archive.Config{
   243  				ExtractFromFilename: true,
   244  			},
   245  			want: []*extractor.Package{{
   246  				Name:     "com.some.package:package-name",
   247  				Version:  "1.2.3",
   248  				PURLType: purl.TypeMaven,
   249  				Metadata: &archivemeta.Metadata{ArtifactID: "package-name", GroupID: "com.some.package"},
   250  				Locations: []string{
   251  					filepath.FromSlash("testdata/simple.jar"),
   252  					filepath.FromSlash("testdata/simple.jar/pom.properties"),
   253  				},
   254  			}},
   255  		},
   256  		{
   257  			name:        "Jar file with no pom.properties and manifest, and IdentifyByFilename enabled",
   258  			description: "Contains other files but no pom.properties and manifest. Has valid filename.",
   259  			path:        filepath.FromSlash("testdata/no_pom_properties-2.4.0.jar"),
   260  			cfg: archive.Config{
   261  				ExtractFromFilename: true,
   262  			},
   263  			want: []*extractor.Package{{
   264  				Name:     "no_pom_properties:no_pom_properties",
   265  				Version:  "2.4.0",
   266  				PURLType: purl.TypeMaven,
   267  				Metadata: &archivemeta.Metadata{ArtifactID: "no_pom_properties", GroupID: "no_pom_properties"},
   268  				Locations: []string{
   269  					filepath.FromSlash("testdata/no_pom_properties-2.4.0.jar"),
   270  				},
   271  			}},
   272  		},
   273  		{
   274  			name:        "Jar file with no pom.properties but has manifest, and IdentifyByFilename enabled",
   275  			description: "Contains other files but no pom.properties. Has valid manifest with Group ID. Has valid filename.",
   276  			path:        filepath.FromSlash("testdata/no_pom_properties-2.4.0.jar"),
   277  			contentPath: filepath.FromSlash("testdata/combine-manifest-filename/MANIFEST.MF"),
   278  			cfg: archive.Config{
   279  				ExtractFromFilename: true,
   280  			},
   281  			want: []*extractor.Package{{
   282  				Name:     "org.apache.ivy:no_pom_properties",
   283  				Version:  "2.4.0",
   284  				PURLType: purl.TypeMaven,
   285  				Metadata: &archivemeta.Metadata{
   286  					ArtifactID: "no_pom_properties",
   287  					GroupID:    "org.apache.ivy", // Group ID overridden by manifest.
   288  				},
   289  				Locations: []string{
   290  					filepath.FromSlash("testdata/no_pom_properties-2.4.0.jar"),
   291  				},
   292  			}},
   293  		},
   294  		{
   295  			name:        "Jar file with no pom.properties but has manifest, and IdentifyByFilename enabled",
   296  			description: "Contains other files but no pom.properties. Has valid manifest without Group ID. Has valid filename.",
   297  			path:        filepath.FromSlash("testdata/no_pom_properties-2.4.0.jar"),
   298  			contentPath: filepath.FromSlash("testdata/manifest-no-group-id/MANIFEST.MF"),
   299  			cfg: archive.Config{
   300  				ExtractFromFilename: true,
   301  			},
   302  			want: []*extractor.Package{{
   303  				Name:     "no_pom_properties:no_pom_properties",
   304  				Version:  "2.4.0",
   305  				PURLType: purl.TypeMaven,
   306  				Metadata: &archivemeta.Metadata{
   307  					ArtifactID: "no_pom_properties",
   308  					// Group ID defaults to Artifact ID since there was no Group ID in the
   309  					// manifest.
   310  					GroupID: "no_pom_properties",
   311  				},
   312  				Locations: []string{
   313  					filepath.FromSlash("testdata/no_pom_properties-2.4.0.jar"),
   314  				},
   315  			}},
   316  		},
   317  		{
   318  			name:        "Jar file with invalid pom.properties and manifest, IdentifyByFilename enabled",
   319  			description: "Contains a pom.properties which is missing the `groupId` field and so it is ignored. Has no manifest. Has valid filename.",
   320  			path:        filepath.FromSlash("testdata/pom_missing_group_id-2.4.0.jar"),
   321  			cfg: archive.Config{
   322  				ExtractFromFilename: true,
   323  			},
   324  			want: []*extractor.Package{{
   325  				Name:     "pom_missing_group_id:pom_missing_group_id",
   326  				Version:  "2.4.0",
   327  				PURLType: purl.TypeMaven,
   328  				Metadata: &archivemeta.Metadata{ArtifactID: "pom_missing_group_id", GroupID: "pom_missing_group_id"},
   329  				Locations: []string{
   330  					filepath.FromSlash("testdata/pom_missing_group_id-2.4.0.jar"),
   331  				},
   332  			}},
   333  		},
   334  		{
   335  			name:        "Jar file with no pom.properties and manifest, and IdentifyByFilename enabled",
   336  			description: "Contains other files but no pom.properties and manifest. Has valid filename with groupID.",
   337  			path:        filepath.FromSlash("testdata/org.eclipse.sisu.inject-0.3.5.jar"),
   338  			cfg: archive.Config{
   339  				ExtractFromFilename: true,
   340  			},
   341  			want: []*extractor.Package{{
   342  				Name:     "org.eclipse.sisu:org.eclipse.sisu.inject",
   343  				Version:  "0.3.5",
   344  				PURLType: purl.TypeMaven,
   345  				Metadata: &archivemeta.Metadata{ArtifactID: "org.eclipse.sisu.inject", GroupID: "org.eclipse.sisu"},
   346  				Locations: []string{
   347  					filepath.FromSlash("testdata/org.eclipse.sisu.inject-0.3.5.jar"),
   348  				},
   349  			}},
   350  		},
   351  		{
   352  			name: "Nested_jars_with_pom.properties_at_depth_10",
   353  			path: filepath.FromSlash("testdata/nested_at_10.jar"),
   354  			cfg:  archive.Config{HashJars: true},
   355  			want: []*extractor.Package{{
   356  				Name:     "com.some.package:package-name",
   357  				Version:  "1.2.3",
   358  				PURLType: purl.TypeMaven,
   359  				Metadata: &archivemeta.Metadata{
   360  					ArtifactID: "package-name",
   361  					GroupID:    "com.some.package",
   362  					SHA1:       "PO6pevcX8f2Rkpv4xB6NYviFokQ=", // inner most nested.jar
   363  				},
   364  				Locations: []string{
   365  					filepath.FromSlash("testdata/nested_at_10.jar"),
   366  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar"),
   367  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar"),
   368  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar/nested.jar"),
   369  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar/nested.jar/nested.jar"),
   370  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar"),
   371  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar"),
   372  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar"),
   373  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar"),
   374  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar"),
   375  					filepath.FromSlash("testdata/nested_at_10.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/nested.jar/pom.properties"),
   376  				},
   377  			}},
   378  		},
   379  		{
   380  			name:        "Nested jars with pom.properties at depth 100",
   381  			description: "Returns error with no results because max depth is reached before getting to pom.properties",
   382  			path:        filepath.FromSlash("testdata/nested_at_100.jar"),
   383  			want:        []*extractor.Package{},
   384  			wantErr:     errAny,
   385  		},
   386  		{
   387  			name:        "Jar file with pom.properties at multiple depths",
   388  			description: "A jar file with pom.properties at complex.jar/pom.properties and another at complex.jar/BOOT-INF/lib/inner.jar/pom.properties",
   389  			path:        filepath.FromSlash("testdata/complex.jar"),
   390  			want: []*extractor.Package{
   391  				{
   392  					Name:     "com.some.package:package-name",
   393  					Version:  "1.2.3",
   394  					PURLType: purl.TypeMaven,
   395  					Metadata: &archivemeta.Metadata{ArtifactID: "package-name", GroupID: "com.some.package"},
   396  					Locations: []string{
   397  						filepath.FromSlash("testdata/complex.jar"),
   398  						filepath.FromSlash("testdata/complex.jar/pom.properties"),
   399  					},
   400  				},
   401  				{
   402  					Name:     "com.some.anotherpackage:another-package-name",
   403  					Version:  "3.2.1",
   404  					PURLType: purl.TypeMaven,
   405  					Metadata: &archivemeta.Metadata{ArtifactID: "another-package-name", GroupID: "com.some.anotherpackage"},
   406  					Locations: []string{
   407  						filepath.FromSlash("testdata/complex.jar"),
   408  						filepath.FromSlash("testdata/complex.jar/BOOT-INF/lib/inner.jar"),
   409  						filepath.FromSlash("testdata/complex.jar/BOOT-INF/lib/inner.jar/pom.properties"),
   410  					},
   411  				},
   412  			},
   413  		},
   414  		{
   415  			name:        "Ignore inner pom.properties because max opened bytes reached",
   416  			description: "A jar file with pom.properties at complex.jar/pom.properties and another at complex.jar/BOOT-INF/lib/inner.jar/pom.properties. The inner pom.properties is never extracted because MaxOpenedBytes is reached.",
   417  			cfg: archive.Config{
   418  				MaxOpenedBytes: 700,
   419  				Stats:          testcollector.New(),
   420  			},
   421  			path: filepath.FromSlash("testdata/complex.jar"),
   422  			want: []*extractor.Package{{
   423  				Name:     "com.some.package:package-name",
   424  				Version:  "1.2.3",
   425  				PURLType: purl.TypeMaven,
   426  				Metadata: &archivemeta.Metadata{ArtifactID: "package-name", GroupID: "com.some.package"},
   427  				Locations: []string{
   428  					filepath.FromSlash("testdata/complex.jar"),
   429  					filepath.FromSlash("testdata/complex.jar/pom.properties"),
   430  				},
   431  			}},
   432  			wantErr:          filesystem.ErrExtractorMemoryLimitExceeded,
   433  			wantResultMetric: stats.FileExtractedResultErrorMemoryLimitExceeded,
   434  		},
   435  		{
   436  			name: "Realistic_jar_file_with_pom.properties",
   437  			path: filepath.FromSlash("testdata/guava-31.1-jre.jar"),
   438  			cfg:  archive.Config{HashJars: true},
   439  			want: []*extractor.Package{
   440  				{
   441  					Name:     "com.google.guava:guava",
   442  					Version:  "31.1-jre",
   443  					PURLType: purl.TypeMaven,
   444  					Metadata: &archivemeta.Metadata{
   445  						ArtifactID: "guava",
   446  						GroupID:    "com.google.guava",
   447  						// openssl sha1 -binary third_party/scalibr/extractor/filesystem/language/java/archive/testdata/guava-31.1-jre.jar | base64
   448  						SHA1: "YEWPh30FXQyRFNnhou+3N7S8KCw=",
   449  					},
   450  					Locations: []string{
   451  						filepath.FromSlash("testdata/guava-31.1-jre.jar"),
   452  						filepath.FromSlash("testdata/guava-31.1-jre.jar/META-INF/maven/com.google.guava/guava/pom.properties"),
   453  					},
   454  				},
   455  			},
   456  		},
   457  		{
   458  			name: "Test_MANIFEST.MF_with_no_valid_ArtifactID",
   459  			path: filepath.FromSlash("testdata/com.google.src.yolo-0.1.2.jar"),
   460  			want: []*extractor.Package{},
   461  		},
   462  		{
   463  			name:        "Test MANIFEST.MF with symbolic name",
   464  			path:        filepath.FromSlash("testdata/manifest-symbolicname"),
   465  			contentPath: filepath.FromSlash("testdata/manifest-symbolicname/MANIFEST.MF"),
   466  			want: []*extractor.Package{{
   467  				Name:     "com.google.guava.failureaccess:failureaccess",
   468  				Version:  "1.0.1",
   469  				PURLType: purl.TypeMaven,
   470  				Metadata: &archivemeta.Metadata{
   471  					ArtifactID: "failureaccess",
   472  					GroupID:    "com.google.guava.failureaccess",
   473  				},
   474  				Locations: []string{
   475  					filepath.FromSlash("testdata/manifest-symbolicname"),
   476  					filepath.FromSlash("testdata/manifest-symbolicname/MANIFEST.MF"),
   477  				},
   478  			}},
   479  		},
   480  		{
   481  			name:        "Test invalid group or artifact id in manifest.mf",
   482  			path:        filepath.FromSlash("testdata/invalid-ids"),
   483  			contentPath: filepath.FromSlash("testdata/invalid-ids/MANIFEST.MF"),
   484  			want: []*extractor.Package{{
   485  				Name:     "test.group:correct.name",
   486  				Version:  "1.2.3",
   487  				PURLType: purl.TypeMaven,
   488  				Metadata: &archivemeta.Metadata{
   489  					ArtifactID: "correct.name",
   490  					GroupID:    "test.group",
   491  				},
   492  				Locations: []string{
   493  					filepath.FromSlash("testdata/invalid-ids"),
   494  					filepath.FromSlash("testdata/invalid-ids/MANIFEST.MF"),
   495  				},
   496  			}},
   497  		},
   498  		{
   499  			name:        "Test artifact ID that is mapped to a known group ID",
   500  			path:        filepath.FromSlash("testdata/known-group-id"),
   501  			contentPath: filepath.FromSlash("testdata/known-group-id/MANIFEST.MF"),
   502  			want: []*extractor.Package{{
   503  				Name:     "org.springframework:spring-web",
   504  				Version:  "5.3.26",
   505  				PURLType: purl.TypeMaven,
   506  				Metadata: &archivemeta.Metadata{
   507  					ArtifactID: "spring-web",
   508  					GroupID:    "org.springframework",
   509  				},
   510  				Locations: []string{
   511  					filepath.FromSlash("testdata/known-group-id"),
   512  					filepath.FromSlash("testdata/known-group-id/MANIFEST.MF"),
   513  				},
   514  			}},
   515  		},
   516  		{
   517  			name:        "Test combination of manifest and filename",
   518  			path:        filepath.FromSlash("testdata/ivy-2.4.0.jar"),
   519  			contentPath: filepath.FromSlash("testdata/combine-manifest-filename/MANIFEST.MF"),
   520  			cfg:         archive.Config{ExtractFromFilename: true},
   521  			want: []*extractor.Package{{
   522  				Name:     "org.apache.ivy:ivy",
   523  				Version:  "2.4.0",
   524  				PURLType: purl.TypeMaven,
   525  				Metadata: &archivemeta.Metadata{
   526  					ArtifactID: "ivy",
   527  					GroupID:    "org.apache.ivy",
   528  				},
   529  				Locations: []string{
   530  					filepath.FromSlash("testdata/ivy-2.4.0.jar"),
   531  				},
   532  			}},
   533  		},
   534  		{
   535  			name:        "Test combination of filename and manifest with group ID transform",
   536  			description: "The manifest has a Implementation-Title field with more data than just the group ID and we want to extract just the group ID.",
   537  			path:        filepath.FromSlash("testdata/no_pom_properties-2.4.0.jar"),
   538  			contentPath: filepath.FromSlash("testdata/manifest-implementation-title/MANIFEST.MF"),
   539  			cfg:         archive.Config{ExtractFromFilename: true},
   540  			want: []*extractor.Package{{
   541  				Name:     "org.elasticsearch:no_pom_properties",
   542  				Version:  "2.4.0",
   543  				PURLType: purl.TypeMaven,
   544  				Metadata: &archivemeta.Metadata{
   545  					ArtifactID: "no_pom_properties",
   546  					GroupID:    "org.elasticsearch",
   547  				},
   548  				Locations: []string{
   549  					filepath.FromSlash("testdata/no_pom_properties-2.4.0.jar"),
   550  				},
   551  			}},
   552  		},
   553  		{
   554  			name:        "Apache Axis package with incorrect artifact and group ID and space in version",
   555  			description: "The MANIFEST.MF file has 4 main issues: 1) The Name field is `org/apache/axis` which is incorrect. 2) The Implementation-Title field is `Apache Axis` which is incorrect. 3) The Implementation-Version field is has spaces `1.4 1855 April 22 2006`. 4) There is a blank new line in the file.",
   556  			path:        filepath.FromSlash("testdata/axis"),
   557  			contentPath: filepath.FromSlash("testdata/axis/MANIFEST.MF"),
   558  			cfg: archive.Config{
   559  				ExtractFromFilename: true,
   560  			},
   561  			want: []*extractor.Package{{
   562  				Name:     "org.apache.axis:axis",
   563  				Version:  "1.4",
   564  				PURLType: purl.TypeMaven,
   565  				Metadata: &archivemeta.Metadata{ArtifactID: "axis", GroupID: "org.apache.axis"},
   566  				Locations: []string{
   567  					filepath.FromSlash("testdata/axis"),
   568  					filepath.FromSlash("testdata/axis/MANIFEST.MF"),
   569  				},
   570  			}},
   571  		},
   572  	}
   573  
   574  	for _, tt := range tests {
   575  		t.Run(tt.name, func(t *testing.T) {
   576  			var f *os.File
   577  			var err error
   578  			if tt.contentPath != "" {
   579  				f = mustJar(t, tt.contentPath)
   580  			} else {
   581  				f, err = os.Open(tt.path)
   582  				if err != nil {
   583  					t.Fatalf("os.Open(%s) unexpected error: %v", tt.path, err)
   584  				}
   585  			}
   586  			defer f.Close()
   587  
   588  			info, err := f.Stat()
   589  			if err != nil {
   590  				t.Fatalf("f.Stat() for %q unexpected error: %v", tt.path, err)
   591  			}
   592  
   593  			// os.Open returns a ReaderAt per default. In case MaxOpenedBytes is set, we want to have no
   594  			// ReaderAt, such that we can test the MaxOpenedBytes limit.
   595  			var r io.Reader = f
   596  			if tt.cfg.MaxOpenedBytes > 0 {
   597  				r = noReaderAt{r: r}
   598  			}
   599  
   600  			collector := testcollector.New()
   601  			tt.cfg.Stats = collector
   602  
   603  			input := &filesystem.ScanInput{FS: scalibrfs.DirFS("."), Path: tt.path, Info: info, Reader: r}
   604  
   605  			log.SetLogger(&log.DefaultLogger{Verbose: true})
   606  			e := archive.New(defaultConfigWith(tt.cfg))
   607  			got, err := e.Extract(t.Context(), input)
   608  			if err != nil && errors.Is(tt.wantErr, errAny) {
   609  				err = errAny
   610  			}
   611  			if !errors.Is(err, tt.wantErr) {
   612  				t.Fatalf("Extract(%s) got error: %v, want error: %v", tt.path, err, tt.wantErr)
   613  			}
   614  			sort := func(a, b *extractor.Package) bool { return a.Name < b.Name }
   615  			if diff := cmp.Diff(inventory.Inventory{Packages: tt.want}, got, cmpopts.SortSlices(sort)); diff != "" {
   616  				t.Fatalf("Extract(%s) (-want +got):\n%s", tt.path, diff)
   617  			}
   618  
   619  			gotResultMetric := collector.FileExtractedResult(tt.path)
   620  			if tt.wantResultMetric != "" && tt.wantResultMetric != gotResultMetric {
   621  				t.Fatalf("Extract(%s): recorded result metric %v, want result metric %v", tt.path, gotResultMetric, tt.wantResultMetric)
   622  			}
   623  
   624  			gotFileSizeMetric := collector.FileExtractedFileSize(tt.path)
   625  			if gotFileSizeMetric != info.Size() {
   626  				t.Errorf("Extract(%s) recorded file size %v, want file size %v", tt.path, gotFileSizeMetric, info.Size())
   627  			}
   628  		})
   629  	}
   630  }
   631  
   632  type noReaderAt struct {
   633  	r io.Reader
   634  }
   635  
   636  func (r noReaderAt) Read(p []byte) (n int, err error) {
   637  	return r.r.Read(p)
   638  }
   639  
   640  // defaultConfigWith combines any non-zero fields of cfg with archive.DefaultConfig().
   641  func defaultConfigWith(cfg archive.Config) archive.Config {
   642  	newCfg := archive.DefaultConfig()
   643  
   644  	if cfg.MaxZipDepth > 0 {
   645  		newCfg.MaxZipDepth = cfg.MaxZipDepth
   646  	}
   647  	if cfg.MaxOpenedBytes > 0 {
   648  		newCfg.MaxOpenedBytes = cfg.MaxOpenedBytes
   649  	}
   650  	if cfg.MinZipBytes > 0 {
   651  		newCfg.MinZipBytes = cfg.MinZipBytes
   652  	}
   653  	if cfg.MaxFileSizeBytes > 0 {
   654  		newCfg.MaxFileSizeBytes = cfg.MaxFileSizeBytes
   655  	}
   656  	if cfg.Stats != nil {
   657  		newCfg.Stats = cfg.Stats
   658  	}
   659  	// ignores defaults
   660  	newCfg.ExtractFromFilename = cfg.ExtractFromFilename
   661  	newCfg.HashJars = cfg.HashJars
   662  	return newCfg
   663  }
   664  
   665  // mustJar creates a temporary jar file that contains the file from path and returns it opened.
   666  func mustJar(t *testing.T, path string) *os.File {
   667  	t.Helper()
   668  
   669  	dir := filepath.Dir(path)
   670  	dirEntry, err := os.ReadDir(dir)
   671  	if err != nil {
   672  		t.Fatalf("os.ReadDir(%s) unexpected error: %v", path, err)
   673  	}
   674  	fmt.Printf("%+v", dirEntry)
   675  	dir = filepath.Dir(path)
   676  	dirEntry, err = os.ReadDir(dir)
   677  	if err != nil {
   678  		t.Fatalf("os.ReadDir(%s) unexpected error: %v", path, err)
   679  	}
   680  	fmt.Printf("%+v", dirEntry)
   681  
   682  	f, err := os.Open(path)
   683  	if err != nil {
   684  		t.Fatalf("os.Open(%s) unexpected error: %v", path, err)
   685  	}
   686  	defer f.Close()
   687  
   688  	content, err := os.ReadFile(path)
   689  	if err != nil {
   690  		t.Fatalf("os.ReadFile(%s) unexpected error: %v", path, err)
   691  	}
   692  
   693  	jarFile, err := os.CreateTemp(t.TempDir(), "temp-*.jar")
   694  	if err != nil {
   695  		t.Fatalf("os.CreateTemp(\"temp-*.jar\") unexpected error: %v", err)
   696  	}
   697  	//nolint:errcheck
   698  	defer jarFile.Sync()
   699  
   700  	zipWriter := zip.NewWriter(jarFile)
   701  
   702  	fileWriter, err := zipWriter.Create(filepath.Base(path))
   703  	if err != nil {
   704  		t.Fatalf("zipWriter.Create(%s) unexpected error: %v", filepath.Base(path), err)
   705  	}
   706  	_, err = fileWriter.Write(content)
   707  	if err != nil {
   708  		t.Fatalf("fileWriter.Write(%s) unexpected error: %v", filepath.Base(path), err)
   709  	}
   710  
   711  	err = zipWriter.Close()
   712  	if err != nil {
   713  		t.Fatalf("zipWriter.Close() unexpected error: %v", err)
   714  	}
   715  
   716  	return jarFile
   717  }