github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/parse_pom_xml_test.go (about)

     1  package java
     2  
     3  import (
     4  	"encoding/base64"
     5  	"io"
     6  	"os"
     7  	"strings"
     8  	"testing"
     9  
    10  	"github.com/stretchr/testify/assert"
    11  	"github.com/stretchr/testify/require"
    12  	"github.com/vifraa/gopom"
    13  
    14  	"github.com/anchore/syft/syft/cataloging"
    15  	"github.com/anchore/syft/syft/file"
    16  	"github.com/anchore/syft/syft/license"
    17  	"github.com/anchore/syft/syft/pkg"
    18  	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
    19  )
    20  
    21  func Test_parserPomXML(t *testing.T) {
    22  	tests := []struct {
    23  		input    string
    24  		expected []pkg.Package
    25  	}{
    26  		{
    27  			input: "test-fixtures/pom/pom.xml",
    28  			expected: []pkg.Package{
    29  				{
    30  					Name:     "joda-time",
    31  					Version:  "2.9.2",
    32  					PURL:     "pkg:maven/com.joda/joda-time@2.9.2",
    33  					Language: pkg.Java,
    34  					Type:     pkg.JavaPkg,
    35  					Metadata: pkg.JavaArchive{
    36  						PomProperties: &pkg.JavaPomProperties{
    37  							GroupID:    "com.joda",
    38  							ArtifactID: "joda-time",
    39  						},
    40  					},
    41  				},
    42  				{
    43  					Name:     "junit",
    44  					Version:  "4.12",
    45  					PURL:     "pkg:maven/junit/junit@4.12",
    46  					Language: pkg.Java,
    47  					Type:     pkg.JavaPkg,
    48  					Metadata: pkg.JavaArchive{
    49  						PomProperties: &pkg.JavaPomProperties{
    50  							GroupID:    "junit",
    51  							ArtifactID: "junit",
    52  							Scope:      "test",
    53  						},
    54  					},
    55  				},
    56  			},
    57  		},
    58  	}
    59  
    60  	for _, test := range tests {
    61  		t.Run(test.input, func(t *testing.T) {
    62  			for i := range test.expected {
    63  				test.expected[i].Locations.Add(file.NewLocation(test.input))
    64  			}
    65  
    66  			gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
    67  				ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
    68  					IncludeIndexedArchives:   true,
    69  					IncludeUnindexedArchives: true,
    70  				},
    71  			})
    72  
    73  			pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
    74  		})
    75  	}
    76  }
    77  
    78  func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) {
    79  	// regression for https://github.com/anchore/syft/issues/2044
    80  
    81  	// we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the
    82  	// file, which is extremely important for this test.
    83  
    84  	// for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically
    85  	// convert the file to UTF-8, which will break this test:
    86  
    87  	// xxd with the original pom.xml
    88  	// 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020  id>..
    89  	// 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69   <name>J.r.me Mi
    90  	// 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020  rc</name>..
    91  
    92  	// xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ)
    93  	// 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020  id>..
    94  	// 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d   <name>J...r...m
    95  	// 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20  e Mirc</name>..
    96  
    97  	// Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save
    98  	// is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character.
    99  	// The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character.
   100  	// This is quite silly on the part of IntelliJ, but it is what it is.
   101  
   102  	cases := []struct {
   103  		name    string
   104  		fixture string
   105  	}{
   106  		{
   107  			name:    "undeclared encoding",
   108  			fixture: "test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64",
   109  		},
   110  		{
   111  			name:    "declared encoding",
   112  			fixture: "test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64",
   113  		},
   114  	}
   115  
   116  	for _, c := range cases {
   117  		t.Run(c.name, func(t *testing.T) {
   118  			fh, err := os.Open(c.fixture)
   119  			require.NoError(t, err)
   120  
   121  			decoder := base64.NewDecoder(base64.StdEncoding, fh)
   122  
   123  			proj, err := decodePomXML(decoder)
   124  
   125  			require.NoError(t, err)
   126  			require.NotEmpty(t, proj.Developers)
   127  		})
   128  	}
   129  
   130  }
   131  
   132  func Test_parseCommonsTextPomXMLProject(t *testing.T) {
   133  	tests := []struct {
   134  		input    string
   135  		expected []pkg.Package
   136  	}{
   137  		{
   138  			input: "test-fixtures/pom/commons-text.pom.xml",
   139  			expected: []pkg.Package{
   140  				{
   141  					Name:     "commons-lang3",
   142  					Version:  "3.12.0",
   143  					PURL:     "pkg:maven/org.apache.commons/commons-lang3@3.12.0",
   144  					Language: pkg.Java,
   145  					Type:     pkg.JavaPkg,
   146  					Metadata: pkg.JavaArchive{
   147  						PomProperties: &pkg.JavaPomProperties{
   148  							GroupID:    "org.apache.commons",
   149  							ArtifactID: "commons-lang3",
   150  						},
   151  					},
   152  				},
   153  				{
   154  					Name:     "junit-jupiter",
   155  					Version:  "",
   156  					PURL:     "pkg:maven/org.junit.jupiter/junit-jupiter",
   157  					Language: pkg.Java,
   158  					Type:     pkg.JavaPkg,
   159  					Metadata: pkg.JavaArchive{
   160  						PomProperties: &pkg.JavaPomProperties{
   161  							GroupID:    "org.junit.jupiter",
   162  							ArtifactID: "junit-jupiter",
   163  							Scope:      "test",
   164  						},
   165  					},
   166  				},
   167  				{
   168  					Name:     "assertj-core",
   169  					Version:  "3.23.1",
   170  					PURL:     "pkg:maven/org.assertj/assertj-core@3.23.1",
   171  					Language: pkg.Java,
   172  					Type:     pkg.JavaPkg,
   173  					Metadata: pkg.JavaArchive{
   174  						PomProperties: &pkg.JavaPomProperties{
   175  							GroupID:    "org.assertj",
   176  							ArtifactID: "assertj-core",
   177  							Scope:      "test",
   178  						},
   179  					},
   180  				},
   181  				{
   182  					Name:     "commons-io",
   183  					Version:  "2.11.0",
   184  					PURL:     "pkg:maven/commons-io/commons-io@2.11.0",
   185  					Language: pkg.Java,
   186  					Type:     pkg.JavaPkg,
   187  					Metadata: pkg.JavaArchive{
   188  						PomProperties: &pkg.JavaPomProperties{
   189  							GroupID:    "commons-io",
   190  							ArtifactID: "commons-io",
   191  							Scope:      "test",
   192  						},
   193  					},
   194  				},
   195  				{
   196  					Name:     "mockito-inline",
   197  					Version:  "4.8.0",
   198  					PURL:     "pkg:maven/org.mockito/mockito-inline@4.8.0",
   199  					Language: pkg.Java,
   200  					Type:     pkg.JavaPkg,
   201  					Metadata: pkg.JavaArchive{
   202  						PomProperties: &pkg.JavaPomProperties{
   203  							GroupID:    "org.mockito",
   204  							ArtifactID: "mockito-inline",
   205  							Scope:      "test",
   206  						},
   207  					},
   208  				},
   209  				{
   210  					Name:     "js",
   211  					Version:  "22.0.0.2",
   212  					PURL:     "pkg:maven/org.graalvm.js/js@22.0.0.2",
   213  					Language: pkg.Java,
   214  					Type:     pkg.JavaPkg,
   215  					Metadata: pkg.JavaArchive{
   216  						PomProperties: &pkg.JavaPomProperties{
   217  							GroupID:    "org.graalvm.js",
   218  							ArtifactID: "js",
   219  							Scope:      "test",
   220  						},
   221  					},
   222  				},
   223  				{
   224  					Name:     "js-scriptengine",
   225  					Version:  "22.0.0.2",
   226  					PURL:     "pkg:maven/org.graalvm.js/js-scriptengine@22.0.0.2",
   227  					Language: pkg.Java,
   228  					Type:     pkg.JavaPkg,
   229  					Metadata: pkg.JavaArchive{
   230  						PomProperties: &pkg.JavaPomProperties{
   231  							GroupID:    "org.graalvm.js",
   232  							ArtifactID: "js-scriptengine",
   233  							Scope:      "test",
   234  						},
   235  					},
   236  				},
   237  				{
   238  					Name:     "commons-rng-simple",
   239  					Version:  "1.4",
   240  					PURL:     "pkg:maven/org.apache.commons/commons-rng-simple@1.4",
   241  					Language: pkg.Java,
   242  					Type:     pkg.JavaPkg,
   243  					Metadata: pkg.JavaArchive{
   244  						PomProperties: &pkg.JavaPomProperties{
   245  							GroupID:    "org.apache.commons",
   246  							ArtifactID: "commons-rng-simple",
   247  							Scope:      "test",
   248  						},
   249  					},
   250  				},
   251  				{
   252  					Name:     "jmh-core",
   253  					Version:  "1.35",
   254  					PURL:     "pkg:maven/org.openjdk.jmh/jmh-core@1.35",
   255  					Language: pkg.Java,
   256  					Type:     pkg.JavaPkg,
   257  					Metadata: pkg.JavaArchive{
   258  						PomProperties: &pkg.JavaPomProperties{
   259  							GroupID:    "org.openjdk.jmh",
   260  							ArtifactID: "jmh-core",
   261  							Scope:      "test",
   262  						},
   263  					},
   264  				},
   265  				{
   266  					Name:     "jmh-generator-annprocess",
   267  					Version:  "1.35",
   268  					PURL:     "pkg:maven/org.openjdk.jmh/jmh-generator-annprocess@1.35",
   269  					Language: pkg.Java,
   270  					Type:     pkg.JavaPkg,
   271  					Metadata: pkg.JavaArchive{
   272  						PomProperties: &pkg.JavaPomProperties{
   273  							GroupID:    "org.openjdk.jmh",
   274  							ArtifactID: "jmh-generator-annprocess",
   275  							Scope:      "test",
   276  						},
   277  					},
   278  				},
   279  			},
   280  		},
   281  	}
   282  
   283  	for _, test := range tests {
   284  		t.Run(test.input, func(t *testing.T) {
   285  			for i := range test.expected {
   286  				test.expected[i].Locations.Add(file.NewLocation(test.input))
   287  			}
   288  
   289  			gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{
   290  				ArchiveSearchConfig: cataloging.ArchiveSearchConfig{
   291  					IncludeIndexedArchives:   true,
   292  					IncludeUnindexedArchives: true,
   293  				},
   294  			})
   295  			pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil)
   296  		})
   297  	}
   298  }
   299  
   300  func Test_parsePomXMLProject(t *testing.T) {
   301  	// TODO: ideally we would have the path to the contained pom.xml, not the jar
   302  	jarLocation := file.NewLocation("path/to/archive.jar")
   303  	tests := []struct {
   304  		name     string
   305  		expected parsedPomProject
   306  	}{
   307  		{
   308  			name: "go case",
   309  			expected: parsedPomProject{
   310  				JavaPomProject: &pkg.JavaPomProject{
   311  					Path: "test-fixtures/pom/commons-codec.pom.xml",
   312  					Parent: &pkg.JavaPomParent{
   313  						GroupID:    "org.apache.commons",
   314  						ArtifactID: "commons-parent",
   315  						Version:    "42",
   316  					},
   317  					GroupID:     "commons-codec",
   318  					ArtifactID:  "commons-codec",
   319  					Version:     "1.11",
   320  					Name:        "Apache Commons Codec",
   321  					Description: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal.  In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.",
   322  					URL:         "http://commons.apache.org/proper/commons-codec/",
   323  				},
   324  			},
   325  		},
   326  		{
   327  			name: "with license data",
   328  			expected: parsedPomProject{
   329  				JavaPomProject: &pkg.JavaPomProject{
   330  					Path: "test-fixtures/pom/neo4j-license-maven-plugin.pom.xml",
   331  					Parent: &pkg.JavaPomParent{
   332  						GroupID:    "org.sonatype.oss",
   333  						ArtifactID: "oss-parent",
   334  						Version:    "7",
   335  					},
   336  					GroupID:     "org.neo4j.build.plugins",
   337  					ArtifactID:  "license-maven-plugin",
   338  					Version:     "4-SNAPSHOT",
   339  					Name:        "${project.artifactId}", // TODO: this is not an ideal answer
   340  					Description: "Maven 2 plugin to check and update license headers in source files",
   341  					URL:         "http://components.neo4j.org/${project.artifactId}/${project.version}", // TODO: this is not an ideal answer
   342  				},
   343  				Licenses: []pkg.License{
   344  					{
   345  						Value:          "The Apache Software License, Version 2.0",
   346  						SPDXExpression: "", // TODO: ideally we would parse this title to get Apache-2.0 (created issue #2210 https://github.com/anchore/syft/issues/2210)
   347  						Type:           license.Declared,
   348  						URLs:           []string{"http://www.apache.org/licenses/LICENSE-2.0.txt"},
   349  						Locations:      file.NewLocationSet(jarLocation),
   350  					},
   351  					{
   352  						Value:          "MIT",
   353  						SPDXExpression: "MIT",
   354  						Type:           license.Declared,
   355  						Locations:      file.NewLocationSet(jarLocation),
   356  					},
   357  					{
   358  						Type:      license.Declared,
   359  						URLs:      []string{"https://opensource.org/license/unlicense/"},
   360  						Locations: file.NewLocationSet(jarLocation),
   361  					},
   362  				},
   363  			},
   364  		},
   365  	}
   366  
   367  	for _, test := range tests {
   368  		t.Run(test.name, func(t *testing.T) {
   369  			fixture, err := os.Open(test.expected.Path)
   370  			assert.NoError(t, err)
   371  
   372  			actual, err := parsePomXMLProject(fixture.Name(), fixture, jarLocation)
   373  			assert.NoError(t, err)
   374  
   375  			assert.Equal(t, &test.expected, actual)
   376  		})
   377  	}
   378  }
   379  
   380  func Test_pomParent(t *testing.T) {
   381  	tests := []struct {
   382  		name     string
   383  		input    *gopom.Parent
   384  		expected *pkg.JavaPomParent
   385  	}{
   386  		{
   387  			name: "only group ID",
   388  			input: &gopom.Parent{
   389  				GroupID: stringPointer("org.something"),
   390  			},
   391  			expected: &pkg.JavaPomParent{
   392  				GroupID: "org.something",
   393  			},
   394  		},
   395  		{
   396  			name: "only artifact ID",
   397  			input: &gopom.Parent{
   398  				ArtifactID: stringPointer("something"),
   399  			},
   400  			expected: &pkg.JavaPomParent{
   401  				ArtifactID: "something",
   402  			},
   403  		},
   404  		{
   405  			name: "only Version",
   406  			input: &gopom.Parent{
   407  				Version: stringPointer("something"),
   408  			},
   409  			expected: &pkg.JavaPomParent{
   410  				Version: "something",
   411  			},
   412  		},
   413  		{
   414  			name:     "nil",
   415  			input:    nil,
   416  			expected: nil,
   417  		},
   418  		{
   419  			name:     "empty",
   420  			input:    &gopom.Parent{},
   421  			expected: nil,
   422  		},
   423  		{
   424  			name: "unused field",
   425  			input: &gopom.Parent{
   426  				RelativePath: stringPointer("something"),
   427  			},
   428  			expected: nil,
   429  		},
   430  	}
   431  
   432  	for _, test := range tests {
   433  		t.Run(test.name, func(t *testing.T) {
   434  			assert.Equal(t, test.expected, pomParent(gopom.Project{}, test.input))
   435  		})
   436  	}
   437  }
   438  
   439  func Test_cleanDescription(t *testing.T) {
   440  	tests := []struct {
   441  		name     string
   442  		input    string
   443  		expected string
   444  	}{
   445  		{
   446  			name: "indent + multiline",
   447  			input: `        The Apache Commons Codec package contains simple encoder and decoders for
   448          various formats such as Base64 and Hexadecimal.  In addition to these
   449          widely used encoders and decoders, the codec package also maintains a
   450          collection of phonetic encoding utilities.`,
   451  			expected: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal.  In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.",
   452  		},
   453  	}
   454  
   455  	for _, test := range tests {
   456  		t.Run(test.name, func(t *testing.T) {
   457  			assert.Equal(t, test.expected, cleanDescription(stringPointer(test.input)))
   458  		})
   459  	}
   460  }
   461  
   462  func Test_resolveProperty(t *testing.T) {
   463  	tests := []struct {
   464  		name     string
   465  		property string
   466  		pom      gopom.Project
   467  		expected string
   468  	}{
   469  		{
   470  			name:     "property",
   471  			property: "${version.number}",
   472  			pom: gopom.Project{
   473  				Properties: &gopom.Properties{
   474  					Entries: map[string]string{
   475  						"version.number": "12.5.0",
   476  					},
   477  				},
   478  			},
   479  			expected: "12.5.0",
   480  		},
   481  		{
   482  			name:     "groupId",
   483  			property: "${project.groupId}",
   484  			pom: gopom.Project{
   485  				GroupID: stringPointer("org.some.group"),
   486  			},
   487  			expected: "org.some.group",
   488  		},
   489  		{
   490  			name:     "parent groupId",
   491  			property: "${project.parent.groupId}",
   492  			pom: gopom.Project{
   493  				Parent: &gopom.Parent{
   494  					GroupID: stringPointer("org.some.parent"),
   495  				},
   496  			},
   497  			expected: "org.some.parent",
   498  		},
   499  		{
   500  			name:     "nil pointer halts search",
   501  			property: "${project.parent.groupId}",
   502  			pom: gopom.Project{
   503  				Parent: nil,
   504  			},
   505  			expected: "",
   506  		},
   507  		{
   508  			name:     "nil string pointer halts search",
   509  			property: "${project.parent.groupId}",
   510  			pom: gopom.Project{
   511  				Parent: &gopom.Parent{
   512  					GroupID: nil,
   513  				},
   514  			},
   515  			expected: "",
   516  		},
   517  		{
   518  			name:     "double dereference",
   519  			property: "${springboot.version}",
   520  			pom: gopom.Project{
   521  				Parent: &gopom.Parent{
   522  					Version: stringPointer("1.2.3"),
   523  				},
   524  				Properties: &gopom.Properties{
   525  					Entries: map[string]string{
   526  						"springboot.version": "${project.parent.version}",
   527  					},
   528  				},
   529  			},
   530  			expected: "1.2.3",
   531  		},
   532  		{
   533  			name:     "map missing stops double dereference",
   534  			property: "${springboot.version}",
   535  			pom: gopom.Project{
   536  				Parent: &gopom.Parent{
   537  					Version: stringPointer("1.2.3"),
   538  				},
   539  			},
   540  			expected: "",
   541  		},
   542  		{
   543  			name:     "resolution halts even if it resolves to a variable",
   544  			property: "${springboot.version}",
   545  			pom: gopom.Project{
   546  				Parent: &gopom.Parent{
   547  					Version: stringPointer("${undefined.version}"),
   548  				},
   549  				Properties: &gopom.Properties{
   550  					Entries: map[string]string{
   551  						"springboot.version": "${project.parent.version}",
   552  					},
   553  				},
   554  			},
   555  			expected: "",
   556  		},
   557  		{
   558  			name:     "resolution halts even if cyclic",
   559  			property: "${springboot.version}",
   560  			pom: gopom.Project{
   561  				Properties: &gopom.Properties{
   562  					Entries: map[string]string{
   563  						"springboot.version": "${springboot.version}",
   564  					},
   565  				},
   566  			},
   567  			expected: "",
   568  		},
   569  		{
   570  			name:     "resolution halts even if cyclic more steps",
   571  			property: "${cyclic.version}",
   572  			pom: gopom.Project{
   573  				Properties: &gopom.Properties{
   574  					Entries: map[string]string{
   575  						"other.version":      "${cyclic.version}",
   576  						"springboot.version": "${other.version}",
   577  						"cyclic.version":     "${springboot.version}",
   578  					},
   579  				},
   580  			},
   581  			expected: "",
   582  		},
   583  		{
   584  			name:     "resolution  halts even if cyclic involving parent",
   585  			property: "${cyclic.version}",
   586  			pom: gopom.Project{
   587  				Parent: &gopom.Parent{
   588  					Version: stringPointer("${cyclic.version}"),
   589  				},
   590  				Properties: &gopom.Properties{
   591  					Entries: map[string]string{
   592  						"other.version":      "${parent.version}",
   593  						"springboot.version": "${other.version}",
   594  						"cyclic.version":     "${springboot.version}",
   595  					},
   596  				},
   597  			},
   598  			expected: "",
   599  		},
   600  	}
   601  
   602  	for _, test := range tests {
   603  		t.Run(test.name, func(t *testing.T) {
   604  			resolved := resolveProperty(test.pom, stringPointer(test.property), test.name)
   605  			assert.Equal(t, test.expected, resolved)
   606  		})
   607  	}
   608  }
   609  
   610  func stringPointer(s string) *string {
   611  	return &s
   612  }
   613  
   614  func Test_getUtf8Reader(t *testing.T) {
   615  	tests := []struct {
   616  		name     string
   617  		contents string
   618  	}{
   619  		{
   620  			name: "unknown encoding",
   621  			// random binary contents
   622  			contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==",
   623  		},
   624  	}
   625  	for _, tt := range tests {
   626  		t.Run(tt.name, func(t *testing.T) {
   627  			decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents))
   628  
   629  			got, err := getUtf8Reader(decoder)
   630  			require.NoError(t, err)
   631  			gotBytes, err := io.ReadAll(got)
   632  			require.NoError(t, err)
   633  			// if we couldn't decode the section as UTF-8, we should get a replacement character
   634  			assert.Contains(t, string(gotBytes), "�")
   635  		})
   636  	}
   637  }