github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/java/parse_pom_xml_test.go (about)

     1  package java
     2  
     3  import (
     4  	"encoding/base64"
     5  	"io"
     6  	"os"
     7  	"strings"
     8  	"testing"
     9  
    10  	"github.com/stretchr/testify/assert"
    11  	"github.com/stretchr/testify/require"
    12  	"github.com/vifraa/gopom"
    13  
    14  	"github.com/anchore/syft/syft/file"
    15  	"github.com/anchore/syft/syft/license"
    16  	"github.com/anchore/syft/syft/pkg"
    17  	"github.com/lineaje-labs/syft/syft/pkg/cataloger/internal/pkgtest"
    18  )
    19  
    20  func Test_parserPomXML(t *testing.T) {
    21  	tests := []struct {
    22  		input    string
    23  		expected []pkg.Package
    24  	}{
    25  		{
    26  			input: "test-fixtures/pom/pom.xml",
    27  			expected: []pkg.Package{
    28  				{
    29  					Name:     "joda-time",
    30  					Version:  "2.9.2",
    31  					PURL:     "pkg:maven/com.joda/joda-time@2.9.2",
    32  					Language: pkg.Java,
    33  					Type:     pkg.JavaPkg,
    34  					Metadata: pkg.JavaArchive{
    35  						PomProperties: &pkg.JavaPomProperties{
    36  							GroupID:    "com.joda",
    37  							ArtifactID: "joda-time",
    38  						},
    39  					},
    40  				},
    41  				{
    42  					Name:     "junit",
    43  					Version:  "4.12",
    44  					PURL:     "pkg:maven/junit/junit@4.12",
    45  					Language: pkg.Java,
    46  					Type:     pkg.JavaPkg,
    47  					Metadata: pkg.JavaArchive{
    48  						PomProperties: &pkg.JavaPomProperties{
    49  							GroupID:    "junit",
    50  							ArtifactID: "junit",
    51  							Scope:      "test",
    52  						},
    53  					},
    54  				},
    55  			},
    56  		},
    57  	}
    58  
    59  	for _, test := range tests {
    60  		t.Run(test.input, func(t *testing.T) {
    61  			for i := range test.expected {
    62  				test.expected[i].Locations.Add(file.NewLocation(test.input))
    63  			}
    64  			pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
    65  		})
    66  	}
    67  }
    68  
    69  func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) {
    70  	// regression for https://github.com/anchore/syft/issues/2044
    71  
    72  	// we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the
    73  	// file, which is extremely important for this test.
    74  
    75  	// for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically
    76  	// convert the file to UTF-8, which will break this test:
    77  
    78  	// xxd with the original pom.xml
    79  	// 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020  id>..
    80  	// 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69   <name>J.r.me Mi
    81  	// 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020  rc</name>..
    82  
    83  	// xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ)
    84  	// 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020  id>..
    85  	// 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d   <name>J...r...m
    86  	// 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20  e Mirc</name>..
    87  
    88  	// Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save
    89  	// is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character.
    90  	// The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character.
    91  	// This is quite silly on the part of IntelliJ, but it is what it is.
    92  
    93  	cases := []struct {
    94  		name    string
    95  		fixture string
    96  	}{
    97  		{
    98  			name:    "undeclared encoding",
    99  			fixture: "test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64",
   100  		},
   101  		{
   102  			name:    "declared encoding",
   103  			fixture: "test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64",
   104  		},
   105  	}
   106  
   107  	for _, c := range cases {
   108  		t.Run(c.name, func(t *testing.T) {
   109  			fh, err := os.Open(c.fixture)
   110  			require.NoError(t, err)
   111  
   112  			decoder := base64.NewDecoder(base64.StdEncoding, fh)
   113  
   114  			proj, err := decodePomXML(decoder)
   115  
   116  			require.NoError(t, err)
   117  			require.NotEmpty(t, proj.Developers)
   118  		})
   119  	}
   120  
   121  }
   122  
   123  func Test_parseCommonsTextPomXMLProject(t *testing.T) {
   124  	tests := []struct {
   125  		input    string
   126  		expected []pkg.Package
   127  	}{
   128  		{
   129  			input: "test-fixtures/pom/commons-text.pom.xml",
   130  			expected: []pkg.Package{
   131  				{
   132  					Name:     "commons-lang3",
   133  					Version:  "3.12.0",
   134  					PURL:     "pkg:maven/org.apache.commons/commons-lang3@3.12.0",
   135  					Language: pkg.Java,
   136  					Type:     pkg.JavaPkg,
   137  					Metadata: pkg.JavaArchive{
   138  						PomProperties: &pkg.JavaPomProperties{
   139  							GroupID:    "org.apache.commons",
   140  							ArtifactID: "commons-lang3",
   141  						},
   142  					},
   143  				},
   144  				{
   145  					Name:     "junit-jupiter",
   146  					Version:  "",
   147  					PURL:     "pkg:maven/org.junit.jupiter/junit-jupiter",
   148  					Language: pkg.Java,
   149  					Type:     pkg.JavaPkg,
   150  					Metadata: pkg.JavaArchive{
   151  						PomProperties: &pkg.JavaPomProperties{
   152  							GroupID:    "org.junit.jupiter",
   153  							ArtifactID: "junit-jupiter",
   154  							Scope:      "test",
   155  						},
   156  					},
   157  				},
   158  				{
   159  					Name:     "assertj-core",
   160  					Version:  "3.23.1",
   161  					PURL:     "pkg:maven/org.assertj/assertj-core@3.23.1",
   162  					Language: pkg.Java,
   163  					Type:     pkg.JavaPkg,
   164  					Metadata: pkg.JavaArchive{
   165  						PomProperties: &pkg.JavaPomProperties{
   166  							GroupID:    "org.assertj",
   167  							ArtifactID: "assertj-core",
   168  							Scope:      "test",
   169  						},
   170  					},
   171  				},
   172  				{
   173  					Name:     "commons-io",
   174  					Version:  "2.11.0",
   175  					PURL:     "pkg:maven/commons-io/commons-io@2.11.0",
   176  					Language: pkg.Java,
   177  					Type:     pkg.JavaPkg,
   178  					Metadata: pkg.JavaArchive{
   179  						PomProperties: &pkg.JavaPomProperties{
   180  							GroupID:    "commons-io",
   181  							ArtifactID: "commons-io",
   182  							Scope:      "test",
   183  						},
   184  					},
   185  				},
   186  				{
   187  					Name:     "mockito-inline",
   188  					Version:  "4.8.0",
   189  					PURL:     "pkg:maven/org.mockito/mockito-inline@4.8.0",
   190  					Language: pkg.Java,
   191  					Type:     pkg.JavaPkg,
   192  					Metadata: pkg.JavaArchive{
   193  						PomProperties: &pkg.JavaPomProperties{
   194  							GroupID:    "org.mockito",
   195  							ArtifactID: "mockito-inline",
   196  							Scope:      "test",
   197  						},
   198  					},
   199  				},
   200  				{
   201  					Name:     "js",
   202  					Version:  "22.0.0.2",
   203  					PURL:     "pkg:maven/org.graalvm.js/js@22.0.0.2",
   204  					Language: pkg.Java,
   205  					Type:     pkg.JavaPkg,
   206  					Metadata: pkg.JavaArchive{
   207  						PomProperties: &pkg.JavaPomProperties{
   208  							GroupID:    "org.graalvm.js",
   209  							ArtifactID: "js",
   210  							Scope:      "test",
   211  						},
   212  					},
   213  				},
   214  				{
   215  					Name:     "js-scriptengine",
   216  					Version:  "22.0.0.2",
   217  					PURL:     "pkg:maven/org.graalvm.js/js-scriptengine@22.0.0.2",
   218  					Language: pkg.Java,
   219  					Type:     pkg.JavaPkg,
   220  					Metadata: pkg.JavaArchive{
   221  						PomProperties: &pkg.JavaPomProperties{
   222  							GroupID:    "org.graalvm.js",
   223  							ArtifactID: "js-scriptengine",
   224  							Scope:      "test",
   225  						},
   226  					},
   227  				},
   228  				{
   229  					Name:     "commons-rng-simple",
   230  					Version:  "1.4",
   231  					PURL:     "pkg:maven/org.apache.commons/commons-rng-simple@1.4",
   232  					Language: pkg.Java,
   233  					Type:     pkg.JavaPkg,
   234  					Metadata: pkg.JavaArchive{
   235  						PomProperties: &pkg.JavaPomProperties{
   236  							GroupID:    "org.apache.commons",
   237  							ArtifactID: "commons-rng-simple",
   238  							Scope:      "test",
   239  						},
   240  					},
   241  				},
   242  				{
   243  					Name:     "jmh-core",
   244  					Version:  "1.35",
   245  					PURL:     "pkg:maven/org.openjdk.jmh/jmh-core@1.35",
   246  					Language: pkg.Java,
   247  					Type:     pkg.JavaPkg,
   248  					Metadata: pkg.JavaArchive{
   249  						PomProperties: &pkg.JavaPomProperties{
   250  							GroupID:    "org.openjdk.jmh",
   251  							ArtifactID: "jmh-core",
   252  							Scope:      "test",
   253  						},
   254  					},
   255  				},
   256  				{
   257  					Name:     "jmh-generator-annprocess",
   258  					Version:  "1.35",
   259  					PURL:     "pkg:maven/org.openjdk.jmh/jmh-generator-annprocess@1.35",
   260  					Language: pkg.Java,
   261  					Type:     pkg.JavaPkg,
   262  					Metadata: pkg.JavaArchive{
   263  						PomProperties: &pkg.JavaPomProperties{
   264  							GroupID:    "org.openjdk.jmh",
   265  							ArtifactID: "jmh-generator-annprocess",
   266  							Scope:      "test",
   267  						},
   268  					},
   269  				},
   270  			},
   271  		},
   272  	}
   273  
   274  	for _, test := range tests {
   275  		t.Run(test.input, func(t *testing.T) {
   276  			for i := range test.expected {
   277  				test.expected[i].Locations.Add(file.NewLocation(test.input))
   278  			}
   279  			pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
   280  		})
   281  	}
   282  }
   283  
   284  func Test_parsePomXMLProject(t *testing.T) {
   285  	// TODO: ideally we would have the path to the contained pom.xml, not the jar
   286  	jarLocation := file.NewLocation("path/to/archive.jar")
   287  	tests := []struct {
   288  		name     string
   289  		expected parsedPomProject
   290  	}{
   291  		{
   292  			name: "go case",
   293  			expected: parsedPomProject{
   294  				JavaPomProject: &pkg.JavaPomProject{
   295  					Path: "test-fixtures/pom/commons-codec.pom.xml",
   296  					Parent: &pkg.JavaPomParent{
   297  						GroupID:    "org.apache.commons",
   298  						ArtifactID: "commons-parent",
   299  						Version:    "42",
   300  					},
   301  					GroupID:     "commons-codec",
   302  					ArtifactID:  "commons-codec",
   303  					Version:     "1.11",
   304  					Name:        "Apache Commons Codec",
   305  					Description: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal.  In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.",
   306  					URL:         "http://commons.apache.org/proper/commons-codec/",
   307  				},
   308  			},
   309  		},
   310  		{
   311  			name: "with license data",
   312  			expected: parsedPomProject{
   313  				JavaPomProject: &pkg.JavaPomProject{
   314  					Path: "test-fixtures/pom/neo4j-license-maven-plugin.pom.xml",
   315  					Parent: &pkg.JavaPomParent{
   316  						GroupID:    "org.sonatype.oss",
   317  						ArtifactID: "oss-parent",
   318  						Version:    "7",
   319  					},
   320  					GroupID:     "org.neo4j.build.plugins",
   321  					ArtifactID:  "license-maven-plugin",
   322  					Version:     "4-SNAPSHOT",
   323  					Name:        "${project.artifactId}", // TODO: this is not an ideal answer
   324  					Description: "Maven 2 plugin to check and update license headers in source files",
   325  					URL:         "http://components.neo4j.org/${project.artifactId}/${project.version}", // TODO: this is not an ideal answer
   326  				},
   327  				Licenses: []pkg.License{
   328  					{
   329  						Value:          "The Apache Software License, Version 2.0",
   330  						SPDXExpression: "", // TODO: ideally we would parse this title to get Apache-2.0 (created issue #2210 https://github.com/anchore/syft/issues/2210)
   331  						Type:           license.Declared,
   332  						URLs:           []string{"http://www.apache.org/licenses/LICENSE-2.0.txt"},
   333  						Locations:      file.NewLocationSet(jarLocation),
   334  					},
   335  					{
   336  						Value:          "MIT",
   337  						SPDXExpression: "MIT",
   338  						Type:           license.Declared,
   339  						Locations:      file.NewLocationSet(jarLocation),
   340  					},
   341  					{
   342  						Type:      license.Declared,
   343  						URLs:      []string{"https://opensource.org/license/unlicense/"},
   344  						Locations: file.NewLocationSet(jarLocation),
   345  					},
   346  				},
   347  			},
   348  		},
   349  	}
   350  
   351  	for _, test := range tests {
   352  		t.Run(test.name, func(t *testing.T) {
   353  			fixture, err := os.Open(test.expected.Path)
   354  			assert.NoError(t, err)
   355  
   356  			actual, err := parsePomXMLProject(fixture.Name(), fixture, jarLocation)
   357  			assert.NoError(t, err)
   358  
   359  			assert.Equal(t, &test.expected, actual)
   360  		})
   361  	}
   362  }
   363  
   364  func Test_pomParent(t *testing.T) {
   365  	tests := []struct {
   366  		name     string
   367  		input    *gopom.Parent
   368  		expected *pkg.JavaPomParent
   369  	}{
   370  		{
   371  			name: "only group ID",
   372  			input: &gopom.Parent{
   373  				GroupID: stringPointer("org.something"),
   374  			},
   375  			expected: &pkg.JavaPomParent{
   376  				GroupID: "org.something",
   377  			},
   378  		},
   379  		{
   380  			name: "only artifact ID",
   381  			input: &gopom.Parent{
   382  				ArtifactID: stringPointer("something"),
   383  			},
   384  			expected: &pkg.JavaPomParent{
   385  				ArtifactID: "something",
   386  			},
   387  		},
   388  		{
   389  			name: "only Version",
   390  			input: &gopom.Parent{
   391  				Version: stringPointer("something"),
   392  			},
   393  			expected: &pkg.JavaPomParent{
   394  				Version: "something",
   395  			},
   396  		},
   397  		{
   398  			name:     "nil",
   399  			input:    nil,
   400  			expected: nil,
   401  		},
   402  		{
   403  			name:     "empty",
   404  			input:    &gopom.Parent{},
   405  			expected: nil,
   406  		},
   407  		{
   408  			name: "unused field",
   409  			input: &gopom.Parent{
   410  				RelativePath: stringPointer("something"),
   411  			},
   412  			expected: nil,
   413  		},
   414  	}
   415  
   416  	for _, test := range tests {
   417  		t.Run(test.name, func(t *testing.T) {
   418  			assert.Equal(t, test.expected, pomParent(gopom.Project{}, test.input))
   419  		})
   420  	}
   421  }
   422  
   423  func Test_cleanDescription(t *testing.T) {
   424  	tests := []struct {
   425  		name     string
   426  		input    string
   427  		expected string
   428  	}{
   429  		{
   430  			name: "indent + multiline",
   431  			input: `        The Apache Commons Codec package contains simple encoder and decoders for
   432          various formats such as Base64 and Hexadecimal.  In addition to these
   433          widely used encoders and decoders, the codec package also maintains a
   434          collection of phonetic encoding utilities.`,
   435  			expected: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal.  In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.",
   436  		},
   437  	}
   438  
   439  	for _, test := range tests {
   440  		t.Run(test.name, func(t *testing.T) {
   441  			assert.Equal(t, test.expected, cleanDescription(stringPointer(test.input)))
   442  		})
   443  	}
   444  }
   445  
   446  func Test_resolveProperty(t *testing.T) {
   447  	tests := []struct {
   448  		name     string
   449  		property string
   450  		pom      gopom.Project
   451  		expected string
   452  	}{
   453  		{
   454  			name:     "property",
   455  			property: "${version.number}",
   456  			pom: gopom.Project{
   457  				Properties: &gopom.Properties{
   458  					Entries: map[string]string{
   459  						"version.number": "12.5.0",
   460  					},
   461  				},
   462  			},
   463  			expected: "12.5.0",
   464  		},
   465  		{
   466  			name:     "groupId",
   467  			property: "${project.groupId}",
   468  			pom: gopom.Project{
   469  				GroupID: stringPointer("org.some.group"),
   470  			},
   471  			expected: "org.some.group",
   472  		},
   473  		{
   474  			name:     "parent groupId",
   475  			property: "${project.parent.groupId}",
   476  			pom: gopom.Project{
   477  				Parent: &gopom.Parent{
   478  					GroupID: stringPointer("org.some.parent"),
   479  				},
   480  			},
   481  			expected: "org.some.parent",
   482  		},
   483  		{
   484  			name:     "nil pointer halts search",
   485  			property: "${project.parent.groupId}",
   486  			pom: gopom.Project{
   487  				Parent: nil,
   488  			},
   489  			expected: "${project.parent.groupId}",
   490  		},
   491  		{
   492  			name:     "nil string pointer halts search",
   493  			property: "${project.parent.groupId}",
   494  			pom: gopom.Project{
   495  				Parent: &gopom.Parent{
   496  					GroupID: nil,
   497  				},
   498  			},
   499  			expected: "${project.parent.groupId}",
   500  		},
   501  	}
   502  
   503  	for _, test := range tests {
   504  		t.Run(test.name, func(t *testing.T) {
   505  			resolved := resolveProperty(test.pom, stringPointer(test.property), test.name)
   506  			assert.Equal(t, test.expected, resolved)
   507  		})
   508  	}
   509  }
   510  
   511  func stringPointer(s string) *string {
   512  	return &s
   513  }
   514  
   515  func Test_getUtf8Reader(t *testing.T) {
   516  	tests := []struct {
   517  		name     string
   518  		contents string
   519  	}{
   520  		{
   521  			name: "unknown encoding",
   522  			// random binary contents
   523  			contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==",
   524  		},
   525  	}
   526  	for _, tt := range tests {
   527  		t.Run(tt.name, func(t *testing.T) {
   528  			decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents))
   529  
   530  			got, err := getUtf8Reader(decoder)
   531  			require.NoError(t, err)
   532  			gotBytes, err := io.ReadAll(got)
   533  			require.NoError(t, err)
   534  			// if we couldn't decode the section as UTF-8, we should get a replacement character
   535  			assert.Contains(t, string(gotBytes), "�")
   536  		})
   537  	}
   538  }