github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/java/parse_pom_xml_test.go (about)

     1  package java
     2  
     3  import (
     4  	"encoding/base64"
     5  	"io"
     6  	"os"
     7  	"strings"
     8  	"testing"
     9  
    10  	"github.com/stretchr/testify/assert"
    11  	"github.com/stretchr/testify/require"
    12  	"github.com/vifraa/gopom"
    13  
    14  	"github.com/anchore/syft/syft/file"
    15  	"github.com/anchore/syft/syft/pkg"
    16  	"github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest"
    17  )
    18  
    19  func Test_parserPomXML(t *testing.T) {
    20  	tests := []struct {
    21  		input    string
    22  		expected []pkg.Package
    23  	}{
    24  		{
    25  			input: "test-fixtures/pom/pom.xml",
    26  			expected: []pkg.Package{
    27  				{
    28  					Name:         "joda-time",
    29  					Version:      "2.9.2",
    30  					PURL:         "pkg:maven/com.joda/joda-time@2.9.2",
    31  					Language:     pkg.Java,
    32  					Type:         pkg.JavaPkg,
    33  					MetadataType: pkg.JavaMetadataType,
    34  					Metadata: pkg.JavaMetadata{
    35  						PomProperties: &pkg.PomProperties{
    36  							GroupID:    "com.joda",
    37  							ArtifactID: "joda-time",
    38  						},
    39  					},
    40  				},
    41  				{
    42  					Name:         "junit",
    43  					Version:      "4.12",
    44  					PURL:         "pkg:maven/junit/junit@4.12",
    45  					Language:     pkg.Java,
    46  					Type:         pkg.JavaPkg,
    47  					MetadataType: pkg.JavaMetadataType,
    48  					Metadata: pkg.JavaMetadata{
    49  						PomProperties: &pkg.PomProperties{
    50  							GroupID:    "junit",
    51  							ArtifactID: "junit",
    52  							Scope:      "test",
    53  						},
    54  					},
    55  				},
    56  			},
    57  		},
    58  	}
    59  
    60  	for _, test := range tests {
    61  		t.Run(test.input, func(t *testing.T) {
    62  			for i := range test.expected {
    63  				test.expected[i].Locations.Add(file.NewLocation(test.input))
    64  			}
    65  			pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
    66  		})
    67  	}
    68  }
    69  
    70  func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) {
    71  	// regression for https://github.com/anchore/syft/issues/2044
    72  
    73  	// we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the
    74  	// file, which is extremely important for this test.
    75  
    76  	// for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically
    77  	// convert the file to UTF-8, which will break this test:
    78  
    79  	// xxd with the original pom.xml
    80  	// 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020  id>..
    81  	// 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69   <name>J.r.me Mi
    82  	// 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020  rc</name>..
    83  
    84  	// xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ)
    85  	// 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020  id>..
    86  	// 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d   <name>J...r...m
    87  	// 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20  e Mirc</name>..
    88  
    89  	// Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save
    90  	// is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character.
    91  	// The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character.
    92  	// This is quite silly on the part of IntelliJ, but it is what it is.
    93  
    94  	cases := []struct {
    95  		name    string
    96  		fixture string
    97  	}{
    98  		{
    99  			name:    "undeclared encoding",
   100  			fixture: "test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64",
   101  		},
   102  		{
   103  			name:    "declared encoding",
   104  			fixture: "test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64",
   105  		},
   106  	}
   107  
   108  	for _, c := range cases {
   109  		t.Run(c.name, func(t *testing.T) {
   110  			fh, err := os.Open(c.fixture)
   111  			require.NoError(t, err)
   112  
   113  			decoder := base64.NewDecoder(base64.StdEncoding, fh)
   114  
   115  			proj, err := decodePomXML(decoder)
   116  
   117  			require.NoError(t, err)
   118  			require.NotEmpty(t, proj.Developers)
   119  		})
   120  	}
   121  
   122  }
   123  
   124  func Test_parseCommonsTextPomXMLProject(t *testing.T) {
   125  	tests := []struct {
   126  		input    string
   127  		expected []pkg.Package
   128  	}{
   129  		{
   130  			input: "test-fixtures/pom/commons-text.pom.xml",
   131  			expected: []pkg.Package{
   132  				{
   133  					Name:         "commons-lang3",
   134  					Version:      "3.12.0",
   135  					PURL:         "pkg:maven/org.apache.commons/commons-lang3@3.12.0",
   136  					Language:     pkg.Java,
   137  					Type:         pkg.JavaPkg,
   138  					MetadataType: pkg.JavaMetadataType,
   139  					Metadata: pkg.JavaMetadata{
   140  						PomProperties: &pkg.PomProperties{
   141  							GroupID:    "org.apache.commons",
   142  							ArtifactID: "commons-lang3",
   143  						},
   144  					},
   145  				},
   146  				{
   147  					Name:         "junit-jupiter",
   148  					Version:      "",
   149  					PURL:         "pkg:maven/org.junit.jupiter/junit-jupiter",
   150  					Language:     pkg.Java,
   151  					Type:         pkg.JavaPkg,
   152  					MetadataType: pkg.JavaMetadataType,
   153  					Metadata: pkg.JavaMetadata{
   154  						PomProperties: &pkg.PomProperties{
   155  							GroupID:    "org.junit.jupiter",
   156  							ArtifactID: "junit-jupiter",
   157  							Scope:      "test",
   158  						},
   159  					},
   160  				},
   161  				{
   162  					Name:         "assertj-core",
   163  					Version:      "3.23.1",
   164  					PURL:         "pkg:maven/org.assertj/assertj-core@3.23.1",
   165  					Language:     pkg.Java,
   166  					Type:         pkg.JavaPkg,
   167  					MetadataType: pkg.JavaMetadataType,
   168  					Metadata: pkg.JavaMetadata{
   169  						PomProperties: &pkg.PomProperties{
   170  							GroupID:    "org.assertj",
   171  							ArtifactID: "assertj-core",
   172  							Scope:      "test",
   173  						},
   174  					},
   175  				},
   176  				{
   177  					Name:         "commons-io",
   178  					Version:      "2.11.0",
   179  					PURL:         "pkg:maven/commons-io/commons-io@2.11.0",
   180  					Language:     pkg.Java,
   181  					Type:         pkg.JavaPkg,
   182  					MetadataType: pkg.JavaMetadataType,
   183  					Metadata: pkg.JavaMetadata{
   184  						PomProperties: &pkg.PomProperties{
   185  							GroupID:    "commons-io",
   186  							ArtifactID: "commons-io",
   187  							Scope:      "test",
   188  						},
   189  					},
   190  				},
   191  				{
   192  					Name:         "mockito-inline",
   193  					Version:      "4.8.0",
   194  					PURL:         "pkg:maven/org.mockito/mockito-inline@4.8.0",
   195  					Language:     pkg.Java,
   196  					Type:         pkg.JavaPkg,
   197  					MetadataType: pkg.JavaMetadataType,
   198  					Metadata: pkg.JavaMetadata{
   199  						PomProperties: &pkg.PomProperties{
   200  							GroupID:    "org.mockito",
   201  							ArtifactID: "mockito-inline",
   202  							Scope:      "test",
   203  						},
   204  					},
   205  				},
   206  				{
   207  					Name:         "js",
   208  					Version:      "22.0.0.2",
   209  					PURL:         "pkg:maven/org.graalvm.js/js@22.0.0.2",
   210  					Language:     pkg.Java,
   211  					Type:         pkg.JavaPkg,
   212  					MetadataType: pkg.JavaMetadataType,
   213  					Metadata: pkg.JavaMetadata{
   214  						PomProperties: &pkg.PomProperties{
   215  							GroupID:    "org.graalvm.js",
   216  							ArtifactID: "js",
   217  							Scope:      "test",
   218  						},
   219  					},
   220  				},
   221  				{
   222  					Name:         "js-scriptengine",
   223  					Version:      "22.0.0.2",
   224  					PURL:         "pkg:maven/org.graalvm.js/js-scriptengine@22.0.0.2",
   225  					Language:     pkg.Java,
   226  					Type:         pkg.JavaPkg,
   227  					MetadataType: pkg.JavaMetadataType,
   228  					Metadata: pkg.JavaMetadata{
   229  						PomProperties: &pkg.PomProperties{
   230  							GroupID:    "org.graalvm.js",
   231  							ArtifactID: "js-scriptengine",
   232  							Scope:      "test",
   233  						},
   234  					},
   235  				},
   236  				{
   237  					Name:         "commons-rng-simple",
   238  					Version:      "1.4",
   239  					PURL:         "pkg:maven/org.apache.commons/commons-rng-simple@1.4",
   240  					Language:     pkg.Java,
   241  					Type:         pkg.JavaPkg,
   242  					MetadataType: pkg.JavaMetadataType,
   243  					Metadata: pkg.JavaMetadata{
   244  						PomProperties: &pkg.PomProperties{
   245  							GroupID:    "org.apache.commons",
   246  							ArtifactID: "commons-rng-simple",
   247  							Scope:      "test",
   248  						},
   249  					},
   250  				},
   251  				{
   252  					Name:         "jmh-core",
   253  					Version:      "1.35",
   254  					PURL:         "pkg:maven/org.openjdk.jmh/jmh-core@1.35",
   255  					Language:     pkg.Java,
   256  					Type:         pkg.JavaPkg,
   257  					MetadataType: pkg.JavaMetadataType,
   258  					Metadata: pkg.JavaMetadata{
   259  						PomProperties: &pkg.PomProperties{
   260  							GroupID:    "org.openjdk.jmh",
   261  							ArtifactID: "jmh-core",
   262  							Scope:      "test",
   263  						},
   264  					},
   265  				},
   266  				{
   267  					Name:         "jmh-generator-annprocess",
   268  					Version:      "1.35",
   269  					PURL:         "pkg:maven/org.openjdk.jmh/jmh-generator-annprocess@1.35",
   270  					Language:     pkg.Java,
   271  					Type:         pkg.JavaPkg,
   272  					MetadataType: pkg.JavaMetadataType,
   273  					Metadata: pkg.JavaMetadata{
   274  						PomProperties: &pkg.PomProperties{
   275  							GroupID:    "org.openjdk.jmh",
   276  							ArtifactID: "jmh-generator-annprocess",
   277  							Scope:      "test",
   278  						},
   279  					},
   280  				},
   281  			},
   282  		},
   283  	}
   284  
   285  	for _, test := range tests {
   286  		t.Run(test.input, func(t *testing.T) {
   287  			for i := range test.expected {
   288  				test.expected[i].Locations.Add(file.NewLocation(test.input))
   289  			}
   290  			pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil)
   291  		})
   292  	}
   293  }
   294  
   295  func Test_parsePomXMLProject(t *testing.T) {
   296  	tests := []struct {
   297  		expected pkg.PomProject
   298  	}{
   299  		{
   300  			expected: pkg.PomProject{
   301  				Path: "test-fixtures/pom/commons-codec.pom.xml",
   302  				Parent: &pkg.PomParent{
   303  					GroupID:    "org.apache.commons",
   304  					ArtifactID: "commons-parent",
   305  					Version:    "42",
   306  				},
   307  				GroupID:     "commons-codec",
   308  				ArtifactID:  "commons-codec",
   309  				Version:     "1.11",
   310  				Name:        "Apache Commons Codec",
   311  				Description: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal.  In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.",
   312  				URL:         "http://commons.apache.org/proper/commons-codec/",
   313  			},
   314  		},
   315  	}
   316  
   317  	for _, test := range tests {
   318  		t.Run(test.expected.Path, func(t *testing.T) {
   319  			fixture, err := os.Open(test.expected.Path)
   320  			assert.NoError(t, err)
   321  
   322  			actual, err := parsePomXMLProject(fixture.Name(), fixture)
   323  			assert.NoError(t, err)
   324  
   325  			assert.Equal(t, &test.expected, actual)
   326  		})
   327  	}
   328  }
   329  
   330  func Test_pomParent(t *testing.T) {
   331  	tests := []struct {
   332  		name     string
   333  		input    *gopom.Parent
   334  		expected *pkg.PomParent
   335  	}{
   336  		{
   337  			name: "only group ID",
   338  			input: &gopom.Parent{
   339  				GroupID: stringPointer("org.something"),
   340  			},
   341  			expected: &pkg.PomParent{
   342  				GroupID: "org.something",
   343  			},
   344  		},
   345  		{
   346  			name: "only artifact ID",
   347  			input: &gopom.Parent{
   348  				ArtifactID: stringPointer("something"),
   349  			},
   350  			expected: &pkg.PomParent{
   351  				ArtifactID: "something",
   352  			},
   353  		},
   354  		{
   355  			name: "only Version",
   356  			input: &gopom.Parent{
   357  				Version: stringPointer("something"),
   358  			},
   359  			expected: &pkg.PomParent{
   360  				Version: "something",
   361  			},
   362  		},
   363  		{
   364  			name:     "nil",
   365  			input:    nil,
   366  			expected: nil,
   367  		},
   368  		{
   369  			name:     "empty",
   370  			input:    &gopom.Parent{},
   371  			expected: nil,
   372  		},
   373  		{
   374  			name: "unused field",
   375  			input: &gopom.Parent{
   376  				RelativePath: stringPointer("something"),
   377  			},
   378  			expected: nil,
   379  		},
   380  	}
   381  
   382  	for _, test := range tests {
   383  		t.Run(test.name, func(t *testing.T) {
   384  			assert.Equal(t, test.expected, pomParent(gopom.Project{}, test.input))
   385  		})
   386  	}
   387  }
   388  
   389  func Test_cleanDescription(t *testing.T) {
   390  	tests := []struct {
   391  		name     string
   392  		input    string
   393  		expected string
   394  	}{
   395  		{
   396  			name: "indent + multiline",
   397  			input: `        The Apache Commons Codec package contains simple encoder and decoders for
   398          various formats such as Base64 and Hexadecimal.  In addition to these
   399          widely used encoders and decoders, the codec package also maintains a
   400          collection of phonetic encoding utilities.`,
   401  			expected: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal.  In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.",
   402  		},
   403  	}
   404  
   405  	for _, test := range tests {
   406  		t.Run(test.name, func(t *testing.T) {
   407  			assert.Equal(t, test.expected, cleanDescription(stringPointer(test.input)))
   408  		})
   409  	}
   410  }
   411  
   412  func Test_resolveProperty(t *testing.T) {
   413  	tests := []struct {
   414  		name     string
   415  		property string
   416  		pom      gopom.Project
   417  		expected string
   418  	}{
   419  		{
   420  			name:     "property",
   421  			property: "${version.number}",
   422  			pom: gopom.Project{
   423  				Properties: &gopom.Properties{
   424  					Entries: map[string]string{
   425  						"version.number": "12.5.0",
   426  					},
   427  				},
   428  			},
   429  			expected: "12.5.0",
   430  		},
   431  		{
   432  			name:     "groupId",
   433  			property: "${project.groupId}",
   434  			pom: gopom.Project{
   435  				GroupID: stringPointer("org.some.group"),
   436  			},
   437  			expected: "org.some.group",
   438  		},
   439  		{
   440  			name:     "parent groupId",
   441  			property: "${project.parent.groupId}",
   442  			pom: gopom.Project{
   443  				Parent: &gopom.Parent{
   444  					GroupID: stringPointer("org.some.parent"),
   445  				},
   446  			},
   447  			expected: "org.some.parent",
   448  		},
   449  		{
   450  			name:     "nil pointer halts search",
   451  			property: "${project.parent.groupId}",
   452  			pom: gopom.Project{
   453  				Parent: nil,
   454  			},
   455  			expected: "${project.parent.groupId}",
   456  		},
   457  		{
   458  			name:     "nil string pointer halts search",
   459  			property: "${project.parent.groupId}",
   460  			pom: gopom.Project{
   461  				Parent: &gopom.Parent{
   462  					GroupID: nil,
   463  				},
   464  			},
   465  			expected: "${project.parent.groupId}",
   466  		},
   467  	}
   468  
   469  	for _, test := range tests {
   470  		t.Run(test.name, func(t *testing.T) {
   471  			resolved := resolveProperty(test.pom, stringPointer(test.property), test.name)
   472  			assert.Equal(t, test.expected, resolved)
   473  		})
   474  	}
   475  }
   476  
   477  func stringPointer(s string) *string {
   478  	return &s
   479  }
   480  
   481  func Test_getUtf8Reader(t *testing.T) {
   482  	tests := []struct {
   483  		name     string
   484  		contents string
   485  	}{
   486  		{
   487  			name: "unknown encoding",
   488  			// random binary contents
   489  			contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==",
   490  		},
   491  	}
   492  	for _, tt := range tests {
   493  		t.Run(tt.name, func(t *testing.T) {
   494  			decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents))
   495  
   496  			got, err := getUtf8Reader(decoder)
   497  			require.NoError(t, err)
   498  			gotBytes, err := io.ReadAll(got)
   499  			require.NoError(t, err)
   500  			// if we couldn't decode the section as UTF-8, we should get a replacement character
   501  			assert.Contains(t, string(gotBytes), "�")
   502  		})
   503  	}
   504  }