github.com/noqcks/syft@v0.0.0-20230920222752-a9e2c4e288e5/syft/pkg/cataloger/java/parse_pom_xml_test.go (about) 1 package java 2 3 import ( 4 "encoding/base64" 5 "io" 6 "os" 7 "strings" 8 "testing" 9 10 "github.com/stretchr/testify/assert" 11 "github.com/stretchr/testify/require" 12 "github.com/vifraa/gopom" 13 14 "github.com/anchore/syft/syft/file" 15 "github.com/anchore/syft/syft/pkg" 16 "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" 17 ) 18 19 func Test_parserPomXML(t *testing.T) { 20 tests := []struct { 21 input string 22 expected []pkg.Package 23 }{ 24 { 25 input: "test-fixtures/pom/pom.xml", 26 expected: []pkg.Package{ 27 { 28 Name: "joda-time", 29 Version: "2.9.2", 30 PURL: "pkg:maven/com.joda/joda-time@2.9.2", 31 Language: pkg.Java, 32 Type: pkg.JavaPkg, 33 MetadataType: pkg.JavaMetadataType, 34 Metadata: pkg.JavaMetadata{ 35 PomProperties: &pkg.PomProperties{ 36 GroupID: "com.joda", 37 ArtifactID: "joda-time", 38 }, 39 }, 40 }, 41 { 42 Name: "junit", 43 Version: "4.12", 44 PURL: "pkg:maven/junit/junit@4.12", 45 Language: pkg.Java, 46 Type: pkg.JavaPkg, 47 MetadataType: pkg.JavaMetadataType, 48 Metadata: pkg.JavaMetadata{ 49 PomProperties: &pkg.PomProperties{ 50 GroupID: "junit", 51 ArtifactID: "junit", 52 Scope: "test", 53 }, 54 }, 55 }, 56 }, 57 }, 58 } 59 60 for _, test := range tests { 61 t.Run(test.input, func(t *testing.T) { 62 for i := range test.expected { 63 test.expected[i].Locations.Add(file.NewLocation(test.input)) 64 } 65 pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil) 66 }) 67 } 68 } 69 70 func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) { 71 // regression for https://github.com/anchore/syft/issues/2044 72 73 // we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the 74 // file, which is extremely important for this test. 75 76 // for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically 77 // convert the file to UTF-8, which will break this test: 78 79 // xxd with the original pom.xml 80 // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. 81 // 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69 <name>J.r.me Mi 82 // 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020 rc</name>.. 83 84 // xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ) 85 // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. 86 // 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d <name>J...r...m 87 // 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20 e Mirc</name>.. 88 89 // Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save 90 // is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character. 91 // The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character. 92 // This is quite silly on the part of IntelliJ, but it is what it is. 93 94 cases := []struct { 95 name string 96 fixture string 97 }{ 98 { 99 name: "undeclared encoding", 100 fixture: "test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64", 101 }, 102 { 103 name: "declared encoding", 104 fixture: "test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64", 105 }, 106 } 107 108 for _, c := range cases { 109 t.Run(c.name, func(t *testing.T) { 110 fh, err := os.Open(c.fixture) 111 require.NoError(t, err) 112 113 decoder := base64.NewDecoder(base64.StdEncoding, fh) 114 115 proj, err := decodePomXML(decoder) 116 117 require.NoError(t, err) 118 require.NotEmpty(t, proj.Developers) 119 }) 120 } 121 122 } 123 124 func Test_parseCommonsTextPomXMLProject(t *testing.T) { 125 tests := []struct { 126 input string 127 expected []pkg.Package 128 }{ 129 { 130 input: "test-fixtures/pom/commons-text.pom.xml", 131 expected: []pkg.Package{ 132 { 133 Name: "commons-lang3", 134 Version: "3.12.0", 135 PURL: "pkg:maven/org.apache.commons/commons-lang3@3.12.0", 136 Language: pkg.Java, 137 Type: pkg.JavaPkg, 138 MetadataType: pkg.JavaMetadataType, 139 Metadata: pkg.JavaMetadata{ 140 PomProperties: &pkg.PomProperties{ 141 GroupID: "org.apache.commons", 142 ArtifactID: "commons-lang3", 143 }, 144 }, 145 }, 146 { 147 Name: "junit-jupiter", 148 Version: "", 149 PURL: "pkg:maven/org.junit.jupiter/junit-jupiter", 150 Language: pkg.Java, 151 Type: pkg.JavaPkg, 152 MetadataType: pkg.JavaMetadataType, 153 Metadata: pkg.JavaMetadata{ 154 PomProperties: &pkg.PomProperties{ 155 GroupID: "org.junit.jupiter", 156 ArtifactID: "junit-jupiter", 157 Scope: "test", 158 }, 159 }, 160 }, 161 { 162 Name: "assertj-core", 163 Version: "3.23.1", 164 PURL: "pkg:maven/org.assertj/assertj-core@3.23.1", 165 Language: pkg.Java, 166 Type: pkg.JavaPkg, 167 MetadataType: pkg.JavaMetadataType, 168 Metadata: pkg.JavaMetadata{ 169 PomProperties: &pkg.PomProperties{ 170 GroupID: "org.assertj", 171 ArtifactID: "assertj-core", 172 Scope: "test", 173 }, 174 }, 175 }, 176 { 177 Name: "commons-io", 178 Version: "2.11.0", 179 PURL: "pkg:maven/commons-io/commons-io@2.11.0", 180 Language: pkg.Java, 181 Type: pkg.JavaPkg, 182 MetadataType: pkg.JavaMetadataType, 183 Metadata: pkg.JavaMetadata{ 184 PomProperties: &pkg.PomProperties{ 185 GroupID: "commons-io", 186 ArtifactID: "commons-io", 187 Scope: "test", 188 }, 189 }, 190 }, 191 { 192 Name: "mockito-inline", 193 Version: "4.8.0", 194 PURL: "pkg:maven/org.mockito/mockito-inline@4.8.0", 195 Language: pkg.Java, 196 Type: pkg.JavaPkg, 197 MetadataType: pkg.JavaMetadataType, 198 Metadata: pkg.JavaMetadata{ 199 PomProperties: &pkg.PomProperties{ 200 GroupID: "org.mockito", 201 ArtifactID: "mockito-inline", 202 Scope: "test", 203 }, 204 }, 205 }, 206 { 207 Name: "js", 208 Version: "22.0.0.2", 209 PURL: "pkg:maven/org.graalvm.js/js@22.0.0.2", 210 Language: pkg.Java, 211 Type: pkg.JavaPkg, 212 MetadataType: pkg.JavaMetadataType, 213 Metadata: pkg.JavaMetadata{ 214 PomProperties: &pkg.PomProperties{ 215 GroupID: "org.graalvm.js", 216 ArtifactID: "js", 217 Scope: "test", 218 }, 219 }, 220 }, 221 { 222 Name: "js-scriptengine", 223 Version: "22.0.0.2", 224 PURL: "pkg:maven/org.graalvm.js/js-scriptengine@22.0.0.2", 225 Language: pkg.Java, 226 Type: pkg.JavaPkg, 227 MetadataType: pkg.JavaMetadataType, 228 Metadata: pkg.JavaMetadata{ 229 PomProperties: &pkg.PomProperties{ 230 GroupID: "org.graalvm.js", 231 ArtifactID: "js-scriptengine", 232 Scope: "test", 233 }, 234 }, 235 }, 236 { 237 Name: "commons-rng-simple", 238 Version: "1.4", 239 PURL: "pkg:maven/org.apache.commons/commons-rng-simple@1.4", 240 Language: pkg.Java, 241 Type: pkg.JavaPkg, 242 MetadataType: pkg.JavaMetadataType, 243 Metadata: pkg.JavaMetadata{ 244 PomProperties: &pkg.PomProperties{ 245 GroupID: "org.apache.commons", 246 ArtifactID: "commons-rng-simple", 247 Scope: "test", 248 }, 249 }, 250 }, 251 { 252 Name: "jmh-core", 253 Version: "1.35", 254 PURL: "pkg:maven/org.openjdk.jmh/jmh-core@1.35", 255 Language: pkg.Java, 256 Type: pkg.JavaPkg, 257 MetadataType: pkg.JavaMetadataType, 258 Metadata: pkg.JavaMetadata{ 259 PomProperties: &pkg.PomProperties{ 260 GroupID: "org.openjdk.jmh", 261 ArtifactID: "jmh-core", 262 Scope: "test", 263 }, 264 }, 265 }, 266 { 267 Name: "jmh-generator-annprocess", 268 Version: "1.35", 269 PURL: "pkg:maven/org.openjdk.jmh/jmh-generator-annprocess@1.35", 270 Language: pkg.Java, 271 Type: pkg.JavaPkg, 272 MetadataType: pkg.JavaMetadataType, 273 Metadata: pkg.JavaMetadata{ 274 PomProperties: &pkg.PomProperties{ 275 GroupID: "org.openjdk.jmh", 276 ArtifactID: "jmh-generator-annprocess", 277 Scope: "test", 278 }, 279 }, 280 }, 281 }, 282 }, 283 } 284 285 for _, test := range tests { 286 t.Run(test.input, func(t *testing.T) { 287 for i := range test.expected { 288 test.expected[i].Locations.Add(file.NewLocation(test.input)) 289 } 290 pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil) 291 }) 292 } 293 } 294 295 func Test_parsePomXMLProject(t *testing.T) { 296 tests := []struct { 297 expected pkg.PomProject 298 }{ 299 { 300 expected: pkg.PomProject{ 301 Path: "test-fixtures/pom/commons-codec.pom.xml", 302 Parent: &pkg.PomParent{ 303 GroupID: "org.apache.commons", 304 ArtifactID: "commons-parent", 305 Version: "42", 306 }, 307 GroupID: "commons-codec", 308 ArtifactID: "commons-codec", 309 Version: "1.11", 310 Name: "Apache Commons Codec", 311 Description: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.", 312 URL: "http://commons.apache.org/proper/commons-codec/", 313 }, 314 }, 315 } 316 317 for _, test := range tests { 318 t.Run(test.expected.Path, func(t *testing.T) { 319 fixture, err := os.Open(test.expected.Path) 320 assert.NoError(t, err) 321 322 actual, err := parsePomXMLProject(fixture.Name(), fixture) 323 assert.NoError(t, err) 324 325 assert.Equal(t, &test.expected, actual) 326 }) 327 } 328 } 329 330 func Test_pomParent(t *testing.T) { 331 tests := []struct { 332 name string 333 input *gopom.Parent 334 expected *pkg.PomParent 335 }{ 336 { 337 name: "only group ID", 338 input: &gopom.Parent{ 339 GroupID: stringPointer("org.something"), 340 }, 341 expected: &pkg.PomParent{ 342 GroupID: "org.something", 343 }, 344 }, 345 { 346 name: "only artifact ID", 347 input: &gopom.Parent{ 348 ArtifactID: stringPointer("something"), 349 }, 350 expected: &pkg.PomParent{ 351 ArtifactID: "something", 352 }, 353 }, 354 { 355 name: "only Version", 356 input: &gopom.Parent{ 357 Version: stringPointer("something"), 358 }, 359 expected: &pkg.PomParent{ 360 Version: "something", 361 }, 362 }, 363 { 364 name: "nil", 365 input: nil, 366 expected: nil, 367 }, 368 { 369 name: "empty", 370 input: &gopom.Parent{}, 371 expected: nil, 372 }, 373 { 374 name: "unused field", 375 input: &gopom.Parent{ 376 RelativePath: stringPointer("something"), 377 }, 378 expected: nil, 379 }, 380 } 381 382 for _, test := range tests { 383 t.Run(test.name, func(t *testing.T) { 384 assert.Equal(t, test.expected, pomParent(gopom.Project{}, test.input)) 385 }) 386 } 387 } 388 389 func Test_cleanDescription(t *testing.T) { 390 tests := []struct { 391 name string 392 input string 393 expected string 394 }{ 395 { 396 name: "indent + multiline", 397 input: ` The Apache Commons Codec package contains simple encoder and decoders for 398 various formats such as Base64 and Hexadecimal. In addition to these 399 widely used encoders and decoders, the codec package also maintains a 400 collection of phonetic encoding utilities.`, 401 expected: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.", 402 }, 403 } 404 405 for _, test := range tests { 406 t.Run(test.name, func(t *testing.T) { 407 assert.Equal(t, test.expected, cleanDescription(stringPointer(test.input))) 408 }) 409 } 410 } 411 412 func Test_resolveProperty(t *testing.T) { 413 tests := []struct { 414 name string 415 property string 416 pom gopom.Project 417 expected string 418 }{ 419 { 420 name: "property", 421 property: "${version.number}", 422 pom: gopom.Project{ 423 Properties: &gopom.Properties{ 424 Entries: map[string]string{ 425 "version.number": "12.5.0", 426 }, 427 }, 428 }, 429 expected: "12.5.0", 430 }, 431 { 432 name: "groupId", 433 property: "${project.groupId}", 434 pom: gopom.Project{ 435 GroupID: stringPointer("org.some.group"), 436 }, 437 expected: "org.some.group", 438 }, 439 { 440 name: "parent groupId", 441 property: "${project.parent.groupId}", 442 pom: gopom.Project{ 443 Parent: &gopom.Parent{ 444 GroupID: stringPointer("org.some.parent"), 445 }, 446 }, 447 expected: "org.some.parent", 448 }, 449 { 450 name: "nil pointer halts search", 451 property: "${project.parent.groupId}", 452 pom: gopom.Project{ 453 Parent: nil, 454 }, 455 expected: "${project.parent.groupId}", 456 }, 457 { 458 name: "nil string pointer halts search", 459 property: "${project.parent.groupId}", 460 pom: gopom.Project{ 461 Parent: &gopom.Parent{ 462 GroupID: nil, 463 }, 464 }, 465 expected: "${project.parent.groupId}", 466 }, 467 } 468 469 for _, test := range tests { 470 t.Run(test.name, func(t *testing.T) { 471 resolved := resolveProperty(test.pom, stringPointer(test.property), test.name) 472 assert.Equal(t, test.expected, resolved) 473 }) 474 } 475 } 476 477 func stringPointer(s string) *string { 478 return &s 479 } 480 481 func Test_getUtf8Reader(t *testing.T) { 482 tests := []struct { 483 name string 484 contents string 485 }{ 486 { 487 name: "unknown encoding", 488 // random binary contents 489 contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==", 490 }, 491 } 492 for _, tt := range tests { 493 t.Run(tt.name, func(t *testing.T) { 494 decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents)) 495 496 got, err := getUtf8Reader(decoder) 497 require.NoError(t, err) 498 gotBytes, err := io.ReadAll(got) 499 require.NoError(t, err) 500 // if we couldn't decode the section as UTF-8, we should get a replacement character 501 assert.Contains(t, string(gotBytes), "�") 502 }) 503 } 504 }