github.com/anchore/syft@v1.4.2-0.20240516191711-1bec1fc5d397/syft/pkg/cataloger/java/parse_pom_xml_test.go (about) 1 package java 2 3 import ( 4 "encoding/base64" 5 "io" 6 "os" 7 "strings" 8 "testing" 9 10 "github.com/stretchr/testify/assert" 11 "github.com/stretchr/testify/require" 12 "github.com/vifraa/gopom" 13 14 "github.com/anchore/syft/syft/cataloging" 15 "github.com/anchore/syft/syft/file" 16 "github.com/anchore/syft/syft/license" 17 "github.com/anchore/syft/syft/pkg" 18 "github.com/anchore/syft/syft/pkg/cataloger/internal/pkgtest" 19 ) 20 21 func Test_parserPomXML(t *testing.T) { 22 tests := []struct { 23 input string 24 expected []pkg.Package 25 }{ 26 { 27 input: "test-fixtures/pom/pom.xml", 28 expected: []pkg.Package{ 29 { 30 Name: "joda-time", 31 Version: "2.9.2", 32 PURL: "pkg:maven/com.joda/joda-time@2.9.2", 33 Language: pkg.Java, 34 Type: pkg.JavaPkg, 35 Metadata: pkg.JavaArchive{ 36 PomProperties: &pkg.JavaPomProperties{ 37 GroupID: "com.joda", 38 ArtifactID: "joda-time", 39 }, 40 }, 41 }, 42 { 43 Name: "junit", 44 Version: "4.12", 45 PURL: "pkg:maven/junit/junit@4.12", 46 Language: pkg.Java, 47 Type: pkg.JavaPkg, 48 Metadata: pkg.JavaArchive{ 49 PomProperties: &pkg.JavaPomProperties{ 50 GroupID: "junit", 51 ArtifactID: "junit", 52 Scope: "test", 53 }, 54 }, 55 }, 56 }, 57 }, 58 } 59 60 for _, test := range tests { 61 t.Run(test.input, func(t *testing.T) { 62 for i := range test.expected { 63 test.expected[i].Locations.Add(file.NewLocation(test.input)) 64 } 65 66 gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{ 67 ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ 68 IncludeIndexedArchives: true, 69 IncludeUnindexedArchives: true, 70 }, 71 }) 72 73 pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil) 74 }) 75 } 76 } 77 78 func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) { 79 // regression for https://github.com/anchore/syft/issues/2044 80 81 // we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the 82 // file, which is extremely important for this test. 83 84 // for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically 85 // convert the file to UTF-8, which will break this test: 86 87 // xxd with the original pom.xml 88 // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. 89 // 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69 <name>J.r.me Mi 90 // 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020 rc</name>.. 91 92 // xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ) 93 // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. 94 // 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d <name>J...r...m 95 // 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20 e Mirc</name>.. 96 97 // Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save 98 // is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character. 99 // The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character. 100 // This is quite silly on the part of IntelliJ, but it is what it is. 101 102 cases := []struct { 103 name string 104 fixture string 105 }{ 106 { 107 name: "undeclared encoding", 108 fixture: "test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64", 109 }, 110 { 111 name: "declared encoding", 112 fixture: "test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64", 113 }, 114 } 115 116 for _, c := range cases { 117 t.Run(c.name, func(t *testing.T) { 118 fh, err := os.Open(c.fixture) 119 require.NoError(t, err) 120 121 decoder := base64.NewDecoder(base64.StdEncoding, fh) 122 123 proj, err := decodePomXML(decoder) 124 125 require.NoError(t, err) 126 require.NotEmpty(t, proj.Developers) 127 }) 128 } 129 130 } 131 132 func Test_parseCommonsTextPomXMLProject(t *testing.T) { 133 tests := []struct { 134 input string 135 expected []pkg.Package 136 }{ 137 { 138 input: "test-fixtures/pom/commons-text.pom.xml", 139 expected: []pkg.Package{ 140 { 141 Name: "commons-lang3", 142 Version: "3.12.0", 143 PURL: "pkg:maven/org.apache.commons/commons-lang3@3.12.0", 144 Language: pkg.Java, 145 Type: pkg.JavaPkg, 146 Metadata: pkg.JavaArchive{ 147 PomProperties: &pkg.JavaPomProperties{ 148 GroupID: "org.apache.commons", 149 ArtifactID: "commons-lang3", 150 }, 151 }, 152 }, 153 { 154 Name: "junit-jupiter", 155 Version: "", 156 PURL: "pkg:maven/org.junit.jupiter/junit-jupiter", 157 Language: pkg.Java, 158 Type: pkg.JavaPkg, 159 Metadata: pkg.JavaArchive{ 160 PomProperties: &pkg.JavaPomProperties{ 161 GroupID: "org.junit.jupiter", 162 ArtifactID: "junit-jupiter", 163 Scope: "test", 164 }, 165 }, 166 }, 167 { 168 Name: "assertj-core", 169 Version: "3.23.1", 170 PURL: "pkg:maven/org.assertj/assertj-core@3.23.1", 171 Language: pkg.Java, 172 Type: pkg.JavaPkg, 173 Metadata: pkg.JavaArchive{ 174 PomProperties: &pkg.JavaPomProperties{ 175 GroupID: "org.assertj", 176 ArtifactID: "assertj-core", 177 Scope: "test", 178 }, 179 }, 180 }, 181 { 182 Name: "commons-io", 183 Version: "2.11.0", 184 PURL: "pkg:maven/commons-io/commons-io@2.11.0", 185 Language: pkg.Java, 186 Type: pkg.JavaPkg, 187 Metadata: pkg.JavaArchive{ 188 PomProperties: &pkg.JavaPomProperties{ 189 GroupID: "commons-io", 190 ArtifactID: "commons-io", 191 Scope: "test", 192 }, 193 }, 194 }, 195 { 196 Name: "mockito-inline", 197 Version: "4.8.0", 198 PURL: "pkg:maven/org.mockito/mockito-inline@4.8.0", 199 Language: pkg.Java, 200 Type: pkg.JavaPkg, 201 Metadata: pkg.JavaArchive{ 202 PomProperties: &pkg.JavaPomProperties{ 203 GroupID: "org.mockito", 204 ArtifactID: "mockito-inline", 205 Scope: "test", 206 }, 207 }, 208 }, 209 { 210 Name: "js", 211 Version: "22.0.0.2", 212 PURL: "pkg:maven/org.graalvm.js/js@22.0.0.2", 213 Language: pkg.Java, 214 Type: pkg.JavaPkg, 215 Metadata: pkg.JavaArchive{ 216 PomProperties: &pkg.JavaPomProperties{ 217 GroupID: "org.graalvm.js", 218 ArtifactID: "js", 219 Scope: "test", 220 }, 221 }, 222 }, 223 { 224 Name: "js-scriptengine", 225 Version: "22.0.0.2", 226 PURL: "pkg:maven/org.graalvm.js/js-scriptengine@22.0.0.2", 227 Language: pkg.Java, 228 Type: pkg.JavaPkg, 229 Metadata: pkg.JavaArchive{ 230 PomProperties: &pkg.JavaPomProperties{ 231 GroupID: "org.graalvm.js", 232 ArtifactID: "js-scriptengine", 233 Scope: "test", 234 }, 235 }, 236 }, 237 { 238 Name: "commons-rng-simple", 239 Version: "1.4", 240 PURL: "pkg:maven/org.apache.commons/commons-rng-simple@1.4", 241 Language: pkg.Java, 242 Type: pkg.JavaPkg, 243 Metadata: pkg.JavaArchive{ 244 PomProperties: &pkg.JavaPomProperties{ 245 GroupID: "org.apache.commons", 246 ArtifactID: "commons-rng-simple", 247 Scope: "test", 248 }, 249 }, 250 }, 251 { 252 Name: "jmh-core", 253 Version: "1.35", 254 PURL: "pkg:maven/org.openjdk.jmh/jmh-core@1.35", 255 Language: pkg.Java, 256 Type: pkg.JavaPkg, 257 Metadata: pkg.JavaArchive{ 258 PomProperties: &pkg.JavaPomProperties{ 259 GroupID: "org.openjdk.jmh", 260 ArtifactID: "jmh-core", 261 Scope: "test", 262 }, 263 }, 264 }, 265 { 266 Name: "jmh-generator-annprocess", 267 Version: "1.35", 268 PURL: "pkg:maven/org.openjdk.jmh/jmh-generator-annprocess@1.35", 269 Language: pkg.Java, 270 Type: pkg.JavaPkg, 271 Metadata: pkg.JavaArchive{ 272 PomProperties: &pkg.JavaPomProperties{ 273 GroupID: "org.openjdk.jmh", 274 ArtifactID: "jmh-generator-annprocess", 275 Scope: "test", 276 }, 277 }, 278 }, 279 }, 280 }, 281 } 282 283 for _, test := range tests { 284 t.Run(test.input, func(t *testing.T) { 285 for i := range test.expected { 286 test.expected[i].Locations.Add(file.NewLocation(test.input)) 287 } 288 289 gap := newGenericArchiveParserAdapter(ArchiveCatalogerConfig{ 290 ArchiveSearchConfig: cataloging.ArchiveSearchConfig{ 291 IncludeIndexedArchives: true, 292 IncludeUnindexedArchives: true, 293 }, 294 }) 295 pkgtest.TestFileParser(t, test.input, gap.parserPomXML, test.expected, nil) 296 }) 297 } 298 } 299 300 func Test_parsePomXMLProject(t *testing.T) { 301 // TODO: ideally we would have the path to the contained pom.xml, not the jar 302 jarLocation := file.NewLocation("path/to/archive.jar") 303 tests := []struct { 304 name string 305 expected parsedPomProject 306 }{ 307 { 308 name: "go case", 309 expected: parsedPomProject{ 310 JavaPomProject: &pkg.JavaPomProject{ 311 Path: "test-fixtures/pom/commons-codec.pom.xml", 312 Parent: &pkg.JavaPomParent{ 313 GroupID: "org.apache.commons", 314 ArtifactID: "commons-parent", 315 Version: "42", 316 }, 317 GroupID: "commons-codec", 318 ArtifactID: "commons-codec", 319 Version: "1.11", 320 Name: "Apache Commons Codec", 321 Description: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.", 322 URL: "http://commons.apache.org/proper/commons-codec/", 323 }, 324 }, 325 }, 326 { 327 name: "with license data", 328 expected: parsedPomProject{ 329 JavaPomProject: &pkg.JavaPomProject{ 330 Path: "test-fixtures/pom/neo4j-license-maven-plugin.pom.xml", 331 Parent: &pkg.JavaPomParent{ 332 GroupID: "org.sonatype.oss", 333 ArtifactID: "oss-parent", 334 Version: "7", 335 }, 336 GroupID: "org.neo4j.build.plugins", 337 ArtifactID: "license-maven-plugin", 338 Version: "4-SNAPSHOT", 339 Name: "${project.artifactId}", // TODO: this is not an ideal answer 340 Description: "Maven 2 plugin to check and update license headers in source files", 341 URL: "http://components.neo4j.org/${project.artifactId}/${project.version}", // TODO: this is not an ideal answer 342 }, 343 Licenses: []pkg.License{ 344 { 345 Value: "The Apache Software License, Version 2.0", 346 SPDXExpression: "", // TODO: ideally we would parse this title to get Apache-2.0 (created issue #2210 https://github.com/anchore/syft/issues/2210) 347 Type: license.Declared, 348 URLs: []string{"http://www.apache.org/licenses/LICENSE-2.0.txt"}, 349 Locations: file.NewLocationSet(jarLocation), 350 }, 351 { 352 Value: "MIT", 353 SPDXExpression: "MIT", 354 Type: license.Declared, 355 Locations: file.NewLocationSet(jarLocation), 356 }, 357 { 358 Type: license.Declared, 359 URLs: []string{"https://opensource.org/license/unlicense/"}, 360 Locations: file.NewLocationSet(jarLocation), 361 }, 362 }, 363 }, 364 }, 365 } 366 367 for _, test := range tests { 368 t.Run(test.name, func(t *testing.T) { 369 fixture, err := os.Open(test.expected.Path) 370 assert.NoError(t, err) 371 372 actual, err := parsePomXMLProject(fixture.Name(), fixture, jarLocation) 373 assert.NoError(t, err) 374 375 assert.Equal(t, &test.expected, actual) 376 }) 377 } 378 } 379 380 func Test_pomParent(t *testing.T) { 381 tests := []struct { 382 name string 383 input *gopom.Parent 384 expected *pkg.JavaPomParent 385 }{ 386 { 387 name: "only group ID", 388 input: &gopom.Parent{ 389 GroupID: stringPointer("org.something"), 390 }, 391 expected: &pkg.JavaPomParent{ 392 GroupID: "org.something", 393 }, 394 }, 395 { 396 name: "only artifact ID", 397 input: &gopom.Parent{ 398 ArtifactID: stringPointer("something"), 399 }, 400 expected: &pkg.JavaPomParent{ 401 ArtifactID: "something", 402 }, 403 }, 404 { 405 name: "only Version", 406 input: &gopom.Parent{ 407 Version: stringPointer("something"), 408 }, 409 expected: &pkg.JavaPomParent{ 410 Version: "something", 411 }, 412 }, 413 { 414 name: "nil", 415 input: nil, 416 expected: nil, 417 }, 418 { 419 name: "empty", 420 input: &gopom.Parent{}, 421 expected: nil, 422 }, 423 { 424 name: "unused field", 425 input: &gopom.Parent{ 426 RelativePath: stringPointer("something"), 427 }, 428 expected: nil, 429 }, 430 } 431 432 for _, test := range tests { 433 t.Run(test.name, func(t *testing.T) { 434 assert.Equal(t, test.expected, pomParent(gopom.Project{}, test.input)) 435 }) 436 } 437 } 438 439 func Test_cleanDescription(t *testing.T) { 440 tests := []struct { 441 name string 442 input string 443 expected string 444 }{ 445 { 446 name: "indent + multiline", 447 input: ` The Apache Commons Codec package contains simple encoder and decoders for 448 various formats such as Base64 and Hexadecimal. In addition to these 449 widely used encoders and decoders, the codec package also maintains a 450 collection of phonetic encoding utilities.`, 451 expected: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.", 452 }, 453 } 454 455 for _, test := range tests { 456 t.Run(test.name, func(t *testing.T) { 457 assert.Equal(t, test.expected, cleanDescription(stringPointer(test.input))) 458 }) 459 } 460 } 461 462 func Test_resolveProperty(t *testing.T) { 463 tests := []struct { 464 name string 465 property string 466 pom gopom.Project 467 expected string 468 }{ 469 { 470 name: "property", 471 property: "${version.number}", 472 pom: gopom.Project{ 473 Properties: &gopom.Properties{ 474 Entries: map[string]string{ 475 "version.number": "12.5.0", 476 }, 477 }, 478 }, 479 expected: "12.5.0", 480 }, 481 { 482 name: "groupId", 483 property: "${project.groupId}", 484 pom: gopom.Project{ 485 GroupID: stringPointer("org.some.group"), 486 }, 487 expected: "org.some.group", 488 }, 489 { 490 name: "parent groupId", 491 property: "${project.parent.groupId}", 492 pom: gopom.Project{ 493 Parent: &gopom.Parent{ 494 GroupID: stringPointer("org.some.parent"), 495 }, 496 }, 497 expected: "org.some.parent", 498 }, 499 { 500 name: "nil pointer halts search", 501 property: "${project.parent.groupId}", 502 pom: gopom.Project{ 503 Parent: nil, 504 }, 505 expected: "", 506 }, 507 { 508 name: "nil string pointer halts search", 509 property: "${project.parent.groupId}", 510 pom: gopom.Project{ 511 Parent: &gopom.Parent{ 512 GroupID: nil, 513 }, 514 }, 515 expected: "", 516 }, 517 { 518 name: "double dereference", 519 property: "${springboot.version}", 520 pom: gopom.Project{ 521 Parent: &gopom.Parent{ 522 Version: stringPointer("1.2.3"), 523 }, 524 Properties: &gopom.Properties{ 525 Entries: map[string]string{ 526 "springboot.version": "${project.parent.version}", 527 }, 528 }, 529 }, 530 expected: "1.2.3", 531 }, 532 { 533 name: "map missing stops double dereference", 534 property: "${springboot.version}", 535 pom: gopom.Project{ 536 Parent: &gopom.Parent{ 537 Version: stringPointer("1.2.3"), 538 }, 539 }, 540 expected: "", 541 }, 542 { 543 name: "resolution halts even if it resolves to a variable", 544 property: "${springboot.version}", 545 pom: gopom.Project{ 546 Parent: &gopom.Parent{ 547 Version: stringPointer("${undefined.version}"), 548 }, 549 Properties: &gopom.Properties{ 550 Entries: map[string]string{ 551 "springboot.version": "${project.parent.version}", 552 }, 553 }, 554 }, 555 expected: "", 556 }, 557 { 558 name: "resolution halts even if cyclic", 559 property: "${springboot.version}", 560 pom: gopom.Project{ 561 Properties: &gopom.Properties{ 562 Entries: map[string]string{ 563 "springboot.version": "${springboot.version}", 564 }, 565 }, 566 }, 567 expected: "", 568 }, 569 { 570 name: "resolution halts even if cyclic more steps", 571 property: "${cyclic.version}", 572 pom: gopom.Project{ 573 Properties: &gopom.Properties{ 574 Entries: map[string]string{ 575 "other.version": "${cyclic.version}", 576 "springboot.version": "${other.version}", 577 "cyclic.version": "${springboot.version}", 578 }, 579 }, 580 }, 581 expected: "", 582 }, 583 { 584 name: "resolution halts even if cyclic involving parent", 585 property: "${cyclic.version}", 586 pom: gopom.Project{ 587 Parent: &gopom.Parent{ 588 Version: stringPointer("${cyclic.version}"), 589 }, 590 Properties: &gopom.Properties{ 591 Entries: map[string]string{ 592 "other.version": "${parent.version}", 593 "springboot.version": "${other.version}", 594 "cyclic.version": "${springboot.version}", 595 }, 596 }, 597 }, 598 expected: "", 599 }, 600 } 601 602 for _, test := range tests { 603 t.Run(test.name, func(t *testing.T) { 604 resolved := resolveProperty(test.pom, stringPointer(test.property), test.name) 605 assert.Equal(t, test.expected, resolved) 606 }) 607 } 608 } 609 610 func stringPointer(s string) *string { 611 return &s 612 } 613 614 func Test_getUtf8Reader(t *testing.T) { 615 tests := []struct { 616 name string 617 contents string 618 }{ 619 { 620 name: "unknown encoding", 621 // random binary contents 622 contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==", 623 }, 624 } 625 for _, tt := range tests { 626 t.Run(tt.name, func(t *testing.T) { 627 decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents)) 628 629 got, err := getUtf8Reader(decoder) 630 require.NoError(t, err) 631 gotBytes, err := io.ReadAll(got) 632 require.NoError(t, err) 633 // if we couldn't decode the section as UTF-8, we should get a replacement character 634 assert.Contains(t, string(gotBytes), "�") 635 }) 636 } 637 }