github.com/lineaje-labs/syft@v0.98.1-0.20231227153149-9e393f60ff1b/syft/pkg/cataloger/java/parse_pom_xml_test.go (about) 1 package java 2 3 import ( 4 "encoding/base64" 5 "io" 6 "os" 7 "strings" 8 "testing" 9 10 "github.com/stretchr/testify/assert" 11 "github.com/stretchr/testify/require" 12 "github.com/vifraa/gopom" 13 14 "github.com/anchore/syft/syft/file" 15 "github.com/anchore/syft/syft/license" 16 "github.com/anchore/syft/syft/pkg" 17 "github.com/lineaje-labs/syft/syft/pkg/cataloger/internal/pkgtest" 18 ) 19 20 func Test_parserPomXML(t *testing.T) { 21 tests := []struct { 22 input string 23 expected []pkg.Package 24 }{ 25 { 26 input: "test-fixtures/pom/pom.xml", 27 expected: []pkg.Package{ 28 { 29 Name: "joda-time", 30 Version: "2.9.2", 31 PURL: "pkg:maven/com.joda/joda-time@2.9.2", 32 Language: pkg.Java, 33 Type: pkg.JavaPkg, 34 Metadata: pkg.JavaArchive{ 35 PomProperties: &pkg.JavaPomProperties{ 36 GroupID: "com.joda", 37 ArtifactID: "joda-time", 38 }, 39 }, 40 }, 41 { 42 Name: "junit", 43 Version: "4.12", 44 PURL: "pkg:maven/junit/junit@4.12", 45 Language: pkg.Java, 46 Type: pkg.JavaPkg, 47 Metadata: pkg.JavaArchive{ 48 PomProperties: &pkg.JavaPomProperties{ 49 GroupID: "junit", 50 ArtifactID: "junit", 51 Scope: "test", 52 }, 53 }, 54 }, 55 }, 56 }, 57 } 58 59 for _, test := range tests { 60 t.Run(test.input, func(t *testing.T) { 61 for i := range test.expected { 62 test.expected[i].Locations.Add(file.NewLocation(test.input)) 63 } 64 pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil) 65 }) 66 } 67 } 68 69 func Test_decodePomXML_surviveNonUtf8Encoding(t *testing.T) { 70 // regression for https://github.com/anchore/syft/issues/2044 71 72 // we are storing the base64 contents of the pom.xml file. We are doing this to prevent accidental changes to the 73 // file, which is extremely important for this test. 74 75 // for instance, even changing a single character in the file and saving in an IntelliJ IDE will automatically 76 // convert the file to UTF-8, which will break this test: 77 78 // xxd with the original pom.xml 79 // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. 80 // 00000790: 203c 6e61 6d65 3e4a e972 f46d 6520 4d69 <name>J.r.me Mi 81 // 000007a0: 7263 3c2f 6e61 6d65 3e0d 0a20 2020 2020 rc</name>.. 82 83 // xxd with the pom.xml converted to UTF-8 (from a simple change with IntelliJ) 84 // 00000780: 6964 3e0d 0a20 2020 2020 2020 2020 2020 id>.. 85 // 00000790: 203c 6e61 6d65 3e4a efbf bd72 efbf bd6d <name>J...r...m 86 // 000007a0: 6520 4d69 7263 3c2f 6e61 6d65 3e0d 0a20 e Mirc</name>.. 87 88 // Note that the name "Jérôme Mirc" was originally interpreted as "J.r.me Mi" and after the save 89 // is now encoded as "J...r...m" which is not what we want (note the extra bytes for each non UTF-8 character. 90 // The original 0xe9 byte (é) was converted to 0xefbfbd (�) which is the UTF-8 replacement character. 91 // This is quite silly on the part of IntelliJ, but it is what it is. 92 93 cases := []struct { 94 name string 95 fixture string 96 }{ 97 { 98 name: "undeclared encoding", 99 fixture: "test-fixtures/pom/undeclared-iso-8859-encoded-pom.xml.base64", 100 }, 101 { 102 name: "declared encoding", 103 fixture: "test-fixtures/pom/declared-iso-8859-encoded-pom.xml.base64", 104 }, 105 } 106 107 for _, c := range cases { 108 t.Run(c.name, func(t *testing.T) { 109 fh, err := os.Open(c.fixture) 110 require.NoError(t, err) 111 112 decoder := base64.NewDecoder(base64.StdEncoding, fh) 113 114 proj, err := decodePomXML(decoder) 115 116 require.NoError(t, err) 117 require.NotEmpty(t, proj.Developers) 118 }) 119 } 120 121 } 122 123 func Test_parseCommonsTextPomXMLProject(t *testing.T) { 124 tests := []struct { 125 input string 126 expected []pkg.Package 127 }{ 128 { 129 input: "test-fixtures/pom/commons-text.pom.xml", 130 expected: []pkg.Package{ 131 { 132 Name: "commons-lang3", 133 Version: "3.12.0", 134 PURL: "pkg:maven/org.apache.commons/commons-lang3@3.12.0", 135 Language: pkg.Java, 136 Type: pkg.JavaPkg, 137 Metadata: pkg.JavaArchive{ 138 PomProperties: &pkg.JavaPomProperties{ 139 GroupID: "org.apache.commons", 140 ArtifactID: "commons-lang3", 141 }, 142 }, 143 }, 144 { 145 Name: "junit-jupiter", 146 Version: "", 147 PURL: "pkg:maven/org.junit.jupiter/junit-jupiter", 148 Language: pkg.Java, 149 Type: pkg.JavaPkg, 150 Metadata: pkg.JavaArchive{ 151 PomProperties: &pkg.JavaPomProperties{ 152 GroupID: "org.junit.jupiter", 153 ArtifactID: "junit-jupiter", 154 Scope: "test", 155 }, 156 }, 157 }, 158 { 159 Name: "assertj-core", 160 Version: "3.23.1", 161 PURL: "pkg:maven/org.assertj/assertj-core@3.23.1", 162 Language: pkg.Java, 163 Type: pkg.JavaPkg, 164 Metadata: pkg.JavaArchive{ 165 PomProperties: &pkg.JavaPomProperties{ 166 GroupID: "org.assertj", 167 ArtifactID: "assertj-core", 168 Scope: "test", 169 }, 170 }, 171 }, 172 { 173 Name: "commons-io", 174 Version: "2.11.0", 175 PURL: "pkg:maven/commons-io/commons-io@2.11.0", 176 Language: pkg.Java, 177 Type: pkg.JavaPkg, 178 Metadata: pkg.JavaArchive{ 179 PomProperties: &pkg.JavaPomProperties{ 180 GroupID: "commons-io", 181 ArtifactID: "commons-io", 182 Scope: "test", 183 }, 184 }, 185 }, 186 { 187 Name: "mockito-inline", 188 Version: "4.8.0", 189 PURL: "pkg:maven/org.mockito/mockito-inline@4.8.0", 190 Language: pkg.Java, 191 Type: pkg.JavaPkg, 192 Metadata: pkg.JavaArchive{ 193 PomProperties: &pkg.JavaPomProperties{ 194 GroupID: "org.mockito", 195 ArtifactID: "mockito-inline", 196 Scope: "test", 197 }, 198 }, 199 }, 200 { 201 Name: "js", 202 Version: "22.0.0.2", 203 PURL: "pkg:maven/org.graalvm.js/js@22.0.0.2", 204 Language: pkg.Java, 205 Type: pkg.JavaPkg, 206 Metadata: pkg.JavaArchive{ 207 PomProperties: &pkg.JavaPomProperties{ 208 GroupID: "org.graalvm.js", 209 ArtifactID: "js", 210 Scope: "test", 211 }, 212 }, 213 }, 214 { 215 Name: "js-scriptengine", 216 Version: "22.0.0.2", 217 PURL: "pkg:maven/org.graalvm.js/js-scriptengine@22.0.0.2", 218 Language: pkg.Java, 219 Type: pkg.JavaPkg, 220 Metadata: pkg.JavaArchive{ 221 PomProperties: &pkg.JavaPomProperties{ 222 GroupID: "org.graalvm.js", 223 ArtifactID: "js-scriptengine", 224 Scope: "test", 225 }, 226 }, 227 }, 228 { 229 Name: "commons-rng-simple", 230 Version: "1.4", 231 PURL: "pkg:maven/org.apache.commons/commons-rng-simple@1.4", 232 Language: pkg.Java, 233 Type: pkg.JavaPkg, 234 Metadata: pkg.JavaArchive{ 235 PomProperties: &pkg.JavaPomProperties{ 236 GroupID: "org.apache.commons", 237 ArtifactID: "commons-rng-simple", 238 Scope: "test", 239 }, 240 }, 241 }, 242 { 243 Name: "jmh-core", 244 Version: "1.35", 245 PURL: "pkg:maven/org.openjdk.jmh/jmh-core@1.35", 246 Language: pkg.Java, 247 Type: pkg.JavaPkg, 248 Metadata: pkg.JavaArchive{ 249 PomProperties: &pkg.JavaPomProperties{ 250 GroupID: "org.openjdk.jmh", 251 ArtifactID: "jmh-core", 252 Scope: "test", 253 }, 254 }, 255 }, 256 { 257 Name: "jmh-generator-annprocess", 258 Version: "1.35", 259 PURL: "pkg:maven/org.openjdk.jmh/jmh-generator-annprocess@1.35", 260 Language: pkg.Java, 261 Type: pkg.JavaPkg, 262 Metadata: pkg.JavaArchive{ 263 PomProperties: &pkg.JavaPomProperties{ 264 GroupID: "org.openjdk.jmh", 265 ArtifactID: "jmh-generator-annprocess", 266 Scope: "test", 267 }, 268 }, 269 }, 270 }, 271 }, 272 } 273 274 for _, test := range tests { 275 t.Run(test.input, func(t *testing.T) { 276 for i := range test.expected { 277 test.expected[i].Locations.Add(file.NewLocation(test.input)) 278 } 279 pkgtest.TestFileParser(t, test.input, parserPomXML, test.expected, nil) 280 }) 281 } 282 } 283 284 func Test_parsePomXMLProject(t *testing.T) { 285 // TODO: ideally we would have the path to the contained pom.xml, not the jar 286 jarLocation := file.NewLocation("path/to/archive.jar") 287 tests := []struct { 288 name string 289 expected parsedPomProject 290 }{ 291 { 292 name: "go case", 293 expected: parsedPomProject{ 294 JavaPomProject: &pkg.JavaPomProject{ 295 Path: "test-fixtures/pom/commons-codec.pom.xml", 296 Parent: &pkg.JavaPomParent{ 297 GroupID: "org.apache.commons", 298 ArtifactID: "commons-parent", 299 Version: "42", 300 }, 301 GroupID: "commons-codec", 302 ArtifactID: "commons-codec", 303 Version: "1.11", 304 Name: "Apache Commons Codec", 305 Description: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.", 306 URL: "http://commons.apache.org/proper/commons-codec/", 307 }, 308 }, 309 }, 310 { 311 name: "with license data", 312 expected: parsedPomProject{ 313 JavaPomProject: &pkg.JavaPomProject{ 314 Path: "test-fixtures/pom/neo4j-license-maven-plugin.pom.xml", 315 Parent: &pkg.JavaPomParent{ 316 GroupID: "org.sonatype.oss", 317 ArtifactID: "oss-parent", 318 Version: "7", 319 }, 320 GroupID: "org.neo4j.build.plugins", 321 ArtifactID: "license-maven-plugin", 322 Version: "4-SNAPSHOT", 323 Name: "${project.artifactId}", // TODO: this is not an ideal answer 324 Description: "Maven 2 plugin to check and update license headers in source files", 325 URL: "http://components.neo4j.org/${project.artifactId}/${project.version}", // TODO: this is not an ideal answer 326 }, 327 Licenses: []pkg.License{ 328 { 329 Value: "The Apache Software License, Version 2.0", 330 SPDXExpression: "", // TODO: ideally we would parse this title to get Apache-2.0 (created issue #2210 https://github.com/anchore/syft/issues/2210) 331 Type: license.Declared, 332 URLs: []string{"http://www.apache.org/licenses/LICENSE-2.0.txt"}, 333 Locations: file.NewLocationSet(jarLocation), 334 }, 335 { 336 Value: "MIT", 337 SPDXExpression: "MIT", 338 Type: license.Declared, 339 Locations: file.NewLocationSet(jarLocation), 340 }, 341 { 342 Type: license.Declared, 343 URLs: []string{"https://opensource.org/license/unlicense/"}, 344 Locations: file.NewLocationSet(jarLocation), 345 }, 346 }, 347 }, 348 }, 349 } 350 351 for _, test := range tests { 352 t.Run(test.name, func(t *testing.T) { 353 fixture, err := os.Open(test.expected.Path) 354 assert.NoError(t, err) 355 356 actual, err := parsePomXMLProject(fixture.Name(), fixture, jarLocation) 357 assert.NoError(t, err) 358 359 assert.Equal(t, &test.expected, actual) 360 }) 361 } 362 } 363 364 func Test_pomParent(t *testing.T) { 365 tests := []struct { 366 name string 367 input *gopom.Parent 368 expected *pkg.JavaPomParent 369 }{ 370 { 371 name: "only group ID", 372 input: &gopom.Parent{ 373 GroupID: stringPointer("org.something"), 374 }, 375 expected: &pkg.JavaPomParent{ 376 GroupID: "org.something", 377 }, 378 }, 379 { 380 name: "only artifact ID", 381 input: &gopom.Parent{ 382 ArtifactID: stringPointer("something"), 383 }, 384 expected: &pkg.JavaPomParent{ 385 ArtifactID: "something", 386 }, 387 }, 388 { 389 name: "only Version", 390 input: &gopom.Parent{ 391 Version: stringPointer("something"), 392 }, 393 expected: &pkg.JavaPomParent{ 394 Version: "something", 395 }, 396 }, 397 { 398 name: "nil", 399 input: nil, 400 expected: nil, 401 }, 402 { 403 name: "empty", 404 input: &gopom.Parent{}, 405 expected: nil, 406 }, 407 { 408 name: "unused field", 409 input: &gopom.Parent{ 410 RelativePath: stringPointer("something"), 411 }, 412 expected: nil, 413 }, 414 } 415 416 for _, test := range tests { 417 t.Run(test.name, func(t *testing.T) { 418 assert.Equal(t, test.expected, pomParent(gopom.Project{}, test.input)) 419 }) 420 } 421 } 422 423 func Test_cleanDescription(t *testing.T) { 424 tests := []struct { 425 name string 426 input string 427 expected string 428 }{ 429 { 430 name: "indent + multiline", 431 input: ` The Apache Commons Codec package contains simple encoder and decoders for 432 various formats such as Base64 and Hexadecimal. In addition to these 433 widely used encoders and decoders, the codec package also maintains a 434 collection of phonetic encoding utilities.`, 435 expected: "The Apache Commons Codec package contains simple encoder and decoders for various formats such as Base64 and Hexadecimal. In addition to these widely used encoders and decoders, the codec package also maintains a collection of phonetic encoding utilities.", 436 }, 437 } 438 439 for _, test := range tests { 440 t.Run(test.name, func(t *testing.T) { 441 assert.Equal(t, test.expected, cleanDescription(stringPointer(test.input))) 442 }) 443 } 444 } 445 446 func Test_resolveProperty(t *testing.T) { 447 tests := []struct { 448 name string 449 property string 450 pom gopom.Project 451 expected string 452 }{ 453 { 454 name: "property", 455 property: "${version.number}", 456 pom: gopom.Project{ 457 Properties: &gopom.Properties{ 458 Entries: map[string]string{ 459 "version.number": "12.5.0", 460 }, 461 }, 462 }, 463 expected: "12.5.0", 464 }, 465 { 466 name: "groupId", 467 property: "${project.groupId}", 468 pom: gopom.Project{ 469 GroupID: stringPointer("org.some.group"), 470 }, 471 expected: "org.some.group", 472 }, 473 { 474 name: "parent groupId", 475 property: "${project.parent.groupId}", 476 pom: gopom.Project{ 477 Parent: &gopom.Parent{ 478 GroupID: stringPointer("org.some.parent"), 479 }, 480 }, 481 expected: "org.some.parent", 482 }, 483 { 484 name: "nil pointer halts search", 485 property: "${project.parent.groupId}", 486 pom: gopom.Project{ 487 Parent: nil, 488 }, 489 expected: "${project.parent.groupId}", 490 }, 491 { 492 name: "nil string pointer halts search", 493 property: "${project.parent.groupId}", 494 pom: gopom.Project{ 495 Parent: &gopom.Parent{ 496 GroupID: nil, 497 }, 498 }, 499 expected: "${project.parent.groupId}", 500 }, 501 } 502 503 for _, test := range tests { 504 t.Run(test.name, func(t *testing.T) { 505 resolved := resolveProperty(test.pom, stringPointer(test.property), test.name) 506 assert.Equal(t, test.expected, resolved) 507 }) 508 } 509 } 510 511 func stringPointer(s string) *string { 512 return &s 513 } 514 515 func Test_getUtf8Reader(t *testing.T) { 516 tests := []struct { 517 name string 518 contents string 519 }{ 520 { 521 name: "unknown encoding", 522 // random binary contents 523 contents: "BkiJz02JyEWE0nXR6TH///9NicpJweEETIucJIgAAABJicxPjQwhTY1JCE05WQh0BU2J0eunTYshTIusJIAAAAAPHwBNOeV1BUUx2+tWTIlUJDhMiUwkSEyJRCQgSIl8JFBMiQ==", 524 }, 525 } 526 for _, tt := range tests { 527 t.Run(tt.name, func(t *testing.T) { 528 decoder := base64.NewDecoder(base64.StdEncoding, strings.NewReader(tt.contents)) 529 530 got, err := getUtf8Reader(decoder) 531 require.NoError(t, err) 532 gotBytes, err := io.ReadAll(got) 533 require.NoError(t, err) 534 // if we couldn't decode the section as UTF-8, we should get a replacement character 535 assert.Contains(t, string(gotBytes), "�") 536 }) 537 } 538 }