github.com/moby/docker@v26.1.3+incompatible/pkg/tarsum/tarsum_test.go (about) 1 package tarsum // import "github.com/docker/docker/pkg/tarsum" 2 3 import ( 4 "archive/tar" 5 "bytes" 6 "compress/gzip" 7 "crypto/md5" // #nosec G501 8 "crypto/rand" 9 "crypto/sha1" // #nosec G505 10 "crypto/sha256" 11 "crypto/sha512" 12 "encoding/hex" 13 "fmt" 14 "io" 15 "os" 16 "strings" 17 "testing" 18 19 "gotest.tools/v3/assert" 20 is "gotest.tools/v3/assert/cmp" 21 ) 22 23 type testLayer struct { 24 filename string 25 options *sizedOptions 26 jsonfile string 27 gzip bool 28 tarsum string 29 version Version 30 hash THash 31 } 32 33 var testLayers = []testLayer{ 34 { 35 filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar", 36 jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json", 37 version: Version0, 38 tarsum: "tarsum+sha256:4095cc12fa5fdb1ab2760377e1cd0c4ecdd3e61b4f9b82319d96fcea6c9a41c6", 39 }, 40 { 41 filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar", 42 jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json", 43 version: VersionDev, 44 tarsum: "tarsum.dev+sha256:db56e35eec6ce65ba1588c20ba6b1ea23743b59e81fb6b7f358ccbde5580345c", 45 }, 46 { 47 filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar", 48 jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json", 49 gzip: true, 50 tarsum: "tarsum+sha256:4095cc12fa5fdb1ab2760377e1cd0c4ecdd3e61b4f9b82319d96fcea6c9a41c6", 51 }, 52 { 53 // Tests existing version of TarSum when xattrs are present 54 filename: "testdata/xattr/layer.tar", 55 jsonfile: "testdata/xattr/json", 56 version: Version0, 57 tarsum: "tarsum+sha256:07e304a8dbcb215b37649fde1a699f8aeea47e60815707f1cdf4d55d25ff6ab4", 58 }, 59 { 60 // Tests next version of TarSum when xattrs are present 61 filename: "testdata/xattr/layer.tar", 62 jsonfile: "testdata/xattr/json", 63 version: VersionDev, 64 tarsum: "tarsum.dev+sha256:6c58917892d77b3b357b0f9ad1e28e1f4ae4de3a8006bd3beb8beda214d8fd16", 65 }, 66 { 67 filename: "testdata/511136ea3c5a64f264b78b5433614aec563103b4d4702f3ba7d4d2698e22c158/layer.tar", 68 jsonfile: "testdata/511136ea3c5a64f264b78b5433614aec563103b4d4702f3ba7d4d2698e22c158/json", 69 tarsum: "tarsum+sha256:c66bd5ec9f87b8f4c6135ca37684618f486a3dd1d113b138d0a177bfa39c2571", 70 }, 71 { 72 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 73 tarsum: "tarsum+sha256:75258b2c5dcd9adfe24ce71eeca5fc5019c7e669912f15703ede92b1a60cb11f", 74 }, 75 { 76 // this tar has two files with the same path 77 filename: "testdata/collision/collision-0.tar", 78 tarsum: "tarsum+sha256:7cabb5e9128bb4a93ff867b9464d7c66a644ae51ea2e90e6ef313f3bef93f077", 79 }, 80 { 81 // this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above 82 filename: "testdata/collision/collision-1.tar", 83 tarsum: "tarsum+sha256:805fd393cfd58900b10c5636cf9bab48b2406d9b66523122f2352620c85dc7f9", 84 }, 85 { 86 // this tar has newer of collider-0.tar, ensuring is has different hash 87 filename: "testdata/collision/collision-2.tar", 88 tarsum: "tarsum+sha256:85d2b8389f077659d78aca898f9e632ed9161f553f144aef100648eac540147b", 89 }, 90 { 91 // this tar has newer of collider-1.tar, ensuring is has different hash 92 filename: "testdata/collision/collision-3.tar", 93 tarsum: "tarsum+sha256:cbe4dee79fe979d69c16c2bccd032e3205716a562f4a3c1ca1cbeed7b256eb19", 94 }, 95 { 96 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 97 tarsum: "tarsum+md5:3a6cdb475d90459ac0d3280703d17be2", 98 hash: md5THash, 99 }, 100 { 101 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 102 tarsum: "tarsum+sha1:14b5e0d12a0c50a4281e86e92153fa06d55d00c6", 103 hash: sha1Hash, 104 }, 105 { 106 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 107 tarsum: "tarsum+sha224:dd8925b7a4c71b13f3a68a0f9428a757c76b93752c398f272a9062d5", 108 hash: sha224Hash, 109 }, 110 { 111 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 112 tarsum: "tarsum+sha384:e39e82f40005134bed13fb632d1a5f2aa4675c9ddb4a136fbcec202797e68d2f635e1200dee2e3a8d7f69d54d3f2fd27", 113 hash: sha384Hash, 114 }, 115 { 116 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 117 tarsum: "tarsum+sha512:7c56de40b2d1ed3863ff25d83b59cdc8f53e67d1c01c3ee8f201f8e4dec3107da976d0c0ec9109c962a152b32699fe329b2dab13966020e400c32878a0761a7e", 118 hash: sha512Hash, 119 }, 120 } 121 122 type sizedOptions struct { 123 num int64 124 size int64 125 isRand bool 126 realFile bool 127 } 128 129 // make a tar: 130 // * num is the number of files the tar should have 131 // * size is the bytes per file 132 // * isRand is whether the contents of the files should be a random chunk (otherwise it's all zeros) 133 // * realFile will write to a TempFile, instead of an in memory buffer 134 func sizedTar(opts sizedOptions) io.Reader { 135 var ( 136 fh io.ReadWriter 137 err error 138 ) 139 if opts.realFile { 140 fh, err = os.CreateTemp("", "tarsum") 141 if err != nil { 142 return nil 143 } 144 } else { 145 fh = bytes.NewBuffer([]byte{}) 146 } 147 tarW := tar.NewWriter(fh) 148 defer tarW.Close() 149 for i := int64(0); i < opts.num; i++ { 150 err := tarW.WriteHeader(&tar.Header{ 151 Name: fmt.Sprintf("/testdata%d", i), 152 Mode: 0o755, 153 Uid: 0, 154 Gid: 0, 155 Size: opts.size, 156 Typeflag: tar.TypeReg, 157 }) 158 if err != nil { 159 return nil 160 } 161 var rBuf []byte 162 if opts.isRand { 163 rBuf = make([]byte, 8) 164 _, err = rand.Read(rBuf) 165 if err != nil { 166 return nil 167 } 168 } else { 169 rBuf = []byte{0, 0, 0, 0, 0, 0, 0, 0} 170 } 171 172 for i := int64(0); i < opts.size/int64(8); i++ { 173 tarW.Write(rBuf) 174 } 175 } 176 return fh 177 } 178 179 func emptyTarSum(gzip bool) (TarSum, error) { 180 reader, writer := io.Pipe() 181 tarWriter := tar.NewWriter(writer) 182 183 // Immediately close tarWriter and write-end of the 184 // Pipe in a separate goroutine so we don't block. 185 go func() { 186 tarWriter.Close() 187 writer.Close() 188 }() 189 190 return NewTarSum(reader, !gzip, Version0) 191 } 192 193 // Test errors on NewTarsumForLabel 194 func TestNewTarSumForLabelInvalid(t *testing.T) { 195 reader := strings.NewReader("") 196 197 if _, err := NewTarSumForLabel(reader, true, "invalidlabel"); err == nil { 198 t.Fatalf("Expected an error, got nothing.") 199 } 200 201 if _, err := NewTarSumForLabel(reader, true, "invalid+sha256"); err == nil { 202 t.Fatalf("Expected an error, got nothing.") 203 } 204 if _, err := NewTarSumForLabel(reader, true, "tarsum.v1+invalid"); err == nil { 205 t.Fatalf("Expected an error, got nothing.") 206 } 207 } 208 209 func TestNewTarSumForLabel(t *testing.T) { 210 layer := testLayers[0] 211 212 reader, err := os.Open(layer.filename) 213 if err != nil { 214 t.Fatal(err) 215 } 216 defer reader.Close() 217 218 label := strings.Split(layer.tarsum, ":")[0] 219 ts, err := NewTarSumForLabel(reader, false, label) 220 if err != nil { 221 t.Fatal(err) 222 } 223 224 // Make sure it actually worked by reading a little bit of it 225 nbByteToRead := 8 * 1024 226 dBuf := make([]byte, nbByteToRead) 227 _, err = ts.Read(dBuf) 228 if err != nil { 229 t.Errorf("failed to read %vKB from %s: %s", nbByteToRead, layer.filename, err) 230 } 231 } 232 233 // TestEmptyTar tests that tarsum does not fail to read an empty tar 234 // and correctly returns the hex digest of an empty hash. 235 func TestEmptyTar(t *testing.T) { 236 // Test without gzip. 237 ts, err := emptyTarSum(false) 238 assert.NilError(t, err) 239 240 zeroBlock := make([]byte, 1024) 241 buf := new(bytes.Buffer) 242 243 n, err := io.Copy(buf, ts) 244 assert.NilError(t, err) 245 246 if n != int64(len(zeroBlock)) || !bytes.Equal(buf.Bytes(), zeroBlock) { 247 t.Fatalf("tarSum did not write the correct number of zeroed bytes: %d", n) 248 } 249 250 expectedSum := ts.Version().String() + "+sha256:" + hex.EncodeToString(sha256.New().Sum(nil)) 251 resultSum := ts.Sum(nil) 252 253 if resultSum != expectedSum { 254 t.Fatalf("expected [%s] but got [%s]", expectedSum, resultSum) 255 } 256 257 // Test with gzip. 258 ts, err = emptyTarSum(true) 259 assert.NilError(t, err) 260 buf.Reset() 261 262 _, err = io.Copy(buf, ts) 263 assert.NilError(t, err) 264 265 bufgz := new(bytes.Buffer) 266 gz := gzip.NewWriter(bufgz) 267 n, err = io.Copy(gz, bytes.NewBuffer(zeroBlock)) 268 assert.NilError(t, err) 269 gz.Close() 270 gzBytes := bufgz.Bytes() 271 272 if n != int64(len(zeroBlock)) || !bytes.Equal(buf.Bytes(), gzBytes) { 273 t.Fatalf("tarSum did not write the correct number of gzipped-zeroed bytes: %d", n) 274 } 275 276 resultSum = ts.Sum(nil) 277 278 if resultSum != expectedSum { 279 t.Fatalf("expected [%s] but got [%s]", expectedSum, resultSum) 280 } 281 282 // Test without ever actually writing anything. 283 if ts, err = NewTarSum(bytes.NewReader([]byte{}), true, Version0); err != nil { 284 t.Fatal(err) 285 } 286 287 resultSum = ts.Sum(nil) 288 assert.Check(t, is.Equal(expectedSum, resultSum)) 289 } 290 291 var ( 292 md5THash = NewTHash("md5", md5.New) 293 sha1Hash = NewTHash("sha1", sha1.New) 294 sha224Hash = NewTHash("sha224", sha256.New224) 295 sha384Hash = NewTHash("sha384", sha512.New384) 296 sha512Hash = NewTHash("sha512", sha512.New) 297 ) 298 299 // Test all the build-in read size : buf8K, buf16K, buf32K and more 300 func TestTarSumsReadSize(t *testing.T) { 301 // Test always on the same layer (that is big enough) 302 layer := testLayers[0] 303 304 for i := 0; i < 5; i++ { 305 reader, err := os.Open(layer.filename) 306 if err != nil { 307 t.Fatal(err) 308 } 309 defer reader.Close() 310 311 ts, err := NewTarSum(reader, false, layer.version) 312 if err != nil { 313 t.Fatal(err) 314 } 315 316 // Read and discard bytes so that it populates sums 317 nbByteToRead := (i + 1) * 8 * 1024 318 dBuf := make([]byte, nbByteToRead) 319 _, err = ts.Read(dBuf) 320 if err != nil { 321 t.Errorf("failed to read %vKB from %s: %s", nbByteToRead, layer.filename, err) 322 continue 323 } 324 } 325 } 326 327 func TestTarSums(t *testing.T) { 328 for _, layer := range testLayers { 329 var ( 330 fh io.Reader 331 err error 332 ) 333 if len(layer.filename) > 0 { 334 fh, err = os.Open(layer.filename) 335 if err != nil { 336 t.Errorf("failed to open %s: %s", layer.filename, err) 337 continue 338 } 339 } else if layer.options != nil { 340 fh = sizedTar(*layer.options) 341 } else { 342 // What else is there to test? 343 t.Errorf("what to do with %#v", layer) 344 continue 345 } 346 if file, ok := fh.(*os.File); ok { 347 defer file.Close() 348 } 349 350 var ts TarSum 351 if layer.hash == nil { 352 // double negatives! 353 ts, err = NewTarSum(fh, !layer.gzip, layer.version) 354 } else { 355 ts, err = NewTarSumHash(fh, !layer.gzip, layer.version, layer.hash) 356 } 357 if err != nil { 358 t.Errorf("%q :: %q", err, layer.filename) 359 continue 360 } 361 362 // Read variable number of bytes to test dynamic buffer 363 dBuf := make([]byte, 1) 364 _, err = ts.Read(dBuf) 365 if err != nil { 366 t.Errorf("failed to read 1B from %s: %s", layer.filename, err) 367 continue 368 } 369 dBuf = make([]byte, 16*1024) 370 _, err = ts.Read(dBuf) 371 if err != nil { 372 t.Errorf("failed to read 16KB from %s: %s", layer.filename, err) 373 continue 374 } 375 376 // Read and discard remaining bytes 377 _, err = io.Copy(io.Discard, ts) 378 if err != nil { 379 t.Errorf("failed to copy from %s: %s", layer.filename, err) 380 continue 381 } 382 var gotSum string 383 if len(layer.jsonfile) > 0 { 384 jfh, err := os.Open(layer.jsonfile) 385 if err != nil { 386 t.Errorf("failed to open %s: %s", layer.jsonfile, err) 387 continue 388 } 389 defer jfh.Close() 390 391 buf, err := io.ReadAll(jfh) 392 if err != nil { 393 t.Errorf("failed to readAll %s: %s", layer.jsonfile, err) 394 continue 395 } 396 gotSum = ts.Sum(buf) 397 } else { 398 gotSum = ts.Sum(nil) 399 } 400 401 if layer.tarsum != gotSum { 402 t.Errorf("expecting [%s], but got [%s]", layer.tarsum, gotSum) 403 } 404 var expectedHashName string 405 if layer.hash != nil { 406 expectedHashName = layer.hash.Name() 407 } else { 408 expectedHashName = DefaultTHash.Name() 409 } 410 if expectedHashName != ts.Hash().Name() { 411 t.Errorf("expecting hash [%v], but got [%s]", expectedHashName, ts.Hash().Name()) 412 } 413 } 414 } 415 416 func TestIteration(t *testing.T) { 417 headerTests := []struct { 418 expectedSum string // TODO(vbatts) it would be nice to get individual sums of each 419 version Version 420 hdr *tar.Header 421 data []byte 422 }{ 423 { 424 "tarsum+sha256:626c4a2e9a467d65c33ae81f7f3dedd4de8ccaee72af73223c4bc4718cbc7bbd", 425 Version0, 426 &tar.Header{ 427 Name: "file.txt", 428 Size: 0, 429 Typeflag: tar.TypeReg, 430 Devminor: 0, 431 Devmajor: 0, 432 }, 433 []byte(""), 434 }, 435 { 436 "tarsum.dev+sha256:6ffd43a1573a9913325b4918e124ee982a99c0f3cba90fc032a65f5e20bdd465", 437 VersionDev, 438 &tar.Header{ 439 Name: "file.txt", 440 Size: 0, 441 Typeflag: tar.TypeReg, 442 Devminor: 0, 443 Devmajor: 0, 444 }, 445 []byte(""), 446 }, 447 { 448 "tarsum.dev+sha256:862964db95e0fa7e42836ae4caab3576ab1df8d275720a45bdd01a5a3730cc63", 449 VersionDev, 450 &tar.Header{ 451 Name: "another.txt", 452 Uid: 1000, 453 Gid: 1000, 454 Uname: "slartibartfast", 455 Gname: "users", 456 Size: 4, 457 Typeflag: tar.TypeReg, 458 Devminor: 0, 459 Devmajor: 0, 460 }, 461 []byte("test"), 462 }, 463 { 464 "tarsum.dev+sha256:4b1ba03544b49d96a32bacc77f8113220bd2f6a77e7e6d1e7b33cd87117d88e7", 465 VersionDev, 466 &tar.Header{ 467 Name: "xattrs.txt", 468 Uid: 1000, 469 Gid: 1000, 470 Uname: "slartibartfast", 471 Gname: "users", 472 Size: 4, 473 Typeflag: tar.TypeReg, 474 Xattrs: map[string]string{ 475 "user.key1": "value1", 476 "user.key2": "value2", 477 }, 478 }, 479 []byte("test"), 480 }, 481 { 482 "tarsum.dev+sha256:410b602c898bd4e82e800050f89848fc2cf20fd52aa59c1ce29df76b878b84a6", 483 VersionDev, 484 &tar.Header{ 485 Name: "xattrs.txt", 486 Uid: 1000, 487 Gid: 1000, 488 Uname: "slartibartfast", 489 Gname: "users", 490 Size: 4, 491 Typeflag: tar.TypeReg, 492 Xattrs: map[string]string{ 493 "user.KEY1": "value1", // adding different case to ensure different sum 494 "user.key2": "value2", 495 }, 496 }, 497 []byte("test"), 498 }, 499 { 500 "tarsum+sha256:b1f97eab73abd7593c245e51070f9fbdb1824c6b00a0b7a3d7f0015cd05e9e86", 501 Version0, 502 &tar.Header{ 503 Name: "xattrs.txt", 504 Uid: 1000, 505 Gid: 1000, 506 Uname: "slartibartfast", 507 Gname: "users", 508 Size: 4, 509 Typeflag: tar.TypeReg, 510 Xattrs: map[string]string{ 511 "user.NOT": "CALCULATED", 512 }, 513 }, 514 []byte("test"), 515 }, 516 } 517 for _, htest := range headerTests { 518 s, err := renderSumForHeader(htest.version, htest.hdr, htest.data) 519 if err != nil { 520 t.Fatal(err) 521 } 522 523 if s != htest.expectedSum { 524 t.Errorf("expected sum: %q, got: %q", htest.expectedSum, s) 525 } 526 } 527 } 528 529 func renderSumForHeader(v Version, h *tar.Header, data []byte) (string, error) { 530 buf := bytes.NewBuffer(nil) 531 // first build our test tar 532 tw := tar.NewWriter(buf) 533 if err := tw.WriteHeader(h); err != nil { 534 return "", err 535 } 536 if _, err := tw.Write(data); err != nil { 537 return "", err 538 } 539 tw.Close() 540 541 ts, err := NewTarSum(buf, true, v) 542 if err != nil { 543 return "", err 544 } 545 tr := tar.NewReader(ts) 546 for { 547 hdr, err := tr.Next() 548 if hdr == nil || err == io.EOF { 549 // Signals the end of the archive. 550 break 551 } 552 if err != nil { 553 return "", err 554 } 555 if _, err = io.Copy(io.Discard, tr); err != nil { 556 return "", err 557 } 558 } 559 return ts.Sum(nil), nil 560 } 561 562 func Benchmark9kTar(b *testing.B) { 563 buf := bytes.NewBuffer([]byte{}) 564 fh, err := os.Open("testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar") 565 if err != nil { 566 b.Error(err) 567 return 568 } 569 defer fh.Close() 570 571 n, err := io.Copy(buf, fh) 572 if err != nil { 573 b.Error(err) 574 return 575 } 576 577 reader := bytes.NewReader(buf.Bytes()) 578 579 b.SetBytes(n) 580 b.ResetTimer() 581 for i := 0; i < b.N; i++ { 582 reader.Seek(0, 0) 583 ts, err := NewTarSum(reader, true, Version0) 584 if err != nil { 585 b.Error(err) 586 return 587 } 588 io.Copy(io.Discard, ts) 589 ts.Sum(nil) 590 } 591 } 592 593 func Benchmark9kTarGzip(b *testing.B) { 594 buf := bytes.NewBuffer([]byte{}) 595 fh, err := os.Open("testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar") 596 if err != nil { 597 b.Error(err) 598 return 599 } 600 defer fh.Close() 601 602 n, err := io.Copy(buf, fh) 603 if err != nil { 604 b.Error(err) 605 return 606 } 607 608 reader := bytes.NewReader(buf.Bytes()) 609 610 b.SetBytes(n) 611 b.ResetTimer() 612 for i := 0; i < b.N; i++ { 613 reader.Seek(0, 0) 614 ts, err := NewTarSum(reader, false, Version0) 615 if err != nil { 616 b.Error(err) 617 return 618 } 619 io.Copy(io.Discard, ts) 620 ts.Sum(nil) 621 } 622 } 623 624 // this is a single big file in the tar archive 625 func Benchmark1mbSingleFileTar(b *testing.B) { 626 benchmarkTar(b, sizedOptions{1, 1024 * 1024, true, true}, false) 627 } 628 629 // this is a single big file in the tar archive 630 func Benchmark1mbSingleFileTarGzip(b *testing.B) { 631 benchmarkTar(b, sizedOptions{1, 1024 * 1024, true, true}, true) 632 } 633 634 // this is 1024 1k files in the tar archive 635 func Benchmark1kFilesTar(b *testing.B) { 636 benchmarkTar(b, sizedOptions{1024, 1024, true, true}, false) 637 } 638 639 // this is 1024 1k files in the tar archive 640 func Benchmark1kFilesTarGzip(b *testing.B) { 641 benchmarkTar(b, sizedOptions{1024, 1024, true, true}, true) 642 } 643 644 func benchmarkTar(b *testing.B, opts sizedOptions, isGzip bool) { 645 var fh *os.File 646 tarReader := sizedTar(opts) 647 if br, ok := tarReader.(*os.File); ok { 648 fh = br 649 } 650 defer os.Remove(fh.Name()) 651 defer fh.Close() 652 653 b.SetBytes(opts.size * opts.num) 654 b.ResetTimer() 655 for i := 0; i < b.N; i++ { 656 ts, err := NewTarSum(fh, !isGzip, Version0) 657 if err != nil { 658 b.Error(err) 659 return 660 } 661 io.Copy(io.Discard, ts) 662 ts.Sum(nil) 663 fh.Seek(0, 0) 664 } 665 }