github.com/demonoid81/moby@v0.0.0-20200517203328-62dd8e17c460/pkg/tarsum/tarsum_test.go (about) 1 package tarsum // import "github.com/demonoid81/moby/pkg/tarsum" 2 3 import ( 4 "archive/tar" 5 "bytes" 6 "compress/gzip" 7 "crypto/md5" // #nosec G501 8 "crypto/rand" 9 "crypto/sha1" // #nosec G505 10 "crypto/sha256" 11 "crypto/sha512" 12 "encoding/hex" 13 "fmt" 14 "io" 15 "io/ioutil" 16 "os" 17 "strings" 18 "testing" 19 20 "gotest.tools/v3/assert" 21 is "gotest.tools/v3/assert/cmp" 22 ) 23 24 type testLayer struct { 25 filename string 26 options *sizedOptions 27 jsonfile string 28 gzip bool 29 tarsum string 30 version Version 31 hash THash 32 } 33 34 var testLayers = []testLayer{ 35 { 36 filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar", 37 jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json", 38 version: Version0, 39 tarsum: "tarsum+sha256:4095cc12fa5fdb1ab2760377e1cd0c4ecdd3e61b4f9b82319d96fcea6c9a41c6"}, 40 { 41 filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar", 42 jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json", 43 version: VersionDev, 44 tarsum: "tarsum.dev+sha256:db56e35eec6ce65ba1588c20ba6b1ea23743b59e81fb6b7f358ccbde5580345c"}, 45 { 46 filename: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar", 47 jsonfile: "testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/json", 48 gzip: true, 49 tarsum: "tarsum+sha256:4095cc12fa5fdb1ab2760377e1cd0c4ecdd3e61b4f9b82319d96fcea6c9a41c6"}, 50 { 51 // Tests existing version of TarSum when xattrs are present 52 filename: "testdata/xattr/layer.tar", 53 jsonfile: "testdata/xattr/json", 54 version: Version0, 55 tarsum: "tarsum+sha256:07e304a8dbcb215b37649fde1a699f8aeea47e60815707f1cdf4d55d25ff6ab4"}, 56 { 57 // Tests next version of TarSum when xattrs are present 58 filename: "testdata/xattr/layer.tar", 59 jsonfile: "testdata/xattr/json", 60 version: VersionDev, 61 tarsum: "tarsum.dev+sha256:6c58917892d77b3b357b0f9ad1e28e1f4ae4de3a8006bd3beb8beda214d8fd16"}, 62 { 63 filename: "testdata/511136ea3c5a64f264b78b5433614aec563103b4d4702f3ba7d4d2698e22c158/layer.tar", 64 jsonfile: "testdata/511136ea3c5a64f264b78b5433614aec563103b4d4702f3ba7d4d2698e22c158/json", 65 tarsum: "tarsum+sha256:c66bd5ec9f87b8f4c6135ca37684618f486a3dd1d113b138d0a177bfa39c2571"}, 66 { 67 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 68 tarsum: "tarsum+sha256:75258b2c5dcd9adfe24ce71eeca5fc5019c7e669912f15703ede92b1a60cb11f"}, 69 { 70 // this tar has two files with the same path 71 filename: "testdata/collision/collision-0.tar", 72 tarsum: "tarsum+sha256:7cabb5e9128bb4a93ff867b9464d7c66a644ae51ea2e90e6ef313f3bef93f077"}, 73 { 74 // this tar has the same two files (with the same path), but reversed order. ensuring is has different hash than above 75 filename: "testdata/collision/collision-1.tar", 76 tarsum: "tarsum+sha256:805fd393cfd58900b10c5636cf9bab48b2406d9b66523122f2352620c85dc7f9"}, 77 { 78 // this tar has newer of collider-0.tar, ensuring is has different hash 79 filename: "testdata/collision/collision-2.tar", 80 tarsum: "tarsum+sha256:85d2b8389f077659d78aca898f9e632ed9161f553f144aef100648eac540147b"}, 81 { 82 // this tar has newer of collider-1.tar, ensuring is has different hash 83 filename: "testdata/collision/collision-3.tar", 84 tarsum: "tarsum+sha256:cbe4dee79fe979d69c16c2bccd032e3205716a562f4a3c1ca1cbeed7b256eb19"}, 85 { 86 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 87 tarsum: "tarsum+md5:3a6cdb475d90459ac0d3280703d17be2", 88 hash: md5THash, 89 }, 90 { 91 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 92 tarsum: "tarsum+sha1:14b5e0d12a0c50a4281e86e92153fa06d55d00c6", 93 hash: sha1Hash, 94 }, 95 { 96 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 97 tarsum: "tarsum+sha224:dd8925b7a4c71b13f3a68a0f9428a757c76b93752c398f272a9062d5", 98 hash: sha224Hash, 99 }, 100 { 101 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 102 tarsum: "tarsum+sha384:e39e82f40005134bed13fb632d1a5f2aa4675c9ddb4a136fbcec202797e68d2f635e1200dee2e3a8d7f69d54d3f2fd27", 103 hash: sha384Hash, 104 }, 105 { 106 options: &sizedOptions{1, 1024 * 1024, false, false}, // a 1mb file (in memory) 107 tarsum: "tarsum+sha512:7c56de40b2d1ed3863ff25d83b59cdc8f53e67d1c01c3ee8f201f8e4dec3107da976d0c0ec9109c962a152b32699fe329b2dab13966020e400c32878a0761a7e", 108 hash: sha512Hash, 109 }, 110 } 111 112 type sizedOptions struct { 113 num int64 114 size int64 115 isRand bool 116 realFile bool 117 } 118 119 // make a tar: 120 // * num is the number of files the tar should have 121 // * size is the bytes per file 122 // * isRand is whether the contents of the files should be a random chunk (otherwise it's all zeros) 123 // * realFile will write to a TempFile, instead of an in memory buffer 124 func sizedTar(opts sizedOptions) io.Reader { 125 var ( 126 fh io.ReadWriter 127 err error 128 ) 129 if opts.realFile { 130 fh, err = ioutil.TempFile("", "tarsum") 131 if err != nil { 132 return nil 133 } 134 } else { 135 fh = bytes.NewBuffer([]byte{}) 136 } 137 tarW := tar.NewWriter(fh) 138 defer tarW.Close() 139 for i := int64(0); i < opts.num; i++ { 140 err := tarW.WriteHeader(&tar.Header{ 141 Name: fmt.Sprintf("/testdata%d", i), 142 Mode: 0755, 143 Uid: 0, 144 Gid: 0, 145 Size: opts.size, 146 Typeflag: tar.TypeReg, 147 }) 148 if err != nil { 149 return nil 150 } 151 var rBuf []byte 152 if opts.isRand { 153 rBuf = make([]byte, 8) 154 _, err = rand.Read(rBuf) 155 if err != nil { 156 return nil 157 } 158 } else { 159 rBuf = []byte{0, 0, 0, 0, 0, 0, 0, 0} 160 } 161 162 for i := int64(0); i < opts.size/int64(8); i++ { 163 tarW.Write(rBuf) 164 } 165 } 166 return fh 167 } 168 169 func emptyTarSum(gzip bool) (TarSum, error) { 170 reader, writer := io.Pipe() 171 tarWriter := tar.NewWriter(writer) 172 173 // Immediately close tarWriter and write-end of the 174 // Pipe in a separate goroutine so we don't block. 175 go func() { 176 tarWriter.Close() 177 writer.Close() 178 }() 179 180 return NewTarSum(reader, !gzip, Version0) 181 } 182 183 // Test errors on NewTarsumForLabel 184 func TestNewTarSumForLabelInvalid(t *testing.T) { 185 reader := strings.NewReader("") 186 187 if _, err := NewTarSumForLabel(reader, true, "invalidlabel"); err == nil { 188 t.Fatalf("Expected an error, got nothing.") 189 } 190 191 if _, err := NewTarSumForLabel(reader, true, "invalid+sha256"); err == nil { 192 t.Fatalf("Expected an error, got nothing.") 193 } 194 if _, err := NewTarSumForLabel(reader, true, "tarsum.v1+invalid"); err == nil { 195 t.Fatalf("Expected an error, got nothing.") 196 } 197 } 198 199 func TestNewTarSumForLabel(t *testing.T) { 200 201 layer := testLayers[0] 202 203 reader, err := os.Open(layer.filename) 204 if err != nil { 205 t.Fatal(err) 206 } 207 defer reader.Close() 208 209 label := strings.Split(layer.tarsum, ":")[0] 210 ts, err := NewTarSumForLabel(reader, false, label) 211 if err != nil { 212 t.Fatal(err) 213 } 214 215 // Make sure it actually worked by reading a little bit of it 216 nbByteToRead := 8 * 1024 217 dBuf := make([]byte, nbByteToRead) 218 _, err = ts.Read(dBuf) 219 if err != nil { 220 t.Errorf("failed to read %vKB from %s: %s", nbByteToRead, layer.filename, err) 221 } 222 } 223 224 // TestEmptyTar tests that tarsum does not fail to read an empty tar 225 // and correctly returns the hex digest of an empty hash. 226 func TestEmptyTar(t *testing.T) { 227 // Test without gzip. 228 ts, err := emptyTarSum(false) 229 assert.NilError(t, err) 230 231 zeroBlock := make([]byte, 1024) 232 buf := new(bytes.Buffer) 233 234 n, err := io.Copy(buf, ts) 235 assert.NilError(t, err) 236 237 if n != int64(len(zeroBlock)) || !bytes.Equal(buf.Bytes(), zeroBlock) { 238 t.Fatalf("tarSum did not write the correct number of zeroed bytes: %d", n) 239 } 240 241 expectedSum := ts.Version().String() + "+sha256:" + hex.EncodeToString(sha256.New().Sum(nil)) 242 resultSum := ts.Sum(nil) 243 244 if resultSum != expectedSum { 245 t.Fatalf("expected [%s] but got [%s]", expectedSum, resultSum) 246 } 247 248 // Test with gzip. 249 ts, err = emptyTarSum(true) 250 assert.NilError(t, err) 251 buf.Reset() 252 253 _, err = io.Copy(buf, ts) 254 assert.NilError(t, err) 255 256 bufgz := new(bytes.Buffer) 257 gz := gzip.NewWriter(bufgz) 258 n, err = io.Copy(gz, bytes.NewBuffer(zeroBlock)) 259 assert.NilError(t, err) 260 gz.Close() 261 gzBytes := bufgz.Bytes() 262 263 if n != int64(len(zeroBlock)) || !bytes.Equal(buf.Bytes(), gzBytes) { 264 t.Fatalf("tarSum did not write the correct number of gzipped-zeroed bytes: %d", n) 265 } 266 267 resultSum = ts.Sum(nil) 268 269 if resultSum != expectedSum { 270 t.Fatalf("expected [%s] but got [%s]", expectedSum, resultSum) 271 } 272 273 // Test without ever actually writing anything. 274 if ts, err = NewTarSum(bytes.NewReader([]byte{}), true, Version0); err != nil { 275 t.Fatal(err) 276 } 277 278 resultSum = ts.Sum(nil) 279 assert.Check(t, is.Equal(expectedSum, resultSum)) 280 } 281 282 var ( 283 md5THash = NewTHash("md5", md5.New) 284 sha1Hash = NewTHash("sha1", sha1.New) 285 sha224Hash = NewTHash("sha224", sha256.New224) 286 sha384Hash = NewTHash("sha384", sha512.New384) 287 sha512Hash = NewTHash("sha512", sha512.New) 288 ) 289 290 // Test all the build-in read size : buf8K, buf16K, buf32K and more 291 func TestTarSumsReadSize(t *testing.T) { 292 // Test always on the same layer (that is big enough) 293 layer := testLayers[0] 294 295 for i := 0; i < 5; i++ { 296 297 reader, err := os.Open(layer.filename) 298 if err != nil { 299 t.Fatal(err) 300 } 301 defer reader.Close() 302 303 ts, err := NewTarSum(reader, false, layer.version) 304 if err != nil { 305 t.Fatal(err) 306 } 307 308 // Read and discard bytes so that it populates sums 309 nbByteToRead := (i + 1) * 8 * 1024 310 dBuf := make([]byte, nbByteToRead) 311 _, err = ts.Read(dBuf) 312 if err != nil { 313 t.Errorf("failed to read %vKB from %s: %s", nbByteToRead, layer.filename, err) 314 continue 315 } 316 } 317 } 318 319 func TestTarSums(t *testing.T) { 320 for _, layer := range testLayers { 321 var ( 322 fh io.Reader 323 err error 324 ) 325 if len(layer.filename) > 0 { 326 fh, err = os.Open(layer.filename) 327 if err != nil { 328 t.Errorf("failed to open %s: %s", layer.filename, err) 329 continue 330 } 331 } else if layer.options != nil { 332 fh = sizedTar(*layer.options) 333 } else { 334 // What else is there to test? 335 t.Errorf("what to do with %#v", layer) 336 continue 337 } 338 if file, ok := fh.(*os.File); ok { 339 defer file.Close() 340 } 341 342 var ts TarSum 343 if layer.hash == nil { 344 // double negatives! 345 ts, err = NewTarSum(fh, !layer.gzip, layer.version) 346 } else { 347 ts, err = NewTarSumHash(fh, !layer.gzip, layer.version, layer.hash) 348 } 349 if err != nil { 350 t.Errorf("%q :: %q", err, layer.filename) 351 continue 352 } 353 354 // Read variable number of bytes to test dynamic buffer 355 dBuf := make([]byte, 1) 356 _, err = ts.Read(dBuf) 357 if err != nil { 358 t.Errorf("failed to read 1B from %s: %s", layer.filename, err) 359 continue 360 } 361 dBuf = make([]byte, 16*1024) 362 _, err = ts.Read(dBuf) 363 if err != nil { 364 t.Errorf("failed to read 16KB from %s: %s", layer.filename, err) 365 continue 366 } 367 368 // Read and discard remaining bytes 369 _, err = io.Copy(ioutil.Discard, ts) 370 if err != nil { 371 t.Errorf("failed to copy from %s: %s", layer.filename, err) 372 continue 373 } 374 var gotSum string 375 if len(layer.jsonfile) > 0 { 376 jfh, err := os.Open(layer.jsonfile) 377 if err != nil { 378 t.Errorf("failed to open %s: %s", layer.jsonfile, err) 379 continue 380 } 381 defer jfh.Close() 382 383 buf, err := ioutil.ReadAll(jfh) 384 if err != nil { 385 t.Errorf("failed to readAll %s: %s", layer.jsonfile, err) 386 continue 387 } 388 gotSum = ts.Sum(buf) 389 } else { 390 gotSum = ts.Sum(nil) 391 } 392 393 if layer.tarsum != gotSum { 394 t.Errorf("expecting [%s], but got [%s]", layer.tarsum, gotSum) 395 } 396 var expectedHashName string 397 if layer.hash != nil { 398 expectedHashName = layer.hash.Name() 399 } else { 400 expectedHashName = DefaultTHash.Name() 401 } 402 if expectedHashName != ts.Hash().Name() { 403 t.Errorf("expecting hash [%v], but got [%s]", expectedHashName, ts.Hash().Name()) 404 } 405 } 406 } 407 408 func TestIteration(t *testing.T) { 409 headerTests := []struct { 410 expectedSum string // TODO(vbatts) it would be nice to get individual sums of each 411 version Version 412 hdr *tar.Header 413 data []byte 414 }{ 415 { 416 "tarsum+sha256:626c4a2e9a467d65c33ae81f7f3dedd4de8ccaee72af73223c4bc4718cbc7bbd", 417 Version0, 418 &tar.Header{ 419 Name: "file.txt", 420 Size: 0, 421 Typeflag: tar.TypeReg, 422 Devminor: 0, 423 Devmajor: 0, 424 }, 425 []byte(""), 426 }, 427 { 428 "tarsum.dev+sha256:6ffd43a1573a9913325b4918e124ee982a99c0f3cba90fc032a65f5e20bdd465", 429 VersionDev, 430 &tar.Header{ 431 Name: "file.txt", 432 Size: 0, 433 Typeflag: tar.TypeReg, 434 Devminor: 0, 435 Devmajor: 0, 436 }, 437 []byte(""), 438 }, 439 { 440 "tarsum.dev+sha256:862964db95e0fa7e42836ae4caab3576ab1df8d275720a45bdd01a5a3730cc63", 441 VersionDev, 442 &tar.Header{ 443 Name: "another.txt", 444 Uid: 1000, 445 Gid: 1000, 446 Uname: "slartibartfast", 447 Gname: "users", 448 Size: 4, 449 Typeflag: tar.TypeReg, 450 Devminor: 0, 451 Devmajor: 0, 452 }, 453 []byte("test"), 454 }, 455 { 456 "tarsum.dev+sha256:4b1ba03544b49d96a32bacc77f8113220bd2f6a77e7e6d1e7b33cd87117d88e7", 457 VersionDev, 458 &tar.Header{ 459 Name: "xattrs.txt", 460 Uid: 1000, 461 Gid: 1000, 462 Uname: "slartibartfast", 463 Gname: "users", 464 Size: 4, 465 Typeflag: tar.TypeReg, 466 Xattrs: map[string]string{ 467 "user.key1": "value1", 468 "user.key2": "value2", 469 }, 470 }, 471 []byte("test"), 472 }, 473 { 474 "tarsum.dev+sha256:410b602c898bd4e82e800050f89848fc2cf20fd52aa59c1ce29df76b878b84a6", 475 VersionDev, 476 &tar.Header{ 477 Name: "xattrs.txt", 478 Uid: 1000, 479 Gid: 1000, 480 Uname: "slartibartfast", 481 Gname: "users", 482 Size: 4, 483 Typeflag: tar.TypeReg, 484 Xattrs: map[string]string{ 485 "user.KEY1": "value1", // adding different case to ensure different sum 486 "user.key2": "value2", 487 }, 488 }, 489 []byte("test"), 490 }, 491 { 492 "tarsum+sha256:b1f97eab73abd7593c245e51070f9fbdb1824c6b00a0b7a3d7f0015cd05e9e86", 493 Version0, 494 &tar.Header{ 495 Name: "xattrs.txt", 496 Uid: 1000, 497 Gid: 1000, 498 Uname: "slartibartfast", 499 Gname: "users", 500 Size: 4, 501 Typeflag: tar.TypeReg, 502 Xattrs: map[string]string{ 503 "user.NOT": "CALCULATED", 504 }, 505 }, 506 []byte("test"), 507 }, 508 } 509 for _, htest := range headerTests { 510 s, err := renderSumForHeader(htest.version, htest.hdr, htest.data) 511 if err != nil { 512 t.Fatal(err) 513 } 514 515 if s != htest.expectedSum { 516 t.Errorf("expected sum: %q, got: %q", htest.expectedSum, s) 517 } 518 } 519 520 } 521 522 func renderSumForHeader(v Version, h *tar.Header, data []byte) (string, error) { 523 buf := bytes.NewBuffer(nil) 524 // first build our test tar 525 tw := tar.NewWriter(buf) 526 if err := tw.WriteHeader(h); err != nil { 527 return "", err 528 } 529 if _, err := tw.Write(data); err != nil { 530 return "", err 531 } 532 tw.Close() 533 534 ts, err := NewTarSum(buf, true, v) 535 if err != nil { 536 return "", err 537 } 538 tr := tar.NewReader(ts) 539 for { 540 hdr, err := tr.Next() 541 if hdr == nil || err == io.EOF { 542 // Signals the end of the archive. 543 break 544 } 545 if err != nil { 546 return "", err 547 } 548 if _, err = io.Copy(ioutil.Discard, tr); err != nil { 549 return "", err 550 } 551 } 552 return ts.Sum(nil), nil 553 } 554 555 func Benchmark9kTar(b *testing.B) { 556 buf := bytes.NewBuffer([]byte{}) 557 fh, err := os.Open("testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar") 558 if err != nil { 559 b.Error(err) 560 return 561 } 562 defer fh.Close() 563 564 n, err := io.Copy(buf, fh) 565 if err != nil { 566 b.Error(err) 567 return 568 } 569 570 reader := bytes.NewReader(buf.Bytes()) 571 572 b.SetBytes(n) 573 b.ResetTimer() 574 for i := 0; i < b.N; i++ { 575 reader.Seek(0, 0) 576 ts, err := NewTarSum(reader, true, Version0) 577 if err != nil { 578 b.Error(err) 579 return 580 } 581 io.Copy(ioutil.Discard, ts) 582 ts.Sum(nil) 583 } 584 } 585 586 func Benchmark9kTarGzip(b *testing.B) { 587 buf := bytes.NewBuffer([]byte{}) 588 fh, err := os.Open("testdata/46af0962ab5afeb5ce6740d4d91652e69206fc991fd5328c1a94d364ad00e457/layer.tar") 589 if err != nil { 590 b.Error(err) 591 return 592 } 593 defer fh.Close() 594 595 n, err := io.Copy(buf, fh) 596 if err != nil { 597 b.Error(err) 598 return 599 } 600 601 reader := bytes.NewReader(buf.Bytes()) 602 603 b.SetBytes(n) 604 b.ResetTimer() 605 for i := 0; i < b.N; i++ { 606 reader.Seek(0, 0) 607 ts, err := NewTarSum(reader, false, Version0) 608 if err != nil { 609 b.Error(err) 610 return 611 } 612 io.Copy(ioutil.Discard, ts) 613 ts.Sum(nil) 614 } 615 } 616 617 // this is a single big file in the tar archive 618 func Benchmark1mbSingleFileTar(b *testing.B) { 619 benchmarkTar(b, sizedOptions{1, 1024 * 1024, true, true}, false) 620 } 621 622 // this is a single big file in the tar archive 623 func Benchmark1mbSingleFileTarGzip(b *testing.B) { 624 benchmarkTar(b, sizedOptions{1, 1024 * 1024, true, true}, true) 625 } 626 627 // this is 1024 1k files in the tar archive 628 func Benchmark1kFilesTar(b *testing.B) { 629 benchmarkTar(b, sizedOptions{1024, 1024, true, true}, false) 630 } 631 632 // this is 1024 1k files in the tar archive 633 func Benchmark1kFilesTarGzip(b *testing.B) { 634 benchmarkTar(b, sizedOptions{1024, 1024, true, true}, true) 635 } 636 637 func benchmarkTar(b *testing.B, opts sizedOptions, isGzip bool) { 638 var fh *os.File 639 tarReader := sizedTar(opts) 640 if br, ok := tarReader.(*os.File); ok { 641 fh = br 642 } 643 defer os.Remove(fh.Name()) 644 defer fh.Close() 645 646 b.SetBytes(opts.size * opts.num) 647 b.ResetTimer() 648 for i := 0; i < b.N; i++ { 649 ts, err := NewTarSum(fh, !isGzip, Version0) 650 if err != nil { 651 b.Error(err) 652 return 653 } 654 io.Copy(ioutil.Discard, ts) 655 ts.Sum(nil) 656 fh.Seek(0, 0) 657 } 658 }