github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/golang/snappy/snappy_test.go (about) 1 // Copyright 2011 The Snappy-Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package snappy 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "flag" 11 "fmt" 12 "io" 13 "io/ioutil" 14 "math/rand" 15 "net/http" 16 "os" 17 "os/exec" 18 "path/filepath" 19 "runtime" 20 "strings" 21 "testing" 22 ) 23 24 var download = flag.Bool("download", false, "If true, download any missing files before running benchmarks") 25 26 // goEncoderShouldMatchCppEncoder is whether to test that the algorithm used by 27 // Go's encoder matches byte-for-byte what the C++ snappy encoder produces, on 28 // this GOARCH. There is more than one valid encoding of any given input, and 29 // there is more than one good algorithm along the frontier of trading off 30 // throughput for output size. Nonetheless, we presume that the C++ encoder's 31 // algorithm is a good one and has been tested on a wide range of inputs, so 32 // matching that exactly should mean that the Go encoder's algorithm is also 33 // good, without needing to gather our own corpus of test data. 34 // 35 // The exact algorithm used by the C++ code is potentially endian dependent, as 36 // it puns a byte pointer to a uint32 pointer to load, hash and compare 4 bytes 37 // at a time. The Go implementation is endian agnostic, in that its output is 38 // the same (as little-endian C++ code), regardless of the CPU's endianness. 39 // 40 // Thus, when comparing Go's output to C++ output generated beforehand, such as 41 // the "testdata/pi.txt.rawsnappy" file generated by C++ code on a little- 42 // endian system, we can run that test regardless of the runtime.GOARCH value. 43 // 44 // When comparing Go's output to dynamically generated C++ output, i.e. the 45 // result of fork/exec'ing a C++ program, we can run that test only on 46 // little-endian systems, because the C++ output might be different on 47 // big-endian systems. The runtime package doesn't export endianness per se, 48 // but we can restrict this match-C++ test to common little-endian systems. 49 const goEncoderShouldMatchCppEncoder = runtime.GOARCH == "386" || runtime.GOARCH == "amd64" || runtime.GOARCH == "arm" 50 51 func TestMaxEncodedLenOfMaxBlockSize(t *testing.T) { 52 got := maxEncodedLenOfMaxBlockSize 53 want := MaxEncodedLen(maxBlockSize) 54 if got != want { 55 t.Fatalf("got %d, want %d", got, want) 56 } 57 } 58 59 func cmp(a, b []byte) error { 60 if bytes.Equal(a, b) { 61 return nil 62 } 63 if len(a) != len(b) { 64 return fmt.Errorf("got %d bytes, want %d", len(a), len(b)) 65 } 66 for i := range a { 67 if a[i] != b[i] { 68 return fmt.Errorf("byte #%d: got 0x%02x, want 0x%02x", i, a[i], b[i]) 69 } 70 } 71 return nil 72 } 73 74 func roundtrip(b, ebuf, dbuf []byte) error { 75 d, err := Decode(dbuf, Encode(ebuf, b)) 76 if err != nil { 77 return fmt.Errorf("decoding error: %v", err) 78 } 79 if err := cmp(d, b); err != nil { 80 return fmt.Errorf("roundtrip mismatch: %v", err) 81 } 82 return nil 83 } 84 85 func TestEmpty(t *testing.T) { 86 if err := roundtrip(nil, nil, nil); err != nil { 87 t.Fatal(err) 88 } 89 } 90 91 func TestSmallCopy(t *testing.T) { 92 for _, ebuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} { 93 for _, dbuf := range [][]byte{nil, make([]byte, 20), make([]byte, 64)} { 94 for i := 0; i < 32; i++ { 95 s := "aaaa" + strings.Repeat("b", i) + "aaaabbbb" 96 if err := roundtrip([]byte(s), ebuf, dbuf); err != nil { 97 t.Errorf("len(ebuf)=%d, len(dbuf)=%d, i=%d: %v", len(ebuf), len(dbuf), i, err) 98 } 99 } 100 } 101 } 102 } 103 104 func TestSmallRand(t *testing.T) { 105 rng := rand.New(rand.NewSource(1)) 106 for n := 1; n < 20000; n += 23 { 107 b := make([]byte, n) 108 for i := range b { 109 b[i] = uint8(rng.Intn(256)) 110 } 111 if err := roundtrip(b, nil, nil); err != nil { 112 t.Fatal(err) 113 } 114 } 115 } 116 117 func TestSmallRegular(t *testing.T) { 118 for n := 1; n < 20000; n += 23 { 119 b := make([]byte, n) 120 for i := range b { 121 b[i] = uint8(i%10 + 'a') 122 } 123 if err := roundtrip(b, nil, nil); err != nil { 124 t.Fatal(err) 125 } 126 } 127 } 128 129 func TestInvalidVarint(t *testing.T) { 130 testCases := []struct { 131 desc string 132 input string 133 }{{ 134 "invalid varint, final byte has continuation bit set", 135 "\xff", 136 }, { 137 "invalid varint, value overflows uint64", 138 "\xff\xff\xff\xff\xff\xff\xff\xff\xff\xff\x00", 139 }, { 140 // https://yougam/libraries/google/snappy/blob/master/format_description.txt 141 // says that "the stream starts with the uncompressed length [as a 142 // varint] (up to a maximum of 2^32 - 1)". 143 "valid varint (as uint64), but value overflows uint32", 144 "\x80\x80\x80\x80\x10", 145 }} 146 147 for _, tc := range testCases { 148 input := []byte(tc.input) 149 if _, err := DecodedLen(input); err != ErrCorrupt { 150 t.Errorf("%s: DecodedLen: got %v, want ErrCorrupt", tc.desc, err) 151 } 152 if _, err := Decode(nil, input); err != ErrCorrupt { 153 t.Errorf("%s: Decode: got %v, want ErrCorrupt", tc.desc, err) 154 } 155 } 156 } 157 158 func TestDecode(t *testing.T) { 159 lit40Bytes := make([]byte, 40) 160 for i := range lit40Bytes { 161 lit40Bytes[i] = byte(i) 162 } 163 lit40 := string(lit40Bytes) 164 165 testCases := []struct { 166 desc string 167 input string 168 want string 169 wantErr error 170 }{{ 171 `decodedLen=0; valid input`, 172 "\x00", 173 "", 174 nil, 175 }, { 176 `decodedLen=3; tagLiteral, 0-byte length; length=3; valid input`, 177 "\x03" + "\x08\xff\xff\xff", 178 "\xff\xff\xff", 179 nil, 180 }, { 181 `decodedLen=2; tagLiteral, 0-byte length; length=3; not enough dst bytes`, 182 "\x02" + "\x08\xff\xff\xff", 183 "", 184 ErrCorrupt, 185 }, { 186 `decodedLen=3; tagLiteral, 0-byte length; length=3; not enough src bytes`, 187 "\x03" + "\x08\xff\xff", 188 "", 189 ErrCorrupt, 190 }, { 191 `decodedLen=40; tagLiteral, 0-byte length; length=40; valid input`, 192 "\x28" + "\x9c" + lit40, 193 lit40, 194 nil, 195 }, { 196 `decodedLen=1; tagLiteral, 1-byte length; not enough length bytes`, 197 "\x01" + "\xf0", 198 "", 199 ErrCorrupt, 200 }, { 201 `decodedLen=3; tagLiteral, 1-byte length; length=3; valid input`, 202 "\x03" + "\xf0\x02\xff\xff\xff", 203 "\xff\xff\xff", 204 nil, 205 }, { 206 `decodedLen=1; tagLiteral, 2-byte length; not enough length bytes`, 207 "\x01" + "\xf4\x00", 208 "", 209 ErrCorrupt, 210 }, { 211 `decodedLen=3; tagLiteral, 2-byte length; length=3; valid input`, 212 "\x03" + "\xf4\x02\x00\xff\xff\xff", 213 "\xff\xff\xff", 214 nil, 215 }, { 216 `decodedLen=1; tagLiteral, 3-byte length; not enough length bytes`, 217 "\x01" + "\xf8\x00\x00", 218 "", 219 ErrCorrupt, 220 }, { 221 `decodedLen=3; tagLiteral, 3-byte length; length=3; valid input`, 222 "\x03" + "\xf8\x02\x00\x00\xff\xff\xff", 223 "\xff\xff\xff", 224 nil, 225 }, { 226 `decodedLen=1; tagLiteral, 4-byte length; not enough length bytes`, 227 "\x01" + "\xfc\x00\x00\x00", 228 "", 229 ErrCorrupt, 230 }, { 231 `decodedLen=1; tagLiteral, 4-byte length; length=3; not enough dst bytes`, 232 "\x01" + "\xfc\x02\x00\x00\x00\xff\xff\xff", 233 "", 234 ErrCorrupt, 235 }, { 236 `decodedLen=4; tagLiteral, 4-byte length; length=3; not enough src bytes`, 237 "\x04" + "\xfc\x02\x00\x00\x00\xff", 238 "", 239 ErrCorrupt, 240 }, { 241 `decodedLen=3; tagLiteral, 4-byte length; length=3; valid input`, 242 "\x03" + "\xfc\x02\x00\x00\x00\xff\xff\xff", 243 "\xff\xff\xff", 244 nil, 245 }, { 246 `decodedLen=4; tagCopy1, 1 extra length|offset byte; not enough extra bytes`, 247 "\x04" + "\x01", 248 "", 249 ErrCorrupt, 250 }, { 251 `decodedLen=4; tagCopy2, 2 extra length|offset bytes; not enough extra bytes`, 252 "\x04" + "\x02\x00", 253 "", 254 ErrCorrupt, 255 }, { 256 `decodedLen=4; tagCopy4; unsupported COPY_4 tag`, 257 "\x04" + "\x03\x00\x00\x00\x00", 258 "", 259 errUnsupportedCopy4Tag, 260 }, { 261 `decodedLen=4; tagLiteral (4 bytes "abcd"); valid input`, 262 "\x04" + "\x0cabcd", 263 "abcd", 264 nil, 265 }, { 266 `decodedLen=13; tagLiteral (4 bytes "abcd"); tagCopy1; length=9 offset=4; valid input`, 267 "\x0d" + "\x0cabcd" + "\x15\x04", 268 "abcdabcdabcda", 269 nil, 270 }, { 271 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; valid input`, 272 "\x08" + "\x0cabcd" + "\x01\x04", 273 "abcdabcd", 274 nil, 275 }, { 276 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=2; valid input`, 277 "\x08" + "\x0cabcd" + "\x01\x02", 278 "abcdcdcd", 279 nil, 280 }, { 281 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=1; valid input`, 282 "\x08" + "\x0cabcd" + "\x01\x01", 283 "abcddddd", 284 nil, 285 }, { 286 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=0; zero offset`, 287 "\x08" + "\x0cabcd" + "\x01\x00", 288 "", 289 ErrCorrupt, 290 }, { 291 `decodedLen=9; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; inconsistent dLen`, 292 "\x09" + "\x0cabcd" + "\x01\x04", 293 "", 294 ErrCorrupt, 295 }, { 296 `decodedLen=8; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=5; offset too large`, 297 "\x08" + "\x0cabcd" + "\x01\x05", 298 "", 299 ErrCorrupt, 300 }, { 301 `decodedLen=7; tagLiteral (4 bytes "abcd"); tagCopy1; length=4 offset=4; length too large`, 302 "\x07" + "\x0cabcd" + "\x01\x04", 303 "", 304 ErrCorrupt, 305 }, { 306 `decodedLen=6; tagLiteral (4 bytes "abcd"); tagCopy2; length=2 offset=3; valid input`, 307 "\x06" + "\x0cabcd" + "\x06\x03\x00", 308 "abcdbc", 309 nil, 310 }} 311 312 const ( 313 // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are 314 // not present in either the input or the output. It is written to dBuf 315 // to check that Decode does not write bytes past the end of 316 // dBuf[:dLen]. 317 // 318 // The magic number 37 was chosen because it is prime. A more 'natural' 319 // number like 32 might lead to a false negative if, for example, a 320 // byte was incorrectly copied 4*8 bytes later. 321 notPresentBase = 0xa0 322 notPresentLen = 37 323 ) 324 325 var dBuf [100]byte 326 loop: 327 for i, tc := range testCases { 328 input := []byte(tc.input) 329 for _, x := range input { 330 if notPresentBase <= x && x < notPresentBase+notPresentLen { 331 t.Errorf("#%d (%s): input shouldn't contain %#02x\ninput: % x", i, tc.desc, x, input) 332 continue loop 333 } 334 } 335 336 dLen, n := binary.Uvarint(input) 337 if n <= 0 { 338 t.Errorf("#%d (%s): invalid varint-encoded dLen", i, tc.desc) 339 continue 340 } 341 if dLen > uint64(len(dBuf)) { 342 t.Errorf("#%d (%s): dLen %d is too large", i, tc.desc, dLen) 343 continue 344 } 345 346 for j := range dBuf { 347 dBuf[j] = byte(notPresentBase + j%notPresentLen) 348 } 349 g, gotErr := Decode(dBuf[:], input) 350 if got := string(g); got != tc.want || gotErr != tc.wantErr { 351 t.Errorf("#%d (%s):\ngot %q, %v\nwant %q, %v", 352 i, tc.desc, got, gotErr, tc.want, tc.wantErr) 353 continue 354 } 355 for j, x := range dBuf { 356 if uint64(j) < dLen { 357 continue 358 } 359 if w := byte(notPresentBase + j%notPresentLen); x != w { 360 t.Errorf("#%d (%s): Decode overrun: dBuf[%d] was modified: got %#02x, want %#02x\ndBuf: % x", 361 i, tc.desc, j, x, w, dBuf) 362 continue loop 363 } 364 } 365 } 366 } 367 368 // TestDecodeLengthOffset tests decoding an encoding of the form literal + 369 // copy-length-offset + literal. For example: "abcdefghijkl" + "efghij" + "AB". 370 func TestDecodeLengthOffset(t *testing.T) { 371 const ( 372 prefix = "abcdefghijklmnopqr" 373 suffix = "ABCDEFGHIJKLMNOPQR" 374 375 // notPresentXxx defines a range of byte values [0xa0, 0xc5) that are 376 // not present in either the input or the output. It is written to 377 // gotBuf to check that Decode does not write bytes past the end of 378 // gotBuf[:totalLen]. 379 // 380 // The magic number 37 was chosen because it is prime. A more 'natural' 381 // number like 32 might lead to a false negative if, for example, a 382 // byte was incorrectly copied 4*8 bytes later. 383 notPresentBase = 0xa0 384 notPresentLen = 37 385 ) 386 var gotBuf, wantBuf, inputBuf [128]byte 387 for length := 1; length <= 18; length++ { 388 for offset := 1; offset <= 18; offset++ { 389 loop: 390 for suffixLen := 0; suffixLen <= 18; suffixLen++ { 391 totalLen := len(prefix) + length + suffixLen 392 393 inputLen := binary.PutUvarint(inputBuf[:], uint64(totalLen)) 394 inputBuf[inputLen] = tagLiteral + 4*byte(len(prefix)-1) 395 inputLen++ 396 inputLen += copy(inputBuf[inputLen:], prefix) 397 inputBuf[inputLen+0] = tagCopy2 + 4*byte(length-1) 398 inputBuf[inputLen+1] = byte(offset) 399 inputBuf[inputLen+2] = 0x00 400 inputLen += 3 401 if suffixLen > 0 { 402 inputBuf[inputLen] = tagLiteral + 4*byte(suffixLen-1) 403 inputLen++ 404 inputLen += copy(inputBuf[inputLen:], suffix[:suffixLen]) 405 } 406 input := inputBuf[:inputLen] 407 408 for i := range gotBuf { 409 gotBuf[i] = byte(notPresentBase + i%notPresentLen) 410 } 411 got, err := Decode(gotBuf[:], input) 412 if err != nil { 413 t.Errorf("length=%d, offset=%d; suffixLen=%d: %v", length, offset, suffixLen, err) 414 continue 415 } 416 417 wantLen := 0 418 wantLen += copy(wantBuf[wantLen:], prefix) 419 for i := 0; i < length; i++ { 420 wantBuf[wantLen] = wantBuf[wantLen-offset] 421 wantLen++ 422 } 423 wantLen += copy(wantBuf[wantLen:], suffix[:suffixLen]) 424 want := wantBuf[:wantLen] 425 426 for _, x := range input { 427 if notPresentBase <= x && x < notPresentBase+notPresentLen { 428 t.Errorf("length=%d, offset=%d; suffixLen=%d: input shouldn't contain %#02x\ninput: % x", 429 length, offset, suffixLen, x, input) 430 continue loop 431 } 432 } 433 for i, x := range gotBuf { 434 if i < totalLen { 435 continue 436 } 437 if w := byte(notPresentBase + i%notPresentLen); x != w { 438 t.Errorf("length=%d, offset=%d; suffixLen=%d; totalLen=%d: "+ 439 "Decode overrun: gotBuf[%d] was modified: got %#02x, want %#02x\ngotBuf: % x", 440 length, offset, suffixLen, totalLen, i, x, w, gotBuf) 441 continue loop 442 } 443 } 444 for _, x := range want { 445 if notPresentBase <= x && x < notPresentBase+notPresentLen { 446 t.Errorf("length=%d, offset=%d; suffixLen=%d: want shouldn't contain %#02x\nwant: % x", 447 length, offset, suffixLen, x, want) 448 continue loop 449 } 450 } 451 452 if !bytes.Equal(got, want) { 453 t.Errorf("length=%d, offset=%d; suffixLen=%d:\ninput % x\ngot % x\nwant % x", 454 length, offset, suffixLen, input, got, want) 455 continue 456 } 457 } 458 } 459 } 460 } 461 462 const ( 463 goldenText = "testdata/Mark.Twain-Tom.Sawyer.txt" 464 goldenCompressed = goldenText + ".rawsnappy" 465 ) 466 467 func TestDecodeGoldenInput(t *testing.T) { 468 src, err := ioutil.ReadFile(goldenCompressed) 469 if err != nil { 470 t.Fatalf("ReadFile: %v", err) 471 } 472 got, err := Decode(nil, src) 473 if err != nil { 474 t.Fatalf("Decode: %v", err) 475 } 476 want, err := ioutil.ReadFile(goldenText) 477 if err != nil { 478 t.Fatalf("ReadFile: %v", err) 479 } 480 if err := cmp(got, want); err != nil { 481 t.Fatal(err) 482 } 483 } 484 485 func TestEncodeGoldenInput(t *testing.T) { 486 src, err := ioutil.ReadFile(goldenText) 487 if err != nil { 488 t.Fatalf("ReadFile: %v", err) 489 } 490 got := Encode(nil, src) 491 want, err := ioutil.ReadFile(goldenCompressed) 492 if err != nil { 493 t.Fatalf("ReadFile: %v", err) 494 } 495 if err := cmp(got, want); err != nil { 496 t.Fatal(err) 497 } 498 } 499 500 const snappytoolCmdName = "cmd/snappytool/snappytool" 501 502 func skipTestSameEncodingAsCpp() (msg string) { 503 if !goEncoderShouldMatchCppEncoder { 504 return fmt.Sprintf("skipping testing that the encoding is byte-for-byte identical to C++: GOARCH=%s", runtime.GOARCH) 505 } 506 if _, err := os.Stat(snappytoolCmdName); err != nil { 507 return fmt.Sprintf("could not find snappytool: %v", err) 508 } 509 return "" 510 } 511 512 func runTestSameEncodingAsCpp(src []byte) error { 513 got := Encode(nil, src) 514 515 cmd := exec.Command(snappytoolCmdName, "-e") 516 cmd.Stdin = bytes.NewReader(src) 517 want, err := cmd.Output() 518 if err != nil { 519 return fmt.Errorf("could not run snappytool: %v", err) 520 } 521 return cmp(got, want) 522 } 523 524 func TestSameEncodingAsCppShortCopies(t *testing.T) { 525 if msg := skipTestSameEncodingAsCpp(); msg != "" { 526 t.Skip(msg) 527 } 528 src := bytes.Repeat([]byte{'a'}, 20) 529 for i := 0; i <= len(src); i++ { 530 if err := runTestSameEncodingAsCpp(src[:i]); err != nil { 531 t.Errorf("i=%d: %v", i, err) 532 } 533 } 534 } 535 536 func TestSameEncodingAsCppLongFiles(t *testing.T) { 537 if msg := skipTestSameEncodingAsCpp(); msg != "" { 538 t.Skip(msg) 539 } 540 failed := false 541 for i, tf := range testFiles { 542 if err := downloadBenchmarkFiles(t, tf.filename); err != nil { 543 t.Fatalf("failed to download testdata: %s", err) 544 } 545 data := readFile(t, filepath.Join(benchDir, tf.filename)) 546 if n := tf.sizeLimit; 0 < n && n < len(data) { 547 data = data[:n] 548 } 549 if err := runTestSameEncodingAsCpp(data); err != nil { 550 t.Errorf("i=%d: %v", i, err) 551 failed = true 552 } 553 } 554 if failed { 555 t.Errorf("was the snappytool program built against the C++ snappy library version " + 556 "d53de187 or later, commited on 2016-04-05? See " + 557 "https://yougam/libraries/google/snappy/commit/d53de18799418e113e44444252a39b12a0e4e0cc") 558 } 559 } 560 561 // TestSlowForwardCopyOverrun tests the "expand the pattern" algorithm 562 // described in decode_amd64.s and its claim of a 10 byte overrun worst case. 563 func TestSlowForwardCopyOverrun(t *testing.T) { 564 const base = 100 565 566 for length := 1; length < 18; length++ { 567 for offset := 1; offset < 18; offset++ { 568 highWaterMark := base 569 d := base 570 l := length 571 o := offset 572 573 // makeOffsetAtLeast8 574 for o < 8 { 575 if end := d + 8; highWaterMark < end { 576 highWaterMark = end 577 } 578 l -= o 579 d += o 580 o += o 581 } 582 583 // fixUpSlowForwardCopy 584 a := d 585 d += l 586 587 // finishSlowForwardCopy 588 for l > 0 { 589 if end := a + 8; highWaterMark < end { 590 highWaterMark = end 591 } 592 a += 8 593 l -= 8 594 } 595 596 dWant := base + length 597 overrun := highWaterMark - dWant 598 if d != dWant || overrun < 0 || 10 < overrun { 599 t.Errorf("length=%d, offset=%d: d and overrun: got (%d, %d), want (%d, something in [0, 10])", 600 length, offset, d, overrun, dWant) 601 } 602 } 603 } 604 } 605 606 // TestEncodeNoiseThenRepeats encodes input for which the first half is very 607 // incompressible and the second half is very compressible. The encoded form's 608 // length should be closer to 50% of the original length than 100%. 609 func TestEncodeNoiseThenRepeats(t *testing.T) { 610 for _, origLen := range []int{256 * 1024, 2048 * 1024} { 611 src := make([]byte, origLen) 612 rng := rand.New(rand.NewSource(1)) 613 firstHalf, secondHalf := src[:origLen/2], src[origLen/2:] 614 for i := range firstHalf { 615 firstHalf[i] = uint8(rng.Intn(256)) 616 } 617 for i := range secondHalf { 618 secondHalf[i] = uint8(i >> 8) 619 } 620 dst := Encode(nil, src) 621 if got, want := len(dst), origLen*3/4; got >= want { 622 t.Errorf("origLen=%d: got %d encoded bytes, want less than %d", origLen, got, want) 623 } 624 } 625 } 626 627 func TestFramingFormat(t *testing.T) { 628 // src is comprised of alternating 1e5-sized sequences of random 629 // (incompressible) bytes and repeated (compressible) bytes. 1e5 was chosen 630 // because it is larger than maxBlockSize (64k). 631 src := make([]byte, 1e6) 632 rng := rand.New(rand.NewSource(1)) 633 for i := 0; i < 10; i++ { 634 if i%2 == 0 { 635 for j := 0; j < 1e5; j++ { 636 src[1e5*i+j] = uint8(rng.Intn(256)) 637 } 638 } else { 639 for j := 0; j < 1e5; j++ { 640 src[1e5*i+j] = uint8(i) 641 } 642 } 643 } 644 645 buf := new(bytes.Buffer) 646 if _, err := NewWriter(buf).Write(src); err != nil { 647 t.Fatalf("Write: encoding: %v", err) 648 } 649 dst, err := ioutil.ReadAll(NewReader(buf)) 650 if err != nil { 651 t.Fatalf("ReadAll: decoding: %v", err) 652 } 653 if err := cmp(dst, src); err != nil { 654 t.Fatal(err) 655 } 656 } 657 658 func TestWriterGoldenOutput(t *testing.T) { 659 buf := new(bytes.Buffer) 660 w := NewBufferedWriter(buf) 661 defer w.Close() 662 w.Write([]byte("abcd")) // Not compressible. 663 w.Flush() 664 w.Write(bytes.Repeat([]byte{'A'}, 150)) // Compressible. 665 w.Flush() 666 // The next chunk is also compressible, but a naive, greedy encoding of the 667 // overall length 67 copy as a length 64 copy (the longest expressible as a 668 // tagCopy1 or tagCopy2) plus a length 3 remainder would be two 3-byte 669 // tagCopy2 tags (6 bytes), since the minimum length for a tagCopy1 is 4 670 // bytes. Instead, we could do it shorter, in 5 bytes: a 3-byte tagCopy2 671 // (of length 60) and a 2-byte tagCopy1 (of length 7). 672 w.Write(bytes.Repeat([]byte{'B'}, 68)) 673 w.Flush() 674 675 got := buf.String() 676 want := strings.Join([]string{ 677 magicChunk, 678 "\x01\x08\x00\x00", // Uncompressed chunk, 8 bytes long (including 4 byte checksum). 679 "\x68\x10\xe6\xb6", // Checksum. 680 "\x61\x62\x63\x64", // Uncompressed payload: "abcd". 681 "\x00\x11\x00\x00", // Compressed chunk, 17 bytes long (including 4 byte checksum). 682 "\x5f\xeb\xf2\x10", // Checksum. 683 "\x96\x01", // Compressed payload: Uncompressed length (varint encoded): 150. 684 "\x00\x41", // Compressed payload: tagLiteral, length=1, "A". 685 "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1. 686 "\xfe\x01\x00", // Compressed payload: tagCopy2, length=64, offset=1. 687 "\x52\x01\x00", // Compressed payload: tagCopy2, length=21, offset=1. 688 "\x00\x0c\x00\x00", // Compressed chunk, 12 bytes long (including 4 byte checksum). 689 "\x27\x50\xe4\x4e", // Checksum. 690 "\x44", // Compressed payload: Uncompressed length (varint encoded): 68. 691 "\x00\x42", // Compressed payload: tagLiteral, length=1, "B". 692 "\xee\x01\x00", // Compressed payload: tagCopy2, length=60, offset=1. 693 "\x0d\x01", // Compressed payload: tagCopy1, length=7, offset=1. 694 }, "") 695 if got != want { 696 t.Fatalf("\ngot: % x\nwant: % x", got, want) 697 } 698 } 699 700 func TestNewBufferedWriter(t *testing.T) { 701 // Test all 32 possible sub-sequences of these 5 input slices. 702 // 703 // Their lengths sum to 400,000, which is over 6 times the Writer ibuf 704 // capacity: 6 * maxBlockSize is 393,216. 705 inputs := [][]byte{ 706 bytes.Repeat([]byte{'a'}, 40000), 707 bytes.Repeat([]byte{'b'}, 150000), 708 bytes.Repeat([]byte{'c'}, 60000), 709 bytes.Repeat([]byte{'d'}, 120000), 710 bytes.Repeat([]byte{'e'}, 30000), 711 } 712 loop: 713 for i := 0; i < 1<<uint(len(inputs)); i++ { 714 var want []byte 715 buf := new(bytes.Buffer) 716 w := NewBufferedWriter(buf) 717 for j, input := range inputs { 718 if i&(1<<uint(j)) == 0 { 719 continue 720 } 721 if _, err := w.Write(input); err != nil { 722 t.Errorf("i=%#02x: j=%d: Write: %v", i, j, err) 723 continue loop 724 } 725 want = append(want, input...) 726 } 727 if err := w.Close(); err != nil { 728 t.Errorf("i=%#02x: Close: %v", i, err) 729 continue 730 } 731 got, err := ioutil.ReadAll(NewReader(buf)) 732 if err != nil { 733 t.Errorf("i=%#02x: ReadAll: %v", i, err) 734 continue 735 } 736 if err := cmp(got, want); err != nil { 737 t.Errorf("i=%#02x: %v", i, err) 738 continue 739 } 740 } 741 } 742 743 func TestFlush(t *testing.T) { 744 buf := new(bytes.Buffer) 745 w := NewBufferedWriter(buf) 746 defer w.Close() 747 if _, err := w.Write(bytes.Repeat([]byte{'x'}, 20)); err != nil { 748 t.Fatalf("Write: %v", err) 749 } 750 if n := buf.Len(); n != 0 { 751 t.Fatalf("before Flush: %d bytes were written to the underlying io.Writer, want 0", n) 752 } 753 if err := w.Flush(); err != nil { 754 t.Fatalf("Flush: %v", err) 755 } 756 if n := buf.Len(); n == 0 { 757 t.Fatalf("after Flush: %d bytes were written to the underlying io.Writer, want non-0", n) 758 } 759 } 760 761 func TestReaderUncompressedDataOK(t *testing.T) { 762 r := NewReader(strings.NewReader(magicChunk + 763 "\x01\x08\x00\x00" + // Uncompressed chunk, 8 bytes long (including 4 byte checksum). 764 "\x68\x10\xe6\xb6" + // Checksum. 765 "\x61\x62\x63\x64", // Uncompressed payload: "abcd". 766 )) 767 g, err := ioutil.ReadAll(r) 768 if err != nil { 769 t.Fatal(err) 770 } 771 if got, want := string(g), "abcd"; got != want { 772 t.Fatalf("got %q, want %q", got, want) 773 } 774 } 775 776 func TestReaderUncompressedDataNoPayload(t *testing.T) { 777 r := NewReader(strings.NewReader(magicChunk + 778 "\x01\x04\x00\x00" + // Uncompressed chunk, 4 bytes long. 779 "", // No payload; corrupt input. 780 )) 781 if _, err := ioutil.ReadAll(r); err != ErrCorrupt { 782 t.Fatalf("got %v, want %v", err, ErrCorrupt) 783 } 784 } 785 786 func TestReaderUncompressedDataTooLong(t *testing.T) { 787 // https://yougam/libraries/google/snappy/blob/master/framing_format.txt section 788 // 4.3 says that "the maximum legal chunk length... is 65540", or 0x10004. 789 const n = 0x10005 790 791 r := NewReader(strings.NewReader(magicChunk + 792 "\x01\x05\x00\x01" + // Uncompressed chunk, n bytes long. 793 strings.Repeat("\x00", n), 794 )) 795 if _, err := ioutil.ReadAll(r); err != ErrCorrupt { 796 t.Fatalf("got %v, want %v", err, ErrCorrupt) 797 } 798 } 799 800 func TestReaderReset(t *testing.T) { 801 gold := bytes.Repeat([]byte("All that is gold does not glitter,\n"), 10000) 802 buf := new(bytes.Buffer) 803 if _, err := NewWriter(buf).Write(gold); err != nil { 804 t.Fatalf("Write: %v", err) 805 } 806 encoded, invalid, partial := buf.String(), "invalid", "partial" 807 r := NewReader(nil) 808 for i, s := range []string{encoded, invalid, partial, encoded, partial, invalid, encoded, encoded} { 809 if s == partial { 810 r.Reset(strings.NewReader(encoded)) 811 if _, err := r.Read(make([]byte, 101)); err != nil { 812 t.Errorf("#%d: %v", i, err) 813 continue 814 } 815 continue 816 } 817 r.Reset(strings.NewReader(s)) 818 got, err := ioutil.ReadAll(r) 819 switch s { 820 case encoded: 821 if err != nil { 822 t.Errorf("#%d: %v", i, err) 823 continue 824 } 825 if err := cmp(got, gold); err != nil { 826 t.Errorf("#%d: %v", i, err) 827 continue 828 } 829 case invalid: 830 if err == nil { 831 t.Errorf("#%d: got nil error, want non-nil", i) 832 continue 833 } 834 } 835 } 836 } 837 838 func TestWriterReset(t *testing.T) { 839 gold := bytes.Repeat([]byte("Not all those who wander are lost;\n"), 10000) 840 const n = 20 841 for _, buffered := range []bool{false, true} { 842 var w *Writer 843 if buffered { 844 w = NewBufferedWriter(nil) 845 defer w.Close() 846 } else { 847 w = NewWriter(nil) 848 } 849 850 var gots, wants [][]byte 851 failed := false 852 for i := 0; i <= n; i++ { 853 buf := new(bytes.Buffer) 854 w.Reset(buf) 855 want := gold[:len(gold)*i/n] 856 if _, err := w.Write(want); err != nil { 857 t.Errorf("#%d: Write: %v", i, err) 858 failed = true 859 continue 860 } 861 if buffered { 862 if err := w.Flush(); err != nil { 863 t.Errorf("#%d: Flush: %v", i, err) 864 failed = true 865 continue 866 } 867 } 868 got, err := ioutil.ReadAll(NewReader(buf)) 869 if err != nil { 870 t.Errorf("#%d: ReadAll: %v", i, err) 871 failed = true 872 continue 873 } 874 gots = append(gots, got) 875 wants = append(wants, want) 876 } 877 if failed { 878 continue 879 } 880 for i := range gots { 881 if err := cmp(gots[i], wants[i]); err != nil { 882 t.Errorf("#%d: %v", i, err) 883 } 884 } 885 } 886 } 887 888 func TestWriterResetWithoutFlush(t *testing.T) { 889 buf0 := new(bytes.Buffer) 890 buf1 := new(bytes.Buffer) 891 w := NewBufferedWriter(buf0) 892 if _, err := w.Write([]byte("xxx")); err != nil { 893 t.Fatalf("Write #0: %v", err) 894 } 895 // Note that we don't Flush the Writer before calling Reset. 896 w.Reset(buf1) 897 if _, err := w.Write([]byte("yyy")); err != nil { 898 t.Fatalf("Write #1: %v", err) 899 } 900 if err := w.Flush(); err != nil { 901 t.Fatalf("Flush: %v", err) 902 } 903 got, err := ioutil.ReadAll(NewReader(buf1)) 904 if err != nil { 905 t.Fatalf("ReadAll: %v", err) 906 } 907 if err := cmp(got, []byte("yyy")); err != nil { 908 t.Fatal(err) 909 } 910 } 911 912 type writeCounter int 913 914 func (c *writeCounter) Write(p []byte) (int, error) { 915 *c++ 916 return len(p), nil 917 } 918 919 // TestNumUnderlyingWrites tests that each Writer flush only makes one or two 920 // Write calls on its underlying io.Writer, depending on whether or not the 921 // flushed buffer was compressible. 922 func TestNumUnderlyingWrites(t *testing.T) { 923 testCases := []struct { 924 input []byte 925 want int 926 }{ 927 {bytes.Repeat([]byte{'x'}, 100), 1}, 928 {bytes.Repeat([]byte{'y'}, 100), 1}, 929 {[]byte("ABCDEFGHIJKLMNOPQRST"), 2}, 930 } 931 932 var c writeCounter 933 w := NewBufferedWriter(&c) 934 defer w.Close() 935 for i, tc := range testCases { 936 c = 0 937 if _, err := w.Write(tc.input); err != nil { 938 t.Errorf("#%d: Write: %v", i, err) 939 continue 940 } 941 if err := w.Flush(); err != nil { 942 t.Errorf("#%d: Flush: %v", i, err) 943 continue 944 } 945 if int(c) != tc.want { 946 t.Errorf("#%d: got %d underlying writes, want %d", i, c, tc.want) 947 continue 948 } 949 } 950 } 951 952 func benchDecode(b *testing.B, src []byte) { 953 encoded := Encode(nil, src) 954 // Bandwidth is in amount of uncompressed data. 955 b.SetBytes(int64(len(src))) 956 b.ResetTimer() 957 for i := 0; i < b.N; i++ { 958 Decode(src, encoded) 959 } 960 } 961 962 func benchEncode(b *testing.B, src []byte) { 963 // Bandwidth is in amount of uncompressed data. 964 b.SetBytes(int64(len(src))) 965 dst := make([]byte, MaxEncodedLen(len(src))) 966 b.ResetTimer() 967 for i := 0; i < b.N; i++ { 968 Encode(dst, src) 969 } 970 } 971 972 func testOrBenchmark(b testing.TB) string { 973 if _, ok := b.(*testing.B); ok { 974 return "benchmark" 975 } 976 return "test" 977 } 978 979 func readFile(b testing.TB, filename string) []byte { 980 src, err := ioutil.ReadFile(filename) 981 if err != nil { 982 b.Skipf("skipping %s: %v", testOrBenchmark(b), err) 983 } 984 if len(src) == 0 { 985 b.Fatalf("%s has zero length", filename) 986 } 987 return src 988 } 989 990 // expand returns a slice of length n containing repeated copies of src. 991 func expand(src []byte, n int) []byte { 992 dst := make([]byte, n) 993 for x := dst; len(x) > 0; { 994 i := copy(x, src) 995 x = x[i:] 996 } 997 return dst 998 } 999 1000 func benchWords(b *testing.B, n int, decode bool) { 1001 // Note: the file is OS-language dependent so the resulting values are not 1002 // directly comparable for non-US-English OS installations. 1003 data := expand(readFile(b, "/usr/share/dict/words"), n) 1004 if decode { 1005 benchDecode(b, data) 1006 } else { 1007 benchEncode(b, data) 1008 } 1009 } 1010 1011 func BenchmarkWordsDecode1e1(b *testing.B) { benchWords(b, 1e1, true) } 1012 func BenchmarkWordsDecode1e2(b *testing.B) { benchWords(b, 1e2, true) } 1013 func BenchmarkWordsDecode1e3(b *testing.B) { benchWords(b, 1e3, true) } 1014 func BenchmarkWordsDecode1e4(b *testing.B) { benchWords(b, 1e4, true) } 1015 func BenchmarkWordsDecode1e5(b *testing.B) { benchWords(b, 1e5, true) } 1016 func BenchmarkWordsDecode1e6(b *testing.B) { benchWords(b, 1e6, true) } 1017 func BenchmarkWordsEncode1e1(b *testing.B) { benchWords(b, 1e1, false) } 1018 func BenchmarkWordsEncode1e2(b *testing.B) { benchWords(b, 1e2, false) } 1019 func BenchmarkWordsEncode1e3(b *testing.B) { benchWords(b, 1e3, false) } 1020 func BenchmarkWordsEncode1e4(b *testing.B) { benchWords(b, 1e4, false) } 1021 func BenchmarkWordsEncode1e5(b *testing.B) { benchWords(b, 1e5, false) } 1022 func BenchmarkWordsEncode1e6(b *testing.B) { benchWords(b, 1e6, false) } 1023 1024 func BenchmarkRandomEncode(b *testing.B) { 1025 rng := rand.New(rand.NewSource(1)) 1026 data := make([]byte, 1<<20) 1027 for i := range data { 1028 data[i] = uint8(rng.Intn(256)) 1029 } 1030 benchEncode(b, data) 1031 } 1032 1033 // testFiles' values are copied directly from 1034 // https://raw.githubusercontent.com/google/snappy/master/snappy_unittest.cc 1035 // The label field is unused in snappy-go. 1036 var testFiles = []struct { 1037 label string 1038 filename string 1039 sizeLimit int 1040 }{ 1041 {"html", "html", 0}, 1042 {"urls", "urls.10K", 0}, 1043 {"jpg", "fireworks.jpeg", 0}, 1044 {"jpg_200", "fireworks.jpeg", 200}, 1045 {"pdf", "paper-100k.pdf", 0}, 1046 {"html4", "html_x_4", 0}, 1047 {"txt1", "alice29.txt", 0}, 1048 {"txt2", "asyoulik.txt", 0}, 1049 {"txt3", "lcet10.txt", 0}, 1050 {"txt4", "plrabn12.txt", 0}, 1051 {"pb", "geo.protodata", 0}, 1052 {"gaviota", "kppkn.gtb", 0}, 1053 } 1054 1055 const ( 1056 // The benchmark data files are at this canonical URL. 1057 benchURL = "https://raw.githubusercontent.com/google/snappy/master/testdata/" 1058 1059 // They are copied to this local directory. 1060 benchDir = "testdata/bench" 1061 ) 1062 1063 func downloadBenchmarkFiles(b testing.TB, basename string) (errRet error) { 1064 filename := filepath.Join(benchDir, basename) 1065 if stat, err := os.Stat(filename); err == nil && stat.Size() != 0 { 1066 return nil 1067 } 1068 1069 if !*download { 1070 b.Skipf("test data not found; skipping %s without the -download flag", testOrBenchmark(b)) 1071 } 1072 // Download the official snappy C++ implementation reference test data 1073 // files for benchmarking. 1074 if err := os.MkdirAll(benchDir, 0777); err != nil && !os.IsExist(err) { 1075 return fmt.Errorf("failed to create %s: %s", benchDir, err) 1076 } 1077 1078 f, err := os.Create(filename) 1079 if err != nil { 1080 return fmt.Errorf("failed to create %s: %s", filename, err) 1081 } 1082 defer f.Close() 1083 defer func() { 1084 if errRet != nil { 1085 os.Remove(filename) 1086 } 1087 }() 1088 url := benchURL + basename 1089 resp, err := http.Get(url) 1090 if err != nil { 1091 return fmt.Errorf("failed to download %s: %s", url, err) 1092 } 1093 defer resp.Body.Close() 1094 if s := resp.StatusCode; s != http.StatusOK { 1095 return fmt.Errorf("downloading %s: HTTP status code %d (%s)", url, s, http.StatusText(s)) 1096 } 1097 _, err = io.Copy(f, resp.Body) 1098 if err != nil { 1099 return fmt.Errorf("failed to download %s to %s: %s", url, filename, err) 1100 } 1101 return nil 1102 } 1103 1104 func benchFile(b *testing.B, i int, decode bool) { 1105 if err := downloadBenchmarkFiles(b, testFiles[i].filename); err != nil { 1106 b.Fatalf("failed to download testdata: %s", err) 1107 } 1108 data := readFile(b, filepath.Join(benchDir, testFiles[i].filename)) 1109 if n := testFiles[i].sizeLimit; 0 < n && n < len(data) { 1110 data = data[:n] 1111 } 1112 if decode { 1113 benchDecode(b, data) 1114 } else { 1115 benchEncode(b, data) 1116 } 1117 } 1118 1119 // Naming convention is kept similar to what snappy's C++ implementation uses. 1120 func Benchmark_UFlat0(b *testing.B) { benchFile(b, 0, true) } 1121 func Benchmark_UFlat1(b *testing.B) { benchFile(b, 1, true) } 1122 func Benchmark_UFlat2(b *testing.B) { benchFile(b, 2, true) } 1123 func Benchmark_UFlat3(b *testing.B) { benchFile(b, 3, true) } 1124 func Benchmark_UFlat4(b *testing.B) { benchFile(b, 4, true) } 1125 func Benchmark_UFlat5(b *testing.B) { benchFile(b, 5, true) } 1126 func Benchmark_UFlat6(b *testing.B) { benchFile(b, 6, true) } 1127 func Benchmark_UFlat7(b *testing.B) { benchFile(b, 7, true) } 1128 func Benchmark_UFlat8(b *testing.B) { benchFile(b, 8, true) } 1129 func Benchmark_UFlat9(b *testing.B) { benchFile(b, 9, true) } 1130 func Benchmark_UFlat10(b *testing.B) { benchFile(b, 10, true) } 1131 func Benchmark_UFlat11(b *testing.B) { benchFile(b, 11, true) } 1132 func Benchmark_ZFlat0(b *testing.B) { benchFile(b, 0, false) } 1133 func Benchmark_ZFlat1(b *testing.B) { benchFile(b, 1, false) } 1134 func Benchmark_ZFlat2(b *testing.B) { benchFile(b, 2, false) } 1135 func Benchmark_ZFlat3(b *testing.B) { benchFile(b, 3, false) } 1136 func Benchmark_ZFlat4(b *testing.B) { benchFile(b, 4, false) } 1137 func Benchmark_ZFlat5(b *testing.B) { benchFile(b, 5, false) } 1138 func Benchmark_ZFlat6(b *testing.B) { benchFile(b, 6, false) } 1139 func Benchmark_ZFlat7(b *testing.B) { benchFile(b, 7, false) } 1140 func Benchmark_ZFlat8(b *testing.B) { benchFile(b, 8, false) } 1141 func Benchmark_ZFlat9(b *testing.B) { benchFile(b, 9, false) } 1142 func Benchmark_ZFlat10(b *testing.B) { benchFile(b, 10, false) } 1143 func Benchmark_ZFlat11(b *testing.B) { benchFile(b, 11, false) }