github.com/insionng/yougam@v0.0.0-20170714101924-2bc18d833463/libraries/klauspost/compress/snappy/encode.go (about) 1 // Copyright 2011 The Snappy-Go Authors. All rights reserved. 2 // Copyright 2016 Klaus Post. All rights reserved. 3 // Use of this source code is governed by a BSD-style 4 // license that can be found in the LICENSE file. 5 6 package snappy 7 8 import ( 9 "encoding/binary" 10 "errors" 11 "io" 12 "sync" 13 ) 14 15 // We limit how far copy back-references can go, the same as the C++ code. 16 const maxOffset = 1 << 15 17 18 // emitLiteral writes a literal chunk and returns the number of bytes written. 19 func emitLiteral(dst, lit []byte) int { 20 i, n := 0, uint(len(lit)-1) 21 switch { 22 case n < 60: 23 dst[0] = uint8(n)<<2 | tagLiteral 24 i = 1 25 case n < 1<<8: 26 dst[0] = 60<<2 | tagLiteral 27 dst[1] = uint8(n) 28 i = 2 29 case n < 1<<16: 30 dst[0] = 61<<2 | tagLiteral 31 dst[1] = uint8(n) 32 dst[2] = uint8(n >> 8) 33 i = 3 34 case n < 1<<24: 35 dst[0] = 62<<2 | tagLiteral 36 dst[1] = uint8(n) 37 dst[2] = uint8(n >> 8) 38 dst[3] = uint8(n >> 16) 39 i = 4 40 case int64(n) < 1<<32: 41 dst[0] = 63<<2 | tagLiteral 42 dst[1] = uint8(n) 43 dst[2] = uint8(n >> 8) 44 dst[3] = uint8(n >> 16) 45 dst[4] = uint8(n >> 24) 46 i = 5 47 default: 48 panic("snappy: source buffer is too long") 49 } 50 if copy(dst[i:], lit) != len(lit) { 51 panic("snappy: destination buffer is too short") 52 } 53 return i + len(lit) 54 } 55 56 // emitCopy writes a copy chunk and returns the number of bytes written. 57 func emitCopy(dst []byte, offset, length int) int { 58 i := 0 59 for length > 0 { 60 x := length - 4 61 if 0 <= x && x < 1<<3 && offset < 1<<11 { 62 dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1 63 dst[i+1] = uint8(offset) 64 i += 2 65 break 66 } 67 68 x = length 69 if x > 1<<6 { 70 x = 1 << 6 71 } 72 dst[i+0] = uint8(x-1)<<2 | tagCopy2 73 dst[i+1] = uint8(offset) 74 dst[i+2] = uint8(offset >> 8) 75 i += 3 76 length -= x 77 } 78 return i 79 } 80 81 var encPool = sync.Pool{New: func() interface{} { return new(encoder) }} 82 83 // Encode returns the encoded form of src. The returned slice may be a sub- 84 // slice of dst if dst was large enough to hold the entire encoded block. 85 // Otherwise, a newly allocated slice will be returned. 86 // It is valid to pass a nil dst. 87 // 88 // The source may not be larger than 2^32 - 1 bytes (4GB). 89 // This is a Snappy format limitation. Use the framing format Writer 90 // for larger data sizes. 91 func Encode(dst, src []byte) []byte { 92 e := encPool.Get().(*encoder) 93 if n := MaxEncodedLen(len(src)); n < 0 { 94 panic(ErrTooLarge) 95 } else if len(dst) < n { 96 dst = make([]byte, n) 97 } 98 99 // The block starts with the varint-encoded length of the decompressed bytes. 100 d := binary.PutUvarint(dst, uint64(len(src))) 101 102 for len(src) > 0 { 103 p := src 104 src = nil 105 if len(p) > maxInternalEncodeSrcLen { 106 p, src = p[:maxInternalEncodeSrcLen], p[maxInternalEncodeSrcLen:] 107 } 108 d += e.encode(dst[d:], p) 109 } 110 encPool.Put(e) 111 return dst[:d] 112 } 113 114 const tableBits = 14 // Bits used in the table 115 const tableSize = 1 << tableBits // Size of the table 116 var useSSE42 bool 117 118 // maxInternalEncodeSrcLen must be less than math.MaxInt32, so that in the 119 // (internal) encode function, it is safe to have the s variable (which indexes 120 // the src slice), and therefore the hash table entries, to have type int32 121 // instead of int. 122 const maxInternalEncodeSrcLen = 0x40000000 123 124 type encoder struct { 125 table [tableSize]int32 126 cur int 127 } 128 129 // encode encodes a non-empty src to a guaranteed-large-enough dst. It assumes 130 // that the varint-encoded length of the decompressed bytes has already been 131 // written. 132 // 133 // It also assumes that: 134 // len(dst) >= MaxEncodedLen(len(src)) && 135 // 0 < len(src) && 136 // len(src) <= maxInternalEncodeSrcLen && 137 // maxInternalEncodeSrcLen < math.MaxInt32. 138 func (e *encoder) encode(dst, src []byte) (d int) { 139 // Return early if src is short. 140 if len(src) <= 4 { 141 if len(src) != 0 { 142 d += emitLiteral(dst[d:], src) 143 } 144 e.cur += len(src) 145 return d 146 } 147 if useSSE42 { 148 return e.encSSE4(dst, src) 149 } 150 return e.enc(dst, src) 151 } 152 153 const skipBits = 4 154 const maxSkip = (1 << 7) - 1 155 156 func (e *encoder) enc(dst, src []byte) (d int) { 157 // Ensure that e.cur doesn't wrap. 158 if e.cur > maxInternalEncodeSrcLen { 159 e.cur = 0 160 } 161 162 // Iterate over the source bytes. 163 var ( 164 s int // The iterator position. 165 t int // The last position with the same hash as s. 166 lit int // The start position of any pending literal bytes. 167 tadd = -1 - e.cur // Added to t to adjust match to offset 168 sadd = 1 + e.cur // Added to s to adjust match to offset 169 ) 170 for s+3 < len(src) { 171 // Update the hash table. 172 b0, b1, b2, b3 := src[s], src[s+1], src[s+2], src[s+3] 173 h := uint32(b0) | uint32(b1)<<8 | uint32(b2)<<16 | uint32(b3)<<24 174 p := &e.table[(h*0x1e35a7bd)>>(32-tableBits)] 175 // We need to to store values in [-1, inf) in table. To save 176 // some initialization time, (re)use the table's zero value 177 // and shift the values against this zero: add 1 on writes, 178 // subtract 1 on reads. 179 t, *p = int(*p)+tadd, int32(s+sadd) 180 181 // We calculate the offset in the current buffer. 182 // if t >= s this will be negative, when converted to a uint this will always be > maxOffset 183 offset := uint(s - t - 1) 184 185 // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte. 186 if t < 0 || offset >= (maxOffset-1) || b0 != src[t] || b1 != src[t+1] || b2 != src[t+2] || b3 != src[t+3] { 187 // Skip bytes if last match was >= 32 bytes in the past. 188 s += 1 + (((s - lit) >> skipBits) & maxSkip) 189 continue 190 } 191 192 // Otherwise, we have a match. First, emit any pending literal bytes. 193 if lit != s { 194 d += emitLiteral(dst[d:], src[lit:s]) 195 } 196 // Extend the match to be as long as possible. 197 s0 := s 198 s, t = s+4, t+4 199 for s < len(src) && src[s] == src[t] { 200 s++ 201 t++ 202 } 203 // Emit the copied bytes. 204 d += emitCopy(dst[d:], s-t, s-s0) 205 lit = s 206 } 207 208 // Emit any final pending literal bytes and return. 209 if lit != len(src) { 210 d += emitLiteral(dst[d:], src[lit:]) 211 } 212 213 e.cur += len(src) 214 return d 215 } 216 217 func (e *encoder) encSSE4(dst, src []byte) (d int) { 218 // Ensure that e.cur doesn't wrap. 219 if e.cur > maxInternalEncodeSrcLen { 220 e.cur = 0 221 } 222 223 // Iterate over the source bytes. 224 var ( 225 s int // The iterator position. 226 t int // The last position with the same hash as s. 227 lit int // The start position of any pending literal bytes. 228 tadd = -1 - e.cur // Added to t to adjust match to offset 229 sadd = 1 + e.cur // Added to s to adjust match to offset 230 ) 231 for s+3 < len(src) { 232 // Update the hash table. 233 h := uint32(src[s]) | uint32(src[s+1])<<8 | uint32(src[s+2])<<16 | uint32(src[s+3])<<24 234 p := &e.table[(h*0x1e35a7bd)>>(32-tableBits)] 235 // We need to to store values in [-1, inf) in table. To save 236 // some initialization time, (re)use the table's zero value 237 // and shift the values against this zero: add 1 on writes, 238 // subtract 1 on reads. 239 t, *p = int(*p)+tadd, int32(s+sadd) 240 241 // We calculate the offset in the current buffer. 242 // if t >= s this will be negative, when converted to a uint this will always be > maxOffset 243 offset := uint(s - t - 1) 244 245 // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte. 246 // This saves us the branch to test if t >=s, which would indicate a forward reference, 247 // that is a result of e.cur wrapping. 248 if t < 0 || offset >= maxOffset-1 { 249 // Skip bytes if last match was >= 32 bytes in the past. 250 s += 1 + (((s - lit) >> skipBits) & maxSkip) 251 continue 252 } 253 254 length := len(src) - s 255 256 // Extend the match to be as long as possible. 257 match := matchLenSSE4(src[t:], src[s:], length) 258 259 /* match2 := matchLenSSE4Ref(src[t:], src[s:], length) 260 261 if match != match2 { 262 fmt.Printf("%v\n%v\nlen: %d\n", src[t:t+length], src[s:s+length], len(src)-s) 263 s := fmt.Sprintf("got %d != %d expected", match, match2) 264 panic(s) 265 } 266 */ 267 // Return if short. 268 if match < 4 { 269 s += 1 + (((s - lit) >> skipBits) & maxSkip) 270 continue 271 } 272 273 // Otherwise, we have a match. First, emit any pending literal bytes. 274 if lit != s { 275 // Skip bytes if last match was >= 32 bytes in the past. 276 d += emitLiteral(dst[d:], src[lit:s]) 277 } 278 279 // Emit the copied bytes. 280 d += emitCopy(dst[d:], s-t, match) 281 s += match 282 lit = s 283 } 284 285 // Emit any final pending literal bytes and return. 286 if lit != len(src) { 287 d += emitLiteral(dst[d:], src[lit:]) 288 } 289 290 e.cur += len(src) 291 return d 292 } 293 294 // MaxEncodedLen returns the maximum length of a snappy block, given its 295 // uncompressed length. 296 // 297 // It will return a negative value if srcLen is too large to encode. 298 func MaxEncodedLen(srcLen int) int { 299 n := uint64(srcLen) 300 if n > 0xffffffff { 301 return -1 302 } 303 // Compressed data can be defined as: 304 // compressed := item* literal* 305 // item := literal* copy 306 // 307 // The trailing literal sequence has a space blowup of at most 62/60 308 // since a literal of length 60 needs one tag byte + one extra byte 309 // for length information. 310 // 311 // Item blowup is trickier to measure. Suppose the "copy" op copies 312 // 4 bytes of data. Because of a special check in the encoding code, 313 // we produce a 4-byte copy only if the offset is < 65536. Therefore 314 // the copy op takes 3 bytes to encode, and this type of item leads 315 // to at most the 62/60 blowup for representing literals. 316 // 317 // Suppose the "copy" op copies 5 bytes of data. If the offset is big 318 // enough, it will take 5 bytes to encode the copy op. Therefore the 319 // worst case here is a one-byte literal followed by a five-byte copy. 320 // That is, 6 bytes of input turn into 7 bytes of "compressed" data. 321 // 322 // This last factor dominates the blowup, so the final estimate is: 323 n = 32 + n + n/6 324 if n > 0xffffffff { 325 return -1 326 } 327 return int(n) 328 } 329 330 var errClosed = errors.New("snappy: Writer is closed") 331 332 // NewWriter returns a new Writer that compresses to w. 333 // 334 // The Writer returned does not buffer writes. There is no need to Flush or 335 // Close such a Writer. 336 // 337 // Deprecated: the Writer returned is not suitable for many small writes, only 338 // for few large writes. Use NewBufferedWriter instead, which is efficient 339 // regardless of the frequency and shape of the writes, and remember to Close 340 // that Writer when done. 341 func NewWriter(w io.Writer) *Writer { 342 return &Writer{ 343 w: w, 344 e: encPool.Get().(*encoder), 345 obuf: make([]byte, obufLen), 346 } 347 } 348 349 // NewBufferedWriter returns a new Writer that compresses to w, using the 350 // framing format described at 351 // https://yougam/libraries/google/snappy/blob/master/framing_format.txt 352 // 353 // The Writer returned buffers writes. Users must call Close to guarantee all 354 // data has been forwarded to the underlying io.Writer. They may also call 355 // Flush zero or more times before calling Close. 356 func NewBufferedWriter(w io.Writer) *Writer { 357 return &Writer{ 358 w: w, 359 e: encPool.Get().(*encoder), 360 ibuf: make([]byte, 0, maxUncompressedChunkLen), 361 obuf: make([]byte, obufLen), 362 } 363 } 364 365 // Writer is an io.Writer than can write Snappy-compressed bytes. 366 type Writer struct { 367 w io.Writer 368 err error 369 e *encoder 370 371 // ibuf is a buffer for the incoming (uncompressed) bytes. 372 // 373 // Its use is optional. For backwards compatibility, Writers created by the 374 // NewWriter function have ibuf == nil, do not buffer incoming bytes, and 375 // therefore do not need to be Flush'ed or Close'd. 376 ibuf []byte 377 378 // obuf is a buffer for the outgoing (compressed) bytes. 379 obuf []byte 380 381 // wroteStreamHeader is whether we have written the stream header. 382 wroteStreamHeader bool 383 } 384 385 // Reset discards the writer's state and switches the Snappy writer to write to 386 // w. This permits reusing a Writer rather than allocating a new one. 387 func (w *Writer) Reset(writer io.Writer) { 388 w.w = writer 389 w.err = nil 390 if w.ibuf != nil { 391 w.ibuf = w.ibuf[:0] 392 } 393 w.wroteStreamHeader = false 394 } 395 396 // Write satisfies the io.Writer interface. 397 func (w *Writer) Write(p []byte) (nRet int, errRet error) { 398 if w.ibuf == nil { 399 // Do not buffer incoming bytes. This does not perform or compress well 400 // if the caller of Writer.Write writes many small slices. This 401 // behavior is therefore deprecated, but still supported for backwards 402 // compatibility with code that doesn't explicitly Flush or Close. 403 return w.write(p) 404 } 405 406 // The remainder of this method is based on bufio.Writer.Write from the 407 // standard library. 408 409 for len(p) > (cap(w.ibuf)-len(w.ibuf)) && w.err == nil { 410 var n int 411 if len(w.ibuf) == 0 { 412 // Large write, empty buffer. 413 // Write directly from p to avoid copy. 414 n, _ = w.write(p) 415 } else { 416 n = copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) 417 w.ibuf = w.ibuf[:len(w.ibuf)+n] 418 w.Flush() 419 } 420 nRet += n 421 p = p[n:] 422 } 423 if w.err != nil { 424 return nRet, w.err 425 } 426 n := copy(w.ibuf[len(w.ibuf):cap(w.ibuf)], p) 427 w.ibuf = w.ibuf[:len(w.ibuf)+n] 428 nRet += n 429 return nRet, nil 430 } 431 432 func (w *Writer) write(p []byte) (nRet int, errRet error) { 433 if w.err != nil { 434 return 0, w.err 435 } 436 for len(p) > 0 { 437 obufStart := len(magicChunk) 438 if !w.wroteStreamHeader { 439 w.wroteStreamHeader = true 440 copy(w.obuf, magicChunk) 441 obufStart = 0 442 } 443 444 var uncompressed []byte 445 if len(p) > maxUncompressedChunkLen { 446 uncompressed, p = p[:maxUncompressedChunkLen], p[maxUncompressedChunkLen:] 447 } else { 448 uncompressed, p = p, nil 449 } 450 checksum := crc(uncompressed) 451 452 // Compress the buffer, discarding the result if the improvement 453 // isn't at least 12.5%. 454 455 // The block starts with the varint-encoded length of the decompressed bytes. 456 d := binary.PutUvarint(w.obuf[obufHeaderLen:], uint64(len(uncompressed))) 457 d += w.e.encode(w.obuf[obufHeaderLen+d:], uncompressed) 458 459 chunkType := uint8(chunkTypeCompressedData) 460 chunkLen := 4 + d 461 obufEnd := obufHeaderLen + d 462 if d >= len(uncompressed)-len(uncompressed)/8 { 463 chunkType = chunkTypeUncompressedData 464 chunkLen = 4 + len(uncompressed) 465 obufEnd = obufHeaderLen 466 } 467 468 // Fill in the per-chunk header that comes before the body. 469 w.obuf[len(magicChunk)+0] = chunkType 470 w.obuf[len(magicChunk)+1] = uint8(chunkLen >> 0) 471 w.obuf[len(magicChunk)+2] = uint8(chunkLen >> 8) 472 w.obuf[len(magicChunk)+3] = uint8(chunkLen >> 16) 473 w.obuf[len(magicChunk)+4] = uint8(checksum >> 0) 474 w.obuf[len(magicChunk)+5] = uint8(checksum >> 8) 475 w.obuf[len(magicChunk)+6] = uint8(checksum >> 16) 476 w.obuf[len(magicChunk)+7] = uint8(checksum >> 24) 477 478 if _, err := w.w.Write(w.obuf[obufStart:obufEnd]); err != nil { 479 w.err = err 480 return nRet, err 481 } 482 if chunkType == chunkTypeUncompressedData { 483 if _, err := w.w.Write(uncompressed); err != nil { 484 w.err = err 485 return nRet, err 486 } 487 } 488 nRet += len(uncompressed) 489 } 490 return nRet, nil 491 } 492 493 // Flush flushes the Writer to its underlying io.Writer. 494 func (w *Writer) Flush() error { 495 if w.err != nil { 496 return w.err 497 } 498 if len(w.ibuf) == 0 { 499 return nil 500 } 501 w.write(w.ibuf) 502 w.ibuf = w.ibuf[:0] 503 return w.err 504 } 505 506 // Close calls Flush and then closes the Writer. 507 func (w *Writer) Close() error { 508 w.Flush() 509 ret := w.err 510 if w.err == nil { 511 w.err = errClosed 512 } 513 return ret 514 }