github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/database/leveldb.chai2010/src/snappy.cc (about) 1 // Copyright 2005 Google Inc. All Rights Reserved. 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google Inc. nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #include "snappy.h" 30 #include "snappy-internal.h" 31 #include "snappy-sinksource.h" 32 33 #include <stdio.h> 34 35 #include <algorithm> 36 #include <string> 37 #include <vector> 38 39 40 namespace snappy { 41 42 // Any hash function will produce a valid compressed bitstream, but a good 43 // hash function reduces the number of collisions and thus yields better 44 // compression for compressible input, and more speed for incompressible 45 // input. Of course, it doesn't hurt if the hash function is reasonably fast 46 // either, as it gets called a lot. 47 static inline uint32 HashBytes(uint32 bytes, int shift) { 48 uint32 kMul = 0x1e35a7bd; 49 return (bytes * kMul) >> shift; 50 } 51 static inline uint32 Hash(const char* p, int shift) { 52 return HashBytes(UNALIGNED_LOAD32(p), shift); 53 } 54 55 size_t MaxCompressedLength(size_t source_len) { 56 // Compressed data can be defined as: 57 // compressed := item* literal* 58 // item := literal* copy 59 // 60 // The trailing literal sequence has a space blowup of at most 62/60 61 // since a literal of length 60 needs one tag byte + one extra byte 62 // for length information. 63 // 64 // Item blowup is trickier to measure. Suppose the "copy" op copies 65 // 4 bytes of data. Because of a special check in the encoding code, 66 // we produce a 4-byte copy only if the offset is < 65536. Therefore 67 // the copy op takes 3 bytes to encode, and this type of item leads 68 // to at most the 62/60 blowup for representing literals. 69 // 70 // Suppose the "copy" op copies 5 bytes of data. If the offset is big 71 // enough, it will take 5 bytes to encode the copy op. Therefore the 72 // worst case here is a one-byte literal followed by a five-byte copy. 73 // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. 74 // 75 // This last factor dominates the blowup, so the final estimate is: 76 return 32 + source_len + source_len/6; 77 } 78 79 enum { 80 LITERAL = 0, 81 COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode 82 COPY_2_BYTE_OFFSET = 2, 83 COPY_4_BYTE_OFFSET = 3 84 }; 85 86 // Copy "len" bytes from "src" to "op", one byte at a time. Used for 87 // handling COPY operations where the input and output regions may 88 // overlap. For example, suppose: 89 // src == "ab" 90 // op == src + 2 91 // len == 20 92 // After IncrementalCopy(src, op, len), the result will have 93 // eleven copies of "ab" 94 // ababababababababababab 95 // Note that this does not match the semantics of either memcpy() 96 // or memmove(). 97 static inline void IncrementalCopy(const char* src, char* op, int len) { 98 assert(len > 0); 99 do { 100 *op++ = *src++; 101 } while (--len > 0); 102 } 103 104 // Equivalent to IncrementalCopy except that it can write up to ten extra 105 // bytes after the end of the copy, and that it is faster. 106 // 107 // The main part of this loop is a simple copy of eight bytes at a time until 108 // we've copied (at least) the requested amount of bytes. However, if op and 109 // src are less than eight bytes apart (indicating a repeating pattern of 110 // length < 8), we first need to expand the pattern in order to get the correct 111 // results. For instance, if the buffer looks like this, with the eight-byte 112 // <src> and <op> patterns marked as intervals: 113 // 114 // abxxxxxxxxxxxx 115 // [------] src 116 // [------] op 117 // 118 // a single eight-byte copy from <src> to <op> will repeat the pattern once, 119 // after which we can move <op> two bytes without moving <src>: 120 // 121 // ababxxxxxxxxxx 122 // [------] src 123 // [------] op 124 // 125 // and repeat the exercise until the two no longer overlap. 126 // 127 // This allows us to do very well in the special case of one single byte 128 // repeated many times, without taking a big hit for more general cases. 129 // 130 // The worst case of extra writing past the end of the match occurs when 131 // op - src == 1 and len == 1; the last copy will read from byte positions 132 // [0..7] and write to [4..11], whereas it was only supposed to write to 133 // position 1. Thus, ten excess bytes. 134 135 namespace { 136 137 const int kMaxIncrementCopyOverflow = 10; 138 139 } // namespace 140 141 static inline void IncrementalCopyFastPath(const char* src, char* op, int len) { 142 while (op - src < 8) { 143 UnalignedCopy64(src, op); 144 len -= op - src; 145 op += op - src; 146 } 147 while (len > 0) { 148 UnalignedCopy64(src, op); 149 src += 8; 150 op += 8; 151 len -= 8; 152 } 153 } 154 155 static inline char* EmitLiteral(char* op, 156 const char* literal, 157 int len, 158 bool allow_fast_path) { 159 int n = len - 1; // Zero-length literals are disallowed 160 if (n < 60) { 161 // Fits in tag byte 162 *op++ = LITERAL | (n << 2); 163 164 // The vast majority of copies are below 16 bytes, for which a 165 // call to memcpy is overkill. This fast path can sometimes 166 // copy up to 15 bytes too much, but that is okay in the 167 // main loop, since we have a bit to go on for both sides: 168 // 169 // - The input will always have kInputMarginBytes = 15 extra 170 // available bytes, as long as we're in the main loop, and 171 // if not, allow_fast_path = false. 172 // - The output will always have 32 spare bytes (see 173 // MaxCompressedLength). 174 if (allow_fast_path && len <= 16) { 175 UnalignedCopy64(literal, op); 176 UnalignedCopy64(literal + 8, op + 8); 177 return op + len; 178 } 179 } else { 180 // Encode in upcoming bytes 181 char* base = op; 182 int count = 0; 183 op++; 184 while (n > 0) { 185 *op++ = n & 0xff; 186 n >>= 8; 187 count++; 188 } 189 assert(count >= 1); 190 assert(count <= 4); 191 *base = LITERAL | ((59+count) << 2); 192 } 193 memcpy(op, literal, len); 194 return op + len; 195 } 196 197 static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) { 198 assert(len <= 64); 199 assert(len >= 4); 200 assert(offset < 65536); 201 202 if ((len < 12) && (offset < 2048)) { 203 size_t len_minus_4 = len - 4; 204 assert(len_minus_4 < 8); // Must fit in 3 bits 205 *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5); 206 *op++ = offset & 0xff; 207 } else { 208 *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2); 209 LittleEndian::Store16(op, offset); 210 op += 2; 211 } 212 return op; 213 } 214 215 static inline char* EmitCopy(char* op, size_t offset, int len) { 216 // Emit 64 byte copies but make sure to keep at least four bytes reserved 217 while (len >= 68) { 218 op = EmitCopyLessThan64(op, offset, 64); 219 len -= 64; 220 } 221 222 // Emit an extra 60 byte copy if have too much data to fit in one copy 223 if (len > 64) { 224 op = EmitCopyLessThan64(op, offset, 60); 225 len -= 60; 226 } 227 228 // Emit remainder 229 op = EmitCopyLessThan64(op, offset, len); 230 return op; 231 } 232 233 234 bool GetUncompressedLength(const char* start, size_t n, size_t* result) { 235 uint32 v = 0; 236 const char* limit = start + n; 237 if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { 238 *result = v; 239 return true; 240 } else { 241 return false; 242 } 243 } 244 245 namespace internal { 246 uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { 247 // Use smaller hash table when input.size() is smaller, since we 248 // fill the table, incurring O(hash table size) overhead for 249 // compression, and if the input is short, we won't need that 250 // many hash table entries anyway. 251 assert(kMaxHashTableSize >= 256); 252 size_t htsize = 256; 253 while (htsize < kMaxHashTableSize && htsize < input_size) { 254 htsize <<= 1; 255 } 256 257 uint16* table; 258 if (htsize <= ARRAYSIZE(small_table_)) { 259 table = small_table_; 260 } else { 261 if (large_table_ == NULL) { 262 large_table_ = new uint16[kMaxHashTableSize]; 263 } 264 table = large_table_; 265 } 266 267 *table_size = htsize; 268 memset(table, 0, htsize * sizeof(*table)); 269 return table; 270 } 271 } // end namespace internal 272 273 // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will 274 // equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have 275 // empirically found that overlapping loads such as 276 // UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) 277 // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. 278 // 279 // We have different versions for 64- and 32-bit; ideally we would avoid the 280 // two functions and just inline the UNALIGNED_LOAD64 call into 281 // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever 282 // enough to avoid loading the value multiple times then. For 64-bit, the load 283 // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is 284 // done at GetUint32AtOffset() time. 285 286 #ifdef ARCH_K8 287 288 typedef uint64 EightBytesReference; 289 290 static inline EightBytesReference GetEightBytesAt(const char* ptr) { 291 return UNALIGNED_LOAD64(ptr); 292 } 293 294 static inline uint32 GetUint32AtOffset(uint64 v, int offset) { 295 assert(offset >= 0); 296 assert(offset <= 4); 297 return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); 298 } 299 300 #else 301 302 typedef const char* EightBytesReference; 303 304 static inline EightBytesReference GetEightBytesAt(const char* ptr) { 305 return ptr; 306 } 307 308 static inline uint32 GetUint32AtOffset(const char* v, int offset) { 309 assert(offset >= 0); 310 assert(offset <= 4); 311 return UNALIGNED_LOAD32(v + offset); 312 } 313 314 #endif 315 316 // Flat array compression that does not emit the "uncompressed length" 317 // prefix. Compresses "input" string to the "*op" buffer. 318 // 319 // REQUIRES: "input" is at most "kBlockSize" bytes long. 320 // REQUIRES: "op" points to an array of memory that is at least 321 // "MaxCompressedLength(input.size())" in size. 322 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. 323 // REQUIRES: "table_size" is a power of two 324 // 325 // Returns an "end" pointer into "op" buffer. 326 // "end - op" is the compressed size of "input". 327 namespace internal { 328 char* CompressFragment(const char* input, 329 size_t input_size, 330 char* op, 331 uint16* table, 332 const int table_size) { 333 // "ip" is the input pointer, and "op" is the output pointer. 334 const char* ip = input; 335 assert(input_size <= kBlockSize); 336 assert((table_size & (table_size - 1)) == 0); // table must be power of two 337 const int shift = 32 - Bits::Log2Floor(table_size); 338 assert(static_cast<int>(kuint32max >> shift) == table_size - 1); 339 const char* ip_end = input + input_size; 340 const char* base_ip = ip; 341 // Bytes in [next_emit, ip) will be emitted as literal bytes. Or 342 // [next_emit, ip_end) after the main loop. 343 const char* next_emit = ip; 344 345 const size_t kInputMarginBytes = 15; 346 if (PREDICT_TRUE(input_size >= kInputMarginBytes)) { 347 const char* ip_limit = input + input_size - kInputMarginBytes; 348 349 for (uint32 next_hash = Hash(++ip, shift); ; ) { 350 assert(next_emit < ip); 351 // The body of this loop calls EmitLiteral once and then EmitCopy one or 352 // more times. (The exception is that when we're close to exhausting 353 // the input we goto emit_remainder.) 354 // 355 // In the first iteration of this loop we're just starting, so 356 // there's nothing to copy, so calling EmitLiteral once is 357 // necessary. And we only start a new iteration when the 358 // current iteration has determined that a call to EmitLiteral will 359 // precede the next call to EmitCopy (if any). 360 // 361 // Step 1: Scan forward in the input looking for a 4-byte-long match. 362 // If we get close to exhausting the input then goto emit_remainder. 363 // 364 // Heuristic match skipping: If 32 bytes are scanned with no matches 365 // found, start looking only at every other byte. If 32 more bytes are 366 // scanned, look at every third byte, etc.. When a match is found, 367 // immediately go back to looking at every byte. This is a small loss 368 // (~5% performance, ~0.1% density) for compressible data due to more 369 // bookkeeping, but for non-compressible data (such as JPEG) it's a huge 370 // win since the compressor quickly "realizes" the data is incompressible 371 // and doesn't bother looking for matches everywhere. 372 // 373 // The "skip" variable keeps track of how many bytes there are since the 374 // last match; dividing it by 32 (ie. right-shifting by five) gives the 375 // number of bytes to move ahead for each iteration. 376 uint32 skip = 32; 377 378 const char* next_ip = ip; 379 const char* candidate; 380 do { 381 ip = next_ip; 382 uint32 hash = next_hash; 383 assert(hash == Hash(ip, shift)); 384 uint32 bytes_between_hash_lookups = skip++ >> 5; 385 next_ip = ip + bytes_between_hash_lookups; 386 if (PREDICT_FALSE(next_ip > ip_limit)) { 387 goto emit_remainder; 388 } 389 next_hash = Hash(next_ip, shift); 390 candidate = base_ip + table[hash]; 391 assert(candidate >= base_ip); 392 assert(candidate < ip); 393 394 table[hash] = ip - base_ip; 395 } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) != 396 UNALIGNED_LOAD32(candidate))); 397 398 // Step 2: A 4-byte match has been found. We'll later see if more 399 // than 4 bytes match. But, prior to the match, input 400 // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." 401 assert(next_emit + 16 <= ip_end); 402 op = EmitLiteral(op, next_emit, ip - next_emit, true); 403 404 // Step 3: Call EmitCopy, and then see if another EmitCopy could 405 // be our next move. Repeat until we find no match for the 406 // input immediately after what was consumed by the last EmitCopy call. 407 // 408 // If we exit this loop normally then we need to call EmitLiteral next, 409 // though we don't yet know how big the literal will be. We handle that 410 // by proceeding to the next iteration of the main loop. We also can exit 411 // this loop via goto if we get close to exhausting the input. 412 EightBytesReference input_bytes; 413 uint32 candidate_bytes = 0; 414 415 do { 416 // We have a 4-byte match at ip, and no need to emit any 417 // "literal bytes" prior to ip. 418 const char* base = ip; 419 int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); 420 ip += matched; 421 size_t offset = base - candidate; 422 assert(0 == memcmp(base, candidate, matched)); 423 op = EmitCopy(op, offset, matched); 424 // We could immediately start working at ip now, but to improve 425 // compression we first update table[Hash(ip - 1, ...)]. 426 const char* insert_tail = ip - 1; 427 next_emit = ip; 428 if (PREDICT_FALSE(ip >= ip_limit)) { 429 goto emit_remainder; 430 } 431 input_bytes = GetEightBytesAt(insert_tail); 432 uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); 433 table[prev_hash] = ip - base_ip - 1; 434 uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); 435 candidate = base_ip + table[cur_hash]; 436 candidate_bytes = UNALIGNED_LOAD32(candidate); 437 table[cur_hash] = ip - base_ip; 438 } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); 439 440 next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); 441 ++ip; 442 } 443 } 444 445 emit_remainder: 446 // Emit the remaining bytes as a literal 447 if (next_emit < ip_end) { 448 op = EmitLiteral(op, next_emit, ip_end - next_emit, false); 449 } 450 451 return op; 452 } 453 } // end namespace internal 454 455 // Signature of output types needed by decompression code. 456 // The decompression code is templatized on a type that obeys this 457 // signature so that we do not pay virtual function call overhead in 458 // the middle of a tight decompression loop. 459 // 460 // class DecompressionWriter { 461 // public: 462 // // Called before decompression 463 // void SetExpectedLength(size_t length); 464 // 465 // // Called after decompression 466 // bool CheckLength() const; 467 // 468 // // Called repeatedly during decompression 469 // bool Append(const char* ip, size_t length); 470 // bool AppendFromSelf(uint32 offset, size_t length); 471 // 472 // // The difference between TryFastAppend and Append is that TryFastAppend 473 // // is allowed to read up to <available> bytes from the input buffer, 474 // // whereas Append is allowed to read <length>. 475 // // 476 // // Also, TryFastAppend is allowed to return false, declining the append, 477 // // without it being a fatal error -- just "return false" would be 478 // // a perfectly legal implementation of TryFastAppend. The intention 479 // // is for TryFastAppend to allow a fast path in the common case of 480 // // a small append. 481 // // 482 // // NOTE(user): TryFastAppend must always return decline (return false) 483 // // if <length> is 61 or more, as in this case the literal length is not 484 // // decoded fully. In practice, this should not be a big problem, 485 // // as it is unlikely that one would implement a fast path accepting 486 // // this much data. 487 // bool TryFastAppend(const char* ip, size_t available, size_t length); 488 // }; 489 490 // ----------------------------------------------------------------------- 491 // Lookup table for decompression code. Generated by ComputeTable() below. 492 // ----------------------------------------------------------------------- 493 494 // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits 495 static const uint32 wordmask[] = { 496 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu 497 }; 498 499 // Data stored per entry in lookup table: 500 // Range Bits-used Description 501 // ------------------------------------ 502 // 1..64 0..7 Literal/copy length encoded in opcode byte 503 // 0..7 8..10 Copy offset encoded in opcode byte / 256 504 // 0..4 11..13 Extra bytes after opcode 505 // 506 // We use eight bits for the length even though 7 would have sufficed 507 // because of efficiency reasons: 508 // (1) Extracting a byte is faster than a bit-field 509 // (2) It properly aligns copy offset so we do not need a <<8 510 static const uint16 char_table[256] = { 511 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, 512 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, 513 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, 514 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, 515 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, 516 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, 517 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, 518 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, 519 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, 520 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, 521 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, 522 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, 523 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, 524 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, 525 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, 526 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, 527 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, 528 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, 529 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, 530 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, 531 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, 532 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, 533 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, 534 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, 535 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, 536 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, 537 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, 538 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, 539 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, 540 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, 541 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, 542 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 543 }; 544 545 // In debug mode, allow optional computation of the table at startup. 546 // Also, check that the decompression table is correct. 547 #ifndef NDEBUG 548 DEFINE_bool(snappy_dump_decompression_table, false, 549 "If true, we print the decompression table at startup."); 550 551 static uint16 MakeEntry(unsigned int extra, 552 unsigned int len, 553 unsigned int copy_offset) { 554 // Check that all of the fields fit within the allocated space 555 assert(extra == (extra & 0x7)); // At most 3 bits 556 assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits 557 assert(len == (len & 0x7f)); // At most 7 bits 558 return len | (copy_offset << 8) | (extra << 11); 559 } 560 561 static void ComputeTable() { 562 uint16 dst[256]; 563 564 // Place invalid entries in all places to detect missing initialization 565 int assigned = 0; 566 for (int i = 0; i < 256; i++) { 567 dst[i] = 0xffff; 568 } 569 570 // Small LITERAL entries. We store (len-1) in the top 6 bits. 571 for (unsigned int len = 1; len <= 60; len++) { 572 dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0); 573 assigned++; 574 } 575 576 // Large LITERAL entries. We use 60..63 in the high 6 bits to 577 // encode the number of bytes of length info that follow the opcode. 578 for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) { 579 // We set the length field in the lookup table to 1 because extra 580 // bytes encode len-1. 581 dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0); 582 assigned++; 583 } 584 585 // COPY_1_BYTE_OFFSET. 586 // 587 // The tag byte in the compressed data stores len-4 in 3 bits, and 588 // offset/256 in 5 bits. offset%256 is stored in the next byte. 589 // 590 // This format is used for length in range [4..11] and offset in 591 // range [0..2047] 592 for (unsigned int len = 4; len < 12; len++) { 593 for (unsigned int offset = 0; offset < 2048; offset += 256) { 594 dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] = 595 MakeEntry(1, len, offset>>8); 596 assigned++; 597 } 598 } 599 600 // COPY_2_BYTE_OFFSET. 601 // Tag contains len-1 in top 6 bits, and offset in next two bytes. 602 for (unsigned int len = 1; len <= 64; len++) { 603 dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0); 604 assigned++; 605 } 606 607 // COPY_4_BYTE_OFFSET. 608 // Tag contents len-1 in top 6 bits, and offset in next four bytes. 609 for (unsigned int len = 1; len <= 64; len++) { 610 dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0); 611 assigned++; 612 } 613 614 // Check that each entry was initialized exactly once. 615 if (assigned != 256) { 616 fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned); 617 abort(); 618 } 619 for (int i = 0; i < 256; i++) { 620 if (dst[i] == 0xffff) { 621 fprintf(stderr, "ComputeTable: did not assign byte %d\n", i); 622 abort(); 623 } 624 } 625 626 if (FLAGS_snappy_dump_decompression_table) { 627 printf("static const uint16 char_table[256] = {\n "); 628 for (int i = 0; i < 256; i++) { 629 printf("0x%04x%s", 630 dst[i], 631 ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", "))); 632 } 633 printf("};\n"); 634 } 635 636 // Check that computed table matched recorded table 637 for (int i = 0; i < 256; i++) { 638 if (dst[i] != char_table[i]) { 639 fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n", 640 i, static_cast<int>(dst[i]), static_cast<int>(char_table[i])); 641 abort(); 642 } 643 } 644 } 645 #endif /* !NDEBUG */ 646 647 // Helper class for decompression 648 class SnappyDecompressor { 649 private: 650 Source* reader_; // Underlying source of bytes to decompress 651 const char* ip_; // Points to next buffered byte 652 const char* ip_limit_; // Points just past buffered bytes 653 uint32 peeked_; // Bytes peeked from reader (need to skip) 654 bool eof_; // Hit end of input without an error? 655 char scratch_[5]; // Temporary buffer for PeekFast() boundaries 656 657 // Ensure that all of the tag metadata for the next tag is available 658 // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even 659 // if (ip_limit_ - ip_ < 5). 660 // 661 // Returns true on success, false on error or end of input. 662 bool RefillTag(); 663 664 public: 665 explicit SnappyDecompressor(Source* reader) 666 : reader_(reader), 667 ip_(NULL), 668 ip_limit_(NULL), 669 peeked_(0), 670 eof_(false) { 671 } 672 673 ~SnappyDecompressor() { 674 // Advance past any bytes we peeked at from the reader 675 reader_->Skip(peeked_); 676 } 677 678 // Returns true iff we have hit the end of the input without an error. 679 bool eof() const { 680 return eof_; 681 } 682 683 // Read the uncompressed length stored at the start of the compressed data. 684 // On succcess, stores the length in *result and returns true. 685 // On failure, returns false. 686 bool ReadUncompressedLength(uint32* result) { 687 assert(ip_ == NULL); // Must not have read anything yet 688 // Length is encoded in 1..5 bytes 689 *result = 0; 690 uint32 shift = 0; 691 while (true) { 692 if (shift >= 32) return false; 693 size_t n; 694 const char* ip = reader_->Peek(&n); 695 if (n == 0) return false; 696 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); 697 reader_->Skip(1); 698 *result |= static_cast<uint32>(c & 0x7f) << shift; 699 if (c < 128) { 700 break; 701 } 702 shift += 7; 703 } 704 return true; 705 } 706 707 // Process the next item found in the input. 708 // Returns true if successful, false on error or end of input. 709 template <class Writer> 710 void DecompressAllTags(Writer* writer) { 711 const char* ip = ip_; 712 713 // We could have put this refill fragment only at the beginning of the loop. 714 // However, duplicating it at the end of each branch gives the compiler more 715 // scope to optimize the <ip_limit_ - ip> expression based on the local 716 // context, which overall increases speed. 717 #define MAYBE_REFILL() \ 718 if (ip_limit_ - ip < 5) { \ 719 ip_ = ip; \ 720 if (!RefillTag()) return; \ 721 ip = ip_; \ 722 } 723 724 MAYBE_REFILL(); 725 for ( ;; ) { 726 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++)); 727 728 if ((c & 0x3) == LITERAL) { 729 size_t literal_length = (c >> 2) + 1u; 730 if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { 731 assert(literal_length < 61); 732 ip += literal_length; 733 MAYBE_REFILL(); 734 continue; 735 } 736 if (PREDICT_FALSE(literal_length >= 61)) { 737 // Long literal. 738 const size_t literal_length_length = literal_length - 60; 739 literal_length = 740 (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1; 741 ip += literal_length_length; 742 } 743 744 size_t avail = ip_limit_ - ip; 745 while (avail < literal_length) { 746 if (!writer->Append(ip, avail)) return; 747 literal_length -= avail; 748 reader_->Skip(peeked_); 749 size_t n; 750 ip = reader_->Peek(&n); 751 avail = n; 752 peeked_ = avail; 753 if (avail == 0) return; // Premature end of input 754 ip_limit_ = ip + avail; 755 } 756 if (!writer->Append(ip, literal_length)) { 757 return; 758 } 759 ip += literal_length; 760 MAYBE_REFILL(); 761 } else { 762 const uint32 entry = char_table[c]; 763 const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; 764 const uint32 length = entry & 0xff; 765 ip += entry >> 11; 766 767 // copy_offset/256 is encoded in bits 8..10. By just fetching 768 // those bits, we get copy_offset (since the bit-field starts at 769 // bit 8). 770 const uint32 copy_offset = entry & 0x700; 771 if (!writer->AppendFromSelf(copy_offset + trailer, length)) { 772 return; 773 } 774 MAYBE_REFILL(); 775 } 776 } 777 778 #undef MAYBE_REFILL 779 } 780 }; 781 782 bool SnappyDecompressor::RefillTag() { 783 const char* ip = ip_; 784 if (ip == ip_limit_) { 785 // Fetch a new fragment from the reader 786 reader_->Skip(peeked_); // All peeked bytes are used up 787 size_t n; 788 ip = reader_->Peek(&n); 789 peeked_ = n; 790 if (n == 0) { 791 eof_ = true; 792 return false; 793 } 794 ip_limit_ = ip + n; 795 } 796 797 // Read the tag character 798 assert(ip < ip_limit_); 799 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); 800 const uint32 entry = char_table[c]; 801 const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' 802 assert(needed <= sizeof(scratch_)); 803 804 // Read more bytes from reader if needed 805 uint32 nbuf = ip_limit_ - ip; 806 if (nbuf < needed) { 807 // Stitch together bytes from ip and reader to form the word 808 // contents. We store the needed bytes in "scratch_". They 809 // will be consumed immediately by the caller since we do not 810 // read more than we need. 811 memmove(scratch_, ip, nbuf); 812 reader_->Skip(peeked_); // All peeked bytes are used up 813 peeked_ = 0; 814 while (nbuf < needed) { 815 size_t length; 816 const char* src = reader_->Peek(&length); 817 if (length == 0) return false; 818 uint32 to_add = min<uint32>(needed - nbuf, length); 819 memcpy(scratch_ + nbuf, src, to_add); 820 nbuf += to_add; 821 reader_->Skip(to_add); 822 } 823 assert(nbuf == needed); 824 ip_ = scratch_; 825 ip_limit_ = scratch_ + needed; 826 } else if (nbuf < 5) { 827 // Have enough bytes, but move into scratch_ so that we do not 828 // read past end of input 829 memmove(scratch_, ip, nbuf); 830 reader_->Skip(peeked_); // All peeked bytes are used up 831 peeked_ = 0; 832 ip_ = scratch_; 833 ip_limit_ = scratch_ + nbuf; 834 } else { 835 // Pass pointer to buffer returned by reader_. 836 ip_ = ip; 837 } 838 return true; 839 } 840 841 template <typename Writer> 842 static bool InternalUncompress(Source* r, 843 Writer* writer, 844 uint32 max_len) { 845 // Read the uncompressed length from the front of the compressed input 846 SnappyDecompressor decompressor(r); 847 uint32 uncompressed_len = 0; 848 if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; 849 return InternalUncompressAllTags( 850 &decompressor, writer, uncompressed_len, max_len); 851 } 852 853 template <typename Writer> 854 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, 855 Writer* writer, 856 uint32 uncompressed_len, 857 uint32 max_len) { 858 // Protect against possible DoS attack 859 if (static_cast<uint64>(uncompressed_len) > max_len) { 860 return false; 861 } 862 863 writer->SetExpectedLength(uncompressed_len); 864 865 // Process the entire input 866 decompressor->DecompressAllTags(writer); 867 return (decompressor->eof() && writer->CheckLength()); 868 } 869 870 bool GetUncompressedLength(Source* source, uint32* result) { 871 SnappyDecompressor decompressor(source); 872 return decompressor.ReadUncompressedLength(result); 873 } 874 875 size_t Compress(Source* reader, Sink* writer) { 876 size_t written = 0; 877 size_t N = reader->Available(); 878 char ulength[Varint::kMax32]; 879 char* p = Varint::Encode32(ulength, N); 880 writer->Append(ulength, p-ulength); 881 written += (p - ulength); 882 883 internal::WorkingMemory wmem; 884 char* scratch = NULL; 885 char* scratch_output = NULL; 886 887 while (N > 0) { 888 // Get next block to compress (without copying if possible) 889 size_t fragment_size; 890 const char* fragment = reader->Peek(&fragment_size); 891 assert(fragment_size != 0); // premature end of input 892 const size_t num_to_read = min(N, kBlockSize); 893 size_t bytes_read = fragment_size; 894 895 size_t pending_advance = 0; 896 if (bytes_read >= num_to_read) { 897 // Buffer returned by reader is large enough 898 pending_advance = num_to_read; 899 fragment_size = num_to_read; 900 } else { 901 // Read into scratch buffer 902 if (scratch == NULL) { 903 // If this is the last iteration, we want to allocate N bytes 904 // of space, otherwise the max possible kBlockSize space. 905 // num_to_read contains exactly the correct value 906 scratch = new char[num_to_read]; 907 } 908 memcpy(scratch, fragment, bytes_read); 909 reader->Skip(bytes_read); 910 911 while (bytes_read < num_to_read) { 912 fragment = reader->Peek(&fragment_size); 913 size_t n = min<size_t>(fragment_size, num_to_read - bytes_read); 914 memcpy(scratch + bytes_read, fragment, n); 915 bytes_read += n; 916 reader->Skip(n); 917 } 918 assert(bytes_read == num_to_read); 919 fragment = scratch; 920 fragment_size = num_to_read; 921 } 922 assert(fragment_size == num_to_read); 923 924 // Get encoding table for compression 925 int table_size; 926 uint16* table = wmem.GetHashTable(num_to_read, &table_size); 927 928 // Compress input_fragment and append to dest 929 const int max_output = MaxCompressedLength(num_to_read); 930 931 // Need a scratch buffer for the output, in case the byte sink doesn't 932 // have room for us directly. 933 if (scratch_output == NULL) { 934 scratch_output = new char[max_output]; 935 } else { 936 // Since we encode kBlockSize regions followed by a region 937 // which is <= kBlockSize in length, a previously allocated 938 // scratch_output[] region is big enough for this iteration. 939 } 940 char* dest = writer->GetAppendBuffer(max_output, scratch_output); 941 char* end = internal::CompressFragment(fragment, fragment_size, 942 dest, table, table_size); 943 writer->Append(dest, end - dest); 944 written += (end - dest); 945 946 N -= num_to_read; 947 reader->Skip(pending_advance); 948 } 949 950 delete[] scratch; 951 delete[] scratch_output; 952 953 return written; 954 } 955 956 // ----------------------------------------------------------------------- 957 // Flat array interfaces 958 // ----------------------------------------------------------------------- 959 960 // A type that writes to a flat array. 961 // Note that this is not a "ByteSink", but a type that matches the 962 // Writer template argument to SnappyDecompressor::DecompressAllTags(). 963 class SnappyArrayWriter { 964 private: 965 char* base_; 966 char* op_; 967 char* op_limit_; 968 969 public: 970 inline explicit SnappyArrayWriter(char* dst) 971 : base_(dst), 972 op_(dst) { 973 } 974 975 inline void SetExpectedLength(size_t len) { 976 op_limit_ = op_ + len; 977 } 978 979 inline bool CheckLength() const { 980 return op_ == op_limit_; 981 } 982 983 inline bool Append(const char* ip, size_t len) { 984 char* op = op_; 985 const size_t space_left = op_limit_ - op; 986 if (space_left < len) { 987 return false; 988 } 989 memcpy(op, ip, len); 990 op_ = op + len; 991 return true; 992 } 993 994 inline bool TryFastAppend(const char* ip, size_t available, size_t len) { 995 char* op = op_; 996 const size_t space_left = op_limit_ - op; 997 if (len <= 16 && available >= 16 && space_left >= 16) { 998 // Fast path, used for the majority (about 95%) of invocations. 999 UnalignedCopy64(ip, op); 1000 UnalignedCopy64(ip + 8, op + 8); 1001 op_ = op + len; 1002 return true; 1003 } else { 1004 return false; 1005 } 1006 } 1007 1008 inline bool AppendFromSelf(size_t offset, size_t len) { 1009 char* op = op_; 1010 const size_t space_left = op_limit_ - op; 1011 1012 if (op - base_ <= offset - 1u) { // -1u catches offset==0 1013 return false; 1014 } 1015 if (len <= 16 && offset >= 8 && space_left >= 16) { 1016 // Fast path, used for the majority (70-80%) of dynamic invocations. 1017 UnalignedCopy64(op - offset, op); 1018 UnalignedCopy64(op - offset + 8, op + 8); 1019 } else { 1020 if (space_left >= len + kMaxIncrementCopyOverflow) { 1021 IncrementalCopyFastPath(op - offset, op, len); 1022 } else { 1023 if (space_left < len) { 1024 return false; 1025 } 1026 IncrementalCopy(op - offset, op, len); 1027 } 1028 } 1029 1030 op_ = op + len; 1031 return true; 1032 } 1033 }; 1034 1035 bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { 1036 ByteArraySource reader(compressed, n); 1037 return RawUncompress(&reader, uncompressed); 1038 } 1039 1040 bool RawUncompress(Source* compressed, char* uncompressed) { 1041 SnappyArrayWriter output(uncompressed); 1042 return InternalUncompress(compressed, &output, kuint32max); 1043 } 1044 1045 bool Uncompress(const char* compressed, size_t n, string* uncompressed) { 1046 size_t ulength; 1047 if (!GetUncompressedLength(compressed, n, &ulength)) { 1048 return false; 1049 } 1050 // Protect against possible DoS attack 1051 if ((static_cast<uint64>(ulength) + uncompressed->size()) > 1052 uncompressed->max_size()) { 1053 return false; 1054 } 1055 STLStringResizeUninitialized(uncompressed, ulength); 1056 return RawUncompress(compressed, n, string_as_array(uncompressed)); 1057 } 1058 1059 1060 // A Writer that drops everything on the floor and just does validation 1061 class SnappyDecompressionValidator { 1062 private: 1063 size_t expected_; 1064 size_t produced_; 1065 1066 public: 1067 inline SnappyDecompressionValidator() : produced_(0) { } 1068 inline void SetExpectedLength(size_t len) { 1069 expected_ = len; 1070 } 1071 inline bool CheckLength() const { 1072 return expected_ == produced_; 1073 } 1074 inline bool Append(const char* ip, size_t len) { 1075 produced_ += len; 1076 return produced_ <= expected_; 1077 } 1078 inline bool TryFastAppend(const char* ip, size_t available, size_t length) { 1079 return false; 1080 } 1081 inline bool AppendFromSelf(size_t offset, size_t len) { 1082 if (produced_ <= offset - 1u) return false; // -1u catches offset==0 1083 produced_ += len; 1084 return produced_ <= expected_; 1085 } 1086 }; 1087 1088 bool IsValidCompressedBuffer(const char* compressed, size_t n) { 1089 ByteArraySource reader(compressed, n); 1090 SnappyDecompressionValidator writer; 1091 return InternalUncompress(&reader, &writer, kuint32max); 1092 } 1093 1094 void RawCompress(const char* input, 1095 size_t input_length, 1096 char* compressed, 1097 size_t* compressed_length) { 1098 ByteArraySource reader(input, input_length); 1099 UncheckedByteArraySink writer(compressed); 1100 Compress(&reader, &writer); 1101 1102 // Compute how many bytes were added 1103 *compressed_length = (writer.CurrentDestination() - compressed); 1104 } 1105 1106 size_t Compress(const char* input, size_t input_length, string* compressed) { 1107 // Pre-grow the buffer to the max length of the compressed output 1108 compressed->resize(MaxCompressedLength(input_length)); 1109 1110 size_t compressed_length; 1111 RawCompress(input, input_length, string_as_array(compressed), 1112 &compressed_length); 1113 compressed->resize(compressed_length); 1114 return compressed_length; 1115 } 1116 1117 1118 } // end namespace snappy 1119