github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/image/rawp/librawp/src/snappy/rawp-snappy.cc (about) 1 // Copyright 2005 Google Inc. All Rights Reserved. 2 // 3 // Redistribution and use in source and binary forms, with or without 4 // modification, are permitted provided that the following conditions are 5 // met: 6 // 7 // * Redistributions of source code must retain the above copyright 8 // notice, this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above 10 // copyright notice, this list of conditions and the following disclaimer 11 // in the documentation and/or other materials provided with the 12 // distribution. 13 // * Neither the name of Google Inc. nor the names of its 14 // contributors may be used to endorse or promote products derived from 15 // this software without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #include "rawp-snappy.h" 30 #include "rawp-snappy-internal.h" 31 #include "rawp-snappy-sinksource.h" 32 33 #include <stdio.h> 34 35 #include <algorithm> 36 #include <string> 37 #include <vector> 38 39 40 namespace rawp { 41 namespace snappy { 42 43 // Any hash function will produce a valid compressed bitstream, but a good 44 // hash function reduces the number of collisions and thus yields better 45 // compression for compressible input, and more speed for incompressible 46 // input. Of course, it doesn't hurt if the hash function is reasonably fast 47 // either, as it gets called a lot. 48 static inline uint32 HashBytes(uint32 bytes, int shift) { 49 uint32 kMul = 0x1e35a7bd; 50 return (bytes * kMul) >> shift; 51 } 52 static inline uint32 Hash(const char* p, int shift) { 53 return HashBytes(UNALIGNED_LOAD32(p), shift); 54 } 55 56 size_t MaxCompressedLength(size_t source_len) { 57 // Compressed data can be defined as: 58 // compressed := item* literal* 59 // item := literal* copy 60 // 61 // The trailing literal sequence has a space blowup of at most 62/60 62 // since a literal of length 60 needs one tag byte + one extra byte 63 // for length information. 64 // 65 // Item blowup is trickier to measure. Suppose the "copy" op copies 66 // 4 bytes of data. Because of a special check in the encoding code, 67 // we produce a 4-byte copy only if the offset is < 65536. Therefore 68 // the copy op takes 3 bytes to encode, and this type of item leads 69 // to at most the 62/60 blowup for representing literals. 70 // 71 // Suppose the "copy" op copies 5 bytes of data. If the offset is big 72 // enough, it will take 5 bytes to encode the copy op. Therefore the 73 // worst case here is a one-byte literal followed by a five-byte copy. 74 // I.e., 6 bytes of input turn into 7 bytes of "compressed" data. 75 // 76 // This last factor dominates the blowup, so the final estimate is: 77 return 32 + source_len + source_len/6; 78 } 79 80 enum { 81 LITERAL = 0, 82 COPY_1_BYTE_OFFSET = 1, // 3 bit length + 3 bits of offset in opcode 83 COPY_2_BYTE_OFFSET = 2, 84 COPY_4_BYTE_OFFSET = 3 85 }; 86 87 // Copy "len" bytes from "src" to "op", one byte at a time. Used for 88 // handling COPY operations where the input and output regions may 89 // overlap. For example, suppose: 90 // src == "ab" 91 // op == src + 2 92 // len == 20 93 // After IncrementalCopy(src, op, len), the result will have 94 // eleven copies of "ab" 95 // ababababababababababab 96 // Note that this does not match the semantics of either memcpy() 97 // or memmove(). 98 static inline void IncrementalCopy(const char* src, char* op, int len) { 99 assert(len > 0); 100 do { 101 *op++ = *src++; 102 } while (--len > 0); 103 } 104 105 // Equivalent to IncrementalCopy except that it can write up to ten extra 106 // bytes after the end of the copy, and that it is faster. 107 // 108 // The main part of this loop is a simple copy of eight bytes at a time until 109 // we've copied (at least) the requested amount of bytes. However, if op and 110 // src are less than eight bytes apart (indicating a repeating pattern of 111 // length < 8), we first need to expand the pattern in order to get the correct 112 // results. For instance, if the buffer looks like this, with the eight-byte 113 // <src> and <op> patterns marked as intervals: 114 // 115 // abxxxxxxxxxxxx 116 // [------] src 117 // [------] op 118 // 119 // a single eight-byte copy from <src> to <op> will repeat the pattern once, 120 // after which we can move <op> two bytes without moving <src>: 121 // 122 // ababxxxxxxxxxx 123 // [------] src 124 // [------] op 125 // 126 // and repeat the exercise until the two no longer overlap. 127 // 128 // This allows us to do very well in the special case of one single byte 129 // repeated many times, without taking a big hit for more general cases. 130 // 131 // The worst case of extra writing past the end of the match occurs when 132 // op - src == 1 and len == 1; the last copy will read from byte positions 133 // [0..7] and write to [4..11], whereas it was only supposed to write to 134 // position 1. Thus, ten excess bytes. 135 136 namespace { 137 138 const int kMaxIncrementCopyOverflow = 10; 139 140 } // namespace 141 142 static inline void IncrementalCopyFastPath(const char* src, char* op, int len) { 143 while (op - src < 8) { 144 UnalignedCopy64(src, op); 145 len -= op - src; 146 op += op - src; 147 } 148 while (len > 0) { 149 UnalignedCopy64(src, op); 150 src += 8; 151 op += 8; 152 len -= 8; 153 } 154 } 155 156 static inline char* EmitLiteral(char* op, 157 const char* literal, 158 int len, 159 bool allow_fast_path) { 160 int n = len - 1; // Zero-length literals are disallowed 161 if (n < 60) { 162 // Fits in tag byte 163 *op++ = LITERAL | (n << 2); 164 165 // The vast majority of copies are below 16 bytes, for which a 166 // call to memcpy is overkill. This fast path can sometimes 167 // copy up to 15 bytes too much, but that is okay in the 168 // main loop, since we have a bit to go on for both sides: 169 // 170 // - The input will always have kInputMarginBytes = 15 extra 171 // available bytes, as long as we're in the main loop, and 172 // if not, allow_fast_path = false. 173 // - The output will always have 32 spare bytes (see 174 // MaxCompressedLength). 175 if (allow_fast_path && len <= 16) { 176 UnalignedCopy64(literal, op); 177 UnalignedCopy64(literal + 8, op + 8); 178 return op + len; 179 } 180 } else { 181 // Encode in upcoming bytes 182 char* base = op; 183 int count = 0; 184 op++; 185 while (n > 0) { 186 *op++ = n & 0xff; 187 n >>= 8; 188 count++; 189 } 190 assert(count >= 1); 191 assert(count <= 4); 192 *base = LITERAL | ((59+count) << 2); 193 } 194 memcpy(op, literal, len); 195 return op + len; 196 } 197 198 static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) { 199 assert(len <= 64); 200 assert(len >= 4); 201 assert(offset < 65536); 202 203 if ((len < 12) && (offset < 2048)) { 204 size_t len_minus_4 = len - 4; 205 assert(len_minus_4 < 8); // Must fit in 3 bits 206 *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5); 207 *op++ = offset & 0xff; 208 } else { 209 *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2); 210 LittleEndian::Store16(op, offset); 211 op += 2; 212 } 213 return op; 214 } 215 216 static inline char* EmitCopy(char* op, size_t offset, int len) { 217 // Emit 64 byte copies but make sure to keep at least four bytes reserved 218 while (len >= 68) { 219 op = EmitCopyLessThan64(op, offset, 64); 220 len -= 64; 221 } 222 223 // Emit an extra 60 byte copy if have too much data to fit in one copy 224 if (len > 64) { 225 op = EmitCopyLessThan64(op, offset, 60); 226 len -= 60; 227 } 228 229 // Emit remainder 230 op = EmitCopyLessThan64(op, offset, len); 231 return op; 232 } 233 234 235 bool GetUncompressedLength(const char* start, size_t n, size_t* result) { 236 uint32 v = 0; 237 const char* limit = start + n; 238 if (Varint::Parse32WithLimit(start, limit, &v) != NULL) { 239 *result = v; 240 return true; 241 } else { 242 return false; 243 } 244 } 245 246 namespace internal { 247 uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) { 248 // Use smaller hash table when input.size() is smaller, since we 249 // fill the table, incurring O(hash table size) overhead for 250 // compression, and if the input is short, we won't need that 251 // many hash table entries anyway. 252 assert(kMaxHashTableSize >= 256); 253 size_t htsize = 256; 254 while (htsize < kMaxHashTableSize && htsize < input_size) { 255 htsize <<= 1; 256 } 257 258 uint16* table; 259 if (htsize <= ARRAYSIZE(small_table_)) { 260 table = small_table_; 261 } else { 262 if (large_table_ == NULL) { 263 large_table_ = new uint16[kMaxHashTableSize]; 264 } 265 table = large_table_; 266 } 267 268 *table_size = htsize; 269 memset(table, 0, htsize * sizeof(*table)); 270 return table; 271 } 272 } // end namespace internal 273 274 // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will 275 // equal UNALIGNED_LOAD32(p + offset). Motivation: On x86-64 hardware we have 276 // empirically found that overlapping loads such as 277 // UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2) 278 // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32. 279 // 280 // We have different versions for 64- and 32-bit; ideally we would avoid the 281 // two functions and just inline the UNALIGNED_LOAD64 call into 282 // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever 283 // enough to avoid loading the value multiple times then. For 64-bit, the load 284 // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is 285 // done at GetUint32AtOffset() time. 286 287 #ifdef ARCH_K8 288 289 typedef uint64 EightBytesReference; 290 291 static inline EightBytesReference GetEightBytesAt(const char* ptr) { 292 return UNALIGNED_LOAD64(ptr); 293 } 294 295 static inline uint32 GetUint32AtOffset(uint64 v, int offset) { 296 assert(offset >= 0); 297 assert(offset <= 4); 298 return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset); 299 } 300 301 #else 302 303 typedef const char* EightBytesReference; 304 305 static inline EightBytesReference GetEightBytesAt(const char* ptr) { 306 return ptr; 307 } 308 309 static inline uint32 GetUint32AtOffset(const char* v, int offset) { 310 assert(offset >= 0); 311 assert(offset <= 4); 312 return UNALIGNED_LOAD32(v + offset); 313 } 314 315 #endif 316 317 // Flat array compression that does not emit the "uncompressed length" 318 // prefix. Compresses "input" string to the "*op" buffer. 319 // 320 // REQUIRES: "input" is at most "kBlockSize" bytes long. 321 // REQUIRES: "op" points to an array of memory that is at least 322 // "MaxCompressedLength(input.size())" in size. 323 // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero. 324 // REQUIRES: "table_size" is a power of two 325 // 326 // Returns an "end" pointer into "op" buffer. 327 // "end - op" is the compressed size of "input". 328 namespace internal { 329 char* CompressFragment(const char* input, 330 size_t input_size, 331 char* op, 332 uint16* table, 333 const int table_size) { 334 // "ip" is the input pointer, and "op" is the output pointer. 335 const char* ip = input; 336 assert(input_size <= kBlockSize); 337 assert((table_size & (table_size - 1)) == 0); // table must be power of two 338 const int shift = 32 - Bits::Log2Floor(table_size); 339 assert(static_cast<int>(kuint32max >> shift) == table_size - 1); 340 const char* ip_end = input + input_size; 341 const char* base_ip = ip; 342 // Bytes in [next_emit, ip) will be emitted as literal bytes. Or 343 // [next_emit, ip_end) after the main loop. 344 const char* next_emit = ip; 345 346 const size_t kInputMarginBytes = 15; 347 if (PREDICT_TRUE(input_size >= kInputMarginBytes)) { 348 const char* ip_limit = input + input_size - kInputMarginBytes; 349 350 for (uint32 next_hash = Hash(++ip, shift); ; ) { 351 assert(next_emit < ip); 352 // The body of this loop calls EmitLiteral once and then EmitCopy one or 353 // more times. (The exception is that when we're close to exhausting 354 // the input we goto emit_remainder.) 355 // 356 // In the first iteration of this loop we're just starting, so 357 // there's nothing to copy, so calling EmitLiteral once is 358 // necessary. And we only start a new iteration when the 359 // current iteration has determined that a call to EmitLiteral will 360 // precede the next call to EmitCopy (if any). 361 // 362 // Step 1: Scan forward in the input looking for a 4-byte-long match. 363 // If we get close to exhausting the input then goto emit_remainder. 364 // 365 // Heuristic match skipping: If 32 bytes are scanned with no matches 366 // found, start looking only at every other byte. If 32 more bytes are 367 // scanned, look at every third byte, etc.. When a match is found, 368 // immediately go back to looking at every byte. This is a small loss 369 // (~5% performance, ~0.1% density) for compressible data due to more 370 // bookkeeping, but for non-compressible data (such as JPEG) it's a huge 371 // win since the compressor quickly "realizes" the data is incompressible 372 // and doesn't bother looking for matches everywhere. 373 // 374 // The "skip" variable keeps track of how many bytes there are since the 375 // last match; dividing it by 32 (ie. right-shifting by five) gives the 376 // number of bytes to move ahead for each iteration. 377 uint32 skip = 32; 378 379 const char* next_ip = ip; 380 const char* candidate; 381 do { 382 ip = next_ip; 383 uint32 hash = next_hash; 384 assert(hash == Hash(ip, shift)); 385 uint32 bytes_between_hash_lookups = skip++ >> 5; 386 next_ip = ip + bytes_between_hash_lookups; 387 if (PREDICT_FALSE(next_ip > ip_limit)) { 388 goto emit_remainder; 389 } 390 next_hash = Hash(next_ip, shift); 391 candidate = base_ip + table[hash]; 392 assert(candidate >= base_ip); 393 assert(candidate < ip); 394 395 table[hash] = ip - base_ip; 396 } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) != 397 UNALIGNED_LOAD32(candidate))); 398 399 // Step 2: A 4-byte match has been found. We'll later see if more 400 // than 4 bytes match. But, prior to the match, input 401 // bytes [next_emit, ip) are unmatched. Emit them as "literal bytes." 402 assert(next_emit + 16 <= ip_end); 403 op = EmitLiteral(op, next_emit, ip - next_emit, true); 404 405 // Step 3: Call EmitCopy, and then see if another EmitCopy could 406 // be our next move. Repeat until we find no match for the 407 // input immediately after what was consumed by the last EmitCopy call. 408 // 409 // If we exit this loop normally then we need to call EmitLiteral next, 410 // though we don't yet know how big the literal will be. We handle that 411 // by proceeding to the next iteration of the main loop. We also can exit 412 // this loop via goto if we get close to exhausting the input. 413 EightBytesReference input_bytes; 414 uint32 candidate_bytes = 0; 415 416 do { 417 // We have a 4-byte match at ip, and no need to emit any 418 // "literal bytes" prior to ip. 419 const char* base = ip; 420 int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end); 421 ip += matched; 422 size_t offset = base - candidate; 423 assert(0 == memcmp(base, candidate, matched)); 424 op = EmitCopy(op, offset, matched); 425 // We could immediately start working at ip now, but to improve 426 // compression we first update table[Hash(ip - 1, ...)]. 427 const char* insert_tail = ip - 1; 428 next_emit = ip; 429 if (PREDICT_FALSE(ip >= ip_limit)) { 430 goto emit_remainder; 431 } 432 input_bytes = GetEightBytesAt(insert_tail); 433 uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift); 434 table[prev_hash] = ip - base_ip - 1; 435 uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift); 436 candidate = base_ip + table[cur_hash]; 437 candidate_bytes = UNALIGNED_LOAD32(candidate); 438 table[cur_hash] = ip - base_ip; 439 } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes); 440 441 next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift); 442 ++ip; 443 } 444 } 445 446 emit_remainder: 447 // Emit the remaining bytes as a literal 448 if (next_emit < ip_end) { 449 op = EmitLiteral(op, next_emit, ip_end - next_emit, false); 450 } 451 452 return op; 453 } 454 } // end namespace internal 455 456 // Signature of output types needed by decompression code. 457 // The decompression code is templatized on a type that obeys this 458 // signature so that we do not pay virtual function call overhead in 459 // the middle of a tight decompression loop. 460 // 461 // class DecompressionWriter { 462 // public: 463 // // Called before decompression 464 // void SetExpectedLength(size_t length); 465 // 466 // // Called after decompression 467 // bool CheckLength() const; 468 // 469 // // Called repeatedly during decompression 470 // bool Append(const char* ip, size_t length); 471 // bool AppendFromSelf(uint32 offset, size_t length); 472 // 473 // // The difference between TryFastAppend and Append is that TryFastAppend 474 // // is allowed to read up to <available> bytes from the input buffer, 475 // // whereas Append is allowed to read <length>. 476 // // 477 // // Also, TryFastAppend is allowed to return false, declining the append, 478 // // without it being a fatal error -- just "return false" would be 479 // // a perfectly legal implementation of TryFastAppend. The intention 480 // // is for TryFastAppend to allow a fast path in the common case of 481 // // a small append. 482 // // 483 // // NOTE(user): TryFastAppend must always return decline (return false) 484 // // if <length> is 61 or more, as in this case the literal length is not 485 // // decoded fully. In practice, this should not be a big problem, 486 // // as it is unlikely that one would implement a fast path accepting 487 // // this much data. 488 // bool TryFastAppend(const char* ip, size_t available, size_t length); 489 // }; 490 491 // ----------------------------------------------------------------------- 492 // Lookup table for decompression code. Generated by ComputeTable() below. 493 // ----------------------------------------------------------------------- 494 495 // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits 496 static const uint32 wordmask[] = { 497 0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu 498 }; 499 500 // Data stored per entry in lookup table: 501 // Range Bits-used Description 502 // ------------------------------------ 503 // 1..64 0..7 Literal/copy length encoded in opcode byte 504 // 0..7 8..10 Copy offset encoded in opcode byte / 256 505 // 0..4 11..13 Extra bytes after opcode 506 // 507 // We use eight bits for the length even though 7 would have sufficed 508 // because of efficiency reasons: 509 // (1) Extracting a byte is faster than a bit-field 510 // (2) It properly aligns copy offset so we do not need a <<8 511 static const uint16 char_table[256] = { 512 0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002, 513 0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004, 514 0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006, 515 0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008, 516 0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a, 517 0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c, 518 0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e, 519 0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010, 520 0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012, 521 0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014, 522 0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016, 523 0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018, 524 0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a, 525 0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c, 526 0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e, 527 0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020, 528 0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022, 529 0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024, 530 0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026, 531 0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028, 532 0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a, 533 0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c, 534 0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e, 535 0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030, 536 0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032, 537 0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034, 538 0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036, 539 0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038, 540 0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a, 541 0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c, 542 0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e, 543 0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040 544 }; 545 546 // In debug mode, allow optional computation of the table at startup. 547 // Also, check that the decompression table is correct. 548 #ifndef NDEBUG 549 DEFINE_bool(snappy_dump_decompression_table, false, 550 "If true, we print the decompression table at startup."); 551 552 static uint16 MakeEntry(unsigned int extra, 553 unsigned int len, 554 unsigned int copy_offset) { 555 // Check that all of the fields fit within the allocated space 556 assert(extra == (extra & 0x7)); // At most 3 bits 557 assert(copy_offset == (copy_offset & 0x7)); // At most 3 bits 558 assert(len == (len & 0x7f)); // At most 7 bits 559 return len | (copy_offset << 8) | (extra << 11); 560 } 561 562 static void ComputeTable() { 563 uint16 dst[256]; 564 565 // Place invalid entries in all places to detect missing initialization 566 int assigned = 0; 567 for (int i = 0; i < 256; i++) { 568 dst[i] = 0xffff; 569 } 570 571 // Small LITERAL entries. We store (len-1) in the top 6 bits. 572 for (unsigned int len = 1; len <= 60; len++) { 573 dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0); 574 assigned++; 575 } 576 577 // Large LITERAL entries. We use 60..63 in the high 6 bits to 578 // encode the number of bytes of length info that follow the opcode. 579 for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) { 580 // We set the length field in the lookup table to 1 because extra 581 // bytes encode len-1. 582 dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0); 583 assigned++; 584 } 585 586 // COPY_1_BYTE_OFFSET. 587 // 588 // The tag byte in the compressed data stores len-4 in 3 bits, and 589 // offset/256 in 5 bits. offset%256 is stored in the next byte. 590 // 591 // This format is used for length in range [4..11] and offset in 592 // range [0..2047] 593 for (unsigned int len = 4; len < 12; len++) { 594 for (unsigned int offset = 0; offset < 2048; offset += 256) { 595 dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] = 596 MakeEntry(1, len, offset>>8); 597 assigned++; 598 } 599 } 600 601 // COPY_2_BYTE_OFFSET. 602 // Tag contains len-1 in top 6 bits, and offset in next two bytes. 603 for (unsigned int len = 1; len <= 64; len++) { 604 dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0); 605 assigned++; 606 } 607 608 // COPY_4_BYTE_OFFSET. 609 // Tag contents len-1 in top 6 bits, and offset in next four bytes. 610 for (unsigned int len = 1; len <= 64; len++) { 611 dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0); 612 assigned++; 613 } 614 615 // Check that each entry was initialized exactly once. 616 if (assigned != 256) { 617 fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned); 618 abort(); 619 } 620 for (int i = 0; i < 256; i++) { 621 if (dst[i] == 0xffff) { 622 fprintf(stderr, "ComputeTable: did not assign byte %d\n", i); 623 abort(); 624 } 625 } 626 627 if (FLAGS_snappy_dump_decompression_table) { 628 printf("static const uint16 char_table[256] = {\n "); 629 for (int i = 0; i < 256; i++) { 630 printf("0x%04x%s", 631 dst[i], 632 ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n " : ", "))); 633 } 634 printf("};\n"); 635 } 636 637 // Check that computed table matched recorded table 638 for (int i = 0; i < 256; i++) { 639 if (dst[i] != char_table[i]) { 640 fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n", 641 i, static_cast<int>(dst[i]), static_cast<int>(char_table[i])); 642 abort(); 643 } 644 } 645 } 646 #endif /* !NDEBUG */ 647 648 // Helper class for decompression 649 class SnappyDecompressor { 650 private: 651 Source* reader_; // Underlying source of bytes to decompress 652 const char* ip_; // Points to next buffered byte 653 const char* ip_limit_; // Points just past buffered bytes 654 uint32 peeked_; // Bytes peeked from reader (need to skip) 655 bool eof_; // Hit end of input without an error? 656 char scratch_[5]; // Temporary buffer for PeekFast() boundaries 657 658 // Ensure that all of the tag metadata for the next tag is available 659 // in [ip_..ip_limit_-1]. Also ensures that [ip,ip+4] is readable even 660 // if (ip_limit_ - ip_ < 5). 661 // 662 // Returns true on success, false on error or end of input. 663 bool RefillTag(); 664 665 public: 666 explicit SnappyDecompressor(Source* reader) 667 : reader_(reader), 668 ip_(NULL), 669 ip_limit_(NULL), 670 peeked_(0), 671 eof_(false) { 672 } 673 674 ~SnappyDecompressor() { 675 // Advance past any bytes we peeked at from the reader 676 reader_->Skip(peeked_); 677 } 678 679 // Returns true iff we have hit the end of the input without an error. 680 bool eof() const { 681 return eof_; 682 } 683 684 // Read the uncompressed length stored at the start of the compressed data. 685 // On succcess, stores the length in *result and returns true. 686 // On failure, returns false. 687 bool ReadUncompressedLength(uint32* result) { 688 assert(ip_ == NULL); // Must not have read anything yet 689 // Length is encoded in 1..5 bytes 690 *result = 0; 691 uint32 shift = 0; 692 while (true) { 693 if (shift >= 32) return false; 694 size_t n; 695 const char* ip = reader_->Peek(&n); 696 if (n == 0) return false; 697 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); 698 reader_->Skip(1); 699 *result |= static_cast<uint32>(c & 0x7f) << shift; 700 if (c < 128) { 701 break; 702 } 703 shift += 7; 704 } 705 return true; 706 } 707 708 // Process the next item found in the input. 709 // Returns true if successful, false on error or end of input. 710 template <class Writer> 711 void DecompressAllTags(Writer* writer) { 712 const char* ip = ip_; 713 714 // We could have put this refill fragment only at the beginning of the loop. 715 // However, duplicating it at the end of each branch gives the compiler more 716 // scope to optimize the <ip_limit_ - ip> expression based on the local 717 // context, which overall increases speed. 718 #define MAYBE_REFILL() \ 719 if (ip_limit_ - ip < 5) { \ 720 ip_ = ip; \ 721 if (!RefillTag()) return; \ 722 ip = ip_; \ 723 } 724 725 MAYBE_REFILL(); 726 for ( ;; ) { 727 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++)); 728 729 if ((c & 0x3) == LITERAL) { 730 size_t literal_length = (c >> 2) + 1u; 731 if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) { 732 assert(literal_length < 61); 733 ip += literal_length; 734 MAYBE_REFILL(); 735 continue; 736 } 737 if (PREDICT_FALSE(literal_length >= 61)) { 738 // Long literal. 739 const size_t literal_length_length = literal_length - 60; 740 literal_length = 741 (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1; 742 ip += literal_length_length; 743 } 744 745 size_t avail = ip_limit_ - ip; 746 while (avail < literal_length) { 747 if (!writer->Append(ip, avail)) return; 748 literal_length -= avail; 749 reader_->Skip(peeked_); 750 size_t n; 751 ip = reader_->Peek(&n); 752 avail = n; 753 peeked_ = avail; 754 if (avail == 0) return; // Premature end of input 755 ip_limit_ = ip + avail; 756 } 757 if (!writer->Append(ip, literal_length)) { 758 return; 759 } 760 ip += literal_length; 761 MAYBE_REFILL(); 762 } else { 763 const uint32 entry = char_table[c]; 764 const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11]; 765 const uint32 length = entry & 0xff; 766 ip += entry >> 11; 767 768 // copy_offset/256 is encoded in bits 8..10. By just fetching 769 // those bits, we get copy_offset (since the bit-field starts at 770 // bit 8). 771 const uint32 copy_offset = entry & 0x700; 772 if (!writer->AppendFromSelf(copy_offset + trailer, length)) { 773 return; 774 } 775 MAYBE_REFILL(); 776 } 777 } 778 779 #undef MAYBE_REFILL 780 } 781 }; 782 783 bool SnappyDecompressor::RefillTag() { 784 const char* ip = ip_; 785 if (ip == ip_limit_) { 786 // Fetch a new fragment from the reader 787 reader_->Skip(peeked_); // All peeked bytes are used up 788 size_t n; 789 ip = reader_->Peek(&n); 790 peeked_ = n; 791 if (n == 0) { 792 eof_ = true; 793 return false; 794 } 795 ip_limit_ = ip + n; 796 } 797 798 // Read the tag character 799 assert(ip < ip_limit_); 800 const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip)); 801 const uint32 entry = char_table[c]; 802 const uint32 needed = (entry >> 11) + 1; // +1 byte for 'c' 803 assert(needed <= sizeof(scratch_)); 804 805 // Read more bytes from reader if needed 806 uint32 nbuf = ip_limit_ - ip; 807 if (nbuf < needed) { 808 // Stitch together bytes from ip and reader to form the word 809 // contents. We store the needed bytes in "scratch_". They 810 // will be consumed immediately by the caller since we do not 811 // read more than we need. 812 memmove(scratch_, ip, nbuf); 813 reader_->Skip(peeked_); // All peeked bytes are used up 814 peeked_ = 0; 815 while (nbuf < needed) { 816 size_t length; 817 const char* src = reader_->Peek(&length); 818 if (length == 0) return false; 819 uint32 to_add = min<uint32>(needed - nbuf, length); 820 memcpy(scratch_ + nbuf, src, to_add); 821 nbuf += to_add; 822 reader_->Skip(to_add); 823 } 824 assert(nbuf == needed); 825 ip_ = scratch_; 826 ip_limit_ = scratch_ + needed; 827 } else if (nbuf < 5) { 828 // Have enough bytes, but move into scratch_ so that we do not 829 // read past end of input 830 memmove(scratch_, ip, nbuf); 831 reader_->Skip(peeked_); // All peeked bytes are used up 832 peeked_ = 0; 833 ip_ = scratch_; 834 ip_limit_ = scratch_ + nbuf; 835 } else { 836 // Pass pointer to buffer returned by reader_. 837 ip_ = ip; 838 } 839 return true; 840 } 841 842 template <typename Writer> 843 static bool InternalUncompress(Source* r, 844 Writer* writer, 845 uint32 max_len) { 846 // Read the uncompressed length from the front of the compressed input 847 SnappyDecompressor decompressor(r); 848 uint32 uncompressed_len = 0; 849 if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false; 850 return InternalUncompressAllTags( 851 &decompressor, writer, uncompressed_len, max_len); 852 } 853 854 template <typename Writer> 855 static bool InternalUncompressAllTags(SnappyDecompressor* decompressor, 856 Writer* writer, 857 uint32 uncompressed_len, 858 uint32 max_len) { 859 // Protect against possible DoS attack 860 if (static_cast<uint64>(uncompressed_len) > max_len) { 861 return false; 862 } 863 864 writer->SetExpectedLength(uncompressed_len); 865 866 // Process the entire input 867 decompressor->DecompressAllTags(writer); 868 return (decompressor->eof() && writer->CheckLength()); 869 } 870 871 bool GetUncompressedLength(Source* source, uint32* result) { 872 SnappyDecompressor decompressor(source); 873 return decompressor.ReadUncompressedLength(result); 874 } 875 876 size_t Compress(Source* reader, Sink* writer) { 877 size_t written = 0; 878 size_t N = reader->Available(); 879 char ulength[Varint::kMax32]; 880 char* p = Varint::Encode32(ulength, N); 881 writer->Append(ulength, p-ulength); 882 written += (p - ulength); 883 884 internal::WorkingMemory wmem; 885 char* scratch = NULL; 886 char* scratch_output = NULL; 887 888 while (N > 0) { 889 // Get next block to compress (without copying if possible) 890 size_t fragment_size; 891 const char* fragment = reader->Peek(&fragment_size); 892 assert(fragment_size != 0); // premature end of input 893 const size_t num_to_read = min(N, kBlockSize); 894 size_t bytes_read = fragment_size; 895 896 size_t pending_advance = 0; 897 if (bytes_read >= num_to_read) { 898 // Buffer returned by reader is large enough 899 pending_advance = num_to_read; 900 fragment_size = num_to_read; 901 } else { 902 // Read into scratch buffer 903 if (scratch == NULL) { 904 // If this is the last iteration, we want to allocate N bytes 905 // of space, otherwise the max possible kBlockSize space. 906 // num_to_read contains exactly the correct value 907 scratch = new char[num_to_read]; 908 } 909 memcpy(scratch, fragment, bytes_read); 910 reader->Skip(bytes_read); 911 912 while (bytes_read < num_to_read) { 913 fragment = reader->Peek(&fragment_size); 914 size_t n = min<size_t>(fragment_size, num_to_read - bytes_read); 915 memcpy(scratch + bytes_read, fragment, n); 916 bytes_read += n; 917 reader->Skip(n); 918 } 919 assert(bytes_read == num_to_read); 920 fragment = scratch; 921 fragment_size = num_to_read; 922 } 923 assert(fragment_size == num_to_read); 924 925 // Get encoding table for compression 926 int table_size; 927 uint16* table = wmem.GetHashTable(num_to_read, &table_size); 928 929 // Compress input_fragment and append to dest 930 const int max_output = MaxCompressedLength(num_to_read); 931 932 // Need a scratch buffer for the output, in case the byte sink doesn't 933 // have room for us directly. 934 if (scratch_output == NULL) { 935 scratch_output = new char[max_output]; 936 } else { 937 // Since we encode kBlockSize regions followed by a region 938 // which is <= kBlockSize in length, a previously allocated 939 // scratch_output[] region is big enough for this iteration. 940 } 941 char* dest = writer->GetAppendBuffer(max_output, scratch_output); 942 char* end = internal::CompressFragment(fragment, fragment_size, 943 dest, table, table_size); 944 writer->Append(dest, end - dest); 945 written += (end - dest); 946 947 N -= num_to_read; 948 reader->Skip(pending_advance); 949 } 950 951 delete[] scratch; 952 delete[] scratch_output; 953 954 return written; 955 } 956 957 // ----------------------------------------------------------------------- 958 // Flat array interfaces 959 // ----------------------------------------------------------------------- 960 961 // A type that writes to a flat array. 962 // Note that this is not a "ByteSink", but a type that matches the 963 // Writer template argument to SnappyDecompressor::DecompressAllTags(). 964 class SnappyArrayWriter { 965 private: 966 char* base_; 967 char* op_; 968 char* op_limit_; 969 970 public: 971 inline explicit SnappyArrayWriter(char* dst) 972 : base_(dst), 973 op_(dst) { 974 } 975 976 inline void SetExpectedLength(size_t len) { 977 op_limit_ = op_ + len; 978 } 979 980 inline bool CheckLength() const { 981 return op_ == op_limit_; 982 } 983 984 inline bool Append(const char* ip, size_t len) { 985 char* op = op_; 986 const size_t space_left = op_limit_ - op; 987 if (space_left < len) { 988 return false; 989 } 990 memcpy(op, ip, len); 991 op_ = op + len; 992 return true; 993 } 994 995 inline bool TryFastAppend(const char* ip, size_t available, size_t len) { 996 char* op = op_; 997 const size_t space_left = op_limit_ - op; 998 if (len <= 16 && available >= 16 && space_left >= 16) { 999 // Fast path, used for the majority (about 95%) of invocations. 1000 UnalignedCopy64(ip, op); 1001 UnalignedCopy64(ip + 8, op + 8); 1002 op_ = op + len; 1003 return true; 1004 } else { 1005 return false; 1006 } 1007 } 1008 1009 inline bool AppendFromSelf(size_t offset, size_t len) { 1010 char* op = op_; 1011 const size_t space_left = op_limit_ - op; 1012 1013 if (op - base_ <= offset - 1u) { // -1u catches offset==0 1014 return false; 1015 } 1016 if (len <= 16 && offset >= 8 && space_left >= 16) { 1017 // Fast path, used for the majority (70-80%) of dynamic invocations. 1018 UnalignedCopy64(op - offset, op); 1019 UnalignedCopy64(op - offset + 8, op + 8); 1020 } else { 1021 if (space_left >= len + kMaxIncrementCopyOverflow) { 1022 IncrementalCopyFastPath(op - offset, op, len); 1023 } else { 1024 if (space_left < len) { 1025 return false; 1026 } 1027 IncrementalCopy(op - offset, op, len); 1028 } 1029 } 1030 1031 op_ = op + len; 1032 return true; 1033 } 1034 }; 1035 1036 bool RawUncompress(const char* compressed, size_t n, char* uncompressed) { 1037 ByteArraySource reader(compressed, n); 1038 return RawUncompress(&reader, uncompressed); 1039 } 1040 1041 bool RawUncompress(Source* compressed, char* uncompressed) { 1042 SnappyArrayWriter output(uncompressed); 1043 return InternalUncompress(compressed, &output, kuint32max); 1044 } 1045 1046 bool Uncompress(const char* compressed, size_t n, string* uncompressed) { 1047 size_t ulength; 1048 if (!GetUncompressedLength(compressed, n, &ulength)) { 1049 return false; 1050 } 1051 // Protect against possible DoS attack 1052 if ((static_cast<uint64>(ulength) + uncompressed->size()) > 1053 uncompressed->max_size()) { 1054 return false; 1055 } 1056 STLStringResizeUninitialized(uncompressed, ulength); 1057 return RawUncompress(compressed, n, string_as_array(uncompressed)); 1058 } 1059 1060 1061 // A Writer that drops everything on the floor and just does validation 1062 class SnappyDecompressionValidator { 1063 private: 1064 size_t expected_; 1065 size_t produced_; 1066 1067 public: 1068 inline SnappyDecompressionValidator() : produced_(0) { } 1069 inline void SetExpectedLength(size_t len) { 1070 expected_ = len; 1071 } 1072 inline bool CheckLength() const { 1073 return expected_ == produced_; 1074 } 1075 inline bool Append(const char* ip, size_t len) { 1076 produced_ += len; 1077 return produced_ <= expected_; 1078 } 1079 inline bool TryFastAppend(const char* ip, size_t available, size_t length) { 1080 return false; 1081 } 1082 inline bool AppendFromSelf(size_t offset, size_t len) { 1083 if (produced_ <= offset - 1u) return false; // -1u catches offset==0 1084 produced_ += len; 1085 return produced_ <= expected_; 1086 } 1087 }; 1088 1089 bool IsValidCompressedBuffer(const char* compressed, size_t n) { 1090 ByteArraySource reader(compressed, n); 1091 SnappyDecompressionValidator writer; 1092 return InternalUncompress(&reader, &writer, kuint32max); 1093 } 1094 1095 void RawCompress(const char* input, 1096 size_t input_length, 1097 char* compressed, 1098 size_t* compressed_length) { 1099 ByteArraySource reader(input, input_length); 1100 UncheckedByteArraySink writer(compressed); 1101 Compress(&reader, &writer); 1102 1103 // Compute how many bytes were added 1104 *compressed_length = (writer.CurrentDestination() - compressed); 1105 } 1106 1107 size_t Compress(const char* input, size_t input_length, string* compressed) { 1108 // Pre-grow the buffer to the max length of the compressed output 1109 compressed->resize(MaxCompressedLength(input_length)); 1110 1111 size_t compressed_length; 1112 RawCompress(input, input_length, string_as_array(compressed), 1113 &compressed_length); 1114 compressed->resize(compressed_length); 1115 return compressed_length; 1116 } 1117 1118 1119 } // end namespace snappy 1120 } // end namespace rawp 1121