github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/database/leveldb.chai2010/src/snappy.cc (about)

     1  // Copyright 2005 Google Inc. All Rights Reserved.
     2  //
     3  // Redistribution and use in source and binary forms, with or without
     4  // modification, are permitted provided that the following conditions are
     5  // met:
     6  //
     7  //     * Redistributions of source code must retain the above copyright
     8  // notice, this list of conditions and the following disclaimer.
     9  //     * Redistributions in binary form must reproduce the above
    10  // copyright notice, this list of conditions and the following disclaimer
    11  // in the documentation and/or other materials provided with the
    12  // distribution.
    13  //     * Neither the name of Google Inc. nor the names of its
    14  // contributors may be used to endorse or promote products derived from
    15  // this software without specific prior written permission.
    16  //
    17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    28  
    29  #include "snappy.h"
    30  #include "snappy-internal.h"
    31  #include "snappy-sinksource.h"
    32  
    33  #include <stdio.h>
    34  
    35  #include <algorithm>
    36  #include <string>
    37  #include <vector>
    38  
    39  
    40  namespace snappy {
    41  
    42  // Any hash function will produce a valid compressed bitstream, but a good
    43  // hash function reduces the number of collisions and thus yields better
    44  // compression for compressible input, and more speed for incompressible
    45  // input. Of course, it doesn't hurt if the hash function is reasonably fast
    46  // either, as it gets called a lot.
    47  static inline uint32 HashBytes(uint32 bytes, int shift) {
    48    uint32 kMul = 0x1e35a7bd;
    49    return (bytes * kMul) >> shift;
    50  }
    51  static inline uint32 Hash(const char* p, int shift) {
    52    return HashBytes(UNALIGNED_LOAD32(p), shift);
    53  }
    54  
    55  size_t MaxCompressedLength(size_t source_len) {
    56    // Compressed data can be defined as:
    57    //    compressed := item* literal*
    58    //    item       := literal* copy
    59    //
    60    // The trailing literal sequence has a space blowup of at most 62/60
    61    // since a literal of length 60 needs one tag byte + one extra byte
    62    // for length information.
    63    //
    64    // Item blowup is trickier to measure.  Suppose the "copy" op copies
    65    // 4 bytes of data.  Because of a special check in the encoding code,
    66    // we produce a 4-byte copy only if the offset is < 65536.  Therefore
    67    // the copy op takes 3 bytes to encode, and this type of item leads
    68    // to at most the 62/60 blowup for representing literals.
    69    //
    70    // Suppose the "copy" op copies 5 bytes of data.  If the offset is big
    71    // enough, it will take 5 bytes to encode the copy op.  Therefore the
    72    // worst case here is a one-byte literal followed by a five-byte copy.
    73    // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
    74    //
    75    // This last factor dominates the blowup, so the final estimate is:
    76    return 32 + source_len + source_len/6;
    77  }
    78  
    79  enum {
    80    LITERAL = 0,
    81    COPY_1_BYTE_OFFSET = 1,  // 3 bit length + 3 bits of offset in opcode
    82    COPY_2_BYTE_OFFSET = 2,
    83    COPY_4_BYTE_OFFSET = 3
    84  };
    85  
    86  // Copy "len" bytes from "src" to "op", one byte at a time.  Used for
    87  // handling COPY operations where the input and output regions may
    88  // overlap.  For example, suppose:
    89  //    src    == "ab"
    90  //    op     == src + 2
    91  //    len    == 20
    92  // After IncrementalCopy(src, op, len), the result will have
    93  // eleven copies of "ab"
    94  //    ababababababababababab
    95  // Note that this does not match the semantics of either memcpy()
    96  // or memmove().
    97  static inline void IncrementalCopy(const char* src, char* op, int len) {
    98    assert(len > 0);
    99    do {
   100      *op++ = *src++;
   101    } while (--len > 0);
   102  }
   103  
   104  // Equivalent to IncrementalCopy except that it can write up to ten extra
   105  // bytes after the end of the copy, and that it is faster.
   106  //
   107  // The main part of this loop is a simple copy of eight bytes at a time until
   108  // we've copied (at least) the requested amount of bytes.  However, if op and
   109  // src are less than eight bytes apart (indicating a repeating pattern of
   110  // length < 8), we first need to expand the pattern in order to get the correct
   111  // results. For instance, if the buffer looks like this, with the eight-byte
   112  // <src> and <op> patterns marked as intervals:
   113  //
   114  //    abxxxxxxxxxxxx
   115  //    [------]           src
   116  //      [------]         op
   117  //
   118  // a single eight-byte copy from <src> to <op> will repeat the pattern once,
   119  // after which we can move <op> two bytes without moving <src>:
   120  //
   121  //    ababxxxxxxxxxx
   122  //    [------]           src
   123  //        [------]       op
   124  //
   125  // and repeat the exercise until the two no longer overlap.
   126  //
   127  // This allows us to do very well in the special case of one single byte
   128  // repeated many times, without taking a big hit for more general cases.
   129  //
   130  // The worst case of extra writing past the end of the match occurs when
   131  // op - src == 1 and len == 1; the last copy will read from byte positions
   132  // [0..7] and write to [4..11], whereas it was only supposed to write to
   133  // position 1. Thus, ten excess bytes.
   134  
   135  namespace {
   136  
   137  const int kMaxIncrementCopyOverflow = 10;
   138  
   139  }  // namespace
   140  
   141  static inline void IncrementalCopyFastPath(const char* src, char* op, int len) {
   142    while (op - src < 8) {
   143      UnalignedCopy64(src, op);
   144      len -= op - src;
   145      op += op - src;
   146    }
   147    while (len > 0) {
   148      UnalignedCopy64(src, op);
   149      src += 8;
   150      op += 8;
   151      len -= 8;
   152    }
   153  }
   154  
   155  static inline char* EmitLiteral(char* op,
   156                                  const char* literal,
   157                                  int len,
   158                                  bool allow_fast_path) {
   159    int n = len - 1;      // Zero-length literals are disallowed
   160    if (n < 60) {
   161      // Fits in tag byte
   162      *op++ = LITERAL | (n << 2);
   163  
   164      // The vast majority of copies are below 16 bytes, for which a
   165      // call to memcpy is overkill. This fast path can sometimes
   166      // copy up to 15 bytes too much, but that is okay in the
   167      // main loop, since we have a bit to go on for both sides:
   168      //
   169      //   - The input will always have kInputMarginBytes = 15 extra
   170      //     available bytes, as long as we're in the main loop, and
   171      //     if not, allow_fast_path = false.
   172      //   - The output will always have 32 spare bytes (see
   173      //     MaxCompressedLength).
   174      if (allow_fast_path && len <= 16) {
   175        UnalignedCopy64(literal, op);
   176        UnalignedCopy64(literal + 8, op + 8);
   177        return op + len;
   178      }
   179    } else {
   180      // Encode in upcoming bytes
   181      char* base = op;
   182      int count = 0;
   183      op++;
   184      while (n > 0) {
   185        *op++ = n & 0xff;
   186        n >>= 8;
   187        count++;
   188      }
   189      assert(count >= 1);
   190      assert(count <= 4);
   191      *base = LITERAL | ((59+count) << 2);
   192    }
   193    memcpy(op, literal, len);
   194    return op + len;
   195  }
   196  
   197  static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
   198    assert(len <= 64);
   199    assert(len >= 4);
   200    assert(offset < 65536);
   201  
   202    if ((len < 12) && (offset < 2048)) {
   203      size_t len_minus_4 = len - 4;
   204      assert(len_minus_4 < 8);            // Must fit in 3 bits
   205      *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
   206      *op++ = offset & 0xff;
   207    } else {
   208      *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
   209      LittleEndian::Store16(op, offset);
   210      op += 2;
   211    }
   212    return op;
   213  }
   214  
   215  static inline char* EmitCopy(char* op, size_t offset, int len) {
   216    // Emit 64 byte copies but make sure to keep at least four bytes reserved
   217    while (len >= 68) {
   218      op = EmitCopyLessThan64(op, offset, 64);
   219      len -= 64;
   220    }
   221  
   222    // Emit an extra 60 byte copy if have too much data to fit in one copy
   223    if (len > 64) {
   224      op = EmitCopyLessThan64(op, offset, 60);
   225      len -= 60;
   226    }
   227  
   228    // Emit remainder
   229    op = EmitCopyLessThan64(op, offset, len);
   230    return op;
   231  }
   232  
   233  
   234  bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
   235    uint32 v = 0;
   236    const char* limit = start + n;
   237    if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
   238      *result = v;
   239      return true;
   240    } else {
   241      return false;
   242    }
   243  }
   244  
   245  namespace internal {
   246  uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
   247    // Use smaller hash table when input.size() is smaller, since we
   248    // fill the table, incurring O(hash table size) overhead for
   249    // compression, and if the input is short, we won't need that
   250    // many hash table entries anyway.
   251    assert(kMaxHashTableSize >= 256);
   252    size_t htsize = 256;
   253    while (htsize < kMaxHashTableSize && htsize < input_size) {
   254      htsize <<= 1;
   255    }
   256  
   257    uint16* table;
   258    if (htsize <= ARRAYSIZE(small_table_)) {
   259      table = small_table_;
   260    } else {
   261      if (large_table_ == NULL) {
   262        large_table_ = new uint16[kMaxHashTableSize];
   263      }
   264      table = large_table_;
   265    }
   266  
   267    *table_size = htsize;
   268    memset(table, 0, htsize * sizeof(*table));
   269    return table;
   270  }
   271  }  // end namespace internal
   272  
   273  // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
   274  // equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
   275  // empirically found that overlapping loads such as
   276  //  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
   277  // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
   278  //
   279  // We have different versions for 64- and 32-bit; ideally we would avoid the
   280  // two functions and just inline the UNALIGNED_LOAD64 call into
   281  // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
   282  // enough to avoid loading the value multiple times then. For 64-bit, the load
   283  // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
   284  // done at GetUint32AtOffset() time.
   285  
   286  #ifdef ARCH_K8
   287  
   288  typedef uint64 EightBytesReference;
   289  
   290  static inline EightBytesReference GetEightBytesAt(const char* ptr) {
   291    return UNALIGNED_LOAD64(ptr);
   292  }
   293  
   294  static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
   295    assert(offset >= 0);
   296    assert(offset <= 4);
   297    return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
   298  }
   299  
   300  #else
   301  
   302  typedef const char* EightBytesReference;
   303  
   304  static inline EightBytesReference GetEightBytesAt(const char* ptr) {
   305    return ptr;
   306  }
   307  
   308  static inline uint32 GetUint32AtOffset(const char* v, int offset) {
   309    assert(offset >= 0);
   310    assert(offset <= 4);
   311    return UNALIGNED_LOAD32(v + offset);
   312  }
   313  
   314  #endif
   315  
   316  // Flat array compression that does not emit the "uncompressed length"
   317  // prefix. Compresses "input" string to the "*op" buffer.
   318  //
   319  // REQUIRES: "input" is at most "kBlockSize" bytes long.
   320  // REQUIRES: "op" points to an array of memory that is at least
   321  // "MaxCompressedLength(input.size())" in size.
   322  // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
   323  // REQUIRES: "table_size" is a power of two
   324  //
   325  // Returns an "end" pointer into "op" buffer.
   326  // "end - op" is the compressed size of "input".
   327  namespace internal {
   328  char* CompressFragment(const char* input,
   329                         size_t input_size,
   330                         char* op,
   331                         uint16* table,
   332                         const int table_size) {
   333    // "ip" is the input pointer, and "op" is the output pointer.
   334    const char* ip = input;
   335    assert(input_size <= kBlockSize);
   336    assert((table_size & (table_size - 1)) == 0); // table must be power of two
   337    const int shift = 32 - Bits::Log2Floor(table_size);
   338    assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
   339    const char* ip_end = input + input_size;
   340    const char* base_ip = ip;
   341    // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
   342    // [next_emit, ip_end) after the main loop.
   343    const char* next_emit = ip;
   344  
   345    const size_t kInputMarginBytes = 15;
   346    if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
   347      const char* ip_limit = input + input_size - kInputMarginBytes;
   348  
   349      for (uint32 next_hash = Hash(++ip, shift); ; ) {
   350        assert(next_emit < ip);
   351        // The body of this loop calls EmitLiteral once and then EmitCopy one or
   352        // more times.  (The exception is that when we're close to exhausting
   353        // the input we goto emit_remainder.)
   354        //
   355        // In the first iteration of this loop we're just starting, so
   356        // there's nothing to copy, so calling EmitLiteral once is
   357        // necessary.  And we only start a new iteration when the
   358        // current iteration has determined that a call to EmitLiteral will
   359        // precede the next call to EmitCopy (if any).
   360        //
   361        // Step 1: Scan forward in the input looking for a 4-byte-long match.
   362        // If we get close to exhausting the input then goto emit_remainder.
   363        //
   364        // Heuristic match skipping: If 32 bytes are scanned with no matches
   365        // found, start looking only at every other byte. If 32 more bytes are
   366        // scanned, look at every third byte, etc.. When a match is found,
   367        // immediately go back to looking at every byte. This is a small loss
   368        // (~5% performance, ~0.1% density) for compressible data due to more
   369        // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
   370        // win since the compressor quickly "realizes" the data is incompressible
   371        // and doesn't bother looking for matches everywhere.
   372        //
   373        // The "skip" variable keeps track of how many bytes there are since the
   374        // last match; dividing it by 32 (ie. right-shifting by five) gives the
   375        // number of bytes to move ahead for each iteration.
   376        uint32 skip = 32;
   377  
   378        const char* next_ip = ip;
   379        const char* candidate;
   380        do {
   381          ip = next_ip;
   382          uint32 hash = next_hash;
   383          assert(hash == Hash(ip, shift));
   384          uint32 bytes_between_hash_lookups = skip++ >> 5;
   385          next_ip = ip + bytes_between_hash_lookups;
   386          if (PREDICT_FALSE(next_ip > ip_limit)) {
   387            goto emit_remainder;
   388          }
   389          next_hash = Hash(next_ip, shift);
   390          candidate = base_ip + table[hash];
   391          assert(candidate >= base_ip);
   392          assert(candidate < ip);
   393  
   394          table[hash] = ip - base_ip;
   395        } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
   396                              UNALIGNED_LOAD32(candidate)));
   397  
   398        // Step 2: A 4-byte match has been found.  We'll later see if more
   399        // than 4 bytes match.  But, prior to the match, input
   400        // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
   401        assert(next_emit + 16 <= ip_end);
   402        op = EmitLiteral(op, next_emit, ip - next_emit, true);
   403  
   404        // Step 3: Call EmitCopy, and then see if another EmitCopy could
   405        // be our next move.  Repeat until we find no match for the
   406        // input immediately after what was consumed by the last EmitCopy call.
   407        //
   408        // If we exit this loop normally then we need to call EmitLiteral next,
   409        // though we don't yet know how big the literal will be.  We handle that
   410        // by proceeding to the next iteration of the main loop.  We also can exit
   411        // this loop via goto if we get close to exhausting the input.
   412        EightBytesReference input_bytes;
   413        uint32 candidate_bytes = 0;
   414  
   415        do {
   416          // We have a 4-byte match at ip, and no need to emit any
   417          // "literal bytes" prior to ip.
   418          const char* base = ip;
   419          int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
   420          ip += matched;
   421          size_t offset = base - candidate;
   422          assert(0 == memcmp(base, candidate, matched));
   423          op = EmitCopy(op, offset, matched);
   424          // We could immediately start working at ip now, but to improve
   425          // compression we first update table[Hash(ip - 1, ...)].
   426          const char* insert_tail = ip - 1;
   427          next_emit = ip;
   428          if (PREDICT_FALSE(ip >= ip_limit)) {
   429            goto emit_remainder;
   430          }
   431          input_bytes = GetEightBytesAt(insert_tail);
   432          uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
   433          table[prev_hash] = ip - base_ip - 1;
   434          uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
   435          candidate = base_ip + table[cur_hash];
   436          candidate_bytes = UNALIGNED_LOAD32(candidate);
   437          table[cur_hash] = ip - base_ip;
   438        } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
   439  
   440        next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
   441        ++ip;
   442      }
   443    }
   444  
   445   emit_remainder:
   446    // Emit the remaining bytes as a literal
   447    if (next_emit < ip_end) {
   448      op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
   449    }
   450  
   451    return op;
   452  }
   453  }  // end namespace internal
   454  
   455  // Signature of output types needed by decompression code.
   456  // The decompression code is templatized on a type that obeys this
   457  // signature so that we do not pay virtual function call overhead in
   458  // the middle of a tight decompression loop.
   459  //
   460  // class DecompressionWriter {
   461  //  public:
   462  //   // Called before decompression
   463  //   void SetExpectedLength(size_t length);
   464  //
   465  //   // Called after decompression
   466  //   bool CheckLength() const;
   467  //
   468  //   // Called repeatedly during decompression
   469  //   bool Append(const char* ip, size_t length);
   470  //   bool AppendFromSelf(uint32 offset, size_t length);
   471  //
   472  //   // The difference between TryFastAppend and Append is that TryFastAppend
   473  //   // is allowed to read up to <available> bytes from the input buffer,
   474  //   // whereas Append is allowed to read <length>.
   475  //   //
   476  //   // Also, TryFastAppend is allowed to return false, declining the append,
   477  //   // without it being a fatal error -- just "return false" would be
   478  //   // a perfectly legal implementation of TryFastAppend. The intention
   479  //   // is for TryFastAppend to allow a fast path in the common case of
   480  //   // a small append.
   481  //   //
   482  //   // NOTE(user): TryFastAppend must always return decline (return false)
   483  //   // if <length> is 61 or more, as in this case the literal length is not
   484  //   // decoded fully. In practice, this should not be a big problem,
   485  //   // as it is unlikely that one would implement a fast path accepting
   486  //   // this much data.
   487  //   bool TryFastAppend(const char* ip, size_t available, size_t length);
   488  // };
   489  
   490  // -----------------------------------------------------------------------
   491  // Lookup table for decompression code.  Generated by ComputeTable() below.
   492  // -----------------------------------------------------------------------
   493  
   494  // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
   495  static const uint32 wordmask[] = {
   496    0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
   497  };
   498  
   499  // Data stored per entry in lookup table:
   500  //      Range   Bits-used       Description
   501  //      ------------------------------------
   502  //      1..64   0..7            Literal/copy length encoded in opcode byte
   503  //      0..7    8..10           Copy offset encoded in opcode byte / 256
   504  //      0..4    11..13          Extra bytes after opcode
   505  //
   506  // We use eight bits for the length even though 7 would have sufficed
   507  // because of efficiency reasons:
   508  //      (1) Extracting a byte is faster than a bit-field
   509  //      (2) It properly aligns copy offset so we do not need a <<8
   510  static const uint16 char_table[256] = {
   511    0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
   512    0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
   513    0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
   514    0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
   515    0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
   516    0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
   517    0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
   518    0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
   519    0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
   520    0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
   521    0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
   522    0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
   523    0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
   524    0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
   525    0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
   526    0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
   527    0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
   528    0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
   529    0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
   530    0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
   531    0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
   532    0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
   533    0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
   534    0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
   535    0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
   536    0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
   537    0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
   538    0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
   539    0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
   540    0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
   541    0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
   542    0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
   543  };
   544  
   545  // In debug mode, allow optional computation of the table at startup.
   546  // Also, check that the decompression table is correct.
   547  #ifndef NDEBUG
   548  DEFINE_bool(snappy_dump_decompression_table, false,
   549              "If true, we print the decompression table at startup.");
   550  
   551  static uint16 MakeEntry(unsigned int extra,
   552                          unsigned int len,
   553                          unsigned int copy_offset) {
   554    // Check that all of the fields fit within the allocated space
   555    assert(extra       == (extra & 0x7));          // At most 3 bits
   556    assert(copy_offset == (copy_offset & 0x7));    // At most 3 bits
   557    assert(len         == (len & 0x7f));           // At most 7 bits
   558    return len | (copy_offset << 8) | (extra << 11);
   559  }
   560  
   561  static void ComputeTable() {
   562    uint16 dst[256];
   563  
   564    // Place invalid entries in all places to detect missing initialization
   565    int assigned = 0;
   566    for (int i = 0; i < 256; i++) {
   567      dst[i] = 0xffff;
   568    }
   569  
   570    // Small LITERAL entries.  We store (len-1) in the top 6 bits.
   571    for (unsigned int len = 1; len <= 60; len++) {
   572      dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
   573      assigned++;
   574    }
   575  
   576    // Large LITERAL entries.  We use 60..63 in the high 6 bits to
   577    // encode the number of bytes of length info that follow the opcode.
   578    for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
   579      // We set the length field in the lookup table to 1 because extra
   580      // bytes encode len-1.
   581      dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
   582      assigned++;
   583    }
   584  
   585    // COPY_1_BYTE_OFFSET.
   586    //
   587    // The tag byte in the compressed data stores len-4 in 3 bits, and
   588    // offset/256 in 5 bits.  offset%256 is stored in the next byte.
   589    //
   590    // This format is used for length in range [4..11] and offset in
   591    // range [0..2047]
   592    for (unsigned int len = 4; len < 12; len++) {
   593      for (unsigned int offset = 0; offset < 2048; offset += 256) {
   594        dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
   595          MakeEntry(1, len, offset>>8);
   596        assigned++;
   597      }
   598    }
   599  
   600    // COPY_2_BYTE_OFFSET.
   601    // Tag contains len-1 in top 6 bits, and offset in next two bytes.
   602    for (unsigned int len = 1; len <= 64; len++) {
   603      dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
   604      assigned++;
   605    }
   606  
   607    // COPY_4_BYTE_OFFSET.
   608    // Tag contents len-1 in top 6 bits, and offset in next four bytes.
   609    for (unsigned int len = 1; len <= 64; len++) {
   610      dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
   611      assigned++;
   612    }
   613  
   614    // Check that each entry was initialized exactly once.
   615    if (assigned != 256) {
   616      fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
   617      abort();
   618    }
   619    for (int i = 0; i < 256; i++) {
   620      if (dst[i] == 0xffff) {
   621        fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
   622        abort();
   623      }
   624    }
   625  
   626    if (FLAGS_snappy_dump_decompression_table) {
   627      printf("static const uint16 char_table[256] = {\n  ");
   628      for (int i = 0; i < 256; i++) {
   629        printf("0x%04x%s",
   630               dst[i],
   631               ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n  " : ", ")));
   632      }
   633      printf("};\n");
   634    }
   635  
   636    // Check that computed table matched recorded table
   637    for (int i = 0; i < 256; i++) {
   638      if (dst[i] != char_table[i]) {
   639        fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
   640                i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
   641        abort();
   642      }
   643    }
   644  }
   645  #endif /* !NDEBUG */
   646  
   647  // Helper class for decompression
   648  class SnappyDecompressor {
   649   private:
   650    Source*       reader_;         // Underlying source of bytes to decompress
   651    const char*   ip_;             // Points to next buffered byte
   652    const char*   ip_limit_;       // Points just past buffered bytes
   653    uint32        peeked_;         // Bytes peeked from reader (need to skip)
   654    bool          eof_;            // Hit end of input without an error?
   655    char          scratch_[5];     // Temporary buffer for PeekFast() boundaries
   656  
   657    // Ensure that all of the tag metadata for the next tag is available
   658    // in [ip_..ip_limit_-1].  Also ensures that [ip,ip+4] is readable even
   659    // if (ip_limit_ - ip_ < 5).
   660    //
   661    // Returns true on success, false on error or end of input.
   662    bool RefillTag();
   663  
   664   public:
   665    explicit SnappyDecompressor(Source* reader)
   666        : reader_(reader),
   667          ip_(NULL),
   668          ip_limit_(NULL),
   669          peeked_(0),
   670          eof_(false) {
   671    }
   672  
   673    ~SnappyDecompressor() {
   674      // Advance past any bytes we peeked at from the reader
   675      reader_->Skip(peeked_);
   676    }
   677  
   678    // Returns true iff we have hit the end of the input without an error.
   679    bool eof() const {
   680      return eof_;
   681    }
   682  
   683    // Read the uncompressed length stored at the start of the compressed data.
   684    // On succcess, stores the length in *result and returns true.
   685    // On failure, returns false.
   686    bool ReadUncompressedLength(uint32* result) {
   687      assert(ip_ == NULL);       // Must not have read anything yet
   688      // Length is encoded in 1..5 bytes
   689      *result = 0;
   690      uint32 shift = 0;
   691      while (true) {
   692        if (shift >= 32) return false;
   693        size_t n;
   694        const char* ip = reader_->Peek(&n);
   695        if (n == 0) return false;
   696        const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
   697        reader_->Skip(1);
   698        *result |= static_cast<uint32>(c & 0x7f) << shift;
   699        if (c < 128) {
   700          break;
   701        }
   702        shift += 7;
   703      }
   704      return true;
   705    }
   706  
   707    // Process the next item found in the input.
   708    // Returns true if successful, false on error or end of input.
   709    template <class Writer>
   710    void DecompressAllTags(Writer* writer) {
   711      const char* ip = ip_;
   712  
   713      // We could have put this refill fragment only at the beginning of the loop.
   714      // However, duplicating it at the end of each branch gives the compiler more
   715      // scope to optimize the <ip_limit_ - ip> expression based on the local
   716      // context, which overall increases speed.
   717      #define MAYBE_REFILL() \
   718          if (ip_limit_ - ip < 5) { \
   719            ip_ = ip; \
   720            if (!RefillTag()) return; \
   721            ip = ip_; \
   722          }
   723  
   724      MAYBE_REFILL();
   725      for ( ;; ) {
   726        const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
   727  
   728        if ((c & 0x3) == LITERAL) {
   729          size_t literal_length = (c >> 2) + 1u;
   730          if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
   731            assert(literal_length < 61);
   732            ip += literal_length;
   733            MAYBE_REFILL();
   734            continue;
   735          }
   736          if (PREDICT_FALSE(literal_length >= 61)) {
   737            // Long literal.
   738            const size_t literal_length_length = literal_length - 60;
   739            literal_length =
   740                (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
   741            ip += literal_length_length;
   742          }
   743  
   744          size_t avail = ip_limit_ - ip;
   745          while (avail < literal_length) {
   746            if (!writer->Append(ip, avail)) return;
   747            literal_length -= avail;
   748            reader_->Skip(peeked_);
   749            size_t n;
   750            ip = reader_->Peek(&n);
   751            avail = n;
   752            peeked_ = avail;
   753            if (avail == 0) return;  // Premature end of input
   754            ip_limit_ = ip + avail;
   755          }
   756          if (!writer->Append(ip, literal_length)) {
   757            return;
   758          }
   759          ip += literal_length;
   760          MAYBE_REFILL();
   761        } else {
   762          const uint32 entry = char_table[c];
   763          const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
   764          const uint32 length = entry & 0xff;
   765          ip += entry >> 11;
   766  
   767          // copy_offset/256 is encoded in bits 8..10.  By just fetching
   768          // those bits, we get copy_offset (since the bit-field starts at
   769          // bit 8).
   770          const uint32 copy_offset = entry & 0x700;
   771          if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
   772            return;
   773          }
   774          MAYBE_REFILL();
   775        }
   776      }
   777  
   778  #undef MAYBE_REFILL
   779    }
   780  };
   781  
   782  bool SnappyDecompressor::RefillTag() {
   783    const char* ip = ip_;
   784    if (ip == ip_limit_) {
   785      // Fetch a new fragment from the reader
   786      reader_->Skip(peeked_);   // All peeked bytes are used up
   787      size_t n;
   788      ip = reader_->Peek(&n);
   789      peeked_ = n;
   790      if (n == 0) {
   791        eof_ = true;
   792        return false;
   793      }
   794      ip_limit_ = ip + n;
   795    }
   796  
   797    // Read the tag character
   798    assert(ip < ip_limit_);
   799    const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
   800    const uint32 entry = char_table[c];
   801    const uint32 needed = (entry >> 11) + 1;  // +1 byte for 'c'
   802    assert(needed <= sizeof(scratch_));
   803  
   804    // Read more bytes from reader if needed
   805    uint32 nbuf = ip_limit_ - ip;
   806    if (nbuf < needed) {
   807      // Stitch together bytes from ip and reader to form the word
   808      // contents.  We store the needed bytes in "scratch_".  They
   809      // will be consumed immediately by the caller since we do not
   810      // read more than we need.
   811      memmove(scratch_, ip, nbuf);
   812      reader_->Skip(peeked_);  // All peeked bytes are used up
   813      peeked_ = 0;
   814      while (nbuf < needed) {
   815        size_t length;
   816        const char* src = reader_->Peek(&length);
   817        if (length == 0) return false;
   818        uint32 to_add = min<uint32>(needed - nbuf, length);
   819        memcpy(scratch_ + nbuf, src, to_add);
   820        nbuf += to_add;
   821        reader_->Skip(to_add);
   822      }
   823      assert(nbuf == needed);
   824      ip_ = scratch_;
   825      ip_limit_ = scratch_ + needed;
   826    } else if (nbuf < 5) {
   827      // Have enough bytes, but move into scratch_ so that we do not
   828      // read past end of input
   829      memmove(scratch_, ip, nbuf);
   830      reader_->Skip(peeked_);  // All peeked bytes are used up
   831      peeked_ = 0;
   832      ip_ = scratch_;
   833      ip_limit_ = scratch_ + nbuf;
   834    } else {
   835      // Pass pointer to buffer returned by reader_.
   836      ip_ = ip;
   837    }
   838    return true;
   839  }
   840  
   841  template <typename Writer>
   842  static bool InternalUncompress(Source* r,
   843                                 Writer* writer,
   844                                 uint32 max_len) {
   845    // Read the uncompressed length from the front of the compressed input
   846    SnappyDecompressor decompressor(r);
   847    uint32 uncompressed_len = 0;
   848    if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
   849    return InternalUncompressAllTags(
   850        &decompressor, writer, uncompressed_len, max_len);
   851  }
   852  
   853  template <typename Writer>
   854  static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
   855                                        Writer* writer,
   856                                        uint32 uncompressed_len,
   857                                        uint32 max_len) {
   858    // Protect against possible DoS attack
   859    if (static_cast<uint64>(uncompressed_len) > max_len) {
   860      return false;
   861    }
   862  
   863    writer->SetExpectedLength(uncompressed_len);
   864  
   865    // Process the entire input
   866    decompressor->DecompressAllTags(writer);
   867    return (decompressor->eof() && writer->CheckLength());
   868  }
   869  
   870  bool GetUncompressedLength(Source* source, uint32* result) {
   871    SnappyDecompressor decompressor(source);
   872    return decompressor.ReadUncompressedLength(result);
   873  }
   874  
   875  size_t Compress(Source* reader, Sink* writer) {
   876    size_t written = 0;
   877    size_t N = reader->Available();
   878    char ulength[Varint::kMax32];
   879    char* p = Varint::Encode32(ulength, N);
   880    writer->Append(ulength, p-ulength);
   881    written += (p - ulength);
   882  
   883    internal::WorkingMemory wmem;
   884    char* scratch = NULL;
   885    char* scratch_output = NULL;
   886  
   887    while (N > 0) {
   888      // Get next block to compress (without copying if possible)
   889      size_t fragment_size;
   890      const char* fragment = reader->Peek(&fragment_size);
   891      assert(fragment_size != 0);  // premature end of input
   892      const size_t num_to_read = min(N, kBlockSize);
   893      size_t bytes_read = fragment_size;
   894  
   895      size_t pending_advance = 0;
   896      if (bytes_read >= num_to_read) {
   897        // Buffer returned by reader is large enough
   898        pending_advance = num_to_read;
   899        fragment_size = num_to_read;
   900      } else {
   901        // Read into scratch buffer
   902        if (scratch == NULL) {
   903          // If this is the last iteration, we want to allocate N bytes
   904          // of space, otherwise the max possible kBlockSize space.
   905          // num_to_read contains exactly the correct value
   906          scratch = new char[num_to_read];
   907        }
   908        memcpy(scratch, fragment, bytes_read);
   909        reader->Skip(bytes_read);
   910  
   911        while (bytes_read < num_to_read) {
   912          fragment = reader->Peek(&fragment_size);
   913          size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
   914          memcpy(scratch + bytes_read, fragment, n);
   915          bytes_read += n;
   916          reader->Skip(n);
   917        }
   918        assert(bytes_read == num_to_read);
   919        fragment = scratch;
   920        fragment_size = num_to_read;
   921      }
   922      assert(fragment_size == num_to_read);
   923  
   924      // Get encoding table for compression
   925      int table_size;
   926      uint16* table = wmem.GetHashTable(num_to_read, &table_size);
   927  
   928      // Compress input_fragment and append to dest
   929      const int max_output = MaxCompressedLength(num_to_read);
   930  
   931      // Need a scratch buffer for the output, in case the byte sink doesn't
   932      // have room for us directly.
   933      if (scratch_output == NULL) {
   934        scratch_output = new char[max_output];
   935      } else {
   936        // Since we encode kBlockSize regions followed by a region
   937        // which is <= kBlockSize in length, a previously allocated
   938        // scratch_output[] region is big enough for this iteration.
   939      }
   940      char* dest = writer->GetAppendBuffer(max_output, scratch_output);
   941      char* end = internal::CompressFragment(fragment, fragment_size,
   942                                             dest, table, table_size);
   943      writer->Append(dest, end - dest);
   944      written += (end - dest);
   945  
   946      N -= num_to_read;
   947      reader->Skip(pending_advance);
   948    }
   949  
   950    delete[] scratch;
   951    delete[] scratch_output;
   952  
   953    return written;
   954  }
   955  
   956  // -----------------------------------------------------------------------
   957  // Flat array interfaces
   958  // -----------------------------------------------------------------------
   959  
   960  // A type that writes to a flat array.
   961  // Note that this is not a "ByteSink", but a type that matches the
   962  // Writer template argument to SnappyDecompressor::DecompressAllTags().
   963  class SnappyArrayWriter {
   964   private:
   965    char* base_;
   966    char* op_;
   967    char* op_limit_;
   968  
   969   public:
   970    inline explicit SnappyArrayWriter(char* dst)
   971        : base_(dst),
   972          op_(dst) {
   973    }
   974  
   975    inline void SetExpectedLength(size_t len) {
   976      op_limit_ = op_ + len;
   977    }
   978  
   979    inline bool CheckLength() const {
   980      return op_ == op_limit_;
   981    }
   982  
   983    inline bool Append(const char* ip, size_t len) {
   984      char* op = op_;
   985      const size_t space_left = op_limit_ - op;
   986      if (space_left < len) {
   987        return false;
   988      }
   989      memcpy(op, ip, len);
   990      op_ = op + len;
   991      return true;
   992    }
   993  
   994    inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
   995      char* op = op_;
   996      const size_t space_left = op_limit_ - op;
   997      if (len <= 16 && available >= 16 && space_left >= 16) {
   998        // Fast path, used for the majority (about 95%) of invocations.
   999        UnalignedCopy64(ip, op);
  1000        UnalignedCopy64(ip + 8, op + 8);
  1001        op_ = op + len;
  1002        return true;
  1003      } else {
  1004        return false;
  1005      }
  1006    }
  1007  
  1008    inline bool AppendFromSelf(size_t offset, size_t len) {
  1009      char* op = op_;
  1010      const size_t space_left = op_limit_ - op;
  1011  
  1012      if (op - base_ <= offset - 1u) {  // -1u catches offset==0
  1013        return false;
  1014      }
  1015      if (len <= 16 && offset >= 8 && space_left >= 16) {
  1016        // Fast path, used for the majority (70-80%) of dynamic invocations.
  1017        UnalignedCopy64(op - offset, op);
  1018        UnalignedCopy64(op - offset + 8, op + 8);
  1019      } else {
  1020        if (space_left >= len + kMaxIncrementCopyOverflow) {
  1021          IncrementalCopyFastPath(op - offset, op, len);
  1022        } else {
  1023          if (space_left < len) {
  1024            return false;
  1025          }
  1026          IncrementalCopy(op - offset, op, len);
  1027        }
  1028      }
  1029  
  1030      op_ = op + len;
  1031      return true;
  1032    }
  1033  };
  1034  
  1035  bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
  1036    ByteArraySource reader(compressed, n);
  1037    return RawUncompress(&reader, uncompressed);
  1038  }
  1039  
  1040  bool RawUncompress(Source* compressed, char* uncompressed) {
  1041    SnappyArrayWriter output(uncompressed);
  1042    return InternalUncompress(compressed, &output, kuint32max);
  1043  }
  1044  
  1045  bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
  1046    size_t ulength;
  1047    if (!GetUncompressedLength(compressed, n, &ulength)) {
  1048      return false;
  1049    }
  1050    // Protect against possible DoS attack
  1051    if ((static_cast<uint64>(ulength) + uncompressed->size()) >
  1052        uncompressed->max_size()) {
  1053      return false;
  1054    }
  1055    STLStringResizeUninitialized(uncompressed, ulength);
  1056    return RawUncompress(compressed, n, string_as_array(uncompressed));
  1057  }
  1058  
  1059  
  1060  // A Writer that drops everything on the floor and just does validation
  1061  class SnappyDecompressionValidator {
  1062   private:
  1063    size_t expected_;
  1064    size_t produced_;
  1065  
  1066   public:
  1067    inline SnappyDecompressionValidator() : produced_(0) { }
  1068    inline void SetExpectedLength(size_t len) {
  1069      expected_ = len;
  1070    }
  1071    inline bool CheckLength() const {
  1072      return expected_ == produced_;
  1073    }
  1074    inline bool Append(const char* ip, size_t len) {
  1075      produced_ += len;
  1076      return produced_ <= expected_;
  1077    }
  1078    inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
  1079      return false;
  1080    }
  1081    inline bool AppendFromSelf(size_t offset, size_t len) {
  1082      if (produced_ <= offset - 1u) return false;  // -1u catches offset==0
  1083      produced_ += len;
  1084      return produced_ <= expected_;
  1085    }
  1086  };
  1087  
  1088  bool IsValidCompressedBuffer(const char* compressed, size_t n) {
  1089    ByteArraySource reader(compressed, n);
  1090    SnappyDecompressionValidator writer;
  1091    return InternalUncompress(&reader, &writer, kuint32max);
  1092  }
  1093  
  1094  void RawCompress(const char* input,
  1095                   size_t input_length,
  1096                   char* compressed,
  1097                   size_t* compressed_length) {
  1098    ByteArraySource reader(input, input_length);
  1099    UncheckedByteArraySink writer(compressed);
  1100    Compress(&reader, &writer);
  1101  
  1102    // Compute how many bytes were added
  1103    *compressed_length = (writer.CurrentDestination() - compressed);
  1104  }
  1105  
  1106  size_t Compress(const char* input, size_t input_length, string* compressed) {
  1107    // Pre-grow the buffer to the max length of the compressed output
  1108    compressed->resize(MaxCompressedLength(input_length));
  1109  
  1110    size_t compressed_length;
  1111    RawCompress(input, input_length, string_as_array(compressed),
  1112                &compressed_length);
  1113    compressed->resize(compressed_length);
  1114    return compressed_length;
  1115  }
  1116  
  1117  
  1118  } // end namespace snappy
  1119