github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/image/rawp/librawp/src/snappy/rawp-snappy.cc

github.com/cellofellow/gopkg@v0.0.0-20140722061823-eec0544a62ad/image/rawp/librawp/src/snappy/rawp-snappy.cc (about)

     1  // Copyright 2005 Google Inc. All Rights Reserved.
     2  //
     3  // Redistribution and use in source and binary forms, with or without
     4  // modification, are permitted provided that the following conditions are
     5  // met:
     6  //
     7  //     * Redistributions of source code must retain the above copyright
     8  // notice, this list of conditions and the following disclaimer.
     9  //     * Redistributions in binary form must reproduce the above
    10  // copyright notice, this list of conditions and the following disclaimer
    11  // in the documentation and/or other materials provided with the
    12  // distribution.
    13  //     * Neither the name of Google Inc. nor the names of its
    14  // contributors may be used to endorse or promote products derived from
    15  // this software without specific prior written permission.
    16  //
    17  // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
    18  // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
    19  // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
    20  // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
    21  // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
    22  // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
    23  // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
    24  // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
    25  // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
    26  // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
    27  // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
    28  
    29  #include "rawp-snappy.h"
    30  #include "rawp-snappy-internal.h"
    31  #include "rawp-snappy-sinksource.h"
    32  
    33  #include <stdio.h>
    34  
    35  #include <algorithm>
    36  #include <string>
    37  #include <vector>
    38  
    39  
    40  namespace rawp {
    41  namespace snappy {
    42  
    43  // Any hash function will produce a valid compressed bitstream, but a good
    44  // hash function reduces the number of collisions and thus yields better
    45  // compression for compressible input, and more speed for incompressible
    46  // input. Of course, it doesn't hurt if the hash function is reasonably fast
    47  // either, as it gets called a lot.
    48  static inline uint32 HashBytes(uint32 bytes, int shift) {
    49    uint32 kMul = 0x1e35a7bd;
    50    return (bytes * kMul) >> shift;
    51  }
    52  static inline uint32 Hash(const char* p, int shift) {
    53    return HashBytes(UNALIGNED_LOAD32(p), shift);
    54  }
    55  
    56  size_t MaxCompressedLength(size_t source_len) {
    57    // Compressed data can be defined as:
    58    //    compressed := item* literal*
    59    //    item       := literal* copy
    60    //
    61    // The trailing literal sequence has a space blowup of at most 62/60
    62    // since a literal of length 60 needs one tag byte + one extra byte
    63    // for length information.
    64    //
    65    // Item blowup is trickier to measure.  Suppose the "copy" op copies
    66    // 4 bytes of data.  Because of a special check in the encoding code,
    67    // we produce a 4-byte copy only if the offset is < 65536.  Therefore
    68    // the copy op takes 3 bytes to encode, and this type of item leads
    69    // to at most the 62/60 blowup for representing literals.
    70    //
    71    // Suppose the "copy" op copies 5 bytes of data.  If the offset is big
    72    // enough, it will take 5 bytes to encode the copy op.  Therefore the
    73    // worst case here is a one-byte literal followed by a five-byte copy.
    74    // I.e., 6 bytes of input turn into 7 bytes of "compressed" data.
    75    //
    76    // This last factor dominates the blowup, so the final estimate is:
    77    return 32 + source_len + source_len/6;
    78  }
    79  
    80  enum {
    81    LITERAL = 0,
    82    COPY_1_BYTE_OFFSET = 1,  // 3 bit length + 3 bits of offset in opcode
    83    COPY_2_BYTE_OFFSET = 2,
    84    COPY_4_BYTE_OFFSET = 3
    85  };
    86  
    87  // Copy "len" bytes from "src" to "op", one byte at a time.  Used for
    88  // handling COPY operations where the input and output regions may
    89  // overlap.  For example, suppose:
    90  //    src    == "ab"
    91  //    op     == src + 2
    92  //    len    == 20
    93  // After IncrementalCopy(src, op, len), the result will have
    94  // eleven copies of "ab"
    95  //    ababababababababababab
    96  // Note that this does not match the semantics of either memcpy()
    97  // or memmove().
    98  static inline void IncrementalCopy(const char* src, char* op, int len) {
    99    assert(len > 0);
   100    do {
   101      *op++ = *src++;
   102    } while (--len > 0);
   103  }
   104  
   105  // Equivalent to IncrementalCopy except that it can write up to ten extra
   106  // bytes after the end of the copy, and that it is faster.
   107  //
   108  // The main part of this loop is a simple copy of eight bytes at a time until
   109  // we've copied (at least) the requested amount of bytes.  However, if op and
   110  // src are less than eight bytes apart (indicating a repeating pattern of
   111  // length < 8), we first need to expand the pattern in order to get the correct
   112  // results. For instance, if the buffer looks like this, with the eight-byte
   113  // <src> and <op> patterns marked as intervals:
   114  //
   115  //    abxxxxxxxxxxxx
   116  //    [------]           src
   117  //      [------]         op
   118  //
   119  // a single eight-byte copy from <src> to <op> will repeat the pattern once,
   120  // after which we can move <op> two bytes without moving <src>:
   121  //
   122  //    ababxxxxxxxxxx
   123  //    [------]           src
   124  //        [------]       op
   125  //
   126  // and repeat the exercise until the two no longer overlap.
   127  //
   128  // This allows us to do very well in the special case of one single byte
   129  // repeated many times, without taking a big hit for more general cases.
   130  //
   131  // The worst case of extra writing past the end of the match occurs when
   132  // op - src == 1 and len == 1; the last copy will read from byte positions
   133  // [0..7] and write to [4..11], whereas it was only supposed to write to
   134  // position 1. Thus, ten excess bytes.
   135  
   136  namespace {
   137  
   138  const int kMaxIncrementCopyOverflow = 10;
   139  
   140  }  // namespace
   141  
   142  static inline void IncrementalCopyFastPath(const char* src, char* op, int len) {
   143    while (op - src < 8) {
   144      UnalignedCopy64(src, op);
   145      len -= op - src;
   146      op += op - src;
   147    }
   148    while (len > 0) {
   149      UnalignedCopy64(src, op);
   150      src += 8;
   151      op += 8;
   152      len -= 8;
   153    }
   154  }
   155  
   156  static inline char* EmitLiteral(char* op,
   157                                  const char* literal,
   158                                  int len,
   159                                  bool allow_fast_path) {
   160    int n = len - 1;      // Zero-length literals are disallowed
   161    if (n < 60) {
   162      // Fits in tag byte
   163      *op++ = LITERAL | (n << 2);
   164  
   165      // The vast majority of copies are below 16 bytes, for which a
   166      // call to memcpy is overkill. This fast path can sometimes
   167      // copy up to 15 bytes too much, but that is okay in the
   168      // main loop, since we have a bit to go on for both sides:
   169      //
   170      //   - The input will always have kInputMarginBytes = 15 extra
   171      //     available bytes, as long as we're in the main loop, and
   172      //     if not, allow_fast_path = false.
   173      //   - The output will always have 32 spare bytes (see
   174      //     MaxCompressedLength).
   175      if (allow_fast_path && len <= 16) {
   176        UnalignedCopy64(literal, op);
   177        UnalignedCopy64(literal + 8, op + 8);
   178        return op + len;
   179      }
   180    } else {
   181      // Encode in upcoming bytes
   182      char* base = op;
   183      int count = 0;
   184      op++;
   185      while (n > 0) {
   186        *op++ = n & 0xff;
   187        n >>= 8;
   188        count++;
   189      }
   190      assert(count >= 1);
   191      assert(count <= 4);
   192      *base = LITERAL | ((59+count) << 2);
   193    }
   194    memcpy(op, literal, len);
   195    return op + len;
   196  }
   197  
   198  static inline char* EmitCopyLessThan64(char* op, size_t offset, int len) {
   199    assert(len <= 64);
   200    assert(len >= 4);
   201    assert(offset < 65536);
   202  
   203    if ((len < 12) && (offset < 2048)) {
   204      size_t len_minus_4 = len - 4;
   205      assert(len_minus_4 < 8);            // Must fit in 3 bits
   206      *op++ = COPY_1_BYTE_OFFSET + ((len_minus_4) << 2) + ((offset >> 8) << 5);
   207      *op++ = offset & 0xff;
   208    } else {
   209      *op++ = COPY_2_BYTE_OFFSET + ((len-1) << 2);
   210      LittleEndian::Store16(op, offset);
   211      op += 2;
   212    }
   213    return op;
   214  }
   215  
   216  static inline char* EmitCopy(char* op, size_t offset, int len) {
   217    // Emit 64 byte copies but make sure to keep at least four bytes reserved
   218    while (len >= 68) {
   219      op = EmitCopyLessThan64(op, offset, 64);
   220      len -= 64;
   221    }
   222  
   223    // Emit an extra 60 byte copy if have too much data to fit in one copy
   224    if (len > 64) {
   225      op = EmitCopyLessThan64(op, offset, 60);
   226      len -= 60;
   227    }
   228  
   229    // Emit remainder
   230    op = EmitCopyLessThan64(op, offset, len);
   231    return op;
   232  }
   233  
   234  
   235  bool GetUncompressedLength(const char* start, size_t n, size_t* result) {
   236    uint32 v = 0;
   237    const char* limit = start + n;
   238    if (Varint::Parse32WithLimit(start, limit, &v) != NULL) {
   239      *result = v;
   240      return true;
   241    } else {
   242      return false;
   243    }
   244  }
   245  
   246  namespace internal {
   247  uint16* WorkingMemory::GetHashTable(size_t input_size, int* table_size) {
   248    // Use smaller hash table when input.size() is smaller, since we
   249    // fill the table, incurring O(hash table size) overhead for
   250    // compression, and if the input is short, we won't need that
   251    // many hash table entries anyway.
   252    assert(kMaxHashTableSize >= 256);
   253    size_t htsize = 256;
   254    while (htsize < kMaxHashTableSize && htsize < input_size) {
   255      htsize <<= 1;
   256    }
   257  
   258    uint16* table;
   259    if (htsize <= ARRAYSIZE(small_table_)) {
   260      table = small_table_;
   261    } else {
   262      if (large_table_ == NULL) {
   263        large_table_ = new uint16[kMaxHashTableSize];
   264      }
   265      table = large_table_;
   266    }
   267  
   268    *table_size = htsize;
   269    memset(table, 0, htsize * sizeof(*table));
   270    return table;
   271  }
   272  }  // end namespace internal
   273  
   274  // For 0 <= offset <= 4, GetUint32AtOffset(GetEightBytesAt(p), offset) will
   275  // equal UNALIGNED_LOAD32(p + offset).  Motivation: On x86-64 hardware we have
   276  // empirically found that overlapping loads such as
   277  //  UNALIGNED_LOAD32(p) ... UNALIGNED_LOAD32(p+1) ... UNALIGNED_LOAD32(p+2)
   278  // are slower than UNALIGNED_LOAD64(p) followed by shifts and casts to uint32.
   279  //
   280  // We have different versions for 64- and 32-bit; ideally we would avoid the
   281  // two functions and just inline the UNALIGNED_LOAD64 call into
   282  // GetUint32AtOffset, but GCC (at least not as of 4.6) is seemingly not clever
   283  // enough to avoid loading the value multiple times then. For 64-bit, the load
   284  // is done when GetEightBytesAt() is called, whereas for 32-bit, the load is
   285  // done at GetUint32AtOffset() time.
   286  
   287  #ifdef ARCH_K8
   288  
   289  typedef uint64 EightBytesReference;
   290  
   291  static inline EightBytesReference GetEightBytesAt(const char* ptr) {
   292    return UNALIGNED_LOAD64(ptr);
   293  }
   294  
   295  static inline uint32 GetUint32AtOffset(uint64 v, int offset) {
   296    assert(offset >= 0);
   297    assert(offset <= 4);
   298    return v >> (LittleEndian::IsLittleEndian() ? 8 * offset : 32 - 8 * offset);
   299  }
   300  
   301  #else
   302  
   303  typedef const char* EightBytesReference;
   304  
   305  static inline EightBytesReference GetEightBytesAt(const char* ptr) {
   306    return ptr;
   307  }
   308  
   309  static inline uint32 GetUint32AtOffset(const char* v, int offset) {
   310    assert(offset >= 0);
   311    assert(offset <= 4);
   312    return UNALIGNED_LOAD32(v + offset);
   313  }
   314  
   315  #endif
   316  
   317  // Flat array compression that does not emit the "uncompressed length"
   318  // prefix. Compresses "input" string to the "*op" buffer.
   319  //
   320  // REQUIRES: "input" is at most "kBlockSize" bytes long.
   321  // REQUIRES: "op" points to an array of memory that is at least
   322  // "MaxCompressedLength(input.size())" in size.
   323  // REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
   324  // REQUIRES: "table_size" is a power of two
   325  //
   326  // Returns an "end" pointer into "op" buffer.
   327  // "end - op" is the compressed size of "input".
   328  namespace internal {
   329  char* CompressFragment(const char* input,
   330                         size_t input_size,
   331                         char* op,
   332                         uint16* table,
   333                         const int table_size) {
   334    // "ip" is the input pointer, and "op" is the output pointer.
   335    const char* ip = input;
   336    assert(input_size <= kBlockSize);
   337    assert((table_size & (table_size - 1)) == 0); // table must be power of two
   338    const int shift = 32 - Bits::Log2Floor(table_size);
   339    assert(static_cast<int>(kuint32max >> shift) == table_size - 1);
   340    const char* ip_end = input + input_size;
   341    const char* base_ip = ip;
   342    // Bytes in [next_emit, ip) will be emitted as literal bytes.  Or
   343    // [next_emit, ip_end) after the main loop.
   344    const char* next_emit = ip;
   345  
   346    const size_t kInputMarginBytes = 15;
   347    if (PREDICT_TRUE(input_size >= kInputMarginBytes)) {
   348      const char* ip_limit = input + input_size - kInputMarginBytes;
   349  
   350      for (uint32 next_hash = Hash(++ip, shift); ; ) {
   351        assert(next_emit < ip);
   352        // The body of this loop calls EmitLiteral once and then EmitCopy one or
   353        // more times.  (The exception is that when we're close to exhausting
   354        // the input we goto emit_remainder.)
   355        //
   356        // In the first iteration of this loop we're just starting, so
   357        // there's nothing to copy, so calling EmitLiteral once is
   358        // necessary.  And we only start a new iteration when the
   359        // current iteration has determined that a call to EmitLiteral will
   360        // precede the next call to EmitCopy (if any).
   361        //
   362        // Step 1: Scan forward in the input looking for a 4-byte-long match.
   363        // If we get close to exhausting the input then goto emit_remainder.
   364        //
   365        // Heuristic match skipping: If 32 bytes are scanned with no matches
   366        // found, start looking only at every other byte. If 32 more bytes are
   367        // scanned, look at every third byte, etc.. When a match is found,
   368        // immediately go back to looking at every byte. This is a small loss
   369        // (~5% performance, ~0.1% density) for compressible data due to more
   370        // bookkeeping, but for non-compressible data (such as JPEG) it's a huge
   371        // win since the compressor quickly "realizes" the data is incompressible
   372        // and doesn't bother looking for matches everywhere.
   373        //
   374        // The "skip" variable keeps track of how many bytes there are since the
   375        // last match; dividing it by 32 (ie. right-shifting by five) gives the
   376        // number of bytes to move ahead for each iteration.
   377        uint32 skip = 32;
   378  
   379        const char* next_ip = ip;
   380        const char* candidate;
   381        do {
   382          ip = next_ip;
   383          uint32 hash = next_hash;
   384          assert(hash == Hash(ip, shift));
   385          uint32 bytes_between_hash_lookups = skip++ >> 5;
   386          next_ip = ip + bytes_between_hash_lookups;
   387          if (PREDICT_FALSE(next_ip > ip_limit)) {
   388            goto emit_remainder;
   389          }
   390          next_hash = Hash(next_ip, shift);
   391          candidate = base_ip + table[hash];
   392          assert(candidate >= base_ip);
   393          assert(candidate < ip);
   394  
   395          table[hash] = ip - base_ip;
   396        } while (PREDICT_TRUE(UNALIGNED_LOAD32(ip) !=
   397                              UNALIGNED_LOAD32(candidate)));
   398  
   399        // Step 2: A 4-byte match has been found.  We'll later see if more
   400        // than 4 bytes match.  But, prior to the match, input
   401        // bytes [next_emit, ip) are unmatched.  Emit them as "literal bytes."
   402        assert(next_emit + 16 <= ip_end);
   403        op = EmitLiteral(op, next_emit, ip - next_emit, true);
   404  
   405        // Step 3: Call EmitCopy, and then see if another EmitCopy could
   406        // be our next move.  Repeat until we find no match for the
   407        // input immediately after what was consumed by the last EmitCopy call.
   408        //
   409        // If we exit this loop normally then we need to call EmitLiteral next,
   410        // though we don't yet know how big the literal will be.  We handle that
   411        // by proceeding to the next iteration of the main loop.  We also can exit
   412        // this loop via goto if we get close to exhausting the input.
   413        EightBytesReference input_bytes;
   414        uint32 candidate_bytes = 0;
   415  
   416        do {
   417          // We have a 4-byte match at ip, and no need to emit any
   418          // "literal bytes" prior to ip.
   419          const char* base = ip;
   420          int matched = 4 + FindMatchLength(candidate + 4, ip + 4, ip_end);
   421          ip += matched;
   422          size_t offset = base - candidate;
   423          assert(0 == memcmp(base, candidate, matched));
   424          op = EmitCopy(op, offset, matched);
   425          // We could immediately start working at ip now, but to improve
   426          // compression we first update table[Hash(ip - 1, ...)].
   427          const char* insert_tail = ip - 1;
   428          next_emit = ip;
   429          if (PREDICT_FALSE(ip >= ip_limit)) {
   430            goto emit_remainder;
   431          }
   432          input_bytes = GetEightBytesAt(insert_tail);
   433          uint32 prev_hash = HashBytes(GetUint32AtOffset(input_bytes, 0), shift);
   434          table[prev_hash] = ip - base_ip - 1;
   435          uint32 cur_hash = HashBytes(GetUint32AtOffset(input_bytes, 1), shift);
   436          candidate = base_ip + table[cur_hash];
   437          candidate_bytes = UNALIGNED_LOAD32(candidate);
   438          table[cur_hash] = ip - base_ip;
   439        } while (GetUint32AtOffset(input_bytes, 1) == candidate_bytes);
   440  
   441        next_hash = HashBytes(GetUint32AtOffset(input_bytes, 2), shift);
   442        ++ip;
   443      }
   444    }
   445  
   446   emit_remainder:
   447    // Emit the remaining bytes as a literal
   448    if (next_emit < ip_end) {
   449      op = EmitLiteral(op, next_emit, ip_end - next_emit, false);
   450    }
   451  
   452    return op;
   453  }
   454  }  // end namespace internal
   455  
   456  // Signature of output types needed by decompression code.
   457  // The decompression code is templatized on a type that obeys this
   458  // signature so that we do not pay virtual function call overhead in
   459  // the middle of a tight decompression loop.
   460  //
   461  // class DecompressionWriter {
   462  //  public:
   463  //   // Called before decompression
   464  //   void SetExpectedLength(size_t length);
   465  //
   466  //   // Called after decompression
   467  //   bool CheckLength() const;
   468  //
   469  //   // Called repeatedly during decompression
   470  //   bool Append(const char* ip, size_t length);
   471  //   bool AppendFromSelf(uint32 offset, size_t length);
   472  //
   473  //   // The difference between TryFastAppend and Append is that TryFastAppend
   474  //   // is allowed to read up to <available> bytes from the input buffer,
   475  //   // whereas Append is allowed to read <length>.
   476  //   //
   477  //   // Also, TryFastAppend is allowed to return false, declining the append,
   478  //   // without it being a fatal error -- just "return false" would be
   479  //   // a perfectly legal implementation of TryFastAppend. The intention
   480  //   // is for TryFastAppend to allow a fast path in the common case of
   481  //   // a small append.
   482  //   //
   483  //   // NOTE(user): TryFastAppend must always return decline (return false)
   484  //   // if <length> is 61 or more, as in this case the literal length is not
   485  //   // decoded fully. In practice, this should not be a big problem,
   486  //   // as it is unlikely that one would implement a fast path accepting
   487  //   // this much data.
   488  //   bool TryFastAppend(const char* ip, size_t available, size_t length);
   489  // };
   490  
   491  // -----------------------------------------------------------------------
   492  // Lookup table for decompression code.  Generated by ComputeTable() below.
   493  // -----------------------------------------------------------------------
   494  
   495  // Mapping from i in range [0,4] to a mask to extract the bottom 8*i bits
   496  static const uint32 wordmask[] = {
   497    0u, 0xffu, 0xffffu, 0xffffffu, 0xffffffffu
   498  };
   499  
   500  // Data stored per entry in lookup table:
   501  //      Range   Bits-used       Description
   502  //      ------------------------------------
   503  //      1..64   0..7            Literal/copy length encoded in opcode byte
   504  //      0..7    8..10           Copy offset encoded in opcode byte / 256
   505  //      0..4    11..13          Extra bytes after opcode
   506  //
   507  // We use eight bits for the length even though 7 would have sufficed
   508  // because of efficiency reasons:
   509  //      (1) Extracting a byte is faster than a bit-field
   510  //      (2) It properly aligns copy offset so we do not need a <<8
   511  static const uint16 char_table[256] = {
   512    0x0001, 0x0804, 0x1001, 0x2001, 0x0002, 0x0805, 0x1002, 0x2002,
   513    0x0003, 0x0806, 0x1003, 0x2003, 0x0004, 0x0807, 0x1004, 0x2004,
   514    0x0005, 0x0808, 0x1005, 0x2005, 0x0006, 0x0809, 0x1006, 0x2006,
   515    0x0007, 0x080a, 0x1007, 0x2007, 0x0008, 0x080b, 0x1008, 0x2008,
   516    0x0009, 0x0904, 0x1009, 0x2009, 0x000a, 0x0905, 0x100a, 0x200a,
   517    0x000b, 0x0906, 0x100b, 0x200b, 0x000c, 0x0907, 0x100c, 0x200c,
   518    0x000d, 0x0908, 0x100d, 0x200d, 0x000e, 0x0909, 0x100e, 0x200e,
   519    0x000f, 0x090a, 0x100f, 0x200f, 0x0010, 0x090b, 0x1010, 0x2010,
   520    0x0011, 0x0a04, 0x1011, 0x2011, 0x0012, 0x0a05, 0x1012, 0x2012,
   521    0x0013, 0x0a06, 0x1013, 0x2013, 0x0014, 0x0a07, 0x1014, 0x2014,
   522    0x0015, 0x0a08, 0x1015, 0x2015, 0x0016, 0x0a09, 0x1016, 0x2016,
   523    0x0017, 0x0a0a, 0x1017, 0x2017, 0x0018, 0x0a0b, 0x1018, 0x2018,
   524    0x0019, 0x0b04, 0x1019, 0x2019, 0x001a, 0x0b05, 0x101a, 0x201a,
   525    0x001b, 0x0b06, 0x101b, 0x201b, 0x001c, 0x0b07, 0x101c, 0x201c,
   526    0x001d, 0x0b08, 0x101d, 0x201d, 0x001e, 0x0b09, 0x101e, 0x201e,
   527    0x001f, 0x0b0a, 0x101f, 0x201f, 0x0020, 0x0b0b, 0x1020, 0x2020,
   528    0x0021, 0x0c04, 0x1021, 0x2021, 0x0022, 0x0c05, 0x1022, 0x2022,
   529    0x0023, 0x0c06, 0x1023, 0x2023, 0x0024, 0x0c07, 0x1024, 0x2024,
   530    0x0025, 0x0c08, 0x1025, 0x2025, 0x0026, 0x0c09, 0x1026, 0x2026,
   531    0x0027, 0x0c0a, 0x1027, 0x2027, 0x0028, 0x0c0b, 0x1028, 0x2028,
   532    0x0029, 0x0d04, 0x1029, 0x2029, 0x002a, 0x0d05, 0x102a, 0x202a,
   533    0x002b, 0x0d06, 0x102b, 0x202b, 0x002c, 0x0d07, 0x102c, 0x202c,
   534    0x002d, 0x0d08, 0x102d, 0x202d, 0x002e, 0x0d09, 0x102e, 0x202e,
   535    0x002f, 0x0d0a, 0x102f, 0x202f, 0x0030, 0x0d0b, 0x1030, 0x2030,
   536    0x0031, 0x0e04, 0x1031, 0x2031, 0x0032, 0x0e05, 0x1032, 0x2032,
   537    0x0033, 0x0e06, 0x1033, 0x2033, 0x0034, 0x0e07, 0x1034, 0x2034,
   538    0x0035, 0x0e08, 0x1035, 0x2035, 0x0036, 0x0e09, 0x1036, 0x2036,
   539    0x0037, 0x0e0a, 0x1037, 0x2037, 0x0038, 0x0e0b, 0x1038, 0x2038,
   540    0x0039, 0x0f04, 0x1039, 0x2039, 0x003a, 0x0f05, 0x103a, 0x203a,
   541    0x003b, 0x0f06, 0x103b, 0x203b, 0x003c, 0x0f07, 0x103c, 0x203c,
   542    0x0801, 0x0f08, 0x103d, 0x203d, 0x1001, 0x0f09, 0x103e, 0x203e,
   543    0x1801, 0x0f0a, 0x103f, 0x203f, 0x2001, 0x0f0b, 0x1040, 0x2040
   544  };
   545  
   546  // In debug mode, allow optional computation of the table at startup.
   547  // Also, check that the decompression table is correct.
   548  #ifndef NDEBUG
   549  DEFINE_bool(snappy_dump_decompression_table, false,
   550              "If true, we print the decompression table at startup.");
   551  
   552  static uint16 MakeEntry(unsigned int extra,
   553                          unsigned int len,
   554                          unsigned int copy_offset) {
   555    // Check that all of the fields fit within the allocated space
   556    assert(extra       == (extra & 0x7));          // At most 3 bits
   557    assert(copy_offset == (copy_offset & 0x7));    // At most 3 bits
   558    assert(len         == (len & 0x7f));           // At most 7 bits
   559    return len | (copy_offset << 8) | (extra << 11);
   560  }
   561  
   562  static void ComputeTable() {
   563    uint16 dst[256];
   564  
   565    // Place invalid entries in all places to detect missing initialization
   566    int assigned = 0;
   567    for (int i = 0; i < 256; i++) {
   568      dst[i] = 0xffff;
   569    }
   570  
   571    // Small LITERAL entries.  We store (len-1) in the top 6 bits.
   572    for (unsigned int len = 1; len <= 60; len++) {
   573      dst[LITERAL | ((len-1) << 2)] = MakeEntry(0, len, 0);
   574      assigned++;
   575    }
   576  
   577    // Large LITERAL entries.  We use 60..63 in the high 6 bits to
   578    // encode the number of bytes of length info that follow the opcode.
   579    for (unsigned int extra_bytes = 1; extra_bytes <= 4; extra_bytes++) {
   580      // We set the length field in the lookup table to 1 because extra
   581      // bytes encode len-1.
   582      dst[LITERAL | ((extra_bytes+59) << 2)] = MakeEntry(extra_bytes, 1, 0);
   583      assigned++;
   584    }
   585  
   586    // COPY_1_BYTE_OFFSET.
   587    //
   588    // The tag byte in the compressed data stores len-4 in 3 bits, and
   589    // offset/256 in 5 bits.  offset%256 is stored in the next byte.
   590    //
   591    // This format is used for length in range [4..11] and offset in
   592    // range [0..2047]
   593    for (unsigned int len = 4; len < 12; len++) {
   594      for (unsigned int offset = 0; offset < 2048; offset += 256) {
   595        dst[COPY_1_BYTE_OFFSET | ((len-4)<<2) | ((offset>>8)<<5)] =
   596          MakeEntry(1, len, offset>>8);
   597        assigned++;
   598      }
   599    }
   600  
   601    // COPY_2_BYTE_OFFSET.
   602    // Tag contains len-1 in top 6 bits, and offset in next two bytes.
   603    for (unsigned int len = 1; len <= 64; len++) {
   604      dst[COPY_2_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(2, len, 0);
   605      assigned++;
   606    }
   607  
   608    // COPY_4_BYTE_OFFSET.
   609    // Tag contents len-1 in top 6 bits, and offset in next four bytes.
   610    for (unsigned int len = 1; len <= 64; len++) {
   611      dst[COPY_4_BYTE_OFFSET | ((len-1)<<2)] = MakeEntry(4, len, 0);
   612      assigned++;
   613    }
   614  
   615    // Check that each entry was initialized exactly once.
   616    if (assigned != 256) {
   617      fprintf(stderr, "ComputeTable: assigned only %d of 256\n", assigned);
   618      abort();
   619    }
   620    for (int i = 0; i < 256; i++) {
   621      if (dst[i] == 0xffff) {
   622        fprintf(stderr, "ComputeTable: did not assign byte %d\n", i);
   623        abort();
   624      }
   625    }
   626  
   627    if (FLAGS_snappy_dump_decompression_table) {
   628      printf("static const uint16 char_table[256] = {\n  ");
   629      for (int i = 0; i < 256; i++) {
   630        printf("0x%04x%s",
   631               dst[i],
   632               ((i == 255) ? "\n" : (((i%8) == 7) ? ",\n  " : ", ")));
   633      }
   634      printf("};\n");
   635    }
   636  
   637    // Check that computed table matched recorded table
   638    for (int i = 0; i < 256; i++) {
   639      if (dst[i] != char_table[i]) {
   640        fprintf(stderr, "ComputeTable: byte %d: computed (%x), expect (%x)\n",
   641                i, static_cast<int>(dst[i]), static_cast<int>(char_table[i]));
   642        abort();
   643      }
   644    }
   645  }
   646  #endif /* !NDEBUG */
   647  
   648  // Helper class for decompression
   649  class SnappyDecompressor {
   650   private:
   651    Source*       reader_;         // Underlying source of bytes to decompress
   652    const char*   ip_;             // Points to next buffered byte
   653    const char*   ip_limit_;       // Points just past buffered bytes
   654    uint32        peeked_;         // Bytes peeked from reader (need to skip)
   655    bool          eof_;            // Hit end of input without an error?
   656    char          scratch_[5];     // Temporary buffer for PeekFast() boundaries
   657  
   658    // Ensure that all of the tag metadata for the next tag is available
   659    // in [ip_..ip_limit_-1].  Also ensures that [ip,ip+4] is readable even
   660    // if (ip_limit_ - ip_ < 5).
   661    //
   662    // Returns true on success, false on error or end of input.
   663    bool RefillTag();
   664  
   665   public:
   666    explicit SnappyDecompressor(Source* reader)
   667        : reader_(reader),
   668          ip_(NULL),
   669          ip_limit_(NULL),
   670          peeked_(0),
   671          eof_(false) {
   672    }
   673  
   674    ~SnappyDecompressor() {
   675      // Advance past any bytes we peeked at from the reader
   676      reader_->Skip(peeked_);
   677    }
   678  
   679    // Returns true iff we have hit the end of the input without an error.
   680    bool eof() const {
   681      return eof_;
   682    }
   683  
   684    // Read the uncompressed length stored at the start of the compressed data.
   685    // On succcess, stores the length in *result and returns true.
   686    // On failure, returns false.
   687    bool ReadUncompressedLength(uint32* result) {
   688      assert(ip_ == NULL);       // Must not have read anything yet
   689      // Length is encoded in 1..5 bytes
   690      *result = 0;
   691      uint32 shift = 0;
   692      while (true) {
   693        if (shift >= 32) return false;
   694        size_t n;
   695        const char* ip = reader_->Peek(&n);
   696        if (n == 0) return false;
   697        const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
   698        reader_->Skip(1);
   699        *result |= static_cast<uint32>(c & 0x7f) << shift;
   700        if (c < 128) {
   701          break;
   702        }
   703        shift += 7;
   704      }
   705      return true;
   706    }
   707  
   708    // Process the next item found in the input.
   709    // Returns true if successful, false on error or end of input.
   710    template <class Writer>
   711    void DecompressAllTags(Writer* writer) {
   712      const char* ip = ip_;
   713  
   714      // We could have put this refill fragment only at the beginning of the loop.
   715      // However, duplicating it at the end of each branch gives the compiler more
   716      // scope to optimize the <ip_limit_ - ip> expression based on the local
   717      // context, which overall increases speed.
   718      #define MAYBE_REFILL() \
   719          if (ip_limit_ - ip < 5) { \
   720            ip_ = ip; \
   721            if (!RefillTag()) return; \
   722            ip = ip_; \
   723          }
   724  
   725      MAYBE_REFILL();
   726      for ( ;; ) {
   727        const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip++));
   728  
   729        if ((c & 0x3) == LITERAL) {
   730          size_t literal_length = (c >> 2) + 1u;
   731          if (writer->TryFastAppend(ip, ip_limit_ - ip, literal_length)) {
   732            assert(literal_length < 61);
   733            ip += literal_length;
   734            MAYBE_REFILL();
   735            continue;
   736          }
   737          if (PREDICT_FALSE(literal_length >= 61)) {
   738            // Long literal.
   739            const size_t literal_length_length = literal_length - 60;
   740            literal_length =
   741                (LittleEndian::Load32(ip) & wordmask[literal_length_length]) + 1;
   742            ip += literal_length_length;
   743          }
   744  
   745          size_t avail = ip_limit_ - ip;
   746          while (avail < literal_length) {
   747            if (!writer->Append(ip, avail)) return;
   748            literal_length -= avail;
   749            reader_->Skip(peeked_);
   750            size_t n;
   751            ip = reader_->Peek(&n);
   752            avail = n;
   753            peeked_ = avail;
   754            if (avail == 0) return;  // Premature end of input
   755            ip_limit_ = ip + avail;
   756          }
   757          if (!writer->Append(ip, literal_length)) {
   758            return;
   759          }
   760          ip += literal_length;
   761          MAYBE_REFILL();
   762        } else {
   763          const uint32 entry = char_table[c];
   764          const uint32 trailer = LittleEndian::Load32(ip) & wordmask[entry >> 11];
   765          const uint32 length = entry & 0xff;
   766          ip += entry >> 11;
   767  
   768          // copy_offset/256 is encoded in bits 8..10.  By just fetching
   769          // those bits, we get copy_offset (since the bit-field starts at
   770          // bit 8).
   771          const uint32 copy_offset = entry & 0x700;
   772          if (!writer->AppendFromSelf(copy_offset + trailer, length)) {
   773            return;
   774          }
   775          MAYBE_REFILL();
   776        }
   777      }
   778  
   779  #undef MAYBE_REFILL
   780    }
   781  };
   782  
   783  bool SnappyDecompressor::RefillTag() {
   784    const char* ip = ip_;
   785    if (ip == ip_limit_) {
   786      // Fetch a new fragment from the reader
   787      reader_->Skip(peeked_);   // All peeked bytes are used up
   788      size_t n;
   789      ip = reader_->Peek(&n);
   790      peeked_ = n;
   791      if (n == 0) {
   792        eof_ = true;
   793        return false;
   794      }
   795      ip_limit_ = ip + n;
   796    }
   797  
   798    // Read the tag character
   799    assert(ip < ip_limit_);
   800    const unsigned char c = *(reinterpret_cast<const unsigned char*>(ip));
   801    const uint32 entry = char_table[c];
   802    const uint32 needed = (entry >> 11) + 1;  // +1 byte for 'c'
   803    assert(needed <= sizeof(scratch_));
   804  
   805    // Read more bytes from reader if needed
   806    uint32 nbuf = ip_limit_ - ip;
   807    if (nbuf < needed) {
   808      // Stitch together bytes from ip and reader to form the word
   809      // contents.  We store the needed bytes in "scratch_".  They
   810      // will be consumed immediately by the caller since we do not
   811      // read more than we need.
   812      memmove(scratch_, ip, nbuf);
   813      reader_->Skip(peeked_);  // All peeked bytes are used up
   814      peeked_ = 0;
   815      while (nbuf < needed) {
   816        size_t length;
   817        const char* src = reader_->Peek(&length);
   818        if (length == 0) return false;
   819        uint32 to_add = min<uint32>(needed - nbuf, length);
   820        memcpy(scratch_ + nbuf, src, to_add);
   821        nbuf += to_add;
   822        reader_->Skip(to_add);
   823      }
   824      assert(nbuf == needed);
   825      ip_ = scratch_;
   826      ip_limit_ = scratch_ + needed;
   827    } else if (nbuf < 5) {
   828      // Have enough bytes, but move into scratch_ so that we do not
   829      // read past end of input
   830      memmove(scratch_, ip, nbuf);
   831      reader_->Skip(peeked_);  // All peeked bytes are used up
   832      peeked_ = 0;
   833      ip_ = scratch_;
   834      ip_limit_ = scratch_ + nbuf;
   835    } else {
   836      // Pass pointer to buffer returned by reader_.
   837      ip_ = ip;
   838    }
   839    return true;
   840  }
   841  
   842  template <typename Writer>
   843  static bool InternalUncompress(Source* r,
   844                                 Writer* writer,
   845                                 uint32 max_len) {
   846    // Read the uncompressed length from the front of the compressed input
   847    SnappyDecompressor decompressor(r);
   848    uint32 uncompressed_len = 0;
   849    if (!decompressor.ReadUncompressedLength(&uncompressed_len)) return false;
   850    return InternalUncompressAllTags(
   851        &decompressor, writer, uncompressed_len, max_len);
   852  }
   853  
   854  template <typename Writer>
   855  static bool InternalUncompressAllTags(SnappyDecompressor* decompressor,
   856                                        Writer* writer,
   857                                        uint32 uncompressed_len,
   858                                        uint32 max_len) {
   859    // Protect against possible DoS attack
   860    if (static_cast<uint64>(uncompressed_len) > max_len) {
   861      return false;
   862    }
   863  
   864    writer->SetExpectedLength(uncompressed_len);
   865  
   866    // Process the entire input
   867    decompressor->DecompressAllTags(writer);
   868    return (decompressor->eof() && writer->CheckLength());
   869  }
   870  
   871  bool GetUncompressedLength(Source* source, uint32* result) {
   872    SnappyDecompressor decompressor(source);
   873    return decompressor.ReadUncompressedLength(result);
   874  }
   875  
   876  size_t Compress(Source* reader, Sink* writer) {
   877    size_t written = 0;
   878    size_t N = reader->Available();
   879    char ulength[Varint::kMax32];
   880    char* p = Varint::Encode32(ulength, N);
   881    writer->Append(ulength, p-ulength);
   882    written += (p - ulength);
   883  
   884    internal::WorkingMemory wmem;
   885    char* scratch = NULL;
   886    char* scratch_output = NULL;
   887  
   888    while (N > 0) {
   889      // Get next block to compress (without copying if possible)
   890      size_t fragment_size;
   891      const char* fragment = reader->Peek(&fragment_size);
   892      assert(fragment_size != 0);  // premature end of input
   893      const size_t num_to_read = min(N, kBlockSize);
   894      size_t bytes_read = fragment_size;
   895  
   896      size_t pending_advance = 0;
   897      if (bytes_read >= num_to_read) {
   898        // Buffer returned by reader is large enough
   899        pending_advance = num_to_read;
   900        fragment_size = num_to_read;
   901      } else {
   902        // Read into scratch buffer
   903        if (scratch == NULL) {
   904          // If this is the last iteration, we want to allocate N bytes
   905          // of space, otherwise the max possible kBlockSize space.
   906          // num_to_read contains exactly the correct value
   907          scratch = new char[num_to_read];
   908        }
   909        memcpy(scratch, fragment, bytes_read);
   910        reader->Skip(bytes_read);
   911  
   912        while (bytes_read < num_to_read) {
   913          fragment = reader->Peek(&fragment_size);
   914          size_t n = min<size_t>(fragment_size, num_to_read - bytes_read);
   915          memcpy(scratch + bytes_read, fragment, n);
   916          bytes_read += n;
   917          reader->Skip(n);
   918        }
   919        assert(bytes_read == num_to_read);
   920        fragment = scratch;
   921        fragment_size = num_to_read;
   922      }
   923      assert(fragment_size == num_to_read);
   924  
   925      // Get encoding table for compression
   926      int table_size;
   927      uint16* table = wmem.GetHashTable(num_to_read, &table_size);
   928  
   929      // Compress input_fragment and append to dest
   930      const int max_output = MaxCompressedLength(num_to_read);
   931  
   932      // Need a scratch buffer for the output, in case the byte sink doesn't
   933      // have room for us directly.
   934      if (scratch_output == NULL) {
   935        scratch_output = new char[max_output];
   936      } else {
   937        // Since we encode kBlockSize regions followed by a region
   938        // which is <= kBlockSize in length, a previously allocated
   939        // scratch_output[] region is big enough for this iteration.
   940      }
   941      char* dest = writer->GetAppendBuffer(max_output, scratch_output);
   942      char* end = internal::CompressFragment(fragment, fragment_size,
   943                                             dest, table, table_size);
   944      writer->Append(dest, end - dest);
   945      written += (end - dest);
   946  
   947      N -= num_to_read;
   948      reader->Skip(pending_advance);
   949    }
   950  
   951    delete[] scratch;
   952    delete[] scratch_output;
   953  
   954    return written;
   955  }
   956  
   957  // -----------------------------------------------------------------------
   958  // Flat array interfaces
   959  // -----------------------------------------------------------------------
   960  
   961  // A type that writes to a flat array.
   962  // Note that this is not a "ByteSink", but a type that matches the
   963  // Writer template argument to SnappyDecompressor::DecompressAllTags().
   964  class SnappyArrayWriter {
   965   private:
   966    char* base_;
   967    char* op_;
   968    char* op_limit_;
   969  
   970   public:
   971    inline explicit SnappyArrayWriter(char* dst)
   972        : base_(dst),
   973          op_(dst) {
   974    }
   975  
   976    inline void SetExpectedLength(size_t len) {
   977      op_limit_ = op_ + len;
   978    }
   979  
   980    inline bool CheckLength() const {
   981      return op_ == op_limit_;
   982    }
   983  
   984    inline bool Append(const char* ip, size_t len) {
   985      char* op = op_;
   986      const size_t space_left = op_limit_ - op;
   987      if (space_left < len) {
   988        return false;
   989      }
   990      memcpy(op, ip, len);
   991      op_ = op + len;
   992      return true;
   993    }
   994  
   995    inline bool TryFastAppend(const char* ip, size_t available, size_t len) {
   996      char* op = op_;
   997      const size_t space_left = op_limit_ - op;
   998      if (len <= 16 && available >= 16 && space_left >= 16) {
   999        // Fast path, used for the majority (about 95%) of invocations.
  1000        UnalignedCopy64(ip, op);
  1001        UnalignedCopy64(ip + 8, op + 8);
  1002        op_ = op + len;
  1003        return true;
  1004      } else {
  1005        return false;
  1006      }
  1007    }
  1008  
  1009    inline bool AppendFromSelf(size_t offset, size_t len) {
  1010      char* op = op_;
  1011      const size_t space_left = op_limit_ - op;
  1012  
  1013      if (op - base_ <= offset - 1u) {  // -1u catches offset==0
  1014        return false;
  1015      }
  1016      if (len <= 16 && offset >= 8 && space_left >= 16) {
  1017        // Fast path, used for the majority (70-80%) of dynamic invocations.
  1018        UnalignedCopy64(op - offset, op);
  1019        UnalignedCopy64(op - offset + 8, op + 8);
  1020      } else {
  1021        if (space_left >= len + kMaxIncrementCopyOverflow) {
  1022          IncrementalCopyFastPath(op - offset, op, len);
  1023        } else {
  1024          if (space_left < len) {
  1025            return false;
  1026          }
  1027          IncrementalCopy(op - offset, op, len);
  1028        }
  1029      }
  1030  
  1031      op_ = op + len;
  1032      return true;
  1033    }
  1034  };
  1035  
  1036  bool RawUncompress(const char* compressed, size_t n, char* uncompressed) {
  1037    ByteArraySource reader(compressed, n);
  1038    return RawUncompress(&reader, uncompressed);
  1039  }
  1040  
  1041  bool RawUncompress(Source* compressed, char* uncompressed) {
  1042    SnappyArrayWriter output(uncompressed);
  1043    return InternalUncompress(compressed, &output, kuint32max);
  1044  }
  1045  
  1046  bool Uncompress(const char* compressed, size_t n, string* uncompressed) {
  1047    size_t ulength;
  1048    if (!GetUncompressedLength(compressed, n, &ulength)) {
  1049      return false;
  1050    }
  1051    // Protect against possible DoS attack
  1052    if ((static_cast<uint64>(ulength) + uncompressed->size()) >
  1053        uncompressed->max_size()) {
  1054      return false;
  1055    }
  1056    STLStringResizeUninitialized(uncompressed, ulength);
  1057    return RawUncompress(compressed, n, string_as_array(uncompressed));
  1058  }
  1059  
  1060  
  1061  // A Writer that drops everything on the floor and just does validation
  1062  class SnappyDecompressionValidator {
  1063   private:
  1064    size_t expected_;
  1065    size_t produced_;
  1066  
  1067   public:
  1068    inline SnappyDecompressionValidator() : produced_(0) { }
  1069    inline void SetExpectedLength(size_t len) {
  1070      expected_ = len;
  1071    }
  1072    inline bool CheckLength() const {
  1073      return expected_ == produced_;
  1074    }
  1075    inline bool Append(const char* ip, size_t len) {
  1076      produced_ += len;
  1077      return produced_ <= expected_;
  1078    }
  1079    inline bool TryFastAppend(const char* ip, size_t available, size_t length) {
  1080      return false;
  1081    }
  1082    inline bool AppendFromSelf(size_t offset, size_t len) {
  1083      if (produced_ <= offset - 1u) return false;  // -1u catches offset==0
  1084      produced_ += len;
  1085      return produced_ <= expected_;
  1086    }
  1087  };
  1088  
  1089  bool IsValidCompressedBuffer(const char* compressed, size_t n) {
  1090    ByteArraySource reader(compressed, n);
  1091    SnappyDecompressionValidator writer;
  1092    return InternalUncompress(&reader, &writer, kuint32max);
  1093  }
  1094  
  1095  void RawCompress(const char* input,
  1096                   size_t input_length,
  1097                   char* compressed,
  1098                   size_t* compressed_length) {
  1099    ByteArraySource reader(input, input_length);
  1100    UncheckedByteArraySink writer(compressed);
  1101    Compress(&reader, &writer);
  1102  
  1103    // Compute how many bytes were added
  1104    *compressed_length = (writer.CurrentDestination() - compressed);
  1105  }
  1106  
  1107  size_t Compress(const char* input, size_t input_length, string* compressed) {
  1108    // Pre-grow the buffer to the max length of the compressed output
  1109    compressed->resize(MaxCompressedLength(input_length));
  1110  
  1111    size_t compressed_length;
  1112    RawCompress(input, input_length, string_as_array(compressed),
  1113                &compressed_length);
  1114    compressed->resize(compressed_length);
  1115    return compressed_length;
  1116  }
  1117  
  1118  
  1119  } // end namespace snappy
  1120  } // end namespace rawp
  1121