github.com/olivere/camlistore@v0.0.0-20140121221811-1b7ac2da0199/third_party/code.google.com/p/snappy-go/snappy/encode.go (about) 1 // Copyright 2011 The Snappy-Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package snappy 6 7 import ( 8 "encoding/binary" 9 ) 10 11 // We limit how far copy back-references can go, the same as the C++ code. 12 const maxOffset = 1 << 15 13 14 // equal4 returns whether b[i:i+4] equals b[j:j+4]. 15 func equal4(b []byte, i, j int) bool { 16 return b[i] == b[j] && 17 b[i+1] == b[j+1] && 18 b[i+2] == b[j+2] && 19 b[i+3] == b[j+3] 20 } 21 22 // emitLiteral writes a literal chunk and returns the number of bytes written. 23 func emitLiteral(dst, lit []byte) int { 24 i, n := 0, uint(len(lit)-1) 25 switch { 26 case n < 60: 27 dst[0] = uint8(n)<<2 | tagLiteral 28 i = 1 29 case n < 1<<8: 30 dst[0] = 60<<2 | tagLiteral 31 dst[1] = uint8(n) 32 i = 2 33 case n < 1<<16: 34 dst[0] = 61<<2 | tagLiteral 35 dst[1] = uint8(n) 36 dst[2] = uint8(n >> 8) 37 i = 3 38 case n < 1<<24: 39 dst[0] = 62<<2 | tagLiteral 40 dst[1] = uint8(n) 41 dst[2] = uint8(n >> 8) 42 dst[3] = uint8(n >> 16) 43 i = 4 44 case int64(n) < 1<<32: 45 dst[0] = 63<<2 | tagLiteral 46 dst[1] = uint8(n) 47 dst[2] = uint8(n >> 8) 48 dst[3] = uint8(n >> 16) 49 dst[4] = uint8(n >> 24) 50 i = 5 51 default: 52 panic("snappy: source buffer is too long") 53 } 54 if copy(dst[i:], lit) != len(lit) { 55 panic("snappy: destination buffer is too short") 56 } 57 return i + len(lit) 58 } 59 60 // emitCopy writes a copy chunk and returns the number of bytes written. 61 func emitCopy(dst []byte, offset, length int) int { 62 i := 0 63 for length > 0 { 64 x := length - 4 65 if 0 <= x && x < 1<<3 && offset < 1<<11 { 66 dst[i+0] = uint8(offset>>8)&0x07<<5 | uint8(x)<<2 | tagCopy1 67 dst[i+1] = uint8(offset) 68 i += 2 69 break 70 } 71 72 x = length 73 if x > 1<<6 { 74 x = 1 << 6 75 } 76 dst[i+0] = uint8(x-1)<<2 | tagCopy2 77 dst[i+1] = uint8(offset) 78 dst[i+2] = uint8(offset >> 8) 79 i += 3 80 length -= x 81 } 82 return i 83 } 84 85 // Encode returns the encoded form of src. The returned slice may be a sub- 86 // slice of dst if dst was large enough to hold the entire encoded block. 87 // Otherwise, a newly allocated slice will be returned. 88 // It is valid to pass a nil dst. 89 func Encode(dst, src []byte) ([]byte, error) { 90 if n := MaxEncodedLen(len(src)); len(dst) < n { 91 dst = make([]byte, n) 92 } 93 94 // The block starts with the varint-encoded length of the decompressed bytes. 95 d := binary.PutUvarint(dst, uint64(len(src))) 96 97 // Return early if src is short. 98 if len(src) <= 4 { 99 d += emitLiteral(dst[d:], src) 100 return dst[:d], nil 101 } 102 103 // Initialize the hash table. Its size ranges from 1<<8 to 1<<14 inclusive. 104 const maxTableSize = 1 << 14 105 shift, tableSize := uint(32-8), 1<<8 106 for tableSize < maxTableSize && tableSize < len(src) { 107 shift-- 108 tableSize *= 2 109 } 110 var table [maxTableSize]int 111 for i := 0; i < tableSize; i++ { 112 table[i] = -1 113 } 114 115 // Iterate over the source bytes. 116 var ( 117 s int // The iterator position. 118 t int // The last position with the same hash as s. 119 lit int // The start position of any pending literal bytes. 120 ) 121 for s+3 < len(src) { 122 // Update the hash table. 123 h := uint32(src[s]) | uint32(src[s+1])<<8 | uint32(src[s+2])<<16 | uint32(src[s+3])<<24 124 h = (h * 0x1e35a7bd) >> shift 125 t, table[h] = table[h], s 126 // If t is invalid or src[s:s+4] differs from src[t:t+4], accumulate a literal byte. 127 if t < 0 || s-t >= maxOffset || !equal4(src, t, s) { 128 s++ 129 continue 130 } 131 // Otherwise, we have a match. First, emit any pending literal bytes. 132 if lit != s { 133 d += emitLiteral(dst[d:], src[lit:s]) 134 } 135 // Extend the match to be as long as possible. 136 s0 := s 137 s, t = s+4, t+4 138 for s < len(src) && src[s] == src[t] { 139 s++ 140 t++ 141 } 142 // Emit the copied bytes. 143 d += emitCopy(dst[d:], s-t, s-s0) 144 lit = s 145 } 146 147 // Emit any final pending literal bytes and return. 148 if lit != len(src) { 149 d += emitLiteral(dst[d:], src[lit:]) 150 } 151 return dst[:d], nil 152 } 153 154 // MaxEncodedLen returns the maximum length of a snappy block, given its 155 // uncompressed length. 156 func MaxEncodedLen(srcLen int) int { 157 // Compressed data can be defined as: 158 // compressed := item* literal* 159 // item := literal* copy 160 // 161 // The trailing literal sequence has a space blowup of at most 62/60 162 // since a literal of length 60 needs one tag byte + one extra byte 163 // for length information. 164 // 165 // Item blowup is trickier to measure. Suppose the "copy" op copies 166 // 4 bytes of data. Because of a special check in the encoding code, 167 // we produce a 4-byte copy only if the offset is < 65536. Therefore 168 // the copy op takes 3 bytes to encode, and this type of item leads 169 // to at most the 62/60 blowup for representing literals. 170 // 171 // Suppose the "copy" op copies 5 bytes of data. If the offset is big 172 // enough, it will take 5 bytes to encode the copy op. Therefore the 173 // worst case here is a one-byte literal followed by a five-byte copy. 174 // That is, 6 bytes of input turn into 7 bytes of "compressed" data. 175 // 176 // This last factor dominates the blowup, so the final estimate is: 177 return 32 + srcLen + srcLen/6 178 }