github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/compress/flate/deflatefast.go (about) 1 // Copyright 2016 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package flate 6 7 // This encoding algorithm, which prioritizes speed over output size, is 8 // based on Snappy's LZ77-style encoder: github.com/golang/snappy 9 10 const ( 11 tableBits = 14 // Bits used in the table. 12 tableSize = 1 << tableBits // Size of the table. 13 tableMask = tableSize - 1 // Mask for table indices. Redundant, but can eliminate bounds checks. 14 tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32. 15 ) 16 17 func load32(b []byte, i int) uint32 { 18 b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line. 19 return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24 20 } 21 22 func load64(b []byte, i int) uint64 { 23 b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line. 24 return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 | 25 uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56 26 } 27 28 func hash(u uint32) uint32 { 29 return (u * 0x1e35a7bd) >> tableShift 30 } 31 32 // These constants are defined by the Snappy implementation so that its 33 // assembly implementation can fast-path some 16-bytes-at-a-time copies. They 34 // aren't necessary in the pure Go implementation, as we don't use those same 35 // optimizations, but using the same thresholds doesn't really hurt. 36 const ( 37 inputMargin = 16 - 1 38 minNonLiteralBlockSize = 1 + 1 + inputMargin 39 ) 40 41 func encodeBestSpeed(dst []token, src []byte) []token { 42 // This check isn't in the Snappy implementation, but there, the caller 43 // instead of the callee handles this case. 44 if len(src) < minNonLiteralBlockSize { 45 return emitLiteral(dst, src) 46 } 47 48 // Initialize the hash table. 49 // 50 // The table element type is uint16, as s < sLimit and sLimit < len(src) 51 // and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535. 52 var table [tableSize]uint16 53 54 // sLimit is when to stop looking for offset/length copies. The inputMargin 55 // lets us use a fast path for emitLiteral in the main loop, while we are 56 // looking for copies. 57 sLimit := len(src) - inputMargin 58 59 // nextEmit is where in src the next emitLiteral should start from. 60 nextEmit := 0 61 62 // The encoded form must start with a literal, as there are no previous 63 // bytes to copy, so we start looking for hash matches at s == 1. 64 s := 1 65 nextHash := hash(load32(src, s)) 66 67 for { 68 // Copied from the C++ snappy implementation: 69 // 70 // Heuristic match skipping: If 32 bytes are scanned with no matches 71 // found, start looking only at every other byte. If 32 more bytes are 72 // scanned (or skipped), look at every third byte, etc.. When a match 73 // is found, immediately go back to looking at every byte. This is a 74 // small loss (~5% performance, ~0.1% density) for compressible data 75 // due to more bookkeeping, but for non-compressible data (such as 76 // JPEG) it's a huge win since the compressor quickly "realizes" the 77 // data is incompressible and doesn't bother looking for matches 78 // everywhere. 79 // 80 // The "skip" variable keeps track of how many bytes there are since 81 // the last match; dividing it by 32 (ie. right-shifting by five) gives 82 // the number of bytes to move ahead for each iteration. 83 skip := 32 84 85 nextS := s 86 candidate := 0 87 for { 88 s = nextS 89 bytesBetweenHashLookups := skip >> 5 90 nextS = s + bytesBetweenHashLookups 91 skip += bytesBetweenHashLookups 92 if nextS > sLimit { 93 goto emitRemainder 94 } 95 candidate = int(table[nextHash&tableMask]) 96 table[nextHash&tableMask] = uint16(s) 97 nextHash = hash(load32(src, nextS)) 98 // TODO: < should be <=, and add a test for that. 99 if s-candidate < maxMatchOffset && load32(src, s) == load32(src, candidate) { 100 break 101 } 102 } 103 104 // A 4-byte match has been found. We'll later see if more than 4 bytes 105 // match. But, prior to the match, src[nextEmit:s] are unmatched. Emit 106 // them as literal bytes. 107 dst = emitLiteral(dst, src[nextEmit:s]) 108 109 // Call emitCopy, and then see if another emitCopy could be our next 110 // move. Repeat until we find no match for the input immediately after 111 // what was consumed by the last emitCopy call. 112 // 113 // If we exit this loop normally then we need to call emitLiteral next, 114 // though we don't yet know how big the literal will be. We handle that 115 // by proceeding to the next iteration of the main loop. We also can 116 // exit this loop via goto if we get close to exhausting the input. 117 for { 118 // Invariant: we have a 4-byte match at s, and no need to emit any 119 // literal bytes prior to s. 120 base := s 121 122 // Extend the 4-byte match as long as possible. 123 // 124 // This is an inlined version of Snappy's: 125 // s = extendMatch(src, candidate+4, s+4) 126 s += 4 127 s1 := base + maxMatchLength 128 if s1 > len(src) { 129 s1 = len(src) 130 } 131 for i := candidate + 4; s < s1 && src[i] == src[s]; i, s = i+1, s+1 { 132 } 133 134 // matchToken is flate's equivalent of Snappy's emitCopy. 135 dst = append(dst, matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset))) 136 nextEmit = s 137 if s >= sLimit { 138 goto emitRemainder 139 } 140 141 // We could immediately start working at s now, but to improve 142 // compression we first update the hash table at s-1 and at s. If 143 // another emitCopy is not our next move, also calculate nextHash 144 // at s+1. At least on GOARCH=amd64, these three hash calculations 145 // are faster as one load64 call (with some shifts) instead of 146 // three load32 calls. 147 x := load64(src, s-1) 148 prevHash := hash(uint32(x >> 0)) 149 table[prevHash&tableMask] = uint16(s - 1) 150 currHash := hash(uint32(x >> 8)) 151 candidate = int(table[currHash&tableMask]) 152 table[currHash&tableMask] = uint16(s) 153 // TODO: >= should be >, and add a test for that. 154 if s-candidate >= maxMatchOffset || uint32(x>>8) != load32(src, candidate) { 155 nextHash = hash(uint32(x >> 16)) 156 s++ 157 break 158 } 159 } 160 } 161 162 emitRemainder: 163 if nextEmit < len(src) { 164 dst = emitLiteral(dst, src[nextEmit:]) 165 } 166 return dst 167 } 168 169 func emitLiteral(dst []token, lit []byte) []token { 170 for _, v := range lit { 171 dst = append(dst, token(v)) 172 } 173 return dst 174 }