github.com/sbinet/go@v0.0.0-20160827155028-54d7de7dd62b/src/compress/flate/deflatefast.go (about)

     1  // Copyright 2016 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package flate
     6  
     7  // This encoding algorithm, which prioritizes speed over output size, is
     8  // based on Snappy's LZ77-style encoder: github.com/golang/snappy
     9  
    10  const (
    11  	tableBits  = 14             // Bits used in the table.
    12  	tableSize  = 1 << tableBits // Size of the table.
    13  	tableMask  = tableSize - 1  // Mask for table indices. Redundant, but can eliminate bounds checks.
    14  	tableShift = 32 - tableBits // Right-shift to get the tableBits most significant bits of a uint32.
    15  )
    16  
    17  func load32(b []byte, i int) uint32 {
    18  	b = b[i : i+4 : len(b)] // Help the compiler eliminate bounds checks on the next line.
    19  	return uint32(b[0]) | uint32(b[1])<<8 | uint32(b[2])<<16 | uint32(b[3])<<24
    20  }
    21  
    22  func load64(b []byte, i int) uint64 {
    23  	b = b[i : i+8 : len(b)] // Help the compiler eliminate bounds checks on the next line.
    24  	return uint64(b[0]) | uint64(b[1])<<8 | uint64(b[2])<<16 | uint64(b[3])<<24 |
    25  		uint64(b[4])<<32 | uint64(b[5])<<40 | uint64(b[6])<<48 | uint64(b[7])<<56
    26  }
    27  
    28  func hash(u uint32) uint32 {
    29  	return (u * 0x1e35a7bd) >> tableShift
    30  }
    31  
    32  // These constants are defined by the Snappy implementation so that its
    33  // assembly implementation can fast-path some 16-bytes-at-a-time copies. They
    34  // aren't necessary in the pure Go implementation, as we don't use those same
    35  // optimizations, but using the same thresholds doesn't really hurt.
    36  const (
    37  	inputMargin            = 16 - 1
    38  	minNonLiteralBlockSize = 1 + 1 + inputMargin
    39  )
    40  
    41  func encodeBestSpeed(dst []token, src []byte) []token {
    42  	// This check isn't in the Snappy implementation, but there, the caller
    43  	// instead of the callee handles this case.
    44  	if len(src) < minNonLiteralBlockSize {
    45  		return emitLiteral(dst, src)
    46  	}
    47  
    48  	// Initialize the hash table.
    49  	//
    50  	// The table element type is uint16, as s < sLimit and sLimit < len(src)
    51  	// and len(src) <= maxStoreBlockSize and maxStoreBlockSize == 65535.
    52  	var table [tableSize]uint16
    53  
    54  	// sLimit is when to stop looking for offset/length copies. The inputMargin
    55  	// lets us use a fast path for emitLiteral in the main loop, while we are
    56  	// looking for copies.
    57  	sLimit := len(src) - inputMargin
    58  
    59  	// nextEmit is where in src the next emitLiteral should start from.
    60  	nextEmit := 0
    61  
    62  	// The encoded form must start with a literal, as there are no previous
    63  	// bytes to copy, so we start looking for hash matches at s == 1.
    64  	s := 1
    65  	nextHash := hash(load32(src, s))
    66  
    67  	for {
    68  		// Copied from the C++ snappy implementation:
    69  		//
    70  		// Heuristic match skipping: If 32 bytes are scanned with no matches
    71  		// found, start looking only at every other byte. If 32 more bytes are
    72  		// scanned (or skipped), look at every third byte, etc.. When a match
    73  		// is found, immediately go back to looking at every byte. This is a
    74  		// small loss (~5% performance, ~0.1% density) for compressible data
    75  		// due to more bookkeeping, but for non-compressible data (such as
    76  		// JPEG) it's a huge win since the compressor quickly "realizes" the
    77  		// data is incompressible and doesn't bother looking for matches
    78  		// everywhere.
    79  		//
    80  		// The "skip" variable keeps track of how many bytes there are since
    81  		// the last match; dividing it by 32 (ie. right-shifting by five) gives
    82  		// the number of bytes to move ahead for each iteration.
    83  		skip := 32
    84  
    85  		nextS := s
    86  		candidate := 0
    87  		for {
    88  			s = nextS
    89  			bytesBetweenHashLookups := skip >> 5
    90  			nextS = s + bytesBetweenHashLookups
    91  			skip += bytesBetweenHashLookups
    92  			if nextS > sLimit {
    93  				goto emitRemainder
    94  			}
    95  			candidate = int(table[nextHash&tableMask])
    96  			table[nextHash&tableMask] = uint16(s)
    97  			nextHash = hash(load32(src, nextS))
    98  			// TODO: < should be <=, and add a test for that.
    99  			if s-candidate < maxMatchOffset && load32(src, s) == load32(src, candidate) {
   100  				break
   101  			}
   102  		}
   103  
   104  		// A 4-byte match has been found. We'll later see if more than 4 bytes
   105  		// match. But, prior to the match, src[nextEmit:s] are unmatched. Emit
   106  		// them as literal bytes.
   107  		dst = emitLiteral(dst, src[nextEmit:s])
   108  
   109  		// Call emitCopy, and then see if another emitCopy could be our next
   110  		// move. Repeat until we find no match for the input immediately after
   111  		// what was consumed by the last emitCopy call.
   112  		//
   113  		// If we exit this loop normally then we need to call emitLiteral next,
   114  		// though we don't yet know how big the literal will be. We handle that
   115  		// by proceeding to the next iteration of the main loop. We also can
   116  		// exit this loop via goto if we get close to exhausting the input.
   117  		for {
   118  			// Invariant: we have a 4-byte match at s, and no need to emit any
   119  			// literal bytes prior to s.
   120  			base := s
   121  
   122  			// Extend the 4-byte match as long as possible.
   123  			//
   124  			// This is an inlined version of Snappy's:
   125  			//	s = extendMatch(src, candidate+4, s+4)
   126  			s += 4
   127  			s1 := base + maxMatchLength
   128  			if s1 > len(src) {
   129  				s1 = len(src)
   130  			}
   131  			for i := candidate + 4; s < s1 && src[i] == src[s]; i, s = i+1, s+1 {
   132  			}
   133  
   134  			// matchToken is flate's equivalent of Snappy's emitCopy.
   135  			dst = append(dst, matchToken(uint32(s-base-baseMatchLength), uint32(base-candidate-baseMatchOffset)))
   136  			nextEmit = s
   137  			if s >= sLimit {
   138  				goto emitRemainder
   139  			}
   140  
   141  			// We could immediately start working at s now, but to improve
   142  			// compression we first update the hash table at s-1 and at s. If
   143  			// another emitCopy is not our next move, also calculate nextHash
   144  			// at s+1. At least on GOARCH=amd64, these three hash calculations
   145  			// are faster as one load64 call (with some shifts) instead of
   146  			// three load32 calls.
   147  			x := load64(src, s-1)
   148  			prevHash := hash(uint32(x >> 0))
   149  			table[prevHash&tableMask] = uint16(s - 1)
   150  			currHash := hash(uint32(x >> 8))
   151  			candidate = int(table[currHash&tableMask])
   152  			table[currHash&tableMask] = uint16(s)
   153  			// TODO: >= should be >, and add a test for that.
   154  			if s-candidate >= maxMatchOffset || uint32(x>>8) != load32(src, candidate) {
   155  				nextHash = hash(uint32(x >> 16))
   156  				s++
   157  				break
   158  			}
   159  		}
   160  	}
   161  
   162  emitRemainder:
   163  	if nextEmit < len(src) {
   164  		dst = emitLiteral(dst, src[nextEmit:])
   165  	}
   166  	return dst
   167  }
   168  
   169  func emitLiteral(dst []token, lit []byte) []token {
   170  	for _, v := range lit {
   171  		dst = append(dst, token(v))
   172  	}
   173  	return dst
   174  }