github.com/creachadair/ffs@v0.17.3/file/blobs.go

github.com/creachadair/ffs@v0.17.3/file/blobs.go (about)

     1  // Copyright 2021 Michael J. Fromberger. All Rights Reserved.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package file
    16  
    17  import (
    18  	"io"
    19  
    20  	"github.com/creachadair/mds/mbits"
    21  )
    22  
    23  // splitExtent splits ext into possibly-multiple extents by removing
    24  // zero-valued data blocks. If there are no zero blocks, the return slice
    25  // contains just the original extent.
    26  func splitExtent(ext *extent) []*extent {
    27  	var chunks [][]cblock
    28  	var bases []int64
    29  	var sizes []int64
    30  
    31  	// Do a two-finger walk of the blocks. The left finger (lo) scans for the
    32  	// next non-zero block, and the right finger (hi) scans forward from there
    33  	// to find the end of the non-zero range. Along the way, we keep track of
    34  	// the base and size of each non-zero range, to pack into extents.
    35  
    36  	base := ext.base
    37  	lo := 0
    38  
    39  nextChunk:
    40  	for lo < len(ext.blocks) {
    41  		// Scan for a nonzero block.
    42  		if ext.blocks[lo].key == "" {
    43  			base += ext.blocks[lo].bytes
    44  			lo++
    45  			continue
    46  		}
    47  
    48  		// Scan forward for a zero block.
    49  		nextBase := base + ext.blocks[lo].bytes
    50  		for hi := lo + 1; hi < len(ext.blocks); hi++ {
    51  			blk := ext.blocks[hi]
    52  
    53  			// If we found a zero-value block, the non-zero blocks since the last
    54  			// marker are an extent.
    55  			if blk.key == "" {
    56  				chunks = append(chunks, ext.blocks[lo:hi])
    57  				bases = append(bases, base)
    58  				sizes = append(sizes, nextBase-base)
    59  				base = nextBase
    60  				lo = hi
    61  				continue nextChunk
    62  			}
    63  			nextBase += blk.bytes
    64  		}
    65  
    66  		// If we get here, hi reached the end of the blocks without finding
    67  		// another zero-value block, so the rest of the blocks are an extent.
    68  		// In the typical case where nothing happened, return without packing.
    69  		if lo == 0 {
    70  			return []*extent{ext}
    71  		}
    72  		chunks = append(chunks, ext.blocks[lo:])
    73  		bases = append(bases, base)
    74  		sizes = append(sizes, nextBase-base)
    75  		break
    76  	}
    77  
    78  	exts := make([]*extent, len(chunks))
    79  	for i, chunk := range chunks {
    80  		exts[i] = &extent{
    81  			base:   bases[i],
    82  			bytes:  sizes[i],
    83  			blocks: chunk,
    84  		}
    85  	}
    86  	return exts
    87  }
    88  
    89  // A blockReader implements io.Reader for the concatenation of a slice of byte
    90  // slices. This avoids the overhead of constructing a bytes.Reader for each
    91  // blob plus an io.MultiReader to concatenate them.
    92  type blockReader struct {
    93  	cur    int
    94  	blocks [][]byte
    95  }
    96  
    97  func newBlockReader(blocks [][]byte) *blockReader {
    98  	return &blockReader{blocks: blocks}
    99  }
   100  
   101  func (r *blockReader) Read(data []byte) (int, error) {
   102  	var nr int
   103  	for nr < len(data) && r.cur < len(r.blocks) {
   104  		curBlock := r.blocks[r.cur]
   105  		cp := copy(data[nr:], curBlock)
   106  		if cp == len(curBlock) {
   107  			r.blocks[r.cur] = nil
   108  			r.cur++
   109  		} else {
   110  			r.blocks[r.cur] = curBlock[cp:]
   111  		}
   112  		nr += cp
   113  	}
   114  	if nr == 0 && r.cur >= len(r.blocks) {
   115  		return 0, io.EOF
   116  	}
   117  	return nr, nil
   118  }
   119  
   120  // zeroCheck returns the length of the longest prefix and suffix of data that
   121  // comprise all zeroes, along with the length of data. If data consists of all
   122  // zero bytes, zeroCheck returns len(data), 0, len(data).
   123  //
   124  // Otherwise, zhead is the count of zero bytes prior to the first non-zero
   125  // byte, and ztail is the count of zero bytes after the last non-zero byte.
   126  func zeroCheck(data []byte) (zhead, ztail, n int) {
   127  	// Benchmarks for this implementation vs. naive loop.
   128  	// Sizes in bytes, times in ns/op (from go test -bench).
   129  	//
   130  	//   Size     Unsafe  Naive  Speedup
   131  	//   103      11      41     2.72x
   132  	//   1007     73      267    2.66x
   133  	//   10007    646     2529   2.91x
   134  	//   100007   6320    25248  2.99x
   135  	//
   136  	n = len(data)
   137  	zhead = mbits.LeadingZeroes(data)
   138  	ztail = mbits.TrailingZeroes(data[zhead:])
   139  	return
   140  }
   141  
   142  func min(z0 int, zs ...int) int {
   143  	for _, z := range zs {
   144  		if z < z0 {
   145  			z0 = z
   146  		}
   147  	}
   148  	return z0
   149  }