github.com/creachadair/ffs@v0.17.3/file/blobs.go (about) 1 // Copyright 2021 Michael J. Fromberger. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package file 16 17 import ( 18 "io" 19 20 "github.com/creachadair/mds/mbits" 21 ) 22 23 // splitExtent splits ext into possibly-multiple extents by removing 24 // zero-valued data blocks. If there are no zero blocks, the return slice 25 // contains just the original extent. 26 func splitExtent(ext *extent) []*extent { 27 var chunks [][]cblock 28 var bases []int64 29 var sizes []int64 30 31 // Do a two-finger walk of the blocks. The left finger (lo) scans for the 32 // next non-zero block, and the right finger (hi) scans forward from there 33 // to find the end of the non-zero range. Along the way, we keep track of 34 // the base and size of each non-zero range, to pack into extents. 35 36 base := ext.base 37 lo := 0 38 39 nextChunk: 40 for lo < len(ext.blocks) { 41 // Scan for a nonzero block. 42 if ext.blocks[lo].key == "" { 43 base += ext.blocks[lo].bytes 44 lo++ 45 continue 46 } 47 48 // Scan forward for a zero block. 49 nextBase := base + ext.blocks[lo].bytes 50 for hi := lo + 1; hi < len(ext.blocks); hi++ { 51 blk := ext.blocks[hi] 52 53 // If we found a zero-value block, the non-zero blocks since the last 54 // marker are an extent. 55 if blk.key == "" { 56 chunks = append(chunks, ext.blocks[lo:hi]) 57 bases = append(bases, base) 58 sizes = append(sizes, nextBase-base) 59 base = nextBase 60 lo = hi 61 continue nextChunk 62 } 63 nextBase += blk.bytes 64 } 65 66 // If we get here, hi reached the end of the blocks without finding 67 // another zero-value block, so the rest of the blocks are an extent. 68 // In the typical case where nothing happened, return without packing. 69 if lo == 0 { 70 return []*extent{ext} 71 } 72 chunks = append(chunks, ext.blocks[lo:]) 73 bases = append(bases, base) 74 sizes = append(sizes, nextBase-base) 75 break 76 } 77 78 exts := make([]*extent, len(chunks)) 79 for i, chunk := range chunks { 80 exts[i] = &extent{ 81 base: bases[i], 82 bytes: sizes[i], 83 blocks: chunk, 84 } 85 } 86 return exts 87 } 88 89 // A blockReader implements io.Reader for the concatenation of a slice of byte 90 // slices. This avoids the overhead of constructing a bytes.Reader for each 91 // blob plus an io.MultiReader to concatenate them. 92 type blockReader struct { 93 cur int 94 blocks [][]byte 95 } 96 97 func newBlockReader(blocks [][]byte) *blockReader { 98 return &blockReader{blocks: blocks} 99 } 100 101 func (r *blockReader) Read(data []byte) (int, error) { 102 var nr int 103 for nr < len(data) && r.cur < len(r.blocks) { 104 curBlock := r.blocks[r.cur] 105 cp := copy(data[nr:], curBlock) 106 if cp == len(curBlock) { 107 r.blocks[r.cur] = nil 108 r.cur++ 109 } else { 110 r.blocks[r.cur] = curBlock[cp:] 111 } 112 nr += cp 113 } 114 if nr == 0 && r.cur >= len(r.blocks) { 115 return 0, io.EOF 116 } 117 return nr, nil 118 } 119 120 // zeroCheck returns the length of the longest prefix and suffix of data that 121 // comprise all zeroes, along with the length of data. If data consists of all 122 // zero bytes, zeroCheck returns len(data), 0, len(data). 123 // 124 // Otherwise, zhead is the count of zero bytes prior to the first non-zero 125 // byte, and ztail is the count of zero bytes after the last non-zero byte. 126 func zeroCheck(data []byte) (zhead, ztail, n int) { 127 // Benchmarks for this implementation vs. naive loop. 128 // Sizes in bytes, times in ns/op (from go test -bench). 129 // 130 // Size Unsafe Naive Speedup 131 // 103 11 41 2.72x 132 // 1007 73 267 2.66x 133 // 10007 646 2529 2.91x 134 // 100007 6320 25248 2.99x 135 // 136 n = len(data) 137 zhead = mbits.LeadingZeroes(data) 138 ztail = mbits.TrailingZeroes(data[zhead:]) 139 return 140 } 141 142 func min(z0 int, zs ...int) int { 143 for _, z := range zs { 144 if z < z0 { 145 z0 = z 146 } 147 } 148 return z0 149 }