github.com/keybase/client/go@v0.0.0-20241007131713-f10651d043c8/kbfs/data/bsplitter_simple.go (about) 1 // Copyright 2016 Keybase Inc. All rights reserved. 2 // Use of this source code is governed by a BSD 3 // license that can be found in the LICENSE file. 4 5 package data 6 7 import ( 8 "fmt" 9 "os" 10 "sort" 11 "strconv" 12 "strings" 13 14 "github.com/keybase/client/go/kbfs/kbfscodec" 15 ) 16 17 // BlockSplitterSimple implements the BlockSplitter interface by using 18 // a simple max-size algorithm to determine when to split blocks. 19 type BlockSplitterSimple struct { 20 maxSize int64 21 maxPtrsPerBlock int 22 blockChangeEmbedMaxSize uint64 23 maxDirEntriesPerBlock int 24 } 25 26 func getMaxDirEntriesPerBlock() (int, error) { 27 dirEnv := os.Getenv("KEYBASE_BSPLIT_MAX_DIR_ENTRIES") 28 if len(dirEnv) > 0 { 29 maxDirEntriesPerBlock, err := strconv.Atoi(dirEnv) 30 if err != nil { 31 return 0, err 32 } 33 return maxDirEntriesPerBlock, nil 34 } 35 return 0, nil // disabled by default 36 } 37 38 // NewBlockSplitterSimple creates a new BlockSplittleSimple and 39 // adjusts the max size to try to match the desired size for file 40 // blocks, given the overhead of encoding a file block and the 41 // round-up padding we do. 42 func NewBlockSplitterSimple(desiredBlockSize int64, 43 blockChangeEmbedMaxSize uint64, codec kbfscodec.Codec) ( 44 *BlockSplitterSimple, error) { 45 // If the desired block size is exactly a power of 2, subtract one 46 // from it to account for the padding we will do, which rounds up 47 // when the encoded size is exactly a power of 2. 48 if desiredBlockSize&(desiredBlockSize-1) == 0 { 49 desiredBlockSize-- 50 } 51 52 // Make a FileBlock of the expected size to see what the encoded 53 // overhead is. 54 block := NewFileBlock().(*FileBlock) 55 fullData := make([]byte, desiredBlockSize) 56 // Fill in the block with varying data to make sure not to trigger 57 // any encoding optimizations. 58 for i := range fullData { 59 fullData[i] = byte(i) 60 } 61 62 maxSize := desiredBlockSize 63 var encodedLen int64 64 // Iterate until we find the right size (up to a maximum number of 65 // attempts), because the overhead is not constant across 66 // different Contents lengths (probably due to variable length 67 // encoding of the buffer size). 68 for i := 0; i < 10; i++ { 69 block.Contents = fullData[:maxSize] 70 encodedBlock, err := codec.Encode(block) 71 if err != nil { 72 return nil, err 73 } 74 75 encodedLen = int64(len(encodedBlock)) 76 if encodedLen >= 2*desiredBlockSize { 77 return nil, fmt.Errorf("Encoded block of %d bytes is more than "+ 78 "twice as big as the desired block size %d", 79 encodedLen, desiredBlockSize) 80 } 81 82 if encodedLen == desiredBlockSize { 83 break 84 } 85 86 maxSize += (desiredBlockSize - encodedLen) 87 } 88 89 if encodedLen != desiredBlockSize { 90 return nil, fmt.Errorf("Couldn't converge on a max block size for a "+ 91 "desired size of %d", desiredBlockSize) 92 } 93 94 // Trial and error shows that this magic 75% constant maximizes 95 // the number of realistic indirect pointers you can fit into the 96 // default block size. TODO: calculate this number more exactly 97 // during initialization for a given `maxSize`. 98 maxPtrs := int(.75 * float64(maxSize/int64(BPSize))) 99 if maxPtrs < 2 { 100 maxPtrs = 2 101 } 102 103 maxDirEntriesPerBlock, err := getMaxDirEntriesPerBlock() 104 if err != nil { 105 return nil, err 106 } 107 108 return &BlockSplitterSimple{ 109 maxSize: maxSize, 110 maxPtrsPerBlock: maxPtrs, 111 blockChangeEmbedMaxSize: blockChangeEmbedMaxSize, 112 maxDirEntriesPerBlock: maxDirEntriesPerBlock, 113 }, nil 114 } 115 116 // NewBlockSplitterSimpleExact returns a BlockSplitterSimple with the 117 // max block size set to an exact value. 118 func NewBlockSplitterSimpleExact( 119 maxSize int64, maxPtrsPerBlock int, blockChangeEmbedMaxSize uint64) ( 120 *BlockSplitterSimple, error) { 121 maxDirEntriesPerBlock, err := getMaxDirEntriesPerBlock() 122 if err != nil { 123 return nil, err 124 } 125 return &BlockSplitterSimple{ 126 maxSize: maxSize, 127 maxPtrsPerBlock: maxPtrsPerBlock, 128 blockChangeEmbedMaxSize: blockChangeEmbedMaxSize, 129 maxDirEntriesPerBlock: maxDirEntriesPerBlock, 130 }, nil 131 } 132 133 // SetMaxDirEntriesByBlockSize sets the maximum number of directory 134 // entries per directory block, based on the maximum block size. If 135 // the `KEYBASE_BSPLIT_MAX_DIR_ENTRIES` is set, this function does 136 // nothing. 137 func (b *BlockSplitterSimple) SetMaxDirEntriesByBlockSize( 138 codec kbfscodec.Codec) error { 139 dirEnv := os.Getenv("KEYBASE_BSPLIT_MAX_DIR_ENTRIES") 140 if len(dirEnv) > 0 { 141 // Don't override the environment variable. 142 return nil 143 } 144 145 block := NewDirBlock().(*DirBlock) 146 bigName := strings.Repeat("a", MaxNameBytesDefault) 147 // Make "typical" DirEntry, though the max dir entry is a bit 148 // bigger than this (can contain a variable-length symlink path, 149 // for example). 150 de := DirEntry{ 151 BlockInfo: BlockInfo{ 152 BlockPointer: BlockPointer{ 153 DirectType: DirectBlock, 154 }, 155 }, 156 EntryInfo: EntryInfo{ 157 PrevRevisions: PrevRevisions{ 158 {Revision: 0, Count: 0}, 159 {Revision: 1, Count: 1}, 160 {Revision: 2, Count: 2}, 161 {Revision: 3, Count: 3}, 162 {Revision: 4, Count: 4}, 163 }, 164 }, 165 } 166 block.Children[bigName] = de 167 encodedBlock, err := codec.Encode(block) 168 if err != nil { 169 return err 170 } 171 oneEntrySize := int64(len(encodedBlock)) 172 b.maxDirEntriesPerBlock = int(b.maxSize / oneEntrySize) 173 if b.maxDirEntriesPerBlock == 0 { 174 b.maxDirEntriesPerBlock = 1 175 } 176 return nil 177 } 178 179 // CopyUntilSplit implements the BlockSplitter interface for 180 // BlockSplitterSimple. 181 func (b *BlockSplitterSimple) CopyUntilSplit( 182 block *FileBlock, lastBlock bool, data []byte, off int64) int64 { 183 n := int64(len(data)) 184 currLen := int64(len(block.Contents)) 185 // lastBlock is irrelevant since we only copy fixed sizes 186 187 toCopy := n 188 if currLen < (off + n) { 189 moreNeeded := (n + off) - currLen 190 // Reduce the number of additional bytes if it will take this block 191 // over maxSize. 192 if moreNeeded+currLen > b.maxSize { 193 moreNeeded = b.maxSize - currLen 194 if moreNeeded < 0 { 195 // If it is already over maxSize w/o any added bytes, 196 // just give up. 197 return 0 198 } 199 // only copy to the end of the block 200 toCopy = b.maxSize - off 201 } 202 203 if moreNeeded > 0 { 204 block.Contents = append(block.Contents, make([]byte, moreNeeded)...) 205 } 206 } 207 208 // we may have filled out the block above, but we still can't copy anything 209 if off > int64(len(block.Contents)) { 210 return 0 211 } 212 213 copy(block.Contents[off:off+toCopy], data[:toCopy]) 214 return toCopy 215 } 216 217 // CheckSplit implements the BlockSplitter interface for 218 // BlockSplitterSimple. 219 func (b *BlockSplitterSimple) CheckSplit(block *FileBlock) int64 { 220 // The split will always be right 221 return 0 222 } 223 224 // MaxPtrsPerBlock implements the BlockSplitter interface for 225 // BlockSplitterSimple. 226 func (b *BlockSplitterSimple) MaxPtrsPerBlock() int { 227 return b.maxPtrsPerBlock 228 } 229 230 // ShouldEmbedData implements the BlockSplitter interface for 231 // BlockSplitterSimple. 232 func (b *BlockSplitterSimple) ShouldEmbedData(size uint64) bool { 233 return size <= b.blockChangeEmbedMaxSize 234 } 235 236 // SplitDirIfNeeded implements the BlockSplitter interface for 237 // BlockSplitterSimple. 238 func (b *BlockSplitterSimple) SplitDirIfNeeded(block *DirBlock) ( 239 []*DirBlock, *StringOffset) { 240 if block.IsIndirect() { 241 panic("SplitDirIfNeeded must be given only a direct block") 242 } 243 244 if b.maxDirEntriesPerBlock == 0 || 245 len(block.Children) <= b.maxDirEntriesPerBlock { 246 return []*DirBlock{block}, nil 247 } 248 249 // Sort the entries and split them down the middle. 250 names := make([]string, 0, len(block.Children)) 251 for name := range block.Children { 252 names = append(names, name) 253 } 254 255 sort.Strings(names) 256 // Delete the second half of the names from the original block, 257 // and add to the new block. 258 newBlock := NewDirBlock().(*DirBlock) 259 startOff := len(names) / 2 260 for _, name := range names[len(names)/2:] { 261 newBlock.Children[name] = block.Children[name] 262 delete(block.Children, name) 263 } 264 newOffset := StringOffset(names[startOff]) 265 return []*DirBlock{block, newBlock}, &newOffset 266 } 267 268 // MaxSize returns the max block size. 269 func (b *BlockSplitterSimple) MaxSize() int64 { 270 return b.maxSize 271 } 272 273 // SetBlockChangeEmbedMaxSizeForTesting sets the max size for block 274 // change embeds, which is useful for testing. It is not 275 // goroutine-safe. 276 func (b *BlockSplitterSimple) SetBlockChangeEmbedMaxSizeForTesting( 277 newSize uint64) { 278 b.blockChangeEmbedMaxSize = newSize 279 } 280 281 // SetMaxDirEntriesPerBlockForTesting sets the max dir entries for a 282 // block, which is useful for testing. It is not goroutine-safe. 283 func (b *BlockSplitterSimple) SetMaxDirEntriesPerBlockForTesting(newMax int) { 284 b.maxDirEntriesPerBlock = newMax 285 }