github.com/keybase/client/go@v0.0.0-20240309051027-028f7c731f8b/kbfs/data/bsplitter_simple.go (about)

     1  // Copyright 2016 Keybase Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD
     3  // license that can be found in the LICENSE file.
     4  
     5  package data
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  
    14  	"github.com/keybase/client/go/kbfs/kbfscodec"
    15  )
    16  
    17  // BlockSplitterSimple implements the BlockSplitter interface by using
    18  // a simple max-size algorithm to determine when to split blocks.
    19  type BlockSplitterSimple struct {
    20  	maxSize                 int64
    21  	maxPtrsPerBlock         int
    22  	blockChangeEmbedMaxSize uint64
    23  	maxDirEntriesPerBlock   int
    24  }
    25  
    26  func getMaxDirEntriesPerBlock() (int, error) {
    27  	dirEnv := os.Getenv("KEYBASE_BSPLIT_MAX_DIR_ENTRIES")
    28  	if len(dirEnv) > 0 {
    29  		maxDirEntriesPerBlock, err := strconv.Atoi(dirEnv)
    30  		if err != nil {
    31  			return 0, err
    32  		}
    33  		return maxDirEntriesPerBlock, nil
    34  	}
    35  	return 0, nil // disabled by default
    36  }
    37  
    38  // NewBlockSplitterSimple creates a new BlockSplittleSimple and
    39  // adjusts the max size to try to match the desired size for file
    40  // blocks, given the overhead of encoding a file block and the
    41  // round-up padding we do.
    42  func NewBlockSplitterSimple(desiredBlockSize int64,
    43  	blockChangeEmbedMaxSize uint64, codec kbfscodec.Codec) (
    44  	*BlockSplitterSimple, error) {
    45  	// If the desired block size is exactly a power of 2, subtract one
    46  	// from it to account for the padding we will do, which rounds up
    47  	// when the encoded size is exactly a power of 2.
    48  	if desiredBlockSize&(desiredBlockSize-1) == 0 {
    49  		desiredBlockSize--
    50  	}
    51  
    52  	// Make a FileBlock of the expected size to see what the encoded
    53  	// overhead is.
    54  	block := NewFileBlock().(*FileBlock)
    55  	fullData := make([]byte, desiredBlockSize)
    56  	// Fill in the block with varying data to make sure not to trigger
    57  	// any encoding optimizations.
    58  	for i := range fullData {
    59  		fullData[i] = byte(i)
    60  	}
    61  
    62  	maxSize := desiredBlockSize
    63  	var encodedLen int64
    64  	// Iterate until we find the right size (up to a maximum number of
    65  	// attempts), because the overhead is not constant across
    66  	// different Contents lengths (probably due to variable length
    67  	// encoding of the buffer size).
    68  	for i := 0; i < 10; i++ {
    69  		block.Contents = fullData[:maxSize]
    70  		encodedBlock, err := codec.Encode(block)
    71  		if err != nil {
    72  			return nil, err
    73  		}
    74  
    75  		encodedLen = int64(len(encodedBlock))
    76  		if encodedLen >= 2*desiredBlockSize {
    77  			return nil, fmt.Errorf("Encoded block of %d bytes is more than "+
    78  				"twice as big as the desired block size %d",
    79  				encodedLen, desiredBlockSize)
    80  		}
    81  
    82  		if encodedLen == desiredBlockSize {
    83  			break
    84  		}
    85  
    86  		maxSize += (desiredBlockSize - encodedLen)
    87  	}
    88  
    89  	if encodedLen != desiredBlockSize {
    90  		return nil, fmt.Errorf("Couldn't converge on a max block size for a "+
    91  			"desired size of %d", desiredBlockSize)
    92  	}
    93  
    94  	// Trial and error shows that this magic 75% constant maximizes
    95  	// the number of realistic indirect pointers you can fit into the
    96  	// default block size.  TODO: calculate this number more exactly
    97  	// during initialization for a given `maxSize`.
    98  	maxPtrs := int(.75 * float64(maxSize/int64(BPSize)))
    99  	if maxPtrs < 2 {
   100  		maxPtrs = 2
   101  	}
   102  
   103  	maxDirEntriesPerBlock, err := getMaxDirEntriesPerBlock()
   104  	if err != nil {
   105  		return nil, err
   106  	}
   107  
   108  	return &BlockSplitterSimple{
   109  		maxSize:                 maxSize,
   110  		maxPtrsPerBlock:         maxPtrs,
   111  		blockChangeEmbedMaxSize: blockChangeEmbedMaxSize,
   112  		maxDirEntriesPerBlock:   maxDirEntriesPerBlock,
   113  	}, nil
   114  }
   115  
   116  // NewBlockSplitterSimpleExact returns a BlockSplitterSimple with the
   117  // max block size set to an exact value.
   118  func NewBlockSplitterSimpleExact(
   119  	maxSize int64, maxPtrsPerBlock int, blockChangeEmbedMaxSize uint64) (
   120  	*BlockSplitterSimple, error) {
   121  	maxDirEntriesPerBlock, err := getMaxDirEntriesPerBlock()
   122  	if err != nil {
   123  		return nil, err
   124  	}
   125  	return &BlockSplitterSimple{
   126  		maxSize:                 maxSize,
   127  		maxPtrsPerBlock:         maxPtrsPerBlock,
   128  		blockChangeEmbedMaxSize: blockChangeEmbedMaxSize,
   129  		maxDirEntriesPerBlock:   maxDirEntriesPerBlock,
   130  	}, nil
   131  }
   132  
   133  // SetMaxDirEntriesByBlockSize sets the maximum number of directory
   134  // entries per directory block, based on the maximum block size.  If
   135  // the `KEYBASE_BSPLIT_MAX_DIR_ENTRIES` is set, this function does
   136  // nothing.
   137  func (b *BlockSplitterSimple) SetMaxDirEntriesByBlockSize(
   138  	codec kbfscodec.Codec) error {
   139  	dirEnv := os.Getenv("KEYBASE_BSPLIT_MAX_DIR_ENTRIES")
   140  	if len(dirEnv) > 0 {
   141  		// Don't override the environment variable.
   142  		return nil
   143  	}
   144  
   145  	block := NewDirBlock().(*DirBlock)
   146  	bigName := strings.Repeat("a", MaxNameBytesDefault)
   147  	// Make "typical" DirEntry, though the max dir entry is a bit
   148  	// bigger than this (can contain a variable-length symlink path,
   149  	// for example).
   150  	de := DirEntry{
   151  		BlockInfo: BlockInfo{
   152  			BlockPointer: BlockPointer{
   153  				DirectType: DirectBlock,
   154  			},
   155  		},
   156  		EntryInfo: EntryInfo{
   157  			PrevRevisions: PrevRevisions{
   158  				{Revision: 0, Count: 0},
   159  				{Revision: 1, Count: 1},
   160  				{Revision: 2, Count: 2},
   161  				{Revision: 3, Count: 3},
   162  				{Revision: 4, Count: 4},
   163  			},
   164  		},
   165  	}
   166  	block.Children[bigName] = de
   167  	encodedBlock, err := codec.Encode(block)
   168  	if err != nil {
   169  		return err
   170  	}
   171  	oneEntrySize := int64(len(encodedBlock))
   172  	b.maxDirEntriesPerBlock = int(b.maxSize / oneEntrySize)
   173  	if b.maxDirEntriesPerBlock == 0 {
   174  		b.maxDirEntriesPerBlock = 1
   175  	}
   176  	return nil
   177  }
   178  
   179  // CopyUntilSplit implements the BlockSplitter interface for
   180  // BlockSplitterSimple.
   181  func (b *BlockSplitterSimple) CopyUntilSplit(
   182  	block *FileBlock, lastBlock bool, data []byte, off int64) int64 {
   183  	n := int64(len(data))
   184  	currLen := int64(len(block.Contents))
   185  	// lastBlock is irrelevant since we only copy fixed sizes
   186  
   187  	toCopy := n
   188  	if currLen < (off + n) {
   189  		moreNeeded := (n + off) - currLen
   190  		// Reduce the number of additional bytes if it will take this block
   191  		// over maxSize.
   192  		if moreNeeded+currLen > b.maxSize {
   193  			moreNeeded = b.maxSize - currLen
   194  			if moreNeeded < 0 {
   195  				// If it is already over maxSize w/o any added bytes,
   196  				// just give up.
   197  				return 0
   198  			}
   199  			// only copy to the end of the block
   200  			toCopy = b.maxSize - off
   201  		}
   202  
   203  		if moreNeeded > 0 {
   204  			block.Contents = append(block.Contents, make([]byte, moreNeeded)...)
   205  		}
   206  	}
   207  
   208  	// we may have filled out the block above, but we still can't copy anything
   209  	if off > int64(len(block.Contents)) {
   210  		return 0
   211  	}
   212  
   213  	copy(block.Contents[off:off+toCopy], data[:toCopy])
   214  	return toCopy
   215  }
   216  
   217  // CheckSplit implements the BlockSplitter interface for
   218  // BlockSplitterSimple.
   219  func (b *BlockSplitterSimple) CheckSplit(block *FileBlock) int64 {
   220  	// The split will always be right
   221  	return 0
   222  }
   223  
   224  // MaxPtrsPerBlock implements the BlockSplitter interface for
   225  // BlockSplitterSimple.
   226  func (b *BlockSplitterSimple) MaxPtrsPerBlock() int {
   227  	return b.maxPtrsPerBlock
   228  }
   229  
   230  // ShouldEmbedData implements the BlockSplitter interface for
   231  // BlockSplitterSimple.
   232  func (b *BlockSplitterSimple) ShouldEmbedData(size uint64) bool {
   233  	return size <= b.blockChangeEmbedMaxSize
   234  }
   235  
   236  // SplitDirIfNeeded implements the BlockSplitter interface for
   237  // BlockSplitterSimple.
   238  func (b *BlockSplitterSimple) SplitDirIfNeeded(block *DirBlock) (
   239  	[]*DirBlock, *StringOffset) {
   240  	if block.IsIndirect() {
   241  		panic("SplitDirIfNeeded must be given only a direct block")
   242  	}
   243  
   244  	if b.maxDirEntriesPerBlock == 0 ||
   245  		len(block.Children) <= b.maxDirEntriesPerBlock {
   246  		return []*DirBlock{block}, nil
   247  	}
   248  
   249  	// Sort the entries and split them down the middle.
   250  	names := make([]string, 0, len(block.Children))
   251  	for name := range block.Children {
   252  		names = append(names, name)
   253  	}
   254  
   255  	sort.Strings(names)
   256  	// Delete the second half of the names from the original block,
   257  	// and add to the new block.
   258  	newBlock := NewDirBlock().(*DirBlock)
   259  	startOff := len(names) / 2
   260  	for _, name := range names[len(names)/2:] {
   261  		newBlock.Children[name] = block.Children[name]
   262  		delete(block.Children, name)
   263  	}
   264  	newOffset := StringOffset(names[startOff])
   265  	return []*DirBlock{block, newBlock}, &newOffset
   266  }
   267  
   268  // MaxSize returns the max block size.
   269  func (b *BlockSplitterSimple) MaxSize() int64 {
   270  	return b.maxSize
   271  }
   272  
   273  // SetBlockChangeEmbedMaxSizeForTesting sets the max size for block
   274  // change embeds, which is useful for testing.  It is not
   275  // goroutine-safe.
   276  func (b *BlockSplitterSimple) SetBlockChangeEmbedMaxSizeForTesting(
   277  	newSize uint64) {
   278  	b.blockChangeEmbedMaxSize = newSize
   279  }
   280  
   281  // SetMaxDirEntriesPerBlockForTesting sets the max dir entries for a
   282  // block, which is useful for testing.  It is not goroutine-safe.
   283  func (b *BlockSplitterSimple) SetMaxDirEntriesPerBlockForTesting(newMax int) {
   284  	b.maxDirEntriesPerBlock = newMax
   285  }