github.com/grailbio/base@v0.0.11/recordio/estimate.go (about)

     1  package recordio
     2  
     3  import (
     4  	"encoding/binary"
     5  
     6  	"github.com/grailbio/base/recordio/internal"
     7  )
     8  
     9  // RequiredSpaceUpperBound returns an upper bound on the space required to
    10  // store n items of itemSizes for a specified record size.
    11  func RequiredSpaceUpperBound(itemSizes []int64, recordSize int64) int64 {
    12  
    13  	// Max number of chunks required per record.
    14  	// reqChunksPerRecord is Ceil(recordSize / internal.MaxChunkPayloadSize)
    15  	reqChunksPerRecord := recordSize / internal.MaxChunkPayloadSize
    16  	if (recordSize % internal.MaxChunkPayloadSize) != 0 {
    17  		reqChunksPerRecord++
    18  	}
    19  
    20  	// Max payload = UpperBound(header) + payload.
    21  	// 1 varint for # items, n for the size of each of n items.
    22  	// Using binary.MaxVarintLen64 since we want an upper bound.
    23  	hdrSizeUBound := (len(itemSizes) + 1) * binary.MaxVarintLen64
    24  	maxPayload := int64(hdrSizeUBound)
    25  	for _, s := range itemSizes {
    26  		maxPayload += s
    27  	}
    28  
    29  	// Max number of records required for payload.
    30  	// reqRecordsForPayload is Ceil(maxPayload / recordSize)
    31  	reqRecordsForPayload := maxPayload / recordSize
    32  	if (maxPayload % recordSize) != 0 {
    33  		reqRecordsForPayload++
    34  	}
    35  
    36  	// Max number of chunks required = chunks for payload + 2 chunks for header and trailer.
    37  	reqChunksForPayload := (reqChunksPerRecord * reqRecordsForPayload) + int64(2)
    38  
    39  	// Upper bound on the space required.
    40  	return reqChunksForPayload * internal.ChunkSize
    41  }