github.com/grailbio/base@v0.0.11/recordio/estimate.go (about) 1 package recordio 2 3 import ( 4 "encoding/binary" 5 6 "github.com/grailbio/base/recordio/internal" 7 ) 8 9 // RequiredSpaceUpperBound returns an upper bound on the space required to 10 // store n items of itemSizes for a specified record size. 11 func RequiredSpaceUpperBound(itemSizes []int64, recordSize int64) int64 { 12 13 // Max number of chunks required per record. 14 // reqChunksPerRecord is Ceil(recordSize / internal.MaxChunkPayloadSize) 15 reqChunksPerRecord := recordSize / internal.MaxChunkPayloadSize 16 if (recordSize % internal.MaxChunkPayloadSize) != 0 { 17 reqChunksPerRecord++ 18 } 19 20 // Max payload = UpperBound(header) + payload. 21 // 1 varint for # items, n for the size of each of n items. 22 // Using binary.MaxVarintLen64 since we want an upper bound. 23 hdrSizeUBound := (len(itemSizes) + 1) * binary.MaxVarintLen64 24 maxPayload := int64(hdrSizeUBound) 25 for _, s := range itemSizes { 26 maxPayload += s 27 } 28 29 // Max number of records required for payload. 30 // reqRecordsForPayload is Ceil(maxPayload / recordSize) 31 reqRecordsForPayload := maxPayload / recordSize 32 if (maxPayload % recordSize) != 0 { 33 reqRecordsForPayload++ 34 } 35 36 // Max number of chunks required = chunks for payload + 2 chunks for header and trailer. 37 reqChunksForPayload := (reqChunksPerRecord * reqRecordsForPayload) + int64(2) 38 39 // Upper bound on the space required. 40 return reqChunksForPayload * internal.ChunkSize 41 }