github.com/Schaudge/grailbase@v0.0.0-20240223061707-44c758a471c0/recordio/internal/chunk.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache-2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package internal
     6  
     7  import (
     8  	"encoding/binary"
     9  	"fmt"
    10  	"hash"
    11  	"hash/crc32"
    12  	"io"
    13  	"math"
    14  
    15  	"github.com/Schaudge/grailbase/errors"
    16  )
    17  
    18  type chunkFlag uint32
    19  
    20  const (
    21  	// ChunkHeaderSize is the fixed header size for a chunk.
    22  	ChunkHeaderSize = 28
    23  
    24  	// ChunkSize is the fixed size of a chunk, including its header.
    25  	ChunkSize = 32 << 10
    26  
    27  	// MaxChunkPayloadSize is the maximum size of payload a chunk can carry.
    28  	MaxChunkPayloadSize = ChunkSize - ChunkHeaderSize
    29  )
    30  
    31  // Chunk layout:
    32  //
    33  //   magic [8B]
    34  //   crc   [4B LE]
    35  //   flag  [4B LE]
    36  //   size  [4B LE]
    37  //   total [4B LE]
    38  //   index [4B LE]
    39  //   data [size]
    40  //   padding [32768 - 28 - size]
    41  //
    42  // magic: one of MagicHeader, MagicPacked, MagicTrailer.
    43  // size: size of the chunk payload (data). size <=  (32<<10) - 28
    44  // padding: garbage data added to make the chunk size exactly 32768B.
    45  //
    46  // total: the total # of chunks in the blocks.
    47  // index: the index of the chunk within the block.  Index is 0 for the first
    48  // block, 1 for the 2nd block, and so on.
    49  // flag: unused now.
    50  //
    51  // crc: IEEE CRC32 of of the succeeding fields: size, index, flag, and data.
    52  //  Note: padding is not included in the CRC.
    53  type chunkHeader [ChunkHeaderSize]byte
    54  
    55  func (h *chunkHeader) TotalChunks() int {
    56  	return int(binary.LittleEndian.Uint32(h[20:]))
    57  }
    58  
    59  func (h *chunkHeader) Index() int {
    60  	return int(binary.LittleEndian.Uint32(h[24:]))
    61  }
    62  
    63  var chunkPadding [MaxChunkPayloadSize]byte
    64  
    65  // Seek to "off". Returns nil iff the seek ptr moves to "off".
    66  func Seek(r io.ReadSeeker, off int64) error {
    67  	n, err := r.Seek(off, io.SeekStart)
    68  	if err != nil {
    69  		return err
    70  	}
    71  	if n != off {
    72  		return fmt.Errorf("seek: got %v, expect %v", n, off)
    73  	}
    74  	return nil
    75  }
    76  
    77  func init() {
    78  	temp := [4]byte{0xde, 0xad, 0xbe, 0xef}
    79  	for i := range chunkPadding {
    80  		chunkPadding[i] = temp[i%len(temp)]
    81  	}
    82  }
    83  
    84  // ChunkWriter implements low-level block-write operations. It takes logical
    85  // block and stores it as a sequence of chunks. Thread compatible.
    86  type ChunkWriter struct {
    87  	nWritten int64
    88  	w        io.Writer
    89  	err      *errors.Once
    90  	crc      hash.Hash32
    91  }
    92  
    93  // Len returns the number of bytes successfully written so far.
    94  // The value is meaningful only when err.Err()==nil.
    95  func (w *ChunkWriter) Len() int64 {
    96  	return w.nWritten
    97  }
    98  
    99  // Write one block. An error is reported through w.err.
   100  func (w *ChunkWriter) Write(magic MagicBytes, payload []byte) {
   101  	var header chunkHeader
   102  	copy(header[:], magic[:])
   103  
   104  	chunkIndex := 0
   105  	totalChunks := (len(payload)-1)/MaxChunkPayloadSize + 1
   106  	for {
   107  		var chunkPayload []byte
   108  		lastChunk := false
   109  		if len(payload) <= MaxChunkPayloadSize {
   110  			lastChunk = true
   111  			chunkPayload = payload
   112  			payload = nil
   113  		} else {
   114  			chunkPayload = payload[:MaxChunkPayloadSize]
   115  			payload = payload[MaxChunkPayloadSize:]
   116  		}
   117  		binary.LittleEndian.PutUint32(header[12:], uint32(0))
   118  		binary.LittleEndian.PutUint32(header[16:], uint32(len(chunkPayload)))
   119  		binary.LittleEndian.PutUint32(header[20:], uint32(totalChunks))
   120  		binary.LittleEndian.PutUint32(header[24:], uint32(chunkIndex))
   121  
   122  		w.crc.Reset()
   123  		w.crc.Write(header[12:])
   124  		w.crc.Write(chunkPayload)
   125  		csum := w.crc.Sum32()
   126  		binary.LittleEndian.PutUint32(header[8:], csum)
   127  		w.doWrite(header[:])
   128  		w.doWrite(chunkPayload)
   129  		chunkIndex++
   130  		if lastChunk {
   131  			paddingSize := MaxChunkPayloadSize - len(chunkPayload)
   132  			if paddingSize > 0 {
   133  				w.doWrite(chunkPadding[:paddingSize])
   134  			}
   135  			break
   136  		}
   137  	}
   138  	if chunkIndex != totalChunks {
   139  		panic(fmt.Sprintf("nchunks %d, total %d", chunkIndex, totalChunks))
   140  	}
   141  }
   142  
   143  func (w *ChunkWriter) doWrite(data []byte) {
   144  	n, err := w.w.Write(data)
   145  	if err != nil {
   146  		w.err.Set(err)
   147  		return
   148  	}
   149  	w.nWritten += int64(len(data))
   150  	if n != len(data) {
   151  		w.err.Set(fmt.Errorf("Failed to write %d bytes (got %d)", len(data), n))
   152  	}
   153  }
   154  
   155  // NewChunkWriter creates a new chunk writer. Any error is reported through
   156  // "err".
   157  func NewChunkWriter(w io.Writer, err *errors.Once) *ChunkWriter {
   158  	return &ChunkWriter{w: w, err: err, crc: crc32.New(IEEECRC)}
   159  }
   160  
   161  // ChunkScanner reads a sequence of chunks and reconstructs a logical
   162  // block. Thread compatible.
   163  type ChunkScanner struct {
   164  	r   io.ReadSeeker
   165  	err *errors.Once
   166  
   167  	fileSize int64
   168  	off      int64
   169  	limit    int64
   170  
   171  	magic  MagicBytes
   172  	chunks [][]byte
   173  
   174  	pool                 [][]byte
   175  	unused               int // the first unused buf in pool.
   176  	approxChunksPerBlock float64
   177  }
   178  
   179  // NewChunkScanner creates a new chunk scanner. Any error is reported through "err".
   180  func NewChunkScanner(r io.ReadSeeker, err *errors.Once) *ChunkScanner {
   181  	rx := &ChunkScanner{r: r, err: err}
   182  	// Compute the file size.
   183  	var e error
   184  	if rx.fileSize, e = r.Seek(0, io.SeekEnd); e != nil {
   185  		rx.err.Set(e)
   186  	}
   187  	rx.err.Set(Seek(r, 0))
   188  	rx.limit = math.MaxInt64
   189  	return rx
   190  }
   191  
   192  // LimitShard limits this scanner to scan the blocks belonging to a shard range
   193  // [start,limit) out of [0, nshard). The shard range begins at the scanner's
   194  // current offset, which must be on a block boundary. The file (beginning at the
   195  // current scanner offset) is divided into n shards. Each shard scans blocks
   196  // until the next segment. If a shard begins in the middle of a block, that
   197  // block belongs to the previous shard.
   198  func (r *ChunkScanner) LimitShard(start, limit, nshard int) {
   199  	// Compute the offset and limit for shard-of-nshard.
   200  	// Invariant: limit is the offset at or after which a new block
   201  	// should not be scanned.
   202  	numChunks := (r.fileSize - r.off) / ChunkSize
   203  	chunksPerShard := float64(numChunks) / float64(nshard)
   204  	startOff := r.off
   205  	r.off = startOff + int64(float64(start)*chunksPerShard)*ChunkSize
   206  	r.limit = startOff + int64(float64(limit)*chunksPerShard)*ChunkSize
   207  	if start == 0 {
   208  		// No more work to do. We assume LimitShard is called on a block boundary.
   209  		return
   210  	}
   211  	r.err.Set(Seek(r.r, r.off))
   212  	if r.err.Err() != nil {
   213  		return
   214  	}
   215  	var header chunkHeader
   216  	if !r.readChunkHeader(&header) {
   217  		return
   218  	}
   219  	if r.err.Err() != nil {
   220  		return
   221  	}
   222  	index := header.Index()
   223  	if index == 0 {
   224  		// No more work to do: we're already on a block boundary.
   225  		return
   226  	}
   227  	// We're in the middle of a block. The current block belongs to the
   228  	// previous shard, so we forward to the next block boundary.
   229  	total := header.TotalChunks()
   230  	if total <= index {
   231  		r.err.Set(errors.New("invalid chunk header"))
   232  		return
   233  	}
   234  	r.off += ChunkSize * int64(total-index)
   235  	r.err.Set(Seek(r.r, r.off))
   236  }
   237  
   238  // Tell returns the file offset of the next block to be read.
   239  // Any error is reported in r.Err()
   240  func (r *ChunkScanner) Tell() int64 {
   241  	return r.off
   242  }
   243  
   244  // Seek moves the read pointer so that next Scan() will move to the block at the
   245  // given file offset. Any error is reported in r.Err()
   246  func (r *ChunkScanner) Seek(off int64) { // "go vet" complaint expected
   247  	r.off = off
   248  	r.err.Set(Seek(r.r, off))
   249  }
   250  
   251  // Scan reads the next block. It returns false on EOF or any error.
   252  // AnB error is reported in r.Err()
   253  func (r *ChunkScanner) Scan() bool {
   254  	r.resetChunks()
   255  	r.magic = MagicInvalid
   256  	if r.err.Err() != nil {
   257  		return false
   258  	}
   259  	if r.off >= r.limit {
   260  		r.err.Set(io.EOF)
   261  		return false
   262  	}
   263  	totalChunks := -1
   264  	for {
   265  		chunkMagic, _, nchunks, index, chunkPayload := r.readChunk()
   266  		if chunkMagic == MagicInvalid || r.err.Err() != nil {
   267  			return false
   268  		}
   269  		if len(r.chunks) == 0 {
   270  			r.magic = chunkMagic
   271  			totalChunks = nchunks
   272  		}
   273  		if chunkMagic != r.magic {
   274  			r.err.Set(fmt.Errorf("Magic number changed in the middle of a chunk sequence, got %v, expect %v",
   275  				r.magic, chunkMagic))
   276  			return false
   277  		}
   278  		if len(r.chunks) != index {
   279  			r.err.Set(fmt.Errorf("Chunk index mismatch, got %v, expect %v for magic %x",
   280  				index, len(r.chunks), r.magic))
   281  			return false
   282  		}
   283  		if nchunks != totalChunks {
   284  			r.err.Set(fmt.Errorf("Chunk nchunk mismatch, got %v, expect %v for magic %x",
   285  				nchunks, totalChunks, r.magic))
   286  			return false
   287  		}
   288  		r.chunks = append(r.chunks, chunkPayload)
   289  		if index == totalChunks-1 {
   290  			break
   291  		}
   292  	}
   293  	return true
   294  }
   295  
   296  // Block returns the current block contents.
   297  //
   298  // REQUIRES: Last Scan() call returned true.
   299  func (r *ChunkScanner) Block() (MagicBytes, [][]byte) {
   300  	return r.magic, r.chunks
   301  }
   302  
   303  func (r *ChunkScanner) readChunkHeader(header *chunkHeader) bool {
   304  	_, err := io.ReadFull(r.r, header[:])
   305  	if err != nil {
   306  		r.err.Set(err)
   307  		return false
   308  	}
   309  	r.off, err = r.r.Seek(-ChunkHeaderSize, io.SeekCurrent)
   310  	r.err.Set(err)
   311  	return true
   312  }
   313  
   314  // Read one chunk. On Error or EOF, returns MagicInvalid. The caller should
   315  // check r.err.Err() to distinguish EOF and a real error.
   316  func (r *ChunkScanner) readChunk() (MagicBytes, chunkFlag, int, int, []byte) {
   317  	chunkBuf := r.allocChunk()
   318  	n, err := io.ReadFull(r.r, chunkBuf)
   319  	r.off += int64(n)
   320  	if err != nil {
   321  		r.err.Set(err)
   322  		return MagicInvalid, chunkFlag(0), 0, 0, nil
   323  	}
   324  	header := chunkBuf[:ChunkHeaderSize]
   325  
   326  	var magic MagicBytes
   327  	copy(magic[:], header[:])
   328  	expectedCsum := binary.LittleEndian.Uint32(header[8:])
   329  	flag := chunkFlag(binary.LittleEndian.Uint32(header[12:]))
   330  	size := binary.LittleEndian.Uint32(header[16:])
   331  	totalChunks := int(binary.LittleEndian.Uint32(header[20:]))
   332  	index := int(binary.LittleEndian.Uint32(header[24:]))
   333  	if size > MaxChunkPayloadSize {
   334  		r.err.Set(fmt.Errorf("Invalid chunk size %d", size))
   335  		return MagicInvalid, chunkFlag(0), 0, 0, nil
   336  	}
   337  
   338  	chunkPayload := chunkBuf[ChunkHeaderSize : ChunkHeaderSize+size]
   339  	actualCsum := crc32.Checksum(chunkBuf[12:ChunkHeaderSize+size], IEEECRC)
   340  	if expectedCsum != actualCsum {
   341  		r.err.Set(fmt.Errorf("Chunk checksum mismatch, expect %d, got %d",
   342  			actualCsum, expectedCsum))
   343  	}
   344  	return magic, flag, totalChunks, index, chunkPayload
   345  }
   346  
   347  func (r *ChunkScanner) resetChunks() {
   348  	// Avoid keeping too much data in the freepool.  If the pool size exceeds 2x
   349  	// the avg size of recent blocks, trim it down.
   350  	nChunks := float64(len(r.chunks))
   351  	if r.approxChunksPerBlock == 0 {
   352  		r.approxChunksPerBlock = nChunks
   353  	} else {
   354  		r.approxChunksPerBlock =
   355  			r.approxChunksPerBlock*0.9 + nChunks*0.1
   356  	}
   357  	max := int(r.approxChunksPerBlock*2) + 1
   358  	if len(r.pool) > max {
   359  		r.pool = r.pool[:max]
   360  	}
   361  	r.unused = 0
   362  	r.chunks = r.chunks[:0]
   363  }
   364  
   365  func (r *ChunkScanner) allocChunk() []byte {
   366  	for len(r.pool) <= r.unused {
   367  		r.pool = append(r.pool, make([]byte, ChunkSize))
   368  	}
   369  	b := r.pool[r.unused]
   370  	r.unused++
   371  	if len(b) != ChunkSize {
   372  		panic(r)
   373  	}
   374  	return b
   375  }
   376  
   377  // ReadLastBlock reads the trailer. Sets err if the trailer does not exist, or
   378  // is corrupt. After the call, the read pointer is at an undefined position so
   379  // the user must call Seek() explicitly.
   380  func (r *ChunkScanner) ReadLastBlock() (MagicBytes, [][]byte) {
   381  	var err error
   382  	r.off, err = r.r.Seek(-ChunkSize, io.SeekEnd)
   383  	if err != nil {
   384  		r.err.Set(err)
   385  		return MagicInvalid, nil
   386  	}
   387  	magic, _, totalChunks, index, payload := r.readChunk()
   388  	if magic != MagicTrailer {
   389  		r.err.Set(fmt.Errorf("Missing magic trailer; found %v", magic))
   390  		return MagicInvalid, nil
   391  	}
   392  	if index == 0 && totalChunks == 1 {
   393  		// Fast path for a single-chunk trailer.
   394  		return magic, [][]byte{payload}
   395  	}
   396  	// Seek to the beginning of the block.
   397  	r.off, err = r.r.Seek(-int64(index+1)*ChunkSize, io.SeekEnd)
   398  	if err != nil {
   399  		r.err.Set(err)
   400  		return MagicInvalid, nil
   401  	}
   402  	if !r.Scan() {
   403  		r.err.Set(fmt.Errorf("Failed to read trailer"))
   404  		return MagicInvalid, nil
   405  	}
   406  	return r.magic, r.chunks
   407  }