github.com/grailbio/base@v0.0.11/mapio/block.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package mapio
     6  
     7  import (
     8  	"bytes"
     9  	"encoding/binary"
    10  	"errors"
    11  	"fmt"
    12  	"hash/crc32"
    13  	"sort"
    14  )
    15  
    16  const (
    17  	maxBlockHeaderSize = binary.MaxVarintLen32 + // sharedSize
    18  		binary.MaxVarintLen32 + // unsharedSize
    19  		binary.MaxVarintLen32 // valueSize
    20  
    21  	blockMinTrailerSize = 4 + // restart count
    22  		1 + // block type
    23  		4 // crc32 (IEEE) checksum of contents
    24  )
    25  
    26  var order = binary.LittleEndian
    27  
    28  // A blockBuffer is a writable block buffer.
    29  type blockBuffer struct {
    30  	bytes.Buffer
    31  
    32  	lastKey []byte
    33  
    34  	restartInterval int
    35  	restarts        []int
    36  	restartCount    int
    37  }
    38  
    39  // Append appends the provided entry to the block. Must be called
    40  // in lexicographic order of keys, or else Append panics.
    41  func (b *blockBuffer) Append(key, value []byte) {
    42  	if bytes.Compare(key, b.lastKey) < 0 {
    43  		panic("keys added out of order")
    44  	}
    45  	var shared int
    46  	if b.restartCount < b.restartInterval {
    47  		n := len(b.lastKey)
    48  		if len(key) < n {
    49  			n = len(key)
    50  		}
    51  		for shared = 0; shared < n; shared++ {
    52  			if key[shared] != b.lastKey[shared] {
    53  				break
    54  			}
    55  		}
    56  		b.restartCount++
    57  	} else {
    58  		b.restartCount = 0
    59  		b.restarts = append(b.restarts, b.Len())
    60  	}
    61  
    62  	if b.lastKey == nil || cap(b.lastKey) < len(key) {
    63  		b.lastKey = make([]byte, len(key))
    64  	} else {
    65  		b.lastKey = b.lastKey[:len(key)]
    66  	}
    67  	copy(b.lastKey[shared:], key[shared:])
    68  
    69  	var hd [maxBlockHeaderSize]byte
    70  	var pos int
    71  	pos += binary.PutUvarint(hd[pos:], uint64(shared))
    72  	pos += binary.PutUvarint(hd[pos:], uint64(len(key)-shared))
    73  	pos += binary.PutUvarint(hd[pos:], uint64(len(value)))
    74  
    75  	b.Write(hd[:pos])
    76  	b.Write(key[shared:])
    77  	b.Write(value)
    78  }
    79  
    80  // Finish completes the block by adding the block trailer.
    81  func (b *blockBuffer) Finish() {
    82  	b.Grow(4*(len(b.restarts)+1) + 1 + 4 + 4)
    83  	var (
    84  		pback [4]byte
    85  		p     = pback[:]
    86  	)
    87  	if b.Buffer.Len() > 0 {
    88  		// Add restart points. Zero is always a restart point (if block is nonempty).
    89  		order.PutUint32(p, 0)
    90  		b.Write(p)
    91  		for _, off := range b.restarts {
    92  			order.PutUint32(p, uint32(off))
    93  			b.Write(p)
    94  		}
    95  		order.PutUint32(p, uint32(len(b.restarts)+1))
    96  	} else {
    97  		order.PutUint32(p, 0)
    98  	}
    99  	b.Write(p)
   100  	b.WriteByte(0) // zero type. reserved.
   101  	order.PutUint32(p, crc32.ChecksumIEEE(b.Bytes()))
   102  	b.Write(p)
   103  }
   104  
   105  // Reset resets the contents of this block. After a call to reset,
   106  // the blockBuffer instance may be used to write a new block.
   107  func (b *blockBuffer) Reset() {
   108  	b.lastKey = nil
   109  	b.restarts = nil
   110  	b.restartCount = 0
   111  	b.Buffer.Reset()
   112  }
   113  
   114  // A block is an in-memory representation of a single block. Blocks
   115  // maintain a current offset from which entries are scanned.
   116  type block struct {
   117  	p        []byte
   118  	nrestart int
   119  	restarts []byte
   120  
   121  	key, value   []byte
   122  	off, prevOff int
   123  }
   124  
   125  // Init initializes the block from the block contents stored at b.p.
   126  // Init returns an error if the block is malformed or corrupted.
   127  func (b *block) init() error {
   128  	if len(b.p) < blockMinTrailerSize {
   129  		return errors.New("invalid block: too small")
   130  	}
   131  	if got, want := crc32.ChecksumIEEE(b.p[:len(b.p)-4]), order.Uint32(b.p[len(b.p)-4:]); got != want {
   132  		return fmt.Errorf("invalid checksum: expected %x, got %v", want, got)
   133  	}
   134  	off := len(b.p) - blockMinTrailerSize
   135  	b.nrestart = int(order.Uint32(b.p[off:]))
   136  	if b.nrestart*4 > off {
   137  		return errors.New("corrupt block")
   138  	}
   139  	b.restarts = b.p[off-4*b.nrestart : off]
   140  	if btype := b.p[off+4]; btype != 0 {
   141  		return fmt.Errorf("invalid block type %d", btype)
   142  	}
   143  	b.p = b.p[:off-4*b.nrestart]
   144  	b.key = nil
   145  	b.value = nil
   146  	b.off = 0
   147  	b.prevOff = 0
   148  	return nil
   149  }
   150  
   151  // Seek sets the block to the first position for which key <= b.Key().
   152  func (b *block) Seek(key []byte) {
   153  	restart := sort.Search(b.nrestart, func(i int) bool {
   154  		b.off = int(order.Uint32(b.restarts[i*4:]))
   155  		if !b.Scan() {
   156  			panic("corrupt block")
   157  		}
   158  		return bytes.Compare(key, b.Key()) <= 0
   159  	})
   160  	if restart == 0 {
   161  		// No more work needed. key <= the first key in the block.
   162  		b.off = 0
   163  		return
   164  	}
   165  	b.off = int(order.Uint32(b.restarts[(restart-1)*4:]))
   166  	for b.Scan() {
   167  		if bytes.Compare(key, b.Key()) <= 0 {
   168  			b.unscan()
   169  			break
   170  		}
   171  	}
   172  }
   173  
   174  // Scan reads the entry at the current position and then advanced the
   175  // block's position to the next entry. Scan returns false when the
   176  // position is at or beyond the end of the block.
   177  func (b *block) Scan() bool {
   178  	if b.off >= len(b.p) {
   179  		return false
   180  	}
   181  	b.prevOff = b.off
   182  	nshared, n := binary.Uvarint(b.p[b.off:])
   183  	b.off += n
   184  	nunshared, n := binary.Uvarint(b.p[b.off:])
   185  	b.off += n
   186  	nvalue, n := binary.Uvarint(b.p[b.off:])
   187  	b.off += n
   188  	b.key = append(b.key[:nshared], b.p[b.off:b.off+int(nunshared)]...)
   189  	b.off += int(nunshared)
   190  	b.value = b.p[b.off : b.off+int(nvalue)]
   191  	b.off += int(nvalue)
   192  	return true
   193  }
   194  
   195  func (b *block) unscan() {
   196  	b.off = b.prevOff
   197  }
   198  
   199  // Key returns the key for the last scanned entry of the block.
   200  func (b *block) Key() []byte {
   201  	return b.key
   202  }
   203  
   204  // Value returns the value for the last scanned entry of the block.
   205  func (b *block) Value() []byte {
   206  	return b.value
   207  }
   208  
   209  func readBlock(p []byte) (*block, error) {
   210  	b := &block{p: p}
   211  	return b, b.init()
   212  }