github.com/grailbio/base@v0.0.11/mapio/block.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package mapio 6 7 import ( 8 "bytes" 9 "encoding/binary" 10 "errors" 11 "fmt" 12 "hash/crc32" 13 "sort" 14 ) 15 16 const ( 17 maxBlockHeaderSize = binary.MaxVarintLen32 + // sharedSize 18 binary.MaxVarintLen32 + // unsharedSize 19 binary.MaxVarintLen32 // valueSize 20 21 blockMinTrailerSize = 4 + // restart count 22 1 + // block type 23 4 // crc32 (IEEE) checksum of contents 24 ) 25 26 var order = binary.LittleEndian 27 28 // A blockBuffer is a writable block buffer. 29 type blockBuffer struct { 30 bytes.Buffer 31 32 lastKey []byte 33 34 restartInterval int 35 restarts []int 36 restartCount int 37 } 38 39 // Append appends the provided entry to the block. Must be called 40 // in lexicographic order of keys, or else Append panics. 41 func (b *blockBuffer) Append(key, value []byte) { 42 if bytes.Compare(key, b.lastKey) < 0 { 43 panic("keys added out of order") 44 } 45 var shared int 46 if b.restartCount < b.restartInterval { 47 n := len(b.lastKey) 48 if len(key) < n { 49 n = len(key) 50 } 51 for shared = 0; shared < n; shared++ { 52 if key[shared] != b.lastKey[shared] { 53 break 54 } 55 } 56 b.restartCount++ 57 } else { 58 b.restartCount = 0 59 b.restarts = append(b.restarts, b.Len()) 60 } 61 62 if b.lastKey == nil || cap(b.lastKey) < len(key) { 63 b.lastKey = make([]byte, len(key)) 64 } else { 65 b.lastKey = b.lastKey[:len(key)] 66 } 67 copy(b.lastKey[shared:], key[shared:]) 68 69 var hd [maxBlockHeaderSize]byte 70 var pos int 71 pos += binary.PutUvarint(hd[pos:], uint64(shared)) 72 pos += binary.PutUvarint(hd[pos:], uint64(len(key)-shared)) 73 pos += binary.PutUvarint(hd[pos:], uint64(len(value))) 74 75 b.Write(hd[:pos]) 76 b.Write(key[shared:]) 77 b.Write(value) 78 } 79 80 // Finish completes the block by adding the block trailer. 81 func (b *blockBuffer) Finish() { 82 b.Grow(4*(len(b.restarts)+1) + 1 + 4 + 4) 83 var ( 84 pback [4]byte 85 p = pback[:] 86 ) 87 if b.Buffer.Len() > 0 { 88 // Add restart points. Zero is always a restart point (if block is nonempty). 89 order.PutUint32(p, 0) 90 b.Write(p) 91 for _, off := range b.restarts { 92 order.PutUint32(p, uint32(off)) 93 b.Write(p) 94 } 95 order.PutUint32(p, uint32(len(b.restarts)+1)) 96 } else { 97 order.PutUint32(p, 0) 98 } 99 b.Write(p) 100 b.WriteByte(0) // zero type. reserved. 101 order.PutUint32(p, crc32.ChecksumIEEE(b.Bytes())) 102 b.Write(p) 103 } 104 105 // Reset resets the contents of this block. After a call to reset, 106 // the blockBuffer instance may be used to write a new block. 107 func (b *blockBuffer) Reset() { 108 b.lastKey = nil 109 b.restarts = nil 110 b.restartCount = 0 111 b.Buffer.Reset() 112 } 113 114 // A block is an in-memory representation of a single block. Blocks 115 // maintain a current offset from which entries are scanned. 116 type block struct { 117 p []byte 118 nrestart int 119 restarts []byte 120 121 key, value []byte 122 off, prevOff int 123 } 124 125 // Init initializes the block from the block contents stored at b.p. 126 // Init returns an error if the block is malformed or corrupted. 127 func (b *block) init() error { 128 if len(b.p) < blockMinTrailerSize { 129 return errors.New("invalid block: too small") 130 } 131 if got, want := crc32.ChecksumIEEE(b.p[:len(b.p)-4]), order.Uint32(b.p[len(b.p)-4:]); got != want { 132 return fmt.Errorf("invalid checksum: expected %x, got %v", want, got) 133 } 134 off := len(b.p) - blockMinTrailerSize 135 b.nrestart = int(order.Uint32(b.p[off:])) 136 if b.nrestart*4 > off { 137 return errors.New("corrupt block") 138 } 139 b.restarts = b.p[off-4*b.nrestart : off] 140 if btype := b.p[off+4]; btype != 0 { 141 return fmt.Errorf("invalid block type %d", btype) 142 } 143 b.p = b.p[:off-4*b.nrestart] 144 b.key = nil 145 b.value = nil 146 b.off = 0 147 b.prevOff = 0 148 return nil 149 } 150 151 // Seek sets the block to the first position for which key <= b.Key(). 152 func (b *block) Seek(key []byte) { 153 restart := sort.Search(b.nrestart, func(i int) bool { 154 b.off = int(order.Uint32(b.restarts[i*4:])) 155 if !b.Scan() { 156 panic("corrupt block") 157 } 158 return bytes.Compare(key, b.Key()) <= 0 159 }) 160 if restart == 0 { 161 // No more work needed. key <= the first key in the block. 162 b.off = 0 163 return 164 } 165 b.off = int(order.Uint32(b.restarts[(restart-1)*4:])) 166 for b.Scan() { 167 if bytes.Compare(key, b.Key()) <= 0 { 168 b.unscan() 169 break 170 } 171 } 172 } 173 174 // Scan reads the entry at the current position and then advanced the 175 // block's position to the next entry. Scan returns false when the 176 // position is at or beyond the end of the block. 177 func (b *block) Scan() bool { 178 if b.off >= len(b.p) { 179 return false 180 } 181 b.prevOff = b.off 182 nshared, n := binary.Uvarint(b.p[b.off:]) 183 b.off += n 184 nunshared, n := binary.Uvarint(b.p[b.off:]) 185 b.off += n 186 nvalue, n := binary.Uvarint(b.p[b.off:]) 187 b.off += n 188 b.key = append(b.key[:nshared], b.p[b.off:b.off+int(nunshared)]...) 189 b.off += int(nunshared) 190 b.value = b.p[b.off : b.off+int(nvalue)] 191 b.off += int(nvalue) 192 return true 193 } 194 195 func (b *block) unscan() { 196 b.off = b.prevOff 197 } 198 199 // Key returns the key for the last scanned entry of the block. 200 func (b *block) Key() []byte { 201 return b.key 202 } 203 204 // Value returns the value for the last scanned entry of the block. 205 func (b *block) Value() []byte { 206 return b.value 207 } 208 209 func readBlock(p []byte) (*block, error) { 210 b := &block{p: p} 211 return b, b.init() 212 }