github.com/cockroachdb/pebble@v1.1.1-0.20240513155919-3622ade60459/sstable/raw_block.go (about) 1 // Copyright 2018 The LevelDB-Go and Pebble Authors. All rights reserved. Use 2 // of this source code is governed by a BSD-style license that can be found in 3 // the LICENSE file. 4 5 package sstable 6 7 import ( 8 "encoding/binary" 9 "sort" 10 "unsafe" 11 12 "github.com/cockroachdb/pebble/internal/base" 13 ) 14 15 type rawBlockWriter struct { 16 blockWriter 17 } 18 19 func (w *rawBlockWriter) add(key InternalKey, value []byte) { 20 w.curKey, w.prevKey = w.prevKey, w.curKey 21 22 size := len(key.UserKey) 23 if cap(w.curKey) < size { 24 w.curKey = make([]byte, 0, size*2) 25 } 26 w.curKey = w.curKey[:size] 27 copy(w.curKey, key.UserKey) 28 29 w.storeWithOptionalValuePrefix( 30 size, value, len(key.UserKey), false, 0, false) 31 } 32 33 // rawBlockIter is an iterator over a single block of data. Unlike blockIter, 34 // keys are stored in "raw" format (i.e. not as internal keys). Note that there 35 // is significant similarity between this code and the code in blockIter. Yet 36 // reducing duplication is difficult due to the blockIter being performance 37 // critical. rawBlockIter must only be used for blocks where the value is 38 // stored together with the key. 39 type rawBlockIter struct { 40 cmp Compare 41 offset int32 42 nextOffset int32 43 restarts int32 44 numRestarts int32 45 ptr unsafe.Pointer 46 data []byte 47 key, val []byte 48 ikey InternalKey 49 cached []blockEntry 50 cachedBuf []byte 51 } 52 53 func newRawBlockIter(cmp Compare, block block) (*rawBlockIter, error) { 54 i := &rawBlockIter{} 55 return i, i.init(cmp, block) 56 } 57 58 func (i *rawBlockIter) init(cmp Compare, block block) error { 59 numRestarts := int32(binary.LittleEndian.Uint32(block[len(block)-4:])) 60 if numRestarts == 0 { 61 return base.CorruptionErrorf("pebble/table: invalid table (block has no restart points)") 62 } 63 i.cmp = cmp 64 i.restarts = int32(len(block)) - 4*(1+numRestarts) 65 i.numRestarts = numRestarts 66 i.ptr = unsafe.Pointer(&block[0]) 67 i.data = block 68 if i.key == nil { 69 i.key = make([]byte, 0, 256) 70 } else { 71 i.key = i.key[:0] 72 } 73 i.val = nil 74 i.clearCache() 75 return nil 76 } 77 78 func (i *rawBlockIter) readEntry() { 79 ptr := unsafe.Pointer(uintptr(i.ptr) + uintptr(i.offset)) 80 shared, ptr := decodeVarint(ptr) 81 unshared, ptr := decodeVarint(ptr) 82 value, ptr := decodeVarint(ptr) 83 i.key = append(i.key[:shared], getBytes(ptr, int(unshared))...) 84 i.key = i.key[:len(i.key):len(i.key)] 85 ptr = unsafe.Pointer(uintptr(ptr) + uintptr(unshared)) 86 i.val = getBytes(ptr, int(value)) 87 i.nextOffset = int32(uintptr(ptr)-uintptr(i.ptr)) + int32(value) 88 } 89 90 func (i *rawBlockIter) loadEntry() { 91 i.readEntry() 92 i.ikey.UserKey = i.key 93 } 94 95 func (i *rawBlockIter) clearCache() { 96 i.cached = i.cached[:0] 97 i.cachedBuf = i.cachedBuf[:0] 98 } 99 100 func (i *rawBlockIter) cacheEntry() { 101 var valStart int32 102 valSize := int32(len(i.val)) 103 if valSize > 0 { 104 valStart = int32(uintptr(unsafe.Pointer(&i.val[0])) - uintptr(i.ptr)) 105 } 106 107 i.cached = append(i.cached, blockEntry{ 108 offset: i.offset, 109 keyStart: int32(len(i.cachedBuf)), 110 keyEnd: int32(len(i.cachedBuf) + len(i.key)), 111 valStart: valStart, 112 valSize: valSize, 113 }) 114 i.cachedBuf = append(i.cachedBuf, i.key...) 115 } 116 117 // SeekGE implements internalIterator.SeekGE, as documented in the pebble 118 // package. 119 func (i *rawBlockIter) SeekGE(key []byte) bool { 120 // Find the index of the smallest restart point whose key is > the key 121 // sought; index will be numRestarts if there is no such restart point. 122 i.offset = 0 123 index := sort.Search(int(i.numRestarts), func(j int) bool { 124 offset := int32(binary.LittleEndian.Uint32(i.data[int(i.restarts)+4*j:])) 125 // For a restart point, there are 0 bytes shared with the previous key. 126 // The varint encoding of 0 occupies 1 byte. 127 ptr := unsafe.Pointer(uintptr(i.ptr) + uintptr(offset+1)) 128 // Decode the key at that restart point, and compare it to the key sought. 129 v1, ptr := decodeVarint(ptr) 130 _, ptr = decodeVarint(ptr) 131 s := getBytes(ptr, int(v1)) 132 return i.cmp(key, s) < 0 133 }) 134 135 // Since keys are strictly increasing, if index > 0 then the restart point at 136 // index-1 will be the largest whose key is <= the key sought. If index == 137 // 0, then all keys in this block are larger than the key sought, and offset 138 // remains at zero. 139 if index > 0 { 140 i.offset = int32(binary.LittleEndian.Uint32(i.data[int(i.restarts)+4*(index-1):])) 141 } 142 i.loadEntry() 143 144 // Iterate from that restart point to somewhere >= the key sought. 145 for valid := i.Valid(); valid; valid = i.Next() { 146 if i.cmp(key, i.key) <= 0 { 147 break 148 } 149 } 150 return i.Valid() 151 } 152 153 // First implements internalIterator.First, as documented in the pebble 154 // package. 155 func (i *rawBlockIter) First() bool { 156 i.offset = 0 157 i.loadEntry() 158 return i.Valid() 159 } 160 161 // Last implements internalIterator.Last, as documented in the pebble package. 162 func (i *rawBlockIter) Last() bool { 163 // Seek forward from the last restart point. 164 i.offset = int32(binary.LittleEndian.Uint32(i.data[i.restarts+4*(i.numRestarts-1):])) 165 166 i.readEntry() 167 i.clearCache() 168 i.cacheEntry() 169 170 for i.nextOffset < i.restarts { 171 i.offset = i.nextOffset 172 i.readEntry() 173 i.cacheEntry() 174 } 175 176 i.ikey.UserKey = i.key 177 return i.Valid() 178 } 179 180 // Next implements internalIterator.Next, as documented in the pebble 181 // package. 182 func (i *rawBlockIter) Next() bool { 183 i.offset = i.nextOffset 184 if !i.Valid() { 185 return false 186 } 187 i.loadEntry() 188 return true 189 } 190 191 // Prev implements internalIterator.Prev, as documented in the pebble 192 // package. 193 func (i *rawBlockIter) Prev() bool { 194 if n := len(i.cached) - 1; n > 0 && i.cached[n].offset == i.offset { 195 i.nextOffset = i.offset 196 e := &i.cached[n-1] 197 i.offset = e.offset 198 i.val = getBytes(unsafe.Pointer(uintptr(i.ptr)+uintptr(e.valStart)), int(e.valSize)) 199 i.ikey.UserKey = i.cachedBuf[e.keyStart:e.keyEnd] 200 i.cached = i.cached[:n] 201 return true 202 } 203 204 if i.offset == 0 { 205 i.offset = -1 206 i.nextOffset = 0 207 return false 208 } 209 210 targetOffset := i.offset 211 index := sort.Search(int(i.numRestarts), func(j int) bool { 212 offset := int32(binary.LittleEndian.Uint32(i.data[int(i.restarts)+4*j:])) 213 return offset >= targetOffset 214 }) 215 i.offset = 0 216 if index > 0 { 217 i.offset = int32(binary.LittleEndian.Uint32(i.data[int(i.restarts)+4*(index-1):])) 218 } 219 220 i.readEntry() 221 i.clearCache() 222 i.cacheEntry() 223 224 for i.nextOffset < targetOffset { 225 i.offset = i.nextOffset 226 i.readEntry() 227 i.cacheEntry() 228 } 229 230 i.ikey.UserKey = i.key 231 return true 232 } 233 234 // Key implements internalIterator.Key, as documented in the pebble package. 235 func (i *rawBlockIter) Key() InternalKey { 236 return i.ikey 237 } 238 239 // Value implements internalIterator.Value, as documented in the pebble 240 // package. 241 func (i *rawBlockIter) Value() []byte { 242 return i.val 243 } 244 245 // Valid implements internalIterator.Valid, as documented in the pebble 246 // package. 247 func (i *rawBlockIter) Valid() bool { 248 return i.offset >= 0 && i.offset < i.restarts 249 } 250 251 // Error implements internalIterator.Error, as documented in the pebble 252 // package. 253 func (i *rawBlockIter) Error() error { 254 return nil 255 } 256 257 // Close implements internalIterator.Close, as documented in the pebble 258 // package. 259 func (i *rawBlockIter) Close() error { 260 i.val = nil 261 return nil 262 }