github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/lsmkv/memtable.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package lsmkv 13 14 import ( 15 "path/filepath" 16 "sync" 17 "time" 18 19 "github.com/pkg/errors" 20 "github.com/weaviate/weaviate/adapters/repos/db/roaringset" 21 "github.com/weaviate/weaviate/entities/lsmkv" 22 ) 23 24 type Memtable struct { 25 sync.RWMutex 26 key *binarySearchTree 27 keyMulti *binarySearchTreeMulti 28 keyMap *binarySearchTreeMap 29 primaryIndex *binarySearchTree 30 roaringSet *roaringset.BinarySearchTree 31 commitlog *commitLogger 32 size uint64 33 path string 34 strategy string 35 secondaryIndices uint16 36 secondaryToPrimary []map[string][]byte 37 // stores time memtable got dirty to determine when flush is needed 38 dirtyAt time.Time 39 createdAt time.Time 40 metrics *memtableMetrics 41 } 42 43 func newMemtable(path string, strategy string, 44 secondaryIndices uint16, cl *commitLogger, metrics *Metrics, 45 ) (*Memtable, error) { 46 m := &Memtable{ 47 key: &binarySearchTree{}, 48 keyMulti: &binarySearchTreeMulti{}, 49 keyMap: &binarySearchTreeMap{}, 50 primaryIndex: &binarySearchTree{}, // todo, sort upfront 51 roaringSet: &roaringset.BinarySearchTree{}, 52 commitlog: cl, 53 path: path, 54 strategy: strategy, 55 secondaryIndices: secondaryIndices, 56 dirtyAt: time.Time{}, 57 createdAt: time.Now(), 58 metrics: newMemtableMetrics(metrics, filepath.Dir(path), strategy), 59 } 60 61 if m.secondaryIndices > 0 { 62 m.secondaryToPrimary = make([]map[string][]byte, m.secondaryIndices) 63 for i := range m.secondaryToPrimary { 64 m.secondaryToPrimary[i] = map[string][]byte{} 65 } 66 } 67 68 m.metrics.size(m.size) 69 70 return m, nil 71 } 72 73 func (m *Memtable) get(key []byte) ([]byte, error) { 74 start := time.Now() 75 defer m.metrics.get(start.UnixNano()) 76 77 if m.strategy != StrategyReplace { 78 return nil, errors.Errorf("get only possible with strategy 'replace'") 79 } 80 81 m.RLock() 82 defer m.RUnlock() 83 84 v, err := m.key.get(key) 85 if err != nil { 86 return nil, err 87 } 88 89 return v, nil 90 } 91 92 func (m *Memtable) getBySecondary(pos int, key []byte) ([]byte, error) { 93 start := time.Now() 94 defer m.metrics.getBySecondary(start.UnixNano()) 95 96 if m.strategy != StrategyReplace { 97 return nil, errors.Errorf("get only possible with strategy 'replace'") 98 } 99 100 m.RLock() 101 defer m.RUnlock() 102 103 primary := m.secondaryToPrimary[pos][string(key)] 104 if primary == nil { 105 return nil, lsmkv.NotFound 106 } 107 108 v, err := m.key.get(primary) 109 if err != nil { 110 return nil, err 111 } 112 113 return v, nil 114 } 115 116 func (m *Memtable) put(key, value []byte, opts ...SecondaryKeyOption) error { 117 start := time.Now() 118 defer m.metrics.put(start.UnixNano()) 119 120 if m.strategy != StrategyReplace { 121 return errors.Errorf("put only possible with strategy 'replace'") 122 } 123 124 m.Lock() 125 defer m.Unlock() 126 127 var secondaryKeys [][]byte 128 if m.secondaryIndices > 0 { 129 secondaryKeys = make([][]byte, m.secondaryIndices) 130 for _, opt := range opts { 131 if err := opt(secondaryKeys); err != nil { 132 return err 133 } 134 } 135 } 136 137 if err := m.commitlog.put(segmentReplaceNode{ 138 primaryKey: key, 139 value: value, 140 secondaryIndexCount: m.secondaryIndices, 141 secondaryKeys: secondaryKeys, 142 tombstone: false, 143 }); err != nil { 144 return errors.Wrap(err, "write into commit log") 145 } 146 147 netAdditions, previousKeys := m.key.insert(key, value, secondaryKeys) 148 149 for i, sec := range previousKeys { 150 m.secondaryToPrimary[i][string(sec)] = nil 151 } 152 153 for i, sec := range secondaryKeys { 154 m.secondaryToPrimary[i][string(sec)] = key 155 } 156 157 m.size += uint64(netAdditions) 158 m.metrics.size(m.size) 159 m.updateDirtyAt() 160 161 return nil 162 } 163 164 func (m *Memtable) setTombstone(key []byte, opts ...SecondaryKeyOption) error { 165 start := time.Now() 166 defer m.metrics.setTombstone(start.UnixNano()) 167 168 if m.strategy != "replace" { 169 return errors.Errorf("setTombstone only possible with strategy 'replace'") 170 } 171 172 m.Lock() 173 defer m.Unlock() 174 175 var secondaryKeys [][]byte 176 if m.secondaryIndices > 0 { 177 secondaryKeys = make([][]byte, m.secondaryIndices) 178 for _, opt := range opts { 179 if err := opt(secondaryKeys); err != nil { 180 return err 181 } 182 } 183 } 184 185 if err := m.commitlog.put(segmentReplaceNode{ 186 primaryKey: key, 187 value: nil, 188 secondaryIndexCount: m.secondaryIndices, 189 secondaryKeys: secondaryKeys, 190 tombstone: true, 191 }); err != nil { 192 return errors.Wrap(err, "write into commit log") 193 } 194 195 m.key.setTombstone(key, secondaryKeys) 196 m.size += uint64(len(key)) + 1 // 1 byte for tombstone 197 m.metrics.size(m.size) 198 m.updateDirtyAt() 199 200 return nil 201 } 202 203 func (m *Memtable) getCollection(key []byte) ([]value, error) { 204 start := time.Now() 205 defer m.metrics.getCollection(start.UnixNano()) 206 207 if m.strategy != StrategySetCollection && m.strategy != StrategyMapCollection { 208 return nil, errors.Errorf("getCollection only possible with strategies %q, %q", 209 StrategySetCollection, StrategyMapCollection) 210 } 211 212 m.RLock() 213 defer m.RUnlock() 214 215 v, err := m.keyMulti.get(key) 216 if err != nil { 217 return nil, err 218 } 219 220 return v, nil 221 } 222 223 func (m *Memtable) getMap(key []byte) ([]MapPair, error) { 224 start := time.Now() 225 defer m.metrics.getMap(start.UnixNano()) 226 227 if m.strategy != StrategyMapCollection { 228 return nil, errors.Errorf("getCollection only possible with strategy %q", 229 StrategyMapCollection) 230 } 231 232 m.RLock() 233 defer m.RUnlock() 234 235 v, err := m.keyMap.get(key) 236 if err != nil { 237 return nil, err 238 } 239 240 return v, nil 241 } 242 243 func (m *Memtable) append(key []byte, values []value) error { 244 start := time.Now() 245 defer m.metrics.append(start.UnixNano()) 246 247 if m.strategy != StrategySetCollection && m.strategy != StrategyMapCollection { 248 return errors.Errorf("append only possible with strategies %q, %q", 249 StrategySetCollection, StrategyMapCollection) 250 } 251 252 m.Lock() 253 defer m.Unlock() 254 if err := m.commitlog.append(segmentCollectionNode{ 255 primaryKey: key, 256 values: values, 257 }); err != nil { 258 return errors.Wrap(err, "write into commit log") 259 } 260 261 m.keyMulti.insert(key, values) 262 m.size += uint64(len(key)) 263 for _, value := range values { 264 m.size += uint64(len(value.value)) 265 } 266 m.metrics.size(m.size) 267 m.updateDirtyAt() 268 269 return nil 270 } 271 272 func (m *Memtable) appendMapSorted(key []byte, pair MapPair) error { 273 start := time.Now() 274 defer m.metrics.appendMapSorted(start.UnixNano()) 275 276 if m.strategy != StrategyMapCollection { 277 return errors.Errorf("append only possible with strategy %q", 278 StrategyMapCollection) 279 } 280 281 m.Lock() 282 defer m.Unlock() 283 284 valuesForCommitLog, err := pair.Bytes() 285 if err != nil { 286 return err 287 } 288 289 if err := m.commitlog.append(segmentCollectionNode{ 290 primaryKey: key, 291 values: []value{ 292 { 293 value: valuesForCommitLog, 294 tombstone: pair.Tombstone, 295 }, 296 }, 297 }); err != nil { 298 return errors.Wrap(err, "write into commit log") 299 } 300 301 m.keyMap.insert(key, pair) 302 m.size += uint64(len(key) + len(valuesForCommitLog)) 303 m.metrics.size(m.size) 304 m.updateDirtyAt() 305 306 return nil 307 } 308 309 func (m *Memtable) Size() uint64 { 310 m.RLock() 311 defer m.RUnlock() 312 313 return m.size 314 } 315 316 func (m *Memtable) ActiveDuration() time.Duration { 317 m.RLock() 318 defer m.RUnlock() 319 320 return time.Since(m.createdAt) 321 } 322 323 func (m *Memtable) updateDirtyAt() { 324 if m.dirtyAt.IsZero() { 325 m.dirtyAt = time.Now() 326 } 327 } 328 329 // returns time memtable got dirty (1st write occurred) 330 // (0 if clean) 331 func (m *Memtable) DirtyDuration() time.Duration { 332 m.RLock() 333 defer m.RUnlock() 334 335 if m.dirtyAt.IsZero() { 336 return 0 337 } 338 return time.Since(m.dirtyAt) 339 } 340 341 func (m *Memtable) countStats() *countStats { 342 m.RLock() 343 defer m.RUnlock() 344 return m.key.countStats() 345 } 346 347 // the WAL uses a buffer and isn't written until the buffer size is crossed or 348 // this function explicitly called. This allows to safge unnecessary disk 349 // writes in larger operations, such as batches. It is sufficient to call write 350 // on the WAL just once. This does not make a batch atomic, but it guarantees 351 // that the WAL is written before a successful response is returned to the 352 // user. 353 func (m *Memtable) writeWAL() error { 354 m.Lock() 355 defer m.Unlock() 356 357 return m.commitlog.flushBuffers() 358 }