github.com/df-mc/goleveldb@v1.1.9/leveldb/table.go (about) 1 // Copyright (c) 2012, Suryandaru Triandana <syndtr@gmail.com> 2 // All rights reserved. 3 // 4 // Use of this source code is governed by a BSD-style license that can be 5 // found in the LICENSE file. 6 7 package leveldb 8 9 import ( 10 "bytes" 11 "fmt" 12 "sort" 13 "sync/atomic" 14 15 "github.com/df-mc/goleveldb/leveldb/cache" 16 "github.com/df-mc/goleveldb/leveldb/iterator" 17 "github.com/df-mc/goleveldb/leveldb/opt" 18 "github.com/df-mc/goleveldb/leveldb/storage" 19 "github.com/df-mc/goleveldb/leveldb/table" 20 "github.com/df-mc/goleveldb/leveldb/util" 21 ) 22 23 // tFile holds basic information about a table. 24 type tFile struct { 25 fd storage.FileDesc 26 seekLeft int32 27 size int64 28 imin, imax internalKey 29 } 30 31 // Returns true if given key is after largest key of this table. 32 func (t *tFile) after(icmp *iComparer, ukey []byte) bool { 33 return ukey != nil && icmp.uCompare(ukey, t.imax.ukey()) > 0 34 } 35 36 // Returns true if given key is before smallest key of this table. 37 func (t *tFile) before(icmp *iComparer, ukey []byte) bool { 38 return ukey != nil && icmp.uCompare(ukey, t.imin.ukey()) < 0 39 } 40 41 // Returns true if given key range overlaps with this table key range. 42 func (t *tFile) overlaps(icmp *iComparer, umin, umax []byte) bool { 43 return !t.after(icmp, umin) && !t.before(icmp, umax) 44 } 45 46 // Cosumes one seek and return current seeks left. 47 func (t *tFile) consumeSeek() int32 { 48 return atomic.AddInt32(&t.seekLeft, -1) 49 } 50 51 // Creates new tFile. 52 func newTableFile(fd storage.FileDesc, size int64, imin, imax internalKey) *tFile { 53 f := &tFile{ 54 fd: fd, 55 size: size, 56 imin: imin, 57 imax: imax, 58 } 59 60 // We arrange to automatically compact this file after 61 // a certain number of seeks. Let's assume: 62 // (1) One seek costs 10ms 63 // (2) Writing or reading 1MB costs 10ms (100MB/s) 64 // (3) A compaction of 1MB does 25MB of IO: 65 // 1MB read from this level 66 // 10-12MB read from next level (boundaries may be misaligned) 67 // 10-12MB written to next level 68 // This implies that 25 seeks cost the same as the compaction 69 // of 1MB of data. I.e., one seek costs approximately the 70 // same as the compaction of 40KB of data. We are a little 71 // conservative and allow approximately one seek for every 16KB 72 // of data before triggering a compaction. 73 f.seekLeft = int32(size / 16384) 74 if f.seekLeft < 100 { 75 f.seekLeft = 100 76 } 77 78 return f 79 } 80 81 func tableFileFromRecord(r atRecord) *tFile { 82 return newTableFile(storage.FileDesc{Type: storage.TypeTable, Num: r.num}, r.size, r.imin, r.imax) 83 } 84 85 // tFiles hold multiple tFile. 86 type tFiles []*tFile 87 88 func (tf tFiles) Len() int { return len(tf) } 89 func (tf tFiles) Swap(i, j int) { tf[i], tf[j] = tf[j], tf[i] } 90 91 func (tf tFiles) nums() string { 92 x := "[ " 93 for i, f := range tf { 94 if i != 0 { 95 x += ", " 96 } 97 x += fmt.Sprint(f.fd.Num) 98 } 99 x += " ]" 100 return x 101 } 102 103 // Returns true if i smallest key is less than j. 104 // This used for sort by key in ascending order. 105 func (tf tFiles) lessByKey(icmp *iComparer, i, j int) bool { 106 a, b := tf[i], tf[j] 107 n := icmp.Compare(a.imin, b.imin) 108 if n == 0 { 109 return a.fd.Num < b.fd.Num 110 } 111 return n < 0 112 } 113 114 // Returns true if i file number is greater than j. 115 // This used for sort by file number in descending order. 116 func (tf tFiles) lessByNum(i, j int) bool { 117 return tf[i].fd.Num > tf[j].fd.Num 118 } 119 120 // Sorts tables by key in ascending order. 121 func (tf tFiles) sortByKey(icmp *iComparer) { 122 sort.Sort(&tFilesSortByKey{tFiles: tf, icmp: icmp}) 123 } 124 125 // Sorts tables by file number in descending order. 126 func (tf tFiles) sortByNum() { 127 sort.Sort(&tFilesSortByNum{tFiles: tf}) 128 } 129 130 // Returns sum of all tables size. 131 func (tf tFiles) size() (sum int64) { 132 for _, t := range tf { 133 sum += t.size 134 } 135 return sum 136 } 137 138 // Searches smallest index of tables whose its smallest 139 // key is after or equal with given key. 140 func (tf tFiles) searchMin(icmp *iComparer, ikey internalKey) int { 141 return sort.Search(len(tf), func(i int) bool { 142 return icmp.Compare(tf[i].imin, ikey) >= 0 143 }) 144 } 145 146 // Searches smallest index of tables whose its largest 147 // key is after or equal with given key. 148 func (tf tFiles) searchMax(icmp *iComparer, ikey internalKey) int { 149 return sort.Search(len(tf), func(i int) bool { 150 return icmp.Compare(tf[i].imax, ikey) >= 0 151 }) 152 } 153 154 // Searches smallest index of tables whose its file number 155 // is smaller than the given number. 156 func (tf tFiles) searchNumLess(num int64) int { 157 return sort.Search(len(tf), func(i int) bool { 158 return tf[i].fd.Num < num 159 }) 160 } 161 162 // Searches smallest index of tables whose its smallest 163 // key is after the given key. 164 func (tf tFiles) searchMinUkey(icmp *iComparer, umin []byte) int { 165 return sort.Search(len(tf), func(i int) bool { 166 return icmp.ucmp.Compare(tf[i].imin.ukey(), umin) > 0 167 }) 168 } 169 170 // Searches smallest index of tables whose its largest 171 // key is after the given key. 172 func (tf tFiles) searchMaxUkey(icmp *iComparer, umax []byte) int { 173 return sort.Search(len(tf), func(i int) bool { 174 return icmp.ucmp.Compare(tf[i].imax.ukey(), umax) > 0 175 }) 176 } 177 178 // Returns true if given key range overlaps with one or more 179 // tables key range. If unsorted is true then binary search will not be used. 180 func (tf tFiles) overlaps(icmp *iComparer, umin, umax []byte, unsorted bool) bool { 181 if unsorted { 182 // Check against all files. 183 for _, t := range tf { 184 if t.overlaps(icmp, umin, umax) { 185 return true 186 } 187 } 188 return false 189 } 190 191 i := 0 192 if len(umin) > 0 { 193 // Find the earliest possible internal key for min. 194 i = tf.searchMax(icmp, makeInternalKey(nil, umin, keyMaxSeq, keyTypeSeek)) 195 } 196 if i >= len(tf) { 197 // Beginning of range is after all files, so no overlap. 198 return false 199 } 200 return !tf[i].before(icmp, umax) 201 } 202 203 // Returns tables whose its key range overlaps with given key range. 204 // Range will be expanded if ukey found hop across tables. 205 // If overlapped is true then the search will be restarted if umax 206 // expanded. 207 // The dst content will be overwritten. 208 func (tf tFiles) getOverlaps(dst tFiles, icmp *iComparer, umin, umax []byte, overlapped bool) tFiles { 209 // Short circuit if tf is empty 210 if len(tf) == 0 { 211 return nil 212 } 213 // For non-zero levels, there is no ukey hop across at all. 214 // And what's more, the files in these levels are strictly sorted, 215 // so use binary search instead of heavy traverse. 216 if !overlapped { 217 var begin, end int 218 // Determine the begin index of the overlapped file 219 if umin != nil { 220 index := tf.searchMinUkey(icmp, umin) 221 if index == 0 { 222 begin = 0 223 } else if bytes.Compare(tf[index-1].imax.ukey(), umin) >= 0 { 224 // The min ukey overlaps with the index-1 file, expand it. 225 begin = index - 1 226 } else { 227 begin = index 228 } 229 } 230 // Determine the end index of the overlapped file 231 if umax != nil { 232 index := tf.searchMaxUkey(icmp, umax) 233 if index == len(tf) { 234 end = len(tf) 235 } else if bytes.Compare(tf[index].imin.ukey(), umax) <= 0 { 236 // The max ukey overlaps with the index file, expand it. 237 end = index + 1 238 } else { 239 end = index 240 } 241 } else { 242 end = len(tf) 243 } 244 // Ensure the overlapped file indexes are valid. 245 if begin >= end { 246 return nil 247 } 248 dst = make([]*tFile, end-begin) 249 copy(dst, tf[begin:end]) 250 return dst 251 } 252 253 dst = dst[:0] 254 for i := 0; i < len(tf); { 255 t := tf[i] 256 if t.overlaps(icmp, umin, umax) { 257 if umin != nil && icmp.uCompare(t.imin.ukey(), umin) < 0 { 258 umin = t.imin.ukey() 259 dst = dst[:0] 260 i = 0 261 continue 262 } else if umax != nil && icmp.uCompare(t.imax.ukey(), umax) > 0 { 263 umax = t.imax.ukey() 264 // Restart search if it is overlapped. 265 dst = dst[:0] 266 i = 0 267 continue 268 } 269 270 dst = append(dst, t) 271 } 272 i++ 273 } 274 275 return dst 276 } 277 278 // Returns tables key range. 279 func (tf tFiles) getRange(icmp *iComparer) (imin, imax internalKey) { 280 for i, t := range tf { 281 if i == 0 { 282 imin, imax = t.imin, t.imax 283 continue 284 } 285 if icmp.Compare(t.imin, imin) < 0 { 286 imin = t.imin 287 } 288 if icmp.Compare(t.imax, imax) > 0 { 289 imax = t.imax 290 } 291 } 292 293 return 294 } 295 296 // Creates iterator index from tables. 297 func (tf tFiles) newIndexIterator(tops *tOps, icmp *iComparer, slice *util.Range, ro *opt.ReadOptions) iterator.IteratorIndexer { 298 if slice != nil { 299 var start, limit int 300 if slice.Start != nil { 301 start = tf.searchMax(icmp, internalKey(slice.Start)) 302 } 303 if slice.Limit != nil { 304 limit = tf.searchMin(icmp, internalKey(slice.Limit)) 305 } else { 306 limit = tf.Len() 307 } 308 tf = tf[start:limit] 309 } 310 return iterator.NewArrayIndexer(&tFilesArrayIndexer{ 311 tFiles: tf, 312 tops: tops, 313 icmp: icmp, 314 slice: slice, 315 ro: ro, 316 }) 317 } 318 319 // Tables iterator index. 320 type tFilesArrayIndexer struct { 321 tFiles 322 tops *tOps 323 icmp *iComparer 324 slice *util.Range 325 ro *opt.ReadOptions 326 } 327 328 func (a *tFilesArrayIndexer) Search(key []byte) int { 329 return a.searchMax(a.icmp, internalKey(key)) 330 } 331 332 func (a *tFilesArrayIndexer) Get(i int) iterator.Iterator { 333 if i == 0 || i == a.Len()-1 { 334 return a.tops.newIterator(a.tFiles[i], a.slice, a.ro) 335 } 336 return a.tops.newIterator(a.tFiles[i], nil, a.ro) 337 } 338 339 // Helper type for sortByKey. 340 type tFilesSortByKey struct { 341 tFiles 342 icmp *iComparer 343 } 344 345 func (x *tFilesSortByKey) Less(i, j int) bool { 346 return x.lessByKey(x.icmp, i, j) 347 } 348 349 // Helper type for sortByNum. 350 type tFilesSortByNum struct { 351 tFiles 352 } 353 354 func (x *tFilesSortByNum) Less(i, j int) bool { 355 return x.lessByNum(i, j) 356 } 357 358 // Table operations. 359 type tOps struct { 360 s *session 361 noSync bool 362 evictRemoved bool 363 cache *cache.Cache 364 bcache *cache.Cache 365 bpool *util.BufferPool 366 } 367 368 // Creates an empty table and returns table writer. 369 func (t *tOps) create() (*tWriter, error) { 370 fd := storage.FileDesc{Type: storage.TypeTable, Num: t.s.allocFileNum()} 371 fw, err := t.s.stor.Create(fd) 372 if err != nil { 373 return nil, err 374 } 375 return &tWriter{ 376 t: t, 377 fd: fd, 378 w: fw, 379 tw: table.NewWriter(fw, t.s.o.Options), 380 }, nil 381 } 382 383 // Builds table from src iterator. 384 func (t *tOps) createFrom(src iterator.Iterator) (f *tFile, n int, err error) { 385 w, err := t.create() 386 if err != nil { 387 return 388 } 389 390 defer func() { 391 if err != nil { 392 w.drop() 393 } 394 }() 395 396 for src.Next() { 397 err = w.append(src.Key(), src.Value()) 398 if err != nil { 399 return 400 } 401 } 402 err = src.Error() 403 if err != nil { 404 return 405 } 406 407 n = w.tw.EntriesLen() 408 f, err = w.finish() 409 return 410 } 411 412 // Opens table. It returns a cache handle, which should 413 // be released after use. 414 func (t *tOps) open(f *tFile) (ch *cache.Handle, err error) { 415 ch = t.cache.Get(0, uint64(f.fd.Num), func() (size int, value cache.Value) { 416 var r storage.Reader 417 r, err = t.s.stor.Open(f.fd) 418 if err != nil { 419 return 0, nil 420 } 421 422 var bcache *cache.NamespaceGetter 423 if t.bcache != nil { 424 bcache = &cache.NamespaceGetter{Cache: t.bcache, NS: uint64(f.fd.Num)} 425 } 426 427 var tr *table.Reader 428 tr, err = table.NewReader(r, f.size, f.fd, bcache, t.bpool, t.s.o.Options) 429 if err != nil { 430 r.Close() 431 return 0, nil 432 } 433 return 1, tr 434 435 }) 436 if ch == nil && err == nil { 437 err = ErrClosed 438 } 439 return 440 } 441 442 // Finds key/value pair whose key is greater than or equal to the 443 // given key. 444 func (t *tOps) find(f *tFile, key []byte, ro *opt.ReadOptions) (rkey, rvalue []byte, err error) { 445 ch, err := t.open(f) 446 if err != nil { 447 return nil, nil, err 448 } 449 defer ch.Release() 450 return ch.Value().(*table.Reader).Find(key, true, ro) 451 } 452 453 // Finds key that is greater than or equal to the given key. 454 func (t *tOps) findKey(f *tFile, key []byte, ro *opt.ReadOptions) (rkey []byte, err error) { 455 ch, err := t.open(f) 456 if err != nil { 457 return nil, err 458 } 459 defer ch.Release() 460 return ch.Value().(*table.Reader).FindKey(key, true, ro) 461 } 462 463 // Returns approximate offset of the given key. 464 func (t *tOps) offsetOf(f *tFile, key []byte) (offset int64, err error) { 465 ch, err := t.open(f) 466 if err != nil { 467 return 468 } 469 defer ch.Release() 470 return ch.Value().(*table.Reader).OffsetOf(key) 471 } 472 473 // Creates an iterator from the given table. 474 func (t *tOps) newIterator(f *tFile, slice *util.Range, ro *opt.ReadOptions) iterator.Iterator { 475 ch, err := t.open(f) 476 if err != nil { 477 return iterator.NewEmptyIterator(err) 478 } 479 iter := ch.Value().(*table.Reader).NewIterator(slice, ro) 480 iter.SetReleaser(ch) 481 return iter 482 } 483 484 // Removes table from persistent storage. It waits until 485 // no one use the the table. 486 func (t *tOps) remove(fd storage.FileDesc) { 487 t.cache.Delete(0, uint64(fd.Num), func() { 488 if err := t.s.stor.Remove(fd); err != nil { 489 t.s.logf("table@remove removing @%d %q", fd.Num, err) 490 } else { 491 t.s.logf("table@remove removed @%d", fd.Num) 492 } 493 if t.evictRemoved && t.bcache != nil { 494 t.bcache.EvictNS(uint64(fd.Num)) 495 } 496 // Try to reuse file num, useful for discarded transaction. 497 t.s.reuseFileNum(fd.Num) 498 }) 499 } 500 501 // Closes the table ops instance. It will close all tables, 502 // regadless still used or not. 503 func (t *tOps) close() { 504 t.bpool.Close() 505 t.cache.Close() 506 if t.bcache != nil { 507 t.bcache.CloseWeak() 508 } 509 } 510 511 // Creates new initialized table ops instance. 512 func newTableOps(s *session) *tOps { 513 var ( 514 cacher cache.Cacher 515 bcache *cache.Cache 516 bpool *util.BufferPool 517 ) 518 if s.o.GetOpenFilesCacheCapacity() > 0 { 519 cacher = cache.NewLRU(s.o.GetOpenFilesCacheCapacity()) 520 } 521 if !s.o.GetDisableBlockCache() { 522 var bcacher cache.Cacher 523 if s.o.GetBlockCacheCapacity() > 0 { 524 bcacher = s.o.GetBlockCacher().New(s.o.GetBlockCacheCapacity()) 525 } 526 bcache = cache.NewCache(bcacher) 527 } 528 if !s.o.GetDisableBufferPool() { 529 bpool = util.NewBufferPool(s.o.GetBlockSize() + 5) 530 } 531 return &tOps{ 532 s: s, 533 noSync: s.o.GetNoSync(), 534 evictRemoved: s.o.GetBlockCacheEvictRemoved(), 535 cache: cache.NewCache(cacher), 536 bcache: bcache, 537 bpool: bpool, 538 } 539 } 540 541 // tWriter wraps the table writer. It keep track of file descriptor 542 // and added key range. 543 type tWriter struct { 544 t *tOps 545 546 fd storage.FileDesc 547 w storage.Writer 548 tw *table.Writer 549 550 first, last []byte 551 } 552 553 // Append key/value pair to the table. 554 func (w *tWriter) append(key, value []byte) error { 555 if w.first == nil { 556 w.first = append([]byte{}, key...) 557 } 558 w.last = append(w.last[:0], key...) 559 return w.tw.Append(key, value) 560 } 561 562 // Returns true if the table is empty. 563 func (w *tWriter) empty() bool { 564 return w.first == nil 565 } 566 567 // Closes the storage.Writer. 568 func (w *tWriter) close() { 569 if w.w != nil { 570 w.w.Close() 571 w.w = nil 572 } 573 } 574 575 // Finalizes the table and returns table file. 576 func (w *tWriter) finish() (f *tFile, err error) { 577 defer w.close() 578 err = w.tw.Close() 579 if err != nil { 580 return 581 } 582 if !w.t.noSync { 583 err = w.w.Sync() 584 if err != nil { 585 return 586 } 587 } 588 f = newTableFile(w.fd, int64(w.tw.BytesLen()), internalKey(w.first), internalKey(w.last)) 589 return 590 } 591 592 // Drops the table. 593 func (w *tWriter) drop() { 594 w.close() 595 w.t.s.stor.Remove(w.fd) 596 w.t.s.reuseFileNum(w.fd.Num) 597 w.tw = nil 598 w.first = nil 599 w.last = nil 600 }