github.com/scottcagno/storage@v1.8.0/pkg/lsmt/sstable/_ss-table-manager.go (about) 1 package sstable 2 3 import ( 4 "bytes" 5 "fmt" 6 "github.com/scottcagno/storage/pkg/_junk/_memtable" 7 "github.com/scottcagno/storage/pkg/lsmt/binary" 8 "github.com/scottcagno/storage/pkg/lsmt/trees/rbtree" 9 "log" 10 "os" 11 "path/filepath" 12 "sort" 13 "strings" 14 "sync" 15 ) 16 17 type KeyRange struct { 18 index int64 19 first string 20 last string 21 } 22 23 func (kr *KeyRange) InKeyRange(k string) bool { 24 return kr.first <= k && k <= kr.last 25 } 26 27 func (kr *KeyRange) String() string { 28 return fmt.Sprintf("kr.gindex=%d, kr.first=%q, kr.last=%q", kr.index, kr.first, kr.last) 29 } 30 31 type KeyRangeSlice []*KeyRange 32 33 func (krs KeyRangeSlice) Len() int { 34 return len(krs) 35 } 36 37 func (krs KeyRangeSlice) Less(i, j int) bool { 38 return krs[i].first < krs[j].first 39 } 40 41 func (krs KeyRangeSlice) Swap(i, j int) { 42 krs[i], krs[j] = krs[j], krs[i] 43 } 44 45 type SSTManager1 struct { 46 lock sync.RWMutex 47 base string 48 inrange []*KeyRange 49 //sparse map[int64]*SparseIndex 50 gindex int64 51 cachedSST *SSTable 52 keyIndex *rbtree.RBTree 53 } 54 55 // https://play.golang.org/p/m_cJtw4wWMc 56 57 // OpenSSTManager opens and returns a SSTManager, which allows you to 58 // perform operations across all the ss-table and ss-table-indexes, 59 // hopefully without too much hassle 60 func OpenSSTManager1(base string) (*SSTManager1, error) { 61 // make sure we are working with absolute paths 62 base, err := filepath.Abs(base) 63 if err != nil { 64 return nil, err 65 } 66 // sanitize any path separators 67 base = filepath.ToSlash(base) 68 // create any directories if they are not there 69 err = os.MkdirAll(base, os.ModeDir) 70 if err != nil { 71 return nil, err 72 } 73 // create ss-table-manager instance 74 sstm := &SSTManager1{ 75 base: base, 76 inrange: make([]*KeyRange, 0), 77 //sparse: make(map[int64]*SparseIndex, 0), 78 keyIndex: rbtree.NewRBTree(), 79 } 80 // read the ss-table directory 81 files, err := os.ReadDir(base) 82 if err != nil { 83 return nil, err 84 } 85 // lock 86 sstm.lock.RLock() 87 defer sstm.lock.RUnlock() 88 // go over all the files 89 for _, file := range files { 90 // skip all non ss-tables 91 if file.IsDir() || !strings.HasSuffix(file.Name(), dataFileSuffix) { 92 continue 93 } 94 // get ss-table id from file name 95 index, err := IndexFromDataFileName(file.Name()) 96 if err != nil { 97 return nil, err 98 } 99 // open the ss-table-gindex 100 ssi, err := OpenSSTIndex(sstm.base, index) 101 if err != nil { 102 return nil, err 103 } 104 // create a new key-range "gindex" 105 kr := &KeyRange{ 106 index: index, // gindex of the ss-table 107 first: ssi.first, // first key in the ss-table 108 last: ssi.last, // last key in the ss-table 109 } 110 // add to keyIndex 111 sstm.keyIndex.Put(sparseIndexEntry{ 112 LastKey: ssi.last, 113 SSTIndex: index, 114 }) 115 116 fmt.Println(file.Name(), ssi.file.Name(), ssi.first, ssi.last) 117 118 // add it to our key in-range index 119 sstm.inrange = append(sstm.inrange, kr) 120 // populate sparse index 121 //sstm.sparse[index] = makeNewSparseIndex(index, ssi) 122 // close gindex 123 err = ssi.Close() 124 if err != nil { 125 return nil, err 126 } 127 } 128 // update the last global gindex 129 //sstm.gindex = sstm.getLastGIndex() 130 e, ok := sstm.keyIndex.Max() 131 if !ok { 132 sstm.gindex = 0 133 } else { 134 sstm.gindex = e.(sparseIndexEntry).SSTIndex 135 } 136 //log.Println(sstm.inrange, len(sstm.inrange), sstm.gindex) 137 138 fmt.Printf("KeyIndex: %s\n", sstm.keyIndex) 139 140 return sstm, nil 141 } 142 143 func (sstm *SSTManager1) GetLastKey() (string, error) { 144 e, ok := sstm.keyIndex.Max() 145 if !ok { 146 return "", ErrSSTIndexNotFound 147 } 148 return e.(sparseIndexEntry).LastKey, nil 149 } 150 151 func (sstm *SSTManager1) getLastGIndex() int64 { 152 if len(sstm.inrange) == 0 { 153 return 0 154 } 155 return sstm.inrange[len(sstm.inrange)-1].index 156 } 157 158 func (sstm *SSTManager1) addKeyRange(first, last string) { 159 kr := &KeyRange{index: sstm.gindex, first: first, last: last} 160 sstm.inrange = append(sstm.inrange, kr) 161 } 162 163 // FlushMemtableToSSTable takes a pointer to a memtable and writes it to disk as an ss-table 164 func (sstm *SSTManager1) FlushMemtableToSSTable(memt *memtable.Memtable) error { 165 // lock 166 sstm.lock.Lock() 167 defer sstm.lock.Unlock() 168 // make new batch 169 batch := sstm.NewBatch() 170 // iterate mem-table entries 171 memt.Scan(func(me rbtree.RBEntry) bool { 172 // and write each entry to the batch 173 batch.WriteEntry(me.(memtable.MemtableEntry).Entry) 174 return true 175 }) 176 // reset memtable asap 177 err := memt.Reset() 178 if err != nil { 179 return err 180 } 181 // open new ss-table 182 sst, err := OpenSSTable(sstm.base, sstm.gindex+1) 183 if err != nil { 184 return err 185 } 186 // write batch to ss-table 187 err = sst.WriteBatch(batch) 188 if err != nil { 189 return err 190 } 191 // save for later 192 first, last := sst.index.first, sst.index.last 193 // flush and close ss-table 194 err = sst.Close() 195 if err != nil { 196 return err 197 } 198 // in the clear, increment gindex 199 sstm.gindex++ 200 // add new entry to sparse index 201 sstm.addKeyRange(first, last) 202 // return 203 return nil 204 } 205 206 func (sstm *SSTManager1) NewBatch() *binary.Batch { 207 return new(binary.Batch) 208 } 209 210 func (sstm *SSTManager1) FlushBatchToSSTable(batch *binary.Batch) error { 211 // lock 212 sstm.lock.Lock() 213 defer sstm.lock.Unlock() 214 // open new ss-table 215 sst, err := OpenSSTable(sstm.base, sstm.gindex+1) 216 if err != nil { 217 return err 218 } 219 // write batch to ss-table 220 err = sst.WriteBatch(batch) 221 if err != nil { 222 return err 223 } 224 // save for later 225 first, last := sst.index.first, sst.index.last 226 // add new sparse index 227 //sstm.sparse[sstm.gindex+1] = makeNewSparseIndex(sstm.gindex+1, sst.index) 228 // flush and close ss-table 229 err = sst.Close() 230 if err != nil { 231 return err 232 } 233 // in the clear, increment gindex 234 sstm.gindex++ 235 // add new entry to key in-range index 236 sstm.addKeyRange(first, last) 237 return nil 238 } 239 240 func (sstm *SSTManager1) isInRange(k string) (int64, error) { //(*SparseIndex, error) { 241 if len(sstm.inrange) == 1 { 242 return sstm.getLastGIndex(), nil 243 } 244 keys := KeyRangeSlice(sstm.inrange) 245 sort.Sort(keys) 246 n := sort.Search(keys.Len(), 247 func(i int) bool { 248 return sstm.inrange[i].first <= k && k <= sstm.inrange[i].last 249 }) 250 log.Println("DEBUG >> N=", n, len(sstm.inrange)) 251 if n < 0 { 252 return -1, ErrSSTIndexNotFound 253 } 254 255 // if i < len(data) && data[i] == x { 256 // // x is present at data[i] 257 // } else { 258 // // x is not present in data, 259 // // but i is the index where it would be inserted. 260 // } 261 262 //for _, kr := range sstm.inrange { 263 // if !kr.InKeyRange(k) { 264 // continue 265 // } 266 // return kr.index, nil 267 //spi, ok := sstm.sparse[kr.index] 268 //if !ok { 269 // continue 270 //} 271 //return spi, nil 272 //} 273 return int64(n), nil 274 } 275 276 func (sstm *SSTManager1) Get(k string) (*binary.Entry, error) { 277 // read lock 278 sstm.lock.RLock() 279 defer sstm.lock.RUnlock() 280 // search "sparse index" 281 e, ok := sstm.keyIndex.GetNearMin(sparseIndexEntry{LastKey: k}) 282 if !ok { 283 if e.(sparseIndexEntry).LastKey < k { 284 log.Panicf("[HMMMM] >>> %s, searching key: %q\n", e, k) 285 return nil, ErrSSTIndexNotFound 286 } 287 } 288 // get the table path index 289 sstIndex := e.(sparseIndexEntry).SSTIndex 290 // open ss-table for reading 291 sst, err := OpenSSTable(sstm.base, sstIndex) 292 if err != nil { 293 return nil, err 294 } 295 // read data by key (performs search using ssi inside read) 296 de, err := sst.Read(k) 297 if err != nil { 298 return nil, err 299 } 300 // close ss-table 301 err = sst.Close() 302 if err != nil { 303 return nil, err 304 } 305 // return entry 306 return de, nil 307 } 308 309 func (sstm *SSTManager1) GetOLD(k string) (*binary.Entry, error) { 310 // read lock 311 sstm.lock.RLock() 312 defer sstm.lock.RUnlock() 313 // search sparse index 314 index, err := sstm.isInRange(k) 315 if err != nil { 316 return nil, err 317 } 318 if index == -1 { 319 return nil, ErrSSTIndexNotFound 320 } 321 // get table path index, and relative offset 322 //index, offset := spi.Search(k) 323 // open ss-table for reading 324 sst, err := OpenSSTable(sstm.base, index) 325 if err != nil { 326 return nil, err 327 } 328 // scan starting at location until we find match 329 //var de *binary.Entry 330 //err = sst.ScanAt(offset, func(e *binary.Entry) bool { 331 // if string(e.Key) == k { 332 // de = e 333 // // got match, lets break 334 // return false 335 // } 336 // return true 337 //}) 338 de, err := sst.Read(k) 339 if err != nil { 340 return nil, err 341 } 342 // close ss-table 343 err = sst.Close() 344 if err != nil { 345 return nil, err 346 } 347 // return entry 348 return de, nil 349 } 350 351 func (sstm *SSTManager1) GetEntryIndex(k string) (*binary.Index, error) { 352 // read lock 353 sstm.lock.RLock() 354 defer sstm.lock.RUnlock() 355 // search sparse index 356 index, err := sstm.isInRange(k) 357 if err != nil { 358 return nil, err 359 } 360 if index == -1 { 361 return nil, ErrSSTIndexNotFound 362 } 363 // open ss-table-index for reading 364 sst, err := OpenSSTable(sstm.base, index) 365 if err != nil { 366 return nil, err 367 } 368 // read index data 369 di, err := sst.ReadIndex(k) 370 if err != nil { 371 return nil, err 372 } 373 // close ss-table 374 err = sst.Close() 375 if err != nil { 376 return nil, err 377 } 378 // return entry 379 return di, nil 380 } 381 382 func (sstm *SSTManager1) ListSSTables() []string { 383 // read lock 384 sstm.lock.RLock() 385 defer sstm.lock.RUnlock() 386 files, err := os.ReadDir(sstm.base) 387 if err != nil { 388 return nil 389 } 390 var ssts []string 391 for _, file := range files { 392 if file.IsDir() || !strings.HasSuffix(file.Name(), dataFileSuffix) { 393 continue 394 } 395 ssts = append(ssts, file.Name()) 396 } 397 398 return ssts 399 } 400 401 func (sstm *SSTManager1) ListSSTIndexes() []string { 402 // read lock 403 sstm.lock.RLock() 404 defer sstm.lock.RUnlock() 405 files, err := os.ReadDir(sstm.base) 406 if err != nil { 407 return nil 408 } 409 var ssti []string 410 for _, file := range files { 411 if file.IsDir() || !strings.HasSuffix(file.Name(), indexFileSuffix) { 412 continue 413 } 414 ssti = append(ssti, file.Name()) 415 } 416 return ssti 417 } 418 419 func (sstm *SSTManager1) CompactSSTables(index int64) error { 420 // lock 421 sstm.lock.Lock() 422 defer sstm.lock.Unlock() 423 // load sstable 424 sst, err := OpenSSTable(sstm.base, index) 425 if err != nil { 426 return err 427 } 428 // make batch 429 batch := binary.NewBatch() 430 // iterate 431 err = sst.Scan(func(e *binary.Entry) bool { 432 // add any data entries that are not tombstones to batch 433 if e.Value != nil && !bytes.Equal(e.Value, Tombstone) { 434 batch.WriteEntry(e) 435 } 436 return true 437 }) 438 if err != nil { 439 return err 440 } 441 // get path 442 tpath, ipath := sst.path, sst.index.path 443 // close sstable 444 err = sst.Close() 445 if err != nil { 446 return err 447 } 448 // remove old table 449 err = os.Remove(tpath) 450 if err != nil { 451 return err 452 } 453 // remove old gindex 454 err = os.Remove(ipath) 455 if err != nil { 456 return err 457 } 458 // open new sstable to write to 459 sst, err = OpenSSTable(sstm.base, index) 460 if err != nil { 461 return err 462 } 463 // write batch to table 464 err = sst.WriteBatch(batch) 465 // flush and close sstable 466 err = sst.Close() 467 if err != nil { 468 return err 469 } 470 return nil 471 } 472 473 func (sstm *SSTManager1) MergeSSTables(iA, iB int64) error { 474 // lock 475 sstm.lock.Lock() 476 defer sstm.lock.Unlock() 477 // load sstable A 478 sstA, err := OpenSSTable(sstm.base, iA) 479 if err != nil { 480 return err 481 } 482 // and sstable B 483 sstB, err := OpenSSTable(sstm.base, iB) 484 if err != nil { 485 return err 486 } 487 // make batch to write data to 488 batch := binary.NewBatch() 489 // pass tables to the merge writer 490 err = mergeWriter(sstA, sstB, batch) 491 if err != nil { 492 return err 493 } 494 // close table A 495 err = sstA.Close() 496 if err != nil { 497 return err 498 } 499 // close table B 500 err = sstB.Close() 501 if err != nil { 502 return err 503 } 504 // open new sstable to write to 505 sstC, err := OpenSSTable(sstm.base, iB+1) 506 if err != nil { 507 return err 508 } 509 // write batch to table 510 err = sstC.WriteBatch(batch) 511 // flush and close sstable 512 err = sstC.Close() 513 if err != nil { 514 return err 515 } 516 return nil 517 } 518 519 func (sstm *SSTManager1) Close() error { 520 521 return nil 522 } 523 524 func mergeWriter(sstA, sstB *SSTable, batch *binary.Batch) error { 525 526 i, j := 0, 0 527 n1, n2 := sstA.index.Len(), sstB.index.Len() 528 529 var err error 530 var de *binary.Entry 531 for i < n1 && j < n2 { 532 if bytes.Compare(sstA.index.data[i].Key, sstB.index.data[j].Key) == 0 { 533 // read entry from sstB 534 de, err = sstB.ReadAt(sstB.index.data[j].Offset) 535 if err != nil { 536 return err 537 } 538 // write entry to batch 539 batch.WriteEntry(de) 540 i++ 541 j++ 542 continue 543 } 544 if bytes.Compare(sstA.index.data[i].Key, sstB.index.data[j].Key) == -1 { 545 // read entry from sstA 546 de, err = sstA.ReadAt(sstA.index.data[i].Offset) 547 if err != nil { 548 return err 549 } 550 // write entry to batch 551 batch.WriteEntry(de) 552 i++ 553 continue 554 } 555 if bytes.Compare(sstB.index.data[j].Key, sstA.index.data[i].Key) == -1 { 556 // read entry from sstB 557 de, err = sstB.ReadAt(sstB.index.data[j].Offset) 558 if err != nil { 559 return err 560 } 561 // write entry to batch 562 batch.WriteEntry(de) 563 j++ 564 continue 565 } 566 } 567 568 // print remaining 569 for i < n1 { 570 // read entry from sstA 571 de, err = sstA.ReadAt(sstA.index.data[i].Offset) 572 if err != nil { 573 return err 574 } 575 // write entry to batch 576 batch.WriteEntry(de) 577 i++ 578 } 579 580 // print remaining 581 for j < n2 { 582 // read entry from sstB 583 de, err = sstB.ReadAt(sstB.index.data[j].Offset) 584 if err != nil { 585 return err 586 } 587 // write entry to batch 588 batch.WriteEntry(de) 589 j++ 590 } 591 592 // return error free 593 return nil 594 }