github.com/scottcagno/storage@v1.8.0/pkg/lsmtree/sstable.go (about) 1 package lsmtree 2 3 import ( 4 "bytes" 5 "fmt" 6 "io" 7 "io/fs" 8 "os" 9 "path/filepath" 10 "strings" 11 ) 12 13 type ssTableIndex struct { 14 first []byte 15 last []byte 16 count int 17 data []*Index 18 } 19 20 func newSSTableIndex(index []*Index) *ssTableIndex { 21 if index == nil || len(index) < 1 { 22 return &ssTableIndex{ 23 first: nil, 24 last: nil, 25 count: 0, 26 data: make([]*Index, 0), 27 } 28 } 29 return &ssTableIndex{ 30 first: index[0].Key, 31 last: index[len(index)-1].Key, 32 count: len(index), 33 data: index, 34 } 35 } 36 37 func (ssti *ssTableIndex) Len() int { 38 return len(ssti.data) 39 } 40 41 func (ssti *ssTableIndex) close() { 42 ssti.first = nil 43 ssti.last = nil 44 ssti.count = 0 45 for i := range ssti.data { 46 ssti.data[i] = nil 47 } 48 ssti.data = nil 49 } 50 51 type ssTable struct { 52 path string 53 fd *os.File 54 index *ssTableIndex 55 } 56 57 func createSSTable(dir string, memt *rbTree) error { 58 // create level-0 path for newly flushed ss-tables 59 path := filepath.Join(dir, levelToDir(0)) 60 // read the base dir for this level 61 files, err := os.ReadDir(path) 62 if err != nil { 63 return err 64 } 65 // init seq 66 var seq int64 67 // count the files to get the sequence number 68 for _, file := range files { 69 // if the file is a sst-table data file, increment 70 if !file.IsDir() && strings.HasSuffix(file.Name(), dataFileSuffix) { 71 seq++ 72 } 73 } 74 // create a new data file 75 dataFile, err := openDataFile(path, seq, os.O_CREATE|os.O_WRONLY) 76 // get data file name 77 //dataFileName := filepath.Join(path, toDataFileName(seq)) 78 // open data file 79 //dataFile, err := os.OpenFile(dataFileName, os.O_CREATE|os.O_RDWR, 0666) 80 if err != nil { 81 return err 82 } 83 // remember to close 84 defer func(dataFile *os.File) { 85 err := dataFile.Close() 86 if err != nil { 87 panic("closing dataFile: " + err.Error()) 88 } 89 }(dataFile) 90 91 // create a new index file 92 indexFile, err := openIndexFile(path, seq, os.O_CREATE|os.O_WRONLY) 93 // get index file name 94 //indexFileName := filepath.Join(path, toIndexFileName(seq)) 95 // open index file 96 //indexFile, err := os.OpenFile(indexFileName, os.O_CREATE|os.O_RDWR, 0666) 97 if err != nil { 98 return err 99 } 100 // remember to close 101 defer func(indexFile *os.File) { 102 err := indexFile.Close() 103 if err != nil { 104 panic("closing indexFile: " + err.Error()) 105 } 106 }(indexFile) 107 // range mem-table and write entries and indexes 108 memt.rangeFront(func(e *Entry) bool { 109 // write entry to data file 110 offset, err := writeEntry(dataFile, e) 111 if err != nil { 112 // for now, just panic 113 panic(err) 114 } 115 // write index to index file 116 _, err = writeIndex(indexFile, &Index{ 117 Key: e.Key, 118 Offset: offset, 119 }) 120 if err != nil { 121 // for now, just panic 122 panic(err) 123 } 124 return true 125 }) 126 // sync data file 127 err = dataFile.Sync() 128 if err != nil { 129 return err 130 } 131 // sync index file 132 err = indexFile.Sync() 133 if err != nil { 134 return err 135 } 136 return nil 137 } 138 139 func openSSTable(path string, seq int64) (*ssTable, error) { 140 // open index file 141 indexFile, err := openIndexFile(path, seq, os.O_RDONLY) 142 if err != nil { 143 return nil, err 144 } 145 // create an index set 146 var index []*Index 147 // load up the ss-table-index entries 148 for { 149 // read index entry from the index file 150 i, err := readIndex(indexFile) 151 if err != nil { 152 if err == io.EOF || err == io.ErrUnexpectedEOF { 153 break 154 } 155 // make sure we close! 156 err = indexFile.Close() 157 if err != nil { 158 return nil, err 159 } 160 return nil, err 161 } 162 // add index to the index set 163 index = append(index, i) 164 } 165 // close index file 166 err = indexFile.Close() 167 if err != nil { 168 return nil, err 169 } 170 // make ss-table instance to return 171 sst := &ssTable{ 172 path: toDataFileName(seq), 173 fd: nil, 174 index: newSSTableIndex(index), 175 } 176 // return ss-table instance 177 return sst, nil 178 } 179 180 func (sst *ssTable) keyInRange(key []byte) bool { 181 // error check 182 if key == nil { 183 return false 184 } 185 // return boolean reporting key being between the lo and hi values 186 return isBetween(sst.index.first, key, sst.index.last) 187 } 188 189 func isBetween(lo, key, hi []byte) bool { 190 return bytes.Compare(lo, key) <= 0 && bytes.Compare(hi, key) >= 0 191 } 192 193 func locateSSTable(base string, key []byte) (string, error) { 194 // initialize vars for return 195 var sstPath string 196 // start walking the directory tree from the supplied base 197 err := filepath.WalkDir(base, func(path string, de fs.DirEntry, err error) error { 198 // handle path error 199 if err != nil { 200 fmt.Fprintf(os.Stderr, "prevent panic by handling failure accessing a path %q: %v\n", path, err) 201 return err 202 } 203 // we found a ss-table index file 204 if !de.IsDir() && strings.HasPrefix(de.Name(), dataFileSuffix) { 205 // open index file 206 dataFile, err := os.OpenFile(path, os.O_RDONLY, 0666) 207 if err != nil { 208 return err 209 } 210 // read through the index file entries 211 for { 212 // read index entry from the index file 213 e, err := readEntry(dataFile) 214 if err != nil { 215 if err == io.EOF || err == io.ErrUnexpectedEOF { 216 break 217 } 218 // make sure we close! 219 err = dataFile.Close() 220 if err != nil { 221 return err 222 } 223 return err 224 } 225 // see if we have a match 226 if bytes.Contains(e.Key, key) { 227 sstPath = path 228 break 229 } 230 } 231 // close index file 232 err = dataFile.Close() 233 if err != nil { 234 return err 235 } 236 } 237 return nil 238 }) 239 if err != nil { 240 return "", err 241 } 242 // got one? 243 return sstPath, nil 244 } 245 246 /* 247 func searchInSSTablesOLD(base string, key []byte) (*Entry, error) { 248 // read the base dir for this level 249 dirs, err := os.ReadDir(base) 250 if err != nil { 251 return nil, err 252 } 253 // iterate dirs 254 for _, dir := range dirs { 255 // skip anything that is not a directory 256 if !dir.IsDir() { 257 continue 258 } 259 // now let us read the files within this level 260 files, err := os.ReadDir(dir.Name()) 261 if err != nil { 262 return nil, err 263 } 264 // visit each file 265 for _, file := range files { 266 // if the file is not a ss-table data file, continue 267 if file.IsDir() || !strings.HasSuffix(file.Name(), dataFileSuffix) { 268 continue // skip to the next file 269 } 270 // get the sequence from the data file name 271 seq, err := fromDataFileName(file.Name()) 272 if err != nil { 273 return nil, err 274 } 275 // if the file is a ss-table, open it 276 sst, err := openSSTable(dir.Name(), seq) 277 if err != nil { 278 return nil, err 279 } 280 // perform prelim check to see if the provided 281 // key may fall in the range of this table 282 if ok := sst.keyInRange(key); !ok { 283 // if the key is not in the range, we can 284 // skip to the next table straight away 285 continue 286 } 287 // if the key does fall in the range than there 288 // is a very high chance that it will be found 289 // within this table. perform a search on the 290 // ss-table for the provided key and return 291 e, err := searchSSTable(sst.path, key) 292 if err != nil { 293 return nil, err 294 } 295 // check and return found entry 296 if e != nil && !e.hasTombstone() { 297 return e, nil 298 } 299 } 300 } 301 return nil, ErrNotFound 302 } 303 304 func searchSSTableOLD(dir string, key []byte) (*Entry, error) { 305 // read the base dir for this level 306 dirs, err := os.ReadDir(sstm.baseDir) 307 if err != nil { 308 return err 309 } 310 // iterate dirs 311 for _, dir := range dirs { 312 // skip anything that is not a directory 313 if !dir.IsDir() { 314 continue 315 } 316 // get level 317 level, err := dirToLevel(dir.Name()) 318 if err != nil { 319 return err 320 } 321 // add level to levels 322 if _, ok := sstm.level[level]; !ok { 323 sstm.level[level] = 0 324 } 325 // now let us add the file count within those levels 326 files, err := os.ReadDir(dir.Name()) 327 if err != nil { 328 return err 329 } 330 // count the files 331 for _, file := range files { 332 // if the file is a sst-table data file, increment 333 if !file.IsDir() && strings.HasSuffix(file.Name(), dataFileSuffix) { 334 sstm.level[level]++ 335 sstm.sstcount++ 336 } 337 } 338 } 339 return nil 340 } 341 */ 342 343 func (sst *ssTable) ReadAt(offset int64) (*Entry, error) { 344 // error check 345 if sst.fd == nil { 346 return nil, ErrFileClosed 347 } 348 // use offset to read entry 349 e, err := readEntryAt(sst.fd, offset) 350 if err != nil { 351 return nil, err 352 } 353 // make sure entry checksum is good 354 err = checkCRC(e, checksum(append(e.Key, e.Value...))) 355 if err != nil { 356 return nil, err 357 } 358 // return entry 359 return e, nil 360 } 361 362 /* 363 func getLevelFromSize(size int64) int { 364 switch { 365 case size > 0<<20 && size < 1<<21: // level-0 (2 MB) max=4 366 return 0 367 case size > 1<<22 && size < 1<<23: // level-1 (8 MB) max=4 368 return 1 369 case size > 1<<24 && size < 1<<25: // level-2 (32 MB) max=4 370 return 2 371 case size > 1<<26 && size < 1<<27: // level-3 (128 MB) max=4 372 return 3 373 default: 374 return 4 // oddballs that will need gc for sure 375 } 376 } 377 */