github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/super_table.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bitpage 16 17 import ( 18 "bytes" 19 "encoding/binary" 20 "fmt" 21 "os" 22 "sort" 23 "sync/atomic" 24 25 "github.com/cockroachdb/errors" 26 "github.com/zuoyebang/bitalosdb/internal/utils" 27 28 "github.com/zuoyebang/bitalosdb/internal/base" 29 "github.com/zuoyebang/bitalosdb/internal/consts" 30 ) 31 32 const ( 33 stVersionDefault uint16 = 1 + iota 34 ) 35 36 const ( 37 stHeaderSize = 8 38 stHeaderVersionOffset = 0 39 stDataOffset = stHeaderSize 40 stItemKeySize = 2 41 stItemValueSize = 4 42 stItemHeaderSize = stItemKeySize + stItemValueSize 43 ) 44 45 const ( 46 stiVersionDefault uint16 = 1 + iota 47 ) 48 49 const ( 50 stiHeaderSize = 14 51 stiHeaderVersionOffset = 0 52 stiHeaderFileSizeOffset = 2 53 stiHeaderDataSizeOffset = 6 54 stiHeaderIdxNumOffset = 10 55 stiIndexesOffset = stiHeaderSize 56 ) 57 58 type stIndexes []uint32 59 60 type superTable struct { 61 p *page 62 tbl *table 63 writer *tableWriter 64 version uint16 65 fn FileNum 66 totalCount float64 67 delCount float64 68 filename string 69 idxPath string 70 indexModified bool 71 reading atomic.Pointer[stIndexes] 72 pending stIndexes 73 } 74 75 func checkSuperTable(obj interface{}) { 76 s := obj.(*superTable) 77 if s.tbl != nil { 78 fmt.Fprintf(os.Stderr, "superTable(%s) buffer was not freed\n", s.path()) 79 os.Exit(1) 80 } 81 } 82 83 func newSuperTable(p *page, path string, fn FileNum, exist bool) (*superTable, error) { 84 tableOpts := &tableOptions{ 85 openType: tableWriteDisk, 86 initMmapSize: consts.BitpageInitMmapSize, 87 } 88 tbl, err := openTable(path, tableOpts) 89 if err != nil { 90 return nil, err 91 } 92 93 st := &superTable{ 94 p: p, 95 tbl: tbl, 96 fn: fn, 97 filename: base.GetFilePathBase(path), 98 pending: make(stIndexes, 0, 1<<10), 99 writer: newTableWriter(tbl), 100 indexModified: false, 101 } 102 st.idxPath = st.getIdxFilePath() 103 if err = st.writer.reset(tbl.filesz); err != nil { 104 return nil, err 105 } 106 107 if exist { 108 if err = st.getHeader(); err != nil { 109 return nil, err 110 } 111 112 err = st.loadIdxFromFile() 113 } else { 114 err = st.setHeader() 115 } 116 if err != nil { 117 return nil, err 118 } 119 120 return st, nil 121 } 122 123 func (s *superTable) getHeader() error { 124 var header [stHeaderSize]byte 125 n, err := s.tbl.file.ReadAt(header[:], 0) 126 if err != nil { 127 return err 128 } 129 if n != stHeaderSize { 130 return errors.Errorf("bitpage: superTable read header err n:%d", n) 131 } 132 133 s.version = binary.BigEndian.Uint16(header[stHeaderVersionOffset:]) 134 return nil 135 } 136 137 func (s *superTable) setHeader() error { 138 version := stVersionDefault 139 140 var header [stHeaderSize]byte 141 for i := range header { 142 header[i] = 0 143 } 144 binary.BigEndian.PutUint16(header[stHeaderVersionOffset:], version) 145 n, err := s.writer.writer.Write(header[:]) 146 if err != nil { 147 return err 148 } 149 if n != stHeaderSize { 150 return errors.Errorf("bitpage: superTable write header err n:%d", n) 151 } 152 if err = s.writer.fdatasync(); err != nil { 153 return err 154 } 155 156 s.version = version 157 s.tbl.offset.Add(uint32(n)) 158 s.grow(n) 159 return nil 160 } 161 162 func (s *superTable) set(key internalKey, value []byte) error { 163 offset, err := s.writer.set(key, value) 164 if err != nil { 165 return err 166 } 167 168 s.pending = append(s.pending, offset) 169 return nil 170 } 171 172 func (s *superTable) get(key []byte, _ uint32) ([]byte, bool, internalKeyKind, func()) { 173 indexes := s.readIndexes() 174 pos := s.findKeyIndexPos(indexes, key) 175 if pos < 0 || pos >= len(indexes) { 176 return nil, false, internalKeyKindInvalid, nil 177 } 178 179 ikey, value := s.getItem(indexes[pos]) 180 if !bytes.Equal(ikey.UserKey, key) { 181 return nil, false, internalKeyKindInvalid, nil 182 } 183 184 return value, true, ikey.Kind(), nil 185 } 186 187 func (s *superTable) getKeyByPos(indexes stIndexes, pos int) internalKey { 188 if pos < 0 || pos >= len(indexes) { 189 return internalKey{} 190 } 191 return s.getKey(indexes[pos]) 192 } 193 194 func (s *superTable) getKey(offset uint32) internalKey { 195 keySize := uint32(binary.BigEndian.Uint16(s.tbl.getBytes(offset, stItemKeySize))) 196 key := s.tbl.getBytes(offset+stItemHeaderSize, keySize) 197 return base.DecodeInternalKey(key) 198 } 199 200 func (s *superTable) getValue(offset uint32) []byte { 201 keySize := uint32(binary.BigEndian.Uint16(s.tbl.getBytes(offset, stItemKeySize))) 202 valueSize := binary.BigEndian.Uint32(s.tbl.getBytes(offset+stItemKeySize, stItemValueSize)) 203 value := s.tbl.getBytes(offset+stItemHeaderSize+keySize, valueSize) 204 return value 205 } 206 207 func (s *superTable) getItem(offset uint32) (internalKey, []byte) { 208 keySize := uint32(binary.BigEndian.Uint16(s.tbl.getBytes(offset, stItemKeySize))) 209 key := s.tbl.getBytes(offset+stItemHeaderSize, keySize) 210 valueSize := binary.BigEndian.Uint32(s.tbl.getBytes(offset+stItemKeySize, stItemValueSize)) 211 value := s.tbl.getBytes(offset+stItemHeaderSize+keySize, valueSize) 212 return base.DecodeInternalKey(key), value 213 } 214 215 func (s *superTable) findKeyIndexPos(indexes stIndexes, key []byte) int { 216 num := len(indexes) 217 if num == 0 { 218 return -1 219 } 220 221 return sort.Search(num, func(i int) bool { 222 ikey := s.getKeyByPos(indexes, i) 223 return bytes.Compare(ikey.UserKey, key) != -1 224 }) 225 } 226 227 func (s *superTable) newIter(o *iterOptions) internalIterator { 228 iter := &superTableIterator{ 229 st: s, 230 indexes: s.readIndexes(), 231 } 232 return iter 233 } 234 235 func (s *superTable) kindStatis(kind internalKeyKind) { 236 s.totalCount++ 237 if kind == internalKeyKindDelete { 238 s.delCount++ 239 } 240 } 241 242 func (s *superTable) delPercent() float64 { 243 if s.delCount == 0 { 244 return 0 245 } 246 return s.delCount / s.totalCount 247 } 248 249 func (s *superTable) itemCount() int { 250 return len(s.readIndexes()) 251 } 252 253 func (s *superTable) readyForFlush() bool { 254 return true 255 } 256 257 func (s *superTable) inuseBytes() uint64 { 258 return uint64(s.tbl.Size()) 259 } 260 261 func (s *superTable) dataBytes() uint64 { 262 return uint64(s.tbl.Size()) 263 } 264 265 func (s *superTable) empty() bool { 266 return s.tbl.Size() == stHeaderSize 267 } 268 269 func (s *superTable) close() error { 270 if err := s.writeIdxToFile(); err != nil { 271 return err 272 } 273 274 if err := s.tbl.close(); err != nil { 275 return err 276 } 277 278 s.tbl = nil 279 return nil 280 } 281 282 func (s *superTable) path() string { 283 if s.tbl == nil { 284 return "" 285 } 286 return s.tbl.path 287 } 288 289 func (s *superTable) idxFilePath() string { 290 return s.idxPath 291 } 292 293 func (s *superTable) mmapRLock() { 294 s.tbl.mmaplock.RLock() 295 } 296 297 func (s *superTable) mmapRUnLock() { 298 s.tbl.mmaplock.RUnlock() 299 } 300 301 func (s *superTable) grow(sz int) { 302 if sz > s.tbl.filesz { 303 s.tbl.filesz = sz 304 } 305 } 306 307 func (s *superTable) mergeIndexes() error { 308 if len(s.pending) == 0 { 309 return nil 310 } 311 312 if err := s.writer.fdatasync(); err != nil { 313 return err 314 } 315 316 if _, err := s.tbl.mmapReadExpand(); err != nil { 317 return err 318 } 319 320 oldIndexes := s.readIndexes() 321 oldEnd := len(oldIndexes) 322 pendingEnd := len(s.pending) 323 pendingCurrent := 0 324 pendingNextKey := s.getKey(s.pending[0]) 325 oldCurrent := s.findKeyIndexPos(oldIndexes, pendingNextKey.UserKey) 326 327 newIndexes := make(stIndexes, 0, oldEnd+pendingEnd) 328 addIndexes := func(index uint32) { 329 newIndexes = append(newIndexes, index) 330 } 331 332 for i := 0; i <= oldCurrent-1; i++ { 333 addIndexes(oldIndexes[i]) 334 } 335 336 if oldCurrent >= 0 && oldCurrent < oldEnd { 337 oldNextKey := s.getKey(oldIndexes[oldCurrent]) 338 for { 339 cmp := bytes.Compare(oldNextKey.UserKey, pendingNextKey.UserKey) 340 if cmp < 0 { 341 addIndexes(oldIndexes[oldCurrent]) 342 oldCurrent++ 343 if oldCurrent >= oldEnd { 344 break 345 } 346 oldNextKey = s.getKey(oldIndexes[oldCurrent]) 347 } else if cmp > 0 { 348 addIndexes(s.pending[pendingCurrent]) 349 pendingCurrent++ 350 if pendingCurrent >= pendingEnd { 351 break 352 } 353 pendingNextKey = s.getKey(s.pending[pendingCurrent]) 354 } else { 355 addIndexes(s.pending[pendingCurrent]) 356 357 if s.p != nil && s.p.bp != nil { 358 s.p.bp.deleteBithashKey(s.getValue(oldIndexes[oldCurrent])) 359 } 360 361 oldCurrent++ 362 pendingCurrent++ 363 if oldCurrent >= oldEnd || pendingCurrent >= pendingEnd { 364 break 365 } 366 367 oldNextKey = s.getKey(oldIndexes[oldCurrent]) 368 pendingNextKey = s.getKey(s.pending[pendingCurrent]) 369 } 370 } 371 } 372 373 for oldCurrent >= 0 && oldCurrent < oldEnd { 374 addIndexes(oldIndexes[oldCurrent]) 375 oldCurrent++ 376 } 377 378 for pendingCurrent < pendingEnd { 379 addIndexes(s.pending[pendingCurrent]) 380 pendingCurrent++ 381 } 382 383 s.indexModified = true 384 s.reading.Store(&newIndexes) 385 s.pending = s.pending[:0] 386 s.grow(int(s.tbl.Size())) 387 388 return nil 389 } 390 391 func (s *superTable) readIndexes() stIndexes { 392 ptr := s.reading.Load() 393 if ptr == nil { 394 return nil 395 } 396 return *ptr 397 } 398 399 func (s *superTable) getIdxFilePath() string { 400 return s.p.bp.makeFilePath(fileTypeSuperTableIndex, s.p.pn, s.fn) 401 } 402 403 func (s *superTable) loadIdxFromFile() error { 404 if utils.IsFileNotExist(s.idxPath) { 405 return s.rebuildIndexes() 406 } 407 408 err := func() error { 409 idxFile, err := os.OpenFile(s.idxPath, os.O_CREATE|os.O_RDONLY, consts.FileMode) 410 if err != nil { 411 return err 412 } 413 414 fstat, err := idxFile.Stat() 415 if err != nil { 416 return err 417 } 418 idxFileSize := fstat.Size() 419 if idxFileSize < stiHeaderSize { 420 return errors.Errorf("bitpage: superTable header size small size:%d", idxFileSize) 421 } 422 423 var header [stiHeaderSize]byte 424 n, err := idxFile.ReadAt(header[:], 0) 425 if err != nil { 426 return err 427 } 428 if n != stiHeaderSize { 429 return errors.Errorf("bitpage: superTable header readAt fail n:%d", n) 430 } 431 432 fileSize := binary.BigEndian.Uint32(header[stiHeaderFileSizeOffset:]) 433 if idxFileSize != int64(fileSize) { 434 return errors.Errorf("bitpage: superTable file size not eq fstat:%d rsize:%d", idxFileSize, fileSize) 435 } 436 437 stSize := s.tbl.Size() 438 dataSize := binary.BigEndian.Uint32(header[stiHeaderDataSizeOffset:]) 439 if stSize != dataSize { 440 return errors.Errorf("bitpage: superTable data size not eq filesz:%d dsize:%d", stSize, dataSize) 441 } 442 443 idxNum := int(binary.BigEndian.Uint32(header[stiHeaderIdxNumOffset:])) 444 if idxNum > 0 { 445 idxSize := idxNum * 4 446 idxBuf := make([]byte, idxSize) 447 n, err = idxFile.ReadAt(idxBuf, stiIndexesOffset) 448 if err != nil { 449 return err 450 } 451 if n != idxSize { 452 return errors.Errorf("bitpage: superTable idx readAt fail n:%d exp:%d", n, idxSize) 453 } 454 455 indexes := make(stIndexes, idxNum) 456 pos := 0 457 for i := 0; i < idxNum; i++ { 458 indexes[i] = binary.BigEndian.Uint32(idxBuf[pos : pos+4]) 459 pos += 4 460 } 461 462 s.reading.Store(&indexes) 463 } else { 464 s.reading.Store(nil) 465 } 466 467 s.p.bp.opts.Logger.Infof("superTable read indexes success file:%s idxNum:%d", s.filename, idxNum) 468 return idxFile.Close() 469 }() 470 if err == nil { 471 return nil 472 } 473 474 s.p.bp.opts.Logger.Errorf("superTable load indexes file fail file:%s err:%v", s.filename, err) 475 return s.rebuildIndexes() 476 } 477 478 func (s *superTable) writeIdxToFile() error { 479 if !s.indexModified { 480 return nil 481 } 482 483 idxFile, err := os.OpenFile(s.idxPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, consts.FileMode) 484 if err != nil { 485 return err 486 } 487 488 indexes := s.readIndexes() 489 idxNum := len(indexes) 490 fileSize := stiHeaderSize + idxNum*4 491 dataSize := s.tbl.Size() 492 if dataSize > 0 && idxNum == 0 { 493 dataSize = 0 494 } 495 496 buf := make([]byte, fileSize) 497 binary.BigEndian.PutUint16(buf[stiHeaderVersionOffset:], stiVersionDefault) 498 binary.BigEndian.PutUint32(buf[stiHeaderFileSizeOffset:], uint32(fileSize)) 499 binary.BigEndian.PutUint32(buf[stiHeaderDataSizeOffset:], dataSize) 500 binary.BigEndian.PutUint32(buf[stiHeaderIdxNumOffset:], uint32(idxNum)) 501 pos := stiIndexesOffset 502 for i := 0; i < idxNum; i++ { 503 binary.BigEndian.PutUint32(buf[pos:pos+4], indexes[i]) 504 pos += 4 505 } 506 507 if _, err = idxFile.Write(buf); err != nil { 508 return err 509 } 510 if err = idxFile.Sync(); err != nil { 511 return err 512 } 513 if err = idxFile.Close(); err != nil { 514 return err 515 } 516 517 s.indexModified = false 518 s.p.bp.opts.Logger.Infof("superTable write indexes finish file:%s filesz:%d dsize:%d idxNum:%d", s.filename, fileSize, dataSize, idxNum) 519 return nil 520 }