github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bithash/compact.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bithash 16 17 import ( 18 "bufio" 19 "bytes" 20 "encoding/binary" 21 "errors" 22 "io" 23 "io/fs" 24 "os" 25 26 "github.com/cockroachdb/errors/oserror" 27 "github.com/zuoyebang/bitalosdb/internal/base" 28 "github.com/zuoyebang/bitalosdb/internal/utils" 29 ) 30 31 const ( 32 fileNumMapMagicLen = 8 33 fileNumMapFooterLen = 4 + fileNumMapMagicLen 34 fileNumMapMagic = "\xf7\xcf\xf4\x85\xb7\x41\xe2\x88" 35 fileNumMapMagicOffset = fileNumMapFooterLen - fileNumMapMagicLen 36 fileNumMapVersion = 1 37 fileNumMapRecordLen = 8 38 ) 39 40 const ( 41 compactLogRecordLen = 10 42 compactLogHeaderLen = 16 43 compactLogWriteOffset = 0 44 compactLogReadOffset = 8 45 compactLogDataOffset = compactLogHeaderLen 46 ) 47 48 const ( 49 compactLogKindSet uint16 = 1 + iota 50 compactLogKindDelete 51 ) 52 53 const ( 54 compactMaxFileNum = 8 55 compactMaxMiniSize = 50 << 20 56 ) 57 58 type CompactFiles struct { 59 FileNum FileNum 60 DelPercent float64 61 Size int64 62 } 63 64 func (b *Bithash) CheckFilesDelPercent(cfgPercent float64) []CompactFiles { 65 var compactFiles []CompactFiles 66 var findNum int 67 68 b.meta.mu.RLock() 69 defer b.meta.mu.RUnlock() 70 71 for fn, fileMeta := range b.meta.mu.filesMeta { 72 if fileMeta.state != fileMetaStateImmutable || fileMeta.keyNum == 0 || (cfgPercent > 0.0 && fileMeta.delKeyNum == 0) { 73 continue 74 } 75 76 delPercent := float64(fileMeta.delKeyNum) / float64(fileMeta.keyNum) 77 if delPercent >= cfgPercent { 78 b.logger.Infof("[COMPACTBITHASH %d] checkFilesDelPercent %s delPercent:%.4f cfgPercent:%.2f", b.index, fileMeta, delPercent, cfgPercent) 79 compactFiles = append(compactFiles, CompactFiles{ 80 FileNum: fn, 81 DelPercent: delPercent, 82 }) 83 findNum++ 84 if findNum >= compactMaxFileNum { 85 break 86 } 87 } 88 } 89 90 return compactFiles 91 } 92 93 func (b *Bithash) CheckFilesMiniSize() []CompactFiles { 94 var compactFiles []CompactFiles 95 96 b.meta.mu.RLock() 97 defer b.meta.mu.RUnlock() 98 99 for fn, fileMeta := range b.meta.mu.filesMeta { 100 if fileMeta.state != fileMetaStateImmutable { 101 continue 102 } 103 104 fileSize := b.fileSize(fn) 105 if fileSize <= compactMaxMiniSize { 106 b.logger.Infof("[COMPACTBITHASH %d] checkFilesMiniSize %s fileSize:%s", b.index, fileMeta, utils.FmtSize(uint64(fileSize))) 107 compactFiles = append(compactFiles, CompactFiles{ 108 FileNum: fn, 109 Size: fileSize, 110 }) 111 } 112 } 113 114 return compactFiles 115 } 116 117 type compactLogWriter struct { 118 b *Bithash 119 file *os.File 120 filename string 121 writeOffset uint64 122 readOffset uint64 123 recordBuf [compactLogRecordLen]byte 124 headerBuf [compactLogHeaderLen]byte 125 } 126 127 func initCompactLog(b *Bithash) (err error) { 128 var file *os.File 129 var isNewFile bool 130 filename := MakeFilepath(b.fs, b.dirname, fileTypeCompactLog, 0) 131 _, err = b.fs.Stat(filename) 132 if errors.Is(err, fs.ErrNotExist) { 133 isNewFile = true 134 } 135 file, err = os.OpenFile(filename, os.O_RDWR|os.O_CREATE, 0666) 136 if err != nil { 137 return err 138 } 139 140 defer func() { 141 if err != nil { 142 err = file.Close() 143 } 144 }() 145 146 w := &compactLogWriter{ 147 b: b, 148 file: file, 149 filename: filename, 150 writeOffset: compactLogDataOffset, 151 readOffset: compactLogDataOffset, 152 } 153 154 if isNewFile { 155 err = w.setHeader() 156 } else { 157 err = w.readHeader() 158 } 159 if err != nil { 160 return err 161 } 162 163 b.logger.Infof("[BITHASH %d] open compactLog file filename:%s writeOffset:%d readOffset:%d", 164 b.index, base.GetFilePathBase(filename), w.writeOffset, w.readOffset) 165 166 b.cLogWriter = w 167 168 // 重放未读的log日志 169 if err = b.cLogWriter.replayLog(); err != nil { 170 return err 171 } 172 173 return nil 174 } 175 176 func (w *compactLogWriter) readHeader() error { 177 var buf [16]byte 178 if _, err := w.file.ReadAt(buf[:], compactLogWriteOffset); err != nil { 179 return err 180 } 181 w.writeOffset = binary.LittleEndian.Uint64(buf[0:8]) 182 w.readOffset = binary.LittleEndian.Uint64(buf[8:16]) 183 return nil 184 } 185 186 func (w *compactLogWriter) setHeader() error { 187 binary.LittleEndian.PutUint64(w.headerBuf[0:8], w.writeOffset) 188 binary.LittleEndian.PutUint64(w.headerBuf[8:16], w.readOffset) 189 _, err := w.file.WriteAt(w.headerBuf[:], compactLogWriteOffset) 190 return err 191 } 192 193 func (w *compactLogWriter) reset() error { 194 w.writeOffset = compactLogDataOffset 195 w.readOffset = compactLogDataOffset 196 return w.setHeader() 197 } 198 199 func (w *compactLogWriter) writeRecord(kind uint16, srcFn, dstFn FileNum) error { 200 binary.LittleEndian.PutUint16(w.recordBuf[0:2], kind) 201 binary.LittleEndian.PutUint32(w.recordBuf[2:6], uint32(srcFn)) 202 binary.LittleEndian.PutUint32(w.recordBuf[6:10], uint32(dstFn)) 203 _, err := w.file.WriteAt(w.recordBuf[:], int64(w.writeOffset)) 204 if err != nil { 205 return err 206 } 207 208 binary.LittleEndian.PutUint64(w.headerBuf[:8], w.writeOffset+compactLogRecordLen) 209 if _, err = w.file.WriteAt(w.headerBuf[:8], compactLogWriteOffset); err != nil { 210 return err 211 } 212 213 w.writeOffset += compactLogRecordLen 214 return nil 215 } 216 217 func (w *compactLogWriter) replayLog() (err error) { 218 if w.readOffset == w.writeOffset { 219 return nil 220 } 221 222 w.b.mufn.Lock() 223 defer w.b.mufn.Unlock() 224 225 var buf [compactLogRecordLen]byte 226 var srcFn, dstFn FileNum 227 var kind uint16 228 num := 0 229 for w.readOffset < w.writeOffset { 230 if _, err = w.file.ReadAt(buf[:], int64(w.readOffset)); err != nil { 231 break 232 } 233 234 kind = binary.LittleEndian.Uint16(buf[0:2]) 235 srcFn = FileNum(binary.LittleEndian.Uint32(buf[2:6])) 236 dstFn = FileNum(binary.LittleEndian.Uint32(buf[6:10])) 237 if kind == compactLogKindSet { 238 w.b.mufn.fnMap[srcFn] = dstFn 239 } else if kind == compactLogKindDelete { 240 delete(w.b.mufn.fnMap, srcFn) 241 } 242 243 w.readOffset += compactLogRecordLen 244 num++ 245 } 246 247 if err != nil && err != io.EOF { 248 return err 249 } 250 251 if err = w.setHeader(); err != nil { 252 return err 253 } 254 255 w.b.cLogUpdate = true 256 w.b.logger.Infof("bithash replay end logNum:%d", num) 257 return nil 258 } 259 260 func (w *compactLogWriter) close() (err error) { 261 if err = w.file.Sync(); err != nil { 262 return 263 } 264 if err = w.file.Close(); err != nil { 265 return 266 } 267 return 268 } 269 270 func initFileNumMap(b *Bithash) error { 271 filename := MakeFilepath(b.fs, b.dirname, fileTypeFileNumMap, 0) 272 _, err := b.fs.Stat(filename) 273 if oserror.IsNotExist(err) { 274 if err = createFileNumMapFile(b, filename); err != nil { 275 return err 276 } 277 } 278 279 if err = readFileNumMapFile(b, filename); err != nil { 280 return err 281 } 282 283 if err = initCompactLog(b); err != nil { 284 return err 285 } 286 287 if err = writeFileNumMapFile(b); err != nil { 288 return err 289 } 290 291 if err = b.cLogWriter.reset(); err != nil { 292 return err 293 } 294 295 b.logger.Infof("[BITHASH %d] initFileNumMap success compactLog readOffset:%d writeOffset:%d", 296 b.index, b.cLogWriter.readOffset, b.cLogWriter.writeOffset) 297 298 return nil 299 } 300 301 func encodeFileNumMapFooter() []byte { 302 buf := make([]byte, fileNumMapFooterLen) 303 binary.LittleEndian.PutUint32(buf[0:4], fileNumMapVersion) 304 copy(buf[4:fileNumMapFooterLen], fileNumMapMagic) 305 buf = buf[:fileNumMapFooterLen] 306 return buf 307 } 308 309 func createFileNumMapFile(b *Bithash, filename string) (err error) { 310 var file File 311 file, err = b.fs.Create(filename) 312 if err != nil { 313 return err 314 } 315 316 defer func() { 317 if file != nil { 318 err = file.Close() 319 } 320 if err != nil { 321 err = b.fs.Remove(filename) 322 } 323 }() 324 325 if _, err = file.Write(encodeFileNumMapFooter()); err != nil { 326 return err 327 } 328 if err = file.Sync(); err != nil { 329 return err 330 } 331 return nil 332 } 333 334 func readFileNumMapFile(b *Bithash, filename string) (err error) { 335 var dataSize int64 336 var file File 337 file, err = b.fs.Open(filename) 338 if err != nil { 339 return err 340 } 341 defer file.Close() 342 343 checkFooter := func(f ReadableFile) bool { 344 stat, err := f.Stat() 345 if err != nil { 346 return false 347 } 348 dataSize = stat.Size() - fileNumMapFooterLen 349 if dataSize < 0 { 350 return false 351 } 352 buf := [fileNumMapMagicLen]byte{} 353 n, err := f.ReadAt(buf[:], dataSize+fileNumMapMagicOffset) 354 if err != nil && err != io.EOF { 355 return false 356 } 357 return bytes.Equal(buf[:n], []byte(fileNumMapMagic)) 358 } 359 if !checkFooter(file) { 360 return ErrBhFileNumMapCheckFail 361 } 362 363 if dataSize == 0 { 364 return nil 365 } 366 367 var readBuf [fileNumMapRecordLen]byte 368 var srcFn, dstFn FileNum 369 var offset int64 370 r := bufio.NewReaderSize(file, int(dataSize)) 371 372 b.mufn.Lock() 373 defer b.mufn.Unlock() 374 375 for offset < dataSize { 376 n, err := r.Read(readBuf[:]) 377 if err != nil { 378 if err != io.EOF { 379 return err 380 } 381 break 382 } 383 if n != fileNumMapRecordLen { 384 return errors.New("read FILENUMMAP incomplete data") 385 } 386 387 offset += fileNumMapRecordLen 388 srcFn = FileNum(binary.LittleEndian.Uint32(readBuf[0:4])) 389 dstFn = FileNum(binary.LittleEndian.Uint32(readBuf[4:8])) 390 b.mufn.fnMap[srcFn] = dstFn 391 } 392 393 return nil 394 } 395 396 func writeFileNumMapFile(b *Bithash) (err error) { 397 if b.cLogUpdate == false { 398 return nil 399 } 400 401 var file File 402 fileNumMapTmp := MakeFilepath(b.fs, b.dirname, fileTypeFileNumMapTmp, 0) 403 fileNumMap := MakeFilepath(b.fs, b.dirname, fileTypeFileNumMap, 0) 404 405 b.logger.Infof("bithash write FileNumMap file start filename:%s", fileNumMap) 406 407 file, err = b.fs.Create(fileNumMapTmp) 408 if err != nil { 409 return err 410 } 411 412 defer func() { 413 if _, e := b.fs.Stat(fileNumMapTmp); e == nil { 414 err = b.fs.Remove(fileNumMapTmp) 415 } 416 }() 417 418 b.mufn.RLock() 419 defer b.mufn.RUnlock() 420 421 var buf [fileNumMapRecordLen]byte 422 fnNum := len(b.mufn.fnMap) 423 size := fnNum*fileNumMapRecordLen + fileNumMapFooterLen 424 w := bufio.NewWriterSize(file, size) 425 426 for srcFn, dstFn := range b.mufn.fnMap { 427 binary.LittleEndian.PutUint32(buf[0:4], uint32(srcFn)) 428 binary.LittleEndian.PutUint32(buf[4:8], uint32(dstFn)) 429 if _, err = w.Write(buf[:]); err != nil { 430 return err 431 } 432 } 433 434 if _, err = w.Write(encodeFileNumMapFooter()); err != nil { 435 return err 436 } 437 if err = w.Flush(); err != nil { 438 return err 439 } 440 if err = file.Sync(); err != nil { 441 return err 442 } 443 if err = file.Close(); err != nil { 444 return err 445 } 446 447 if err = b.fs.Rename(fileNumMapTmp, fileNumMap); err != nil { 448 return err 449 } 450 451 b.logger.Infof("bithash write FileNumMap file end filename:%s writeFnNum:%d", fileNumMap, fnNum) 452 453 return nil 454 }