github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/table.go (about) 1 // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package bitpage 16 17 import ( 18 "bufio" 19 "encoding/binary" 20 "io" 21 "os" 22 "sync" 23 "sync/atomic" 24 "syscall" 25 "unsafe" 26 27 "github.com/cockroachdb/errors" 28 29 "github.com/zuoyebang/bitalosdb/internal/consts" 30 "github.com/zuoyebang/bitalosdb/internal/mmap" 31 "golang.org/x/sys/unix" 32 ) 33 34 const ( 35 maxMapSize = 0xFFFFFFFFFFFF 36 maxExpandStep = 128 << 20 37 ) 38 39 const ( 40 align4 = 3 41 tableHeaderOffset = 0 42 tableHeaderSize = 4 43 tableDataOffset = 4 44 ) 45 46 const ( 47 tableWriteMmap = 1 48 tableReadMmap = 2 49 tableWriteDisk = 3 50 ) 51 52 type tableOptions struct { 53 openType int 54 initMmapSize int 55 } 56 57 var defaultTableOptions = &tableOptions{ 58 openType: tableWriteMmap, 59 initMmapSize: consts.BitpageInitMmapSize, 60 } 61 62 type table struct { 63 path string 64 file *os.File 65 offset atomic.Uint32 66 filesz int 67 data []byte 68 datasz int 69 opened bool 70 openType int 71 mmaplock sync.RWMutex 72 } 73 74 func openTable(path string, opts *tableOptions) (*table, error) { 75 var err error 76 77 t := &table{ 78 opened: true, 79 } 80 81 defer func() { 82 if err != nil { 83 _ = t.close() 84 } 85 }() 86 87 t.file, err = os.OpenFile(path, os.O_CREATE|os.O_RDWR, consts.FileMode) 88 if err != nil { 89 return nil, err 90 } 91 92 t.path = t.file.Name() 93 t.filesz = int(t.fileStatSize()) 94 95 switch opts.openType { 96 case tableWriteMmap: 97 sz := opts.initMmapSize 98 if sz == 0 { 99 sz = consts.BitpageInitMmapSize 100 } 101 if t.filesz > sz { 102 sz = t.filesz 103 } 104 if err = t.mmapWrite(sz); err != nil { 105 return nil, err 106 } 107 if err = t.initHeader(); err != nil { 108 return nil, err 109 } 110 t.offset.Store(t.getOffset()) 111 case tableReadMmap: 112 if err = t.mmapRead(t.filesz); err != nil { 113 return nil, err 114 } 115 t.offset.Store(t.getOffset()) 116 case tableWriteDisk: 117 if err = t.mmapRead(opts.initMmapSize); err != nil { 118 return nil, err 119 } 120 t.offset.Store(uint32(t.filesz)) 121 default: 122 return nil, ErrTableOpenType 123 } 124 125 return t, nil 126 } 127 128 func (t *table) close() error { 129 if !t.opened { 130 return nil 131 } 132 133 t.opened = false 134 135 if err := t.munmap(); err != nil { 136 return err 137 } 138 139 if t.file != nil { 140 if err := t.file.Sync(); err != nil { 141 return err 142 } 143 if err := t.file.Close(); err != nil { 144 return err 145 } 146 t.file = nil 147 } 148 return nil 149 } 150 151 func (t *table) Size() uint32 { 152 return t.offset.Load() 153 } 154 155 func (t *table) Capacity() int { 156 return t.datasz 157 } 158 159 func (t *table) calcExpandSize(size int) (int, error) { 160 for i := uint(15); i <= 30; i++ { 161 if size <= 1<<i { 162 return 1 << i, nil 163 } 164 } 165 166 if size > maxMapSize { 167 return 0, errors.New("bitpage: table too large") 168 } 169 170 sz := int64(size) 171 if remainder := sz % int64(maxExpandStep); remainder > 0 { 172 sz += int64(maxExpandStep) - remainder 173 } 174 175 if sz > maxMapSize { 176 sz = maxMapSize 177 } 178 179 return int(sz), nil 180 } 181 182 func (t *table) expandFileSize(size int) error { 183 if size > t.filesz { 184 sz, err := t.calcExpandSize(size) 185 if err != nil { 186 return err 187 } 188 if err = t.fileTruncate(sz); err != nil { 189 return errors.Wrapf(err, "bitpage: table truncate fail file:%s", t.path) 190 } 191 } 192 return nil 193 } 194 195 func (t *table) expandMmapSize(size int) error { 196 if size > t.datasz { 197 if err := t.mmapWrite(size); err != nil { 198 return errors.Wrapf(err, "bitpage: table mmapWrite fail file:%s", t.path) 199 } 200 } 201 return nil 202 } 203 204 func (t *table) checkTableFull(size int) error { 205 if size+int(t.Size()) > t.datasz { 206 return ErrTableFull 207 } 208 return nil 209 } 210 211 func (t *table) allocAlign(size, align, overflow uint32) (uint32, uint32, error) { 212 padded := size + align 213 newSize := t.offset.Add(padded) 214 sz := int(newSize) + int(overflow) 215 if sz > t.datasz { 216 return 0, 0, ErrTableFull 217 } 218 if err := t.expandFileSize(sz); err != nil { 219 return 0, 0, err 220 } 221 222 t.setOffset(newSize) 223 offset := (newSize - padded + align) & ^align 224 return offset, padded, nil 225 } 226 227 func (t *table) alloc(size uint32) (uint32, error) { 228 newSize := t.offset.Add(size) 229 sz := int(newSize) 230 if err := t.expandFileSize(sz); err != nil { 231 return 0, err 232 } 233 if err := t.expandMmapSize(sz); err != nil { 234 return 0, err 235 } 236 237 t.setOffset(newSize) 238 offset := newSize - size 239 return offset, nil 240 } 241 242 func (t *table) initHeader() error { 243 if t.filesz == 0 { 244 if _, err := t.alloc(tableHeaderSize); err != nil { 245 return err 246 } 247 t.setOffset(tableHeaderSize) 248 } 249 return nil 250 } 251 252 func (t *table) getOffset() uint32 { 253 return t.readAtUInt32(tableHeaderOffset) 254 } 255 256 func (t *table) setOffset(val uint32) { 257 t.writeAtUInt32(val, tableHeaderOffset) 258 } 259 260 func (t *table) writeAt(b []byte, offset uint32) (int, error) { 261 size := uint32(len(b)) 262 n := copy(t.data[offset:offset+size], b) 263 return n, nil 264 } 265 266 func (t *table) readAtUInt16(offset uint16) uint16 { 267 return binary.BigEndian.Uint16(t.data[offset : offset+2]) 268 } 269 270 func (t *table) writeAtUInt16(val uint16, offset uint32) { 271 binary.BigEndian.PutUint16(t.data[offset:offset+2], val) 272 } 273 274 func (t *table) readAtUInt32(offset uint32) uint32 { 275 return binary.BigEndian.Uint32(t.data[offset : offset+4]) 276 } 277 278 func (t *table) writeAtUInt32(val uint32, offset uint32) { 279 binary.BigEndian.PutUint32(t.data[offset:offset+4], val) 280 } 281 282 func (t *table) getBytes(offset uint32, size uint32) []byte { 283 return t.data[offset : offset+size : offset+size] 284 } 285 286 func (t *table) getPointer(offset uint32) unsafe.Pointer { 287 return unsafe.Pointer(&t.data[offset]) 288 } 289 290 func (t *table) getData() []byte { 291 return t.data[:] 292 } 293 294 func (t *table) getPointerOffset(ptr unsafe.Pointer) uint32 { 295 if ptr == nil { 296 return 0 297 } 298 return uint32(uintptr(ptr) - uintptr(unsafe.Pointer(&t.data[0]))) 299 } 300 301 func (t *table) fileTruncate(size int) error { 302 if err := t.file.Truncate(int64(size)); err != nil { 303 return err 304 } 305 if err := t.file.Sync(); err != nil { 306 return err 307 } 308 t.filesz = size 309 return nil 310 } 311 312 func (t *table) fileStatSize() int64 { 313 info, err := t.file.Stat() 314 if err != nil { 315 return 0 316 } 317 return info.Size() 318 } 319 320 func (t *table) mmapWrite(sz int) error { 321 size, err := t.calcExpandSize(sz) 322 if err != nil { 323 return err 324 } 325 326 if err = t.munmap(); err != nil { 327 return err 328 } 329 330 if err = mmapFile(t, mmap.RDWR, size); err != nil { 331 return err 332 } 333 334 return nil 335 } 336 337 func (t *table) mmapRead(sz int) error { 338 if err := t.munmap(); err != nil { 339 return err 340 } 341 342 return mmapFile(t, mmap.RDONLY, sz) 343 } 344 345 func (t *table) mmapReadExpand() (bool, error) { 346 if t.filesz <= t.datasz { 347 return false, nil 348 } 349 350 sz := t.datasz * 2 351 352 t.mmaplock.Lock() 353 defer t.mmaplock.Unlock() 354 355 return true, t.mmapRead(sz) 356 } 357 358 func (t *table) mmapReadTruncate(sz int) error { 359 fileSize := int(t.fileStatSize()) 360 if fileSize != sz { 361 if err := t.fileTruncate(sz); err != nil { 362 return err 363 } 364 } 365 366 return t.mmapRead(sz) 367 } 368 369 func (t *table) munmap() error { 370 if t.data == nil { 371 return nil 372 } 373 374 if t.openType == tableWriteMmap { 375 _ = unix.Msync(t.data, unix.MS_SYNC) 376 } 377 378 err := unix.Munmap(t.data) 379 t.data = nil 380 t.datasz = 0 381 if err != nil { 382 return errors.Wrapf(err, "bitpage: munmap fail") 383 } 384 return nil 385 } 386 387 func mmapFile(t *table, prot, length int) error { 388 b, err := mmap.Map(t.file, prot, length) 389 if err != nil { 390 return err 391 } 392 393 err = unix.Madvise(b, syscall.MADV_RANDOM) 394 if err != nil && err != syscall.ENOSYS { 395 return errors.Wrapf(err, "bitpage: madvise fail") 396 } 397 398 t.data = b 399 t.datasz = length 400 return nil 401 } 402 403 type tableWriter struct { 404 *table 405 wbuf []byte 406 writer io.Writer 407 bufWriter *bufio.Writer 408 } 409 410 func newTableWriter(t *table) *tableWriter { 411 return &tableWriter{table: t} 412 } 413 414 func (w *tableWriter) reset(offset int) error { 415 if _, err := w.file.Seek(int64(offset), io.SeekStart); err != nil { 416 return err 417 } 418 419 w.writer = nil 420 w.bufWriter = nil 421 w.bufWriter = bufio.NewWriterSize(w.file, consts.BufioWriterBufSize) 422 w.writer = w.bufWriter 423 w.filesz = offset 424 w.offset.Store(uint32(offset)) 425 return nil 426 } 427 428 func (w *tableWriter) encodeHeader(buf []byte, keySize uint16, valueSize uint32) { 429 binary.BigEndian.PutUint16(buf[0:2], keySize) 430 binary.BigEndian.PutUint32(buf[2:6], valueSize) 431 } 432 433 func (w *tableWriter) decodeHeader(buf []byte) (uint16, uint32) { 434 return binary.BigEndian.Uint16(buf[0:2]), binary.BigEndian.Uint32(buf[2:6]) 435 } 436 437 func (w *tableWriter) set(key internalKey, value []byte) (uint32, error) { 438 keySize := key.Size() 439 valueSize := len(value) 440 preSize := keySize + stItemHeaderSize 441 wrn := 0 442 443 if cap(w.wbuf) < preSize { 444 w.wbuf = make([]byte, 0, preSize*2) 445 } 446 447 w.wbuf = w.wbuf[:preSize] 448 w.encodeHeader(w.wbuf[:stItemHeaderSize], uint16(keySize), uint32(valueSize)) 449 key.Encode(w.wbuf[stItemHeaderSize:]) 450 n, err := w.writer.Write(w.wbuf) 451 if err != nil { 452 return 0, err 453 } 454 wrn += n 455 456 if valueSize > 0 { 457 n, err = w.writer.Write(value) 458 if err != nil { 459 return 0, err 460 } 461 wrn += n 462 } 463 464 addSize := uint32(wrn) 465 w.wbuf = w.wbuf[:0] 466 offset := w.offset.Load() 467 w.offset.Add(addSize) 468 return offset, nil 469 } 470 471 func (w *tableWriter) close() error { 472 w.bufWriter = nil 473 w.wbuf = nil 474 w.writer = nil 475 return nil 476 } 477 478 func (w *tableWriter) fdatasync() error { 479 if err := w.bufWriter.Flush(); err != nil { 480 return err 481 } 482 483 return w.file.Sync() 484 }