github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/table.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bitpage
    16  
    17  import (
    18  	"bufio"
    19  	"encoding/binary"
    20  	"io"
    21  	"os"
    22  	"sync"
    23  	"sync/atomic"
    24  	"syscall"
    25  	"unsafe"
    26  
    27  	"github.com/cockroachdb/errors"
    28  
    29  	"github.com/zuoyebang/bitalosdb/internal/consts"
    30  	"github.com/zuoyebang/bitalosdb/internal/mmap"
    31  	"golang.org/x/sys/unix"
    32  )
    33  
    34  const (
    35  	maxMapSize    = 0xFFFFFFFFFFFF
    36  	maxExpandStep = 128 << 20
    37  )
    38  
    39  const (
    40  	align4            = 3
    41  	tableHeaderOffset = 0
    42  	tableHeaderSize   = 4
    43  	tableDataOffset   = 4
    44  )
    45  
    46  const (
    47  	tableWriteMmap = 1
    48  	tableReadMmap  = 2
    49  	tableWriteDisk = 3
    50  )
    51  
    52  type tableOptions struct {
    53  	openType     int
    54  	initMmapSize int
    55  }
    56  
    57  var defaultTableOptions = &tableOptions{
    58  	openType:     tableWriteMmap,
    59  	initMmapSize: consts.BitpageInitMmapSize,
    60  }
    61  
    62  type table struct {
    63  	path     string
    64  	file     *os.File
    65  	offset   atomic.Uint32
    66  	filesz   int
    67  	data     []byte
    68  	datasz   int
    69  	opened   bool
    70  	openType int
    71  	mmaplock sync.RWMutex
    72  }
    73  
    74  func openTable(path string, opts *tableOptions) (*table, error) {
    75  	var err error
    76  
    77  	t := &table{
    78  		opened: true,
    79  	}
    80  
    81  	defer func() {
    82  		if err != nil {
    83  			_ = t.close()
    84  		}
    85  	}()
    86  
    87  	t.file, err = os.OpenFile(path, os.O_CREATE|os.O_RDWR, consts.FileMode)
    88  	if err != nil {
    89  		return nil, err
    90  	}
    91  
    92  	t.path = t.file.Name()
    93  	t.filesz = int(t.fileStatSize())
    94  
    95  	switch opts.openType {
    96  	case tableWriteMmap:
    97  		sz := opts.initMmapSize
    98  		if sz == 0 {
    99  			sz = consts.BitpageInitMmapSize
   100  		}
   101  		if t.filesz > sz {
   102  			sz = t.filesz
   103  		}
   104  		if err = t.mmapWrite(sz); err != nil {
   105  			return nil, err
   106  		}
   107  		if err = t.initHeader(); err != nil {
   108  			return nil, err
   109  		}
   110  		t.offset.Store(t.getOffset())
   111  	case tableReadMmap:
   112  		if err = t.mmapRead(t.filesz); err != nil {
   113  			return nil, err
   114  		}
   115  		t.offset.Store(t.getOffset())
   116  	case tableWriteDisk:
   117  		if err = t.mmapRead(opts.initMmapSize); err != nil {
   118  			return nil, err
   119  		}
   120  		t.offset.Store(uint32(t.filesz))
   121  	default:
   122  		return nil, ErrTableOpenType
   123  	}
   124  
   125  	return t, nil
   126  }
   127  
   128  func (t *table) close() error {
   129  	if !t.opened {
   130  		return nil
   131  	}
   132  
   133  	t.opened = false
   134  
   135  	if err := t.munmap(); err != nil {
   136  		return err
   137  	}
   138  
   139  	if t.file != nil {
   140  		if err := t.file.Sync(); err != nil {
   141  			return err
   142  		}
   143  		if err := t.file.Close(); err != nil {
   144  			return err
   145  		}
   146  		t.file = nil
   147  	}
   148  	return nil
   149  }
   150  
   151  func (t *table) Size() uint32 {
   152  	return t.offset.Load()
   153  }
   154  
   155  func (t *table) Capacity() int {
   156  	return t.datasz
   157  }
   158  
   159  func (t *table) calcExpandSize(size int) (int, error) {
   160  	for i := uint(15); i <= 30; i++ {
   161  		if size <= 1<<i {
   162  			return 1 << i, nil
   163  		}
   164  	}
   165  
   166  	if size > maxMapSize {
   167  		return 0, errors.New("bitpage: table too large")
   168  	}
   169  
   170  	sz := int64(size)
   171  	if remainder := sz % int64(maxExpandStep); remainder > 0 {
   172  		sz += int64(maxExpandStep) - remainder
   173  	}
   174  
   175  	if sz > maxMapSize {
   176  		sz = maxMapSize
   177  	}
   178  
   179  	return int(sz), nil
   180  }
   181  
   182  func (t *table) expandFileSize(size int) error {
   183  	if size > t.filesz {
   184  		sz, err := t.calcExpandSize(size)
   185  		if err != nil {
   186  			return err
   187  		}
   188  		if err = t.fileTruncate(sz); err != nil {
   189  			return errors.Wrapf(err, "bitpage: table truncate fail file:%s", t.path)
   190  		}
   191  	}
   192  	return nil
   193  }
   194  
   195  func (t *table) expandMmapSize(size int) error {
   196  	if size > t.datasz {
   197  		if err := t.mmapWrite(size); err != nil {
   198  			return errors.Wrapf(err, "bitpage: table mmapWrite fail file:%s", t.path)
   199  		}
   200  	}
   201  	return nil
   202  }
   203  
   204  func (t *table) checkTableFull(size int) error {
   205  	if size+int(t.Size()) > t.datasz {
   206  		return ErrTableFull
   207  	}
   208  	return nil
   209  }
   210  
   211  func (t *table) allocAlign(size, align, overflow uint32) (uint32, uint32, error) {
   212  	padded := size + align
   213  	newSize := t.offset.Add(padded)
   214  	sz := int(newSize) + int(overflow)
   215  	if sz > t.datasz {
   216  		return 0, 0, ErrTableFull
   217  	}
   218  	if err := t.expandFileSize(sz); err != nil {
   219  		return 0, 0, err
   220  	}
   221  
   222  	t.setOffset(newSize)
   223  	offset := (newSize - padded + align) & ^align
   224  	return offset, padded, nil
   225  }
   226  
   227  func (t *table) alloc(size uint32) (uint32, error) {
   228  	newSize := t.offset.Add(size)
   229  	sz := int(newSize)
   230  	if err := t.expandFileSize(sz); err != nil {
   231  		return 0, err
   232  	}
   233  	if err := t.expandMmapSize(sz); err != nil {
   234  		return 0, err
   235  	}
   236  
   237  	t.setOffset(newSize)
   238  	offset := newSize - size
   239  	return offset, nil
   240  }
   241  
   242  func (t *table) initHeader() error {
   243  	if t.filesz == 0 {
   244  		if _, err := t.alloc(tableHeaderSize); err != nil {
   245  			return err
   246  		}
   247  		t.setOffset(tableHeaderSize)
   248  	}
   249  	return nil
   250  }
   251  
   252  func (t *table) getOffset() uint32 {
   253  	return t.readAtUInt32(tableHeaderOffset)
   254  }
   255  
   256  func (t *table) setOffset(val uint32) {
   257  	t.writeAtUInt32(val, tableHeaderOffset)
   258  }
   259  
   260  func (t *table) writeAt(b []byte, offset uint32) (int, error) {
   261  	size := uint32(len(b))
   262  	n := copy(t.data[offset:offset+size], b)
   263  	return n, nil
   264  }
   265  
   266  func (t *table) readAtUInt16(offset uint16) uint16 {
   267  	return binary.BigEndian.Uint16(t.data[offset : offset+2])
   268  }
   269  
   270  func (t *table) writeAtUInt16(val uint16, offset uint32) {
   271  	binary.BigEndian.PutUint16(t.data[offset:offset+2], val)
   272  }
   273  
   274  func (t *table) readAtUInt32(offset uint32) uint32 {
   275  	return binary.BigEndian.Uint32(t.data[offset : offset+4])
   276  }
   277  
   278  func (t *table) writeAtUInt32(val uint32, offset uint32) {
   279  	binary.BigEndian.PutUint32(t.data[offset:offset+4], val)
   280  }
   281  
   282  func (t *table) getBytes(offset uint32, size uint32) []byte {
   283  	return t.data[offset : offset+size : offset+size]
   284  }
   285  
   286  func (t *table) getPointer(offset uint32) unsafe.Pointer {
   287  	return unsafe.Pointer(&t.data[offset])
   288  }
   289  
   290  func (t *table) getData() []byte {
   291  	return t.data[:]
   292  }
   293  
   294  func (t *table) getPointerOffset(ptr unsafe.Pointer) uint32 {
   295  	if ptr == nil {
   296  		return 0
   297  	}
   298  	return uint32(uintptr(ptr) - uintptr(unsafe.Pointer(&t.data[0])))
   299  }
   300  
   301  func (t *table) fileTruncate(size int) error {
   302  	if err := t.file.Truncate(int64(size)); err != nil {
   303  		return err
   304  	}
   305  	if err := t.file.Sync(); err != nil {
   306  		return err
   307  	}
   308  	t.filesz = size
   309  	return nil
   310  }
   311  
   312  func (t *table) fileStatSize() int64 {
   313  	info, err := t.file.Stat()
   314  	if err != nil {
   315  		return 0
   316  	}
   317  	return info.Size()
   318  }
   319  
   320  func (t *table) mmapWrite(sz int) error {
   321  	size, err := t.calcExpandSize(sz)
   322  	if err != nil {
   323  		return err
   324  	}
   325  
   326  	if err = t.munmap(); err != nil {
   327  		return err
   328  	}
   329  
   330  	if err = mmapFile(t, mmap.RDWR, size); err != nil {
   331  		return err
   332  	}
   333  
   334  	return nil
   335  }
   336  
   337  func (t *table) mmapRead(sz int) error {
   338  	if err := t.munmap(); err != nil {
   339  		return err
   340  	}
   341  
   342  	return mmapFile(t, mmap.RDONLY, sz)
   343  }
   344  
   345  func (t *table) mmapReadExpand() (bool, error) {
   346  	if t.filesz <= t.datasz {
   347  		return false, nil
   348  	}
   349  
   350  	sz := t.datasz * 2
   351  
   352  	t.mmaplock.Lock()
   353  	defer t.mmaplock.Unlock()
   354  
   355  	return true, t.mmapRead(sz)
   356  }
   357  
   358  func (t *table) mmapReadTruncate(sz int) error {
   359  	fileSize := int(t.fileStatSize())
   360  	if fileSize != sz {
   361  		if err := t.fileTruncate(sz); err != nil {
   362  			return err
   363  		}
   364  	}
   365  
   366  	return t.mmapRead(sz)
   367  }
   368  
   369  func (t *table) munmap() error {
   370  	if t.data == nil {
   371  		return nil
   372  	}
   373  
   374  	if t.openType == tableWriteMmap {
   375  		_ = unix.Msync(t.data, unix.MS_SYNC)
   376  	}
   377  
   378  	err := unix.Munmap(t.data)
   379  	t.data = nil
   380  	t.datasz = 0
   381  	if err != nil {
   382  		return errors.Wrapf(err, "bitpage: munmap fail")
   383  	}
   384  	return nil
   385  }
   386  
   387  func mmapFile(t *table, prot, length int) error {
   388  	b, err := mmap.Map(t.file, prot, length)
   389  	if err != nil {
   390  		return err
   391  	}
   392  
   393  	err = unix.Madvise(b, syscall.MADV_RANDOM)
   394  	if err != nil && err != syscall.ENOSYS {
   395  		return errors.Wrapf(err, "bitpage: madvise fail")
   396  	}
   397  
   398  	t.data = b
   399  	t.datasz = length
   400  	return nil
   401  }
   402  
   403  type tableWriter struct {
   404  	*table
   405  	wbuf      []byte
   406  	writer    io.Writer
   407  	bufWriter *bufio.Writer
   408  }
   409  
   410  func newTableWriter(t *table) *tableWriter {
   411  	return &tableWriter{table: t}
   412  }
   413  
   414  func (w *tableWriter) reset(offset int) error {
   415  	if _, err := w.file.Seek(int64(offset), io.SeekStart); err != nil {
   416  		return err
   417  	}
   418  
   419  	w.writer = nil
   420  	w.bufWriter = nil
   421  	w.bufWriter = bufio.NewWriterSize(w.file, consts.BufioWriterBufSize)
   422  	w.writer = w.bufWriter
   423  	w.filesz = offset
   424  	w.offset.Store(uint32(offset))
   425  	return nil
   426  }
   427  
   428  func (w *tableWriter) encodeHeader(buf []byte, keySize uint16, valueSize uint32) {
   429  	binary.BigEndian.PutUint16(buf[0:2], keySize)
   430  	binary.BigEndian.PutUint32(buf[2:6], valueSize)
   431  }
   432  
   433  func (w *tableWriter) decodeHeader(buf []byte) (uint16, uint32) {
   434  	return binary.BigEndian.Uint16(buf[0:2]), binary.BigEndian.Uint32(buf[2:6])
   435  }
   436  
   437  func (w *tableWriter) set(key internalKey, value []byte) (uint32, error) {
   438  	keySize := key.Size()
   439  	valueSize := len(value)
   440  	preSize := keySize + stItemHeaderSize
   441  	wrn := 0
   442  
   443  	if cap(w.wbuf) < preSize {
   444  		w.wbuf = make([]byte, 0, preSize*2)
   445  	}
   446  
   447  	w.wbuf = w.wbuf[:preSize]
   448  	w.encodeHeader(w.wbuf[:stItemHeaderSize], uint16(keySize), uint32(valueSize))
   449  	key.Encode(w.wbuf[stItemHeaderSize:])
   450  	n, err := w.writer.Write(w.wbuf)
   451  	if err != nil {
   452  		return 0, err
   453  	}
   454  	wrn += n
   455  
   456  	if valueSize > 0 {
   457  		n, err = w.writer.Write(value)
   458  		if err != nil {
   459  			return 0, err
   460  		}
   461  		wrn += n
   462  	}
   463  
   464  	addSize := uint32(wrn)
   465  	w.wbuf = w.wbuf[:0]
   466  	offset := w.offset.Load()
   467  	w.offset.Add(addSize)
   468  	return offset, nil
   469  }
   470  
   471  func (w *tableWriter) close() error {
   472  	w.bufWriter = nil
   473  	w.wbuf = nil
   474  	w.writer = nil
   475  	return nil
   476  }
   477  
   478  func (w *tableWriter) fdatasync() error {
   479  	if err := w.bufWriter.Flush(); err != nil {
   480  		return err
   481  	}
   482  
   483  	return w.file.Sync()
   484  }