github.com/zuoyebang/bitalosdb@v1.1.1-0.20240516111551-79a8c4d8ce20/bitpage/super_table.go (about)

     1  // Copyright 2021 The Bitalosdb author(hustxrb@163.com) and other contributors.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bitpage
    16  
    17  import (
    18  	"bytes"
    19  	"encoding/binary"
    20  	"fmt"
    21  	"os"
    22  	"sort"
    23  	"sync/atomic"
    24  
    25  	"github.com/cockroachdb/errors"
    26  	"github.com/zuoyebang/bitalosdb/internal/utils"
    27  
    28  	"github.com/zuoyebang/bitalosdb/internal/base"
    29  	"github.com/zuoyebang/bitalosdb/internal/consts"
    30  )
    31  
    32  const (
    33  	stVersionDefault uint16 = 1 + iota
    34  )
    35  
    36  const (
    37  	stHeaderSize          = 8
    38  	stHeaderVersionOffset = 0
    39  	stDataOffset          = stHeaderSize
    40  	stItemKeySize         = 2
    41  	stItemValueSize       = 4
    42  	stItemHeaderSize      = stItemKeySize + stItemValueSize
    43  )
    44  
    45  const (
    46  	stiVersionDefault uint16 = 1 + iota
    47  )
    48  
    49  const (
    50  	stiHeaderSize           = 14
    51  	stiHeaderVersionOffset  = 0
    52  	stiHeaderFileSizeOffset = 2
    53  	stiHeaderDataSizeOffset = 6
    54  	stiHeaderIdxNumOffset   = 10
    55  	stiIndexesOffset        = stiHeaderSize
    56  )
    57  
    58  type stIndexes []uint32
    59  
    60  type superTable struct {
    61  	p             *page
    62  	tbl           *table
    63  	writer        *tableWriter
    64  	version       uint16
    65  	fn            FileNum
    66  	totalCount    float64
    67  	delCount      float64
    68  	filename      string
    69  	idxPath       string
    70  	indexModified bool
    71  	reading       atomic.Pointer[stIndexes]
    72  	pending       stIndexes
    73  }
    74  
    75  func checkSuperTable(obj interface{}) {
    76  	s := obj.(*superTable)
    77  	if s.tbl != nil {
    78  		fmt.Fprintf(os.Stderr, "superTable(%s) buffer was not freed\n", s.path())
    79  		os.Exit(1)
    80  	}
    81  }
    82  
    83  func newSuperTable(p *page, path string, fn FileNum, exist bool) (*superTable, error) {
    84  	tableOpts := &tableOptions{
    85  		openType:     tableWriteDisk,
    86  		initMmapSize: consts.BitpageInitMmapSize,
    87  	}
    88  	tbl, err := openTable(path, tableOpts)
    89  	if err != nil {
    90  		return nil, err
    91  	}
    92  
    93  	st := &superTable{
    94  		p:             p,
    95  		tbl:           tbl,
    96  		fn:            fn,
    97  		filename:      base.GetFilePathBase(path),
    98  		pending:       make(stIndexes, 0, 1<<10),
    99  		writer:        newTableWriter(tbl),
   100  		indexModified: false,
   101  	}
   102  	st.idxPath = st.getIdxFilePath()
   103  	if err = st.writer.reset(tbl.filesz); err != nil {
   104  		return nil, err
   105  	}
   106  
   107  	if exist {
   108  		if err = st.getHeader(); err != nil {
   109  			return nil, err
   110  		}
   111  
   112  		err = st.loadIdxFromFile()
   113  	} else {
   114  		err = st.setHeader()
   115  	}
   116  	if err != nil {
   117  		return nil, err
   118  	}
   119  
   120  	return st, nil
   121  }
   122  
   123  func (s *superTable) getHeader() error {
   124  	var header [stHeaderSize]byte
   125  	n, err := s.tbl.file.ReadAt(header[:], 0)
   126  	if err != nil {
   127  		return err
   128  	}
   129  	if n != stHeaderSize {
   130  		return errors.Errorf("bitpage: superTable read header err n:%d", n)
   131  	}
   132  
   133  	s.version = binary.BigEndian.Uint16(header[stHeaderVersionOffset:])
   134  	return nil
   135  }
   136  
   137  func (s *superTable) setHeader() error {
   138  	version := stVersionDefault
   139  
   140  	var header [stHeaderSize]byte
   141  	for i := range header {
   142  		header[i] = 0
   143  	}
   144  	binary.BigEndian.PutUint16(header[stHeaderVersionOffset:], version)
   145  	n, err := s.writer.writer.Write(header[:])
   146  	if err != nil {
   147  		return err
   148  	}
   149  	if n != stHeaderSize {
   150  		return errors.Errorf("bitpage: superTable write header err n:%d", n)
   151  	}
   152  	if err = s.writer.fdatasync(); err != nil {
   153  		return err
   154  	}
   155  
   156  	s.version = version
   157  	s.tbl.offset.Add(uint32(n))
   158  	s.grow(n)
   159  	return nil
   160  }
   161  
   162  func (s *superTable) set(key internalKey, value []byte) error {
   163  	offset, err := s.writer.set(key, value)
   164  	if err != nil {
   165  		return err
   166  	}
   167  
   168  	s.pending = append(s.pending, offset)
   169  	return nil
   170  }
   171  
   172  func (s *superTable) get(key []byte, _ uint32) ([]byte, bool, internalKeyKind, func()) {
   173  	indexes := s.readIndexes()
   174  	pos := s.findKeyIndexPos(indexes, key)
   175  	if pos < 0 || pos >= len(indexes) {
   176  		return nil, false, internalKeyKindInvalid, nil
   177  	}
   178  
   179  	ikey, value := s.getItem(indexes[pos])
   180  	if !bytes.Equal(ikey.UserKey, key) {
   181  		return nil, false, internalKeyKindInvalid, nil
   182  	}
   183  
   184  	return value, true, ikey.Kind(), nil
   185  }
   186  
   187  func (s *superTable) getKeyByPos(indexes stIndexes, pos int) internalKey {
   188  	if pos < 0 || pos >= len(indexes) {
   189  		return internalKey{}
   190  	}
   191  	return s.getKey(indexes[pos])
   192  }
   193  
   194  func (s *superTable) getKey(offset uint32) internalKey {
   195  	keySize := uint32(binary.BigEndian.Uint16(s.tbl.getBytes(offset, stItemKeySize)))
   196  	key := s.tbl.getBytes(offset+stItemHeaderSize, keySize)
   197  	return base.DecodeInternalKey(key)
   198  }
   199  
   200  func (s *superTable) getValue(offset uint32) []byte {
   201  	keySize := uint32(binary.BigEndian.Uint16(s.tbl.getBytes(offset, stItemKeySize)))
   202  	valueSize := binary.BigEndian.Uint32(s.tbl.getBytes(offset+stItemKeySize, stItemValueSize))
   203  	value := s.tbl.getBytes(offset+stItemHeaderSize+keySize, valueSize)
   204  	return value
   205  }
   206  
   207  func (s *superTable) getItem(offset uint32) (internalKey, []byte) {
   208  	keySize := uint32(binary.BigEndian.Uint16(s.tbl.getBytes(offset, stItemKeySize)))
   209  	key := s.tbl.getBytes(offset+stItemHeaderSize, keySize)
   210  	valueSize := binary.BigEndian.Uint32(s.tbl.getBytes(offset+stItemKeySize, stItemValueSize))
   211  	value := s.tbl.getBytes(offset+stItemHeaderSize+keySize, valueSize)
   212  	return base.DecodeInternalKey(key), value
   213  }
   214  
   215  func (s *superTable) findKeyIndexPos(indexes stIndexes, key []byte) int {
   216  	num := len(indexes)
   217  	if num == 0 {
   218  		return -1
   219  	}
   220  
   221  	return sort.Search(num, func(i int) bool {
   222  		ikey := s.getKeyByPos(indexes, i)
   223  		return bytes.Compare(ikey.UserKey, key) != -1
   224  	})
   225  }
   226  
   227  func (s *superTable) newIter(o *iterOptions) internalIterator {
   228  	iter := &superTableIterator{
   229  		st:      s,
   230  		indexes: s.readIndexes(),
   231  	}
   232  	return iter
   233  }
   234  
   235  func (s *superTable) kindStatis(kind internalKeyKind) {
   236  	s.totalCount++
   237  	if kind == internalKeyKindDelete {
   238  		s.delCount++
   239  	}
   240  }
   241  
   242  func (s *superTable) delPercent() float64 {
   243  	if s.delCount == 0 {
   244  		return 0
   245  	}
   246  	return s.delCount / s.totalCount
   247  }
   248  
   249  func (s *superTable) itemCount() int {
   250  	return len(s.readIndexes())
   251  }
   252  
   253  func (s *superTable) readyForFlush() bool {
   254  	return true
   255  }
   256  
   257  func (s *superTable) inuseBytes() uint64 {
   258  	return uint64(s.tbl.Size())
   259  }
   260  
   261  func (s *superTable) dataBytes() uint64 {
   262  	return uint64(s.tbl.Size())
   263  }
   264  
   265  func (s *superTable) empty() bool {
   266  	return s.tbl.Size() == stHeaderSize
   267  }
   268  
   269  func (s *superTable) close() error {
   270  	if err := s.writeIdxToFile(); err != nil {
   271  		return err
   272  	}
   273  
   274  	if err := s.tbl.close(); err != nil {
   275  		return err
   276  	}
   277  
   278  	s.tbl = nil
   279  	return nil
   280  }
   281  
   282  func (s *superTable) path() string {
   283  	if s.tbl == nil {
   284  		return ""
   285  	}
   286  	return s.tbl.path
   287  }
   288  
   289  func (s *superTable) idxFilePath() string {
   290  	return s.idxPath
   291  }
   292  
   293  func (s *superTable) mmapRLock() {
   294  	s.tbl.mmaplock.RLock()
   295  }
   296  
   297  func (s *superTable) mmapRUnLock() {
   298  	s.tbl.mmaplock.RUnlock()
   299  }
   300  
   301  func (s *superTable) grow(sz int) {
   302  	if sz > s.tbl.filesz {
   303  		s.tbl.filesz = sz
   304  	}
   305  }
   306  
   307  func (s *superTable) mergeIndexes() error {
   308  	if len(s.pending) == 0 {
   309  		return nil
   310  	}
   311  
   312  	if err := s.writer.fdatasync(); err != nil {
   313  		return err
   314  	}
   315  
   316  	if _, err := s.tbl.mmapReadExpand(); err != nil {
   317  		return err
   318  	}
   319  
   320  	oldIndexes := s.readIndexes()
   321  	oldEnd := len(oldIndexes)
   322  	pendingEnd := len(s.pending)
   323  	pendingCurrent := 0
   324  	pendingNextKey := s.getKey(s.pending[0])
   325  	oldCurrent := s.findKeyIndexPos(oldIndexes, pendingNextKey.UserKey)
   326  
   327  	newIndexes := make(stIndexes, 0, oldEnd+pendingEnd)
   328  	addIndexes := func(index uint32) {
   329  		newIndexes = append(newIndexes, index)
   330  	}
   331  
   332  	for i := 0; i <= oldCurrent-1; i++ {
   333  		addIndexes(oldIndexes[i])
   334  	}
   335  
   336  	if oldCurrent >= 0 && oldCurrent < oldEnd {
   337  		oldNextKey := s.getKey(oldIndexes[oldCurrent])
   338  		for {
   339  			cmp := bytes.Compare(oldNextKey.UserKey, pendingNextKey.UserKey)
   340  			if cmp < 0 {
   341  				addIndexes(oldIndexes[oldCurrent])
   342  				oldCurrent++
   343  				if oldCurrent >= oldEnd {
   344  					break
   345  				}
   346  				oldNextKey = s.getKey(oldIndexes[oldCurrent])
   347  			} else if cmp > 0 {
   348  				addIndexes(s.pending[pendingCurrent])
   349  				pendingCurrent++
   350  				if pendingCurrent >= pendingEnd {
   351  					break
   352  				}
   353  				pendingNextKey = s.getKey(s.pending[pendingCurrent])
   354  			} else {
   355  				addIndexes(s.pending[pendingCurrent])
   356  
   357  				if s.p != nil && s.p.bp != nil {
   358  					s.p.bp.deleteBithashKey(s.getValue(oldIndexes[oldCurrent]))
   359  				}
   360  
   361  				oldCurrent++
   362  				pendingCurrent++
   363  				if oldCurrent >= oldEnd || pendingCurrent >= pendingEnd {
   364  					break
   365  				}
   366  
   367  				oldNextKey = s.getKey(oldIndexes[oldCurrent])
   368  				pendingNextKey = s.getKey(s.pending[pendingCurrent])
   369  			}
   370  		}
   371  	}
   372  
   373  	for oldCurrent >= 0 && oldCurrent < oldEnd {
   374  		addIndexes(oldIndexes[oldCurrent])
   375  		oldCurrent++
   376  	}
   377  
   378  	for pendingCurrent < pendingEnd {
   379  		addIndexes(s.pending[pendingCurrent])
   380  		pendingCurrent++
   381  	}
   382  
   383  	s.indexModified = true
   384  	s.reading.Store(&newIndexes)
   385  	s.pending = s.pending[:0]
   386  	s.grow(int(s.tbl.Size()))
   387  
   388  	return nil
   389  }
   390  
   391  func (s *superTable) readIndexes() stIndexes {
   392  	ptr := s.reading.Load()
   393  	if ptr == nil {
   394  		return nil
   395  	}
   396  	return *ptr
   397  }
   398  
   399  func (s *superTable) getIdxFilePath() string {
   400  	return s.p.bp.makeFilePath(fileTypeSuperTableIndex, s.p.pn, s.fn)
   401  }
   402  
   403  func (s *superTable) loadIdxFromFile() error {
   404  	if utils.IsFileNotExist(s.idxPath) {
   405  		return s.rebuildIndexes()
   406  	}
   407  
   408  	err := func() error {
   409  		idxFile, err := os.OpenFile(s.idxPath, os.O_CREATE|os.O_RDONLY, consts.FileMode)
   410  		if err != nil {
   411  			return err
   412  		}
   413  
   414  		fstat, err := idxFile.Stat()
   415  		if err != nil {
   416  			return err
   417  		}
   418  		idxFileSize := fstat.Size()
   419  		if idxFileSize < stiHeaderSize {
   420  			return errors.Errorf("bitpage: superTable header size small size:%d", idxFileSize)
   421  		}
   422  
   423  		var header [stiHeaderSize]byte
   424  		n, err := idxFile.ReadAt(header[:], 0)
   425  		if err != nil {
   426  			return err
   427  		}
   428  		if n != stiHeaderSize {
   429  			return errors.Errorf("bitpage: superTable header readAt fail n:%d", n)
   430  		}
   431  
   432  		fileSize := binary.BigEndian.Uint32(header[stiHeaderFileSizeOffset:])
   433  		if idxFileSize != int64(fileSize) {
   434  			return errors.Errorf("bitpage: superTable file size not eq fstat:%d rsize:%d", idxFileSize, fileSize)
   435  		}
   436  
   437  		stSize := s.tbl.Size()
   438  		dataSize := binary.BigEndian.Uint32(header[stiHeaderDataSizeOffset:])
   439  		if stSize != dataSize {
   440  			return errors.Errorf("bitpage: superTable data size not eq filesz:%d dsize:%d", stSize, dataSize)
   441  		}
   442  
   443  		idxNum := int(binary.BigEndian.Uint32(header[stiHeaderIdxNumOffset:]))
   444  		if idxNum > 0 {
   445  			idxSize := idxNum * 4
   446  			idxBuf := make([]byte, idxSize)
   447  			n, err = idxFile.ReadAt(idxBuf, stiIndexesOffset)
   448  			if err != nil {
   449  				return err
   450  			}
   451  			if n != idxSize {
   452  				return errors.Errorf("bitpage: superTable idx readAt fail n:%d exp:%d", n, idxSize)
   453  			}
   454  
   455  			indexes := make(stIndexes, idxNum)
   456  			pos := 0
   457  			for i := 0; i < idxNum; i++ {
   458  				indexes[i] = binary.BigEndian.Uint32(idxBuf[pos : pos+4])
   459  				pos += 4
   460  			}
   461  
   462  			s.reading.Store(&indexes)
   463  		} else {
   464  			s.reading.Store(nil)
   465  		}
   466  
   467  		s.p.bp.opts.Logger.Infof("superTable read indexes success file:%s idxNum:%d", s.filename, idxNum)
   468  		return idxFile.Close()
   469  	}()
   470  	if err == nil {
   471  		return nil
   472  	}
   473  
   474  	s.p.bp.opts.Logger.Errorf("superTable load indexes file fail file:%s err:%v", s.filename, err)
   475  	return s.rebuildIndexes()
   476  }
   477  
   478  func (s *superTable) writeIdxToFile() error {
   479  	if !s.indexModified {
   480  		return nil
   481  	}
   482  
   483  	idxFile, err := os.OpenFile(s.idxPath, os.O_CREATE|os.O_WRONLY|os.O_TRUNC, consts.FileMode)
   484  	if err != nil {
   485  		return err
   486  	}
   487  
   488  	indexes := s.readIndexes()
   489  	idxNum := len(indexes)
   490  	fileSize := stiHeaderSize + idxNum*4
   491  	dataSize := s.tbl.Size()
   492  	if dataSize > 0 && idxNum == 0 {
   493  		dataSize = 0
   494  	}
   495  
   496  	buf := make([]byte, fileSize)
   497  	binary.BigEndian.PutUint16(buf[stiHeaderVersionOffset:], stiVersionDefault)
   498  	binary.BigEndian.PutUint32(buf[stiHeaderFileSizeOffset:], uint32(fileSize))
   499  	binary.BigEndian.PutUint32(buf[stiHeaderDataSizeOffset:], dataSize)
   500  	binary.BigEndian.PutUint32(buf[stiHeaderIdxNumOffset:], uint32(idxNum))
   501  	pos := stiIndexesOffset
   502  	for i := 0; i < idxNum; i++ {
   503  		binary.BigEndian.PutUint32(buf[pos:pos+4], indexes[i])
   504  		pos += 4
   505  	}
   506  
   507  	if _, err = idxFile.Write(buf); err != nil {
   508  		return err
   509  	}
   510  	if err = idxFile.Sync(); err != nil {
   511  		return err
   512  	}
   513  	if err = idxFile.Close(); err != nil {
   514  		return err
   515  	}
   516  
   517  	s.indexModified = false
   518  	s.p.bp.opts.Logger.Infof("superTable write indexes finish file:%s filesz:%d dsize:%d idxNum:%d", s.filename, fileSize, dataSize, idxNum)
   519  	return nil
   520  }