github.com/matrixorigin/matrixone@v1.2.0/pkg/container/hashtable/int64_hash_map.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashtable
    16  
    17  import (
    18  	"unsafe"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    21  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    22  )
    23  
    24  type Int64HashMapCell struct {
    25  	Key    uint64
    26  	Mapped uint64
    27  }
    28  
    29  type Int64HashMap struct {
    30  	blockCellCnt    uint64
    31  	blockMaxElemCnt uint64
    32  	cellCntMask     uint64
    33  
    34  	cellCnt uint64
    35  	elemCnt uint64
    36  	rawData [][]byte
    37  	cells   [][]Int64HashMapCell
    38  }
    39  
    40  var (
    41  	intCellSize           uint64
    42  	maxIntCellCntPerBlock uint64
    43  )
    44  
    45  func init() {
    46  	intCellSize = uint64(unsafe.Sizeof(Int64HashMapCell{}))
    47  	maxIntCellCntPerBlock = maxBlockSize / intCellSize
    48  }
    49  
    50  func (ht *Int64HashMap) Dup() *Int64HashMap {
    51  	val := &Int64HashMap{
    52  		blockCellCnt:    ht.blockCellCnt,
    53  		blockMaxElemCnt: ht.blockMaxElemCnt,
    54  		cellCntMask:     ht.cellCntMask,
    55  
    56  		cellCnt: ht.cellCnt,
    57  		elemCnt: ht.elemCnt,
    58  
    59  		rawData: make([][]byte, len(ht.rawData)),
    60  		cells:   make([][]Int64HashMapCell, len(ht.cells)),
    61  	}
    62  
    63  	for i, raw := range ht.rawData {
    64  		val.rawData[i] = make([]byte, len(raw))
    65  		copy(val.rawData[i], raw)
    66  	}
    67  
    68  	for i, cell := range ht.cells {
    69  		val.cells[i] = make([]Int64HashMapCell, len(cell))
    70  		copy(val.cells[i], cell)
    71  	}
    72  
    73  	return val
    74  }
    75  
    76  func (ht *Int64HashMap) Free(m *mpool.MPool) {
    77  	for i := range ht.rawData {
    78  		if len(ht.rawData[i]) > 0 {
    79  			m.Free(ht.rawData[i])
    80  		}
    81  		ht.rawData[i], ht.cells[i] = nil, nil
    82  	}
    83  	ht.rawData, ht.cells = nil, nil
    84  }
    85  
    86  func (ht *Int64HashMap) Init(m *mpool.MPool) (err error) {
    87  	ht.blockCellCnt = kInitialCellCnt
    88  	ht.blockMaxElemCnt = maxElemCnt(kInitialCellCnt, intCellSize)
    89  	ht.cellCntMask = kInitialCellCnt - 1
    90  	ht.elemCnt = 0
    91  	ht.cellCnt = kInitialCellCnt
    92  
    93  	ht.rawData = make([][]byte, 1)
    94  	ht.cells = make([][]Int64HashMapCell, 1)
    95  	if ht.rawData[0], err = m.Alloc(int(ht.blockCellCnt * intCellSize)); err == nil {
    96  		ht.cells[0] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt)
    97  	}
    98  	return
    99  }
   100  
   101  func (ht *Int64HashMap) InsertBatch(n int, hashes []uint64, keysPtr unsafe.Pointer, values []uint64, m *mpool.MPool) error {
   102  	if err := ht.ResizeOnDemand(n, m); err != nil {
   103  		return err
   104  	}
   105  
   106  	if hashes[0] == 0 {
   107  		Int64BatchHash(keysPtr, &hashes[0], n)
   108  	}
   109  
   110  	for i, hash := range hashes {
   111  		cell := ht.findCell(hash)
   112  		if cell.Mapped == 0 {
   113  			ht.elemCnt++
   114  			cell.Key = hash
   115  			cell.Mapped = ht.elemCnt
   116  		}
   117  		values[i] = cell.Mapped
   118  	}
   119  	return nil
   120  }
   121  
   122  func (ht *Int64HashMap) InsertBatchWithRing(n int, zValues []int64, hashes []uint64, keysPtr unsafe.Pointer, values []uint64, m *mpool.MPool) error {
   123  	if err := ht.ResizeOnDemand(n, m); err != nil {
   124  		return err
   125  	}
   126  
   127  	if hashes[0] == 0 {
   128  		Int64BatchHash(keysPtr, &hashes[0], n)
   129  	}
   130  
   131  	for i, hash := range hashes {
   132  		if zValues[i] == 0 {
   133  			continue
   134  		}
   135  		cell := ht.findCell(hash)
   136  		if cell.Mapped == 0 {
   137  			ht.elemCnt++
   138  			cell.Key = hash
   139  			cell.Mapped = ht.elemCnt
   140  		}
   141  		values[i] = cell.Mapped
   142  	}
   143  	return nil
   144  }
   145  
   146  func (ht *Int64HashMap) FindBatch(n int, hashes []uint64, keysPtr unsafe.Pointer, values []uint64) {
   147  	if hashes[0] == 0 {
   148  		Int64BatchHash(keysPtr, &hashes[0], n)
   149  	}
   150  
   151  	for i, hash := range hashes {
   152  		cell := ht.findCell(hash)
   153  		values[i] = cell.Mapped
   154  	}
   155  }
   156  
   157  func (ht *Int64HashMap) FindBatchWithRing(n int, zValues []int64, hashes []uint64, keysPtr unsafe.Pointer, values []uint64) {
   158  	if hashes[0] == 0 {
   159  		Int64BatchHash(keysPtr, &hashes[0], n)
   160  	}
   161  
   162  	for i, hash := range hashes {
   163  		if zValues[i] == 0 {
   164  			values[i] = 0
   165  			continue
   166  		}
   167  		cell := ht.findCell(hash)
   168  		values[i] = cell.Mapped
   169  	}
   170  }
   171  
   172  func (ht *Int64HashMap) findCell(hash uint64) *Int64HashMapCell {
   173  	for idx := hash & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask {
   174  		blockId := idx / ht.blockCellCnt
   175  		cellId := idx % ht.blockCellCnt
   176  		cell := &ht.cells[blockId][cellId]
   177  		if cell.Key == hash || cell.Mapped == 0 {
   178  			return cell
   179  		}
   180  	}
   181  	return nil
   182  }
   183  
   184  func (ht *Int64HashMap) findEmptyCell(hash uint64) *Int64HashMapCell {
   185  	for idx := hash & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask {
   186  		blockId := idx / ht.blockCellCnt
   187  		cellId := idx % ht.blockCellCnt
   188  		cell := &ht.cells[blockId][cellId]
   189  		if cell.Mapped == 0 {
   190  			return cell
   191  		}
   192  	}
   193  	return nil
   194  }
   195  
   196  func (ht *Int64HashMap) ResizeOnDemand(n int, m *mpool.MPool) error {
   197  	var err error
   198  
   199  	targetCnt := ht.elemCnt + uint64(n)
   200  	if targetCnt <= uint64(len(ht.rawData))*ht.blockMaxElemCnt {
   201  		return nil
   202  	}
   203  
   204  	newCellCnt := ht.cellCnt << 1
   205  	newMaxElemCnt := maxElemCnt(newCellCnt, intCellSize)
   206  	for newMaxElemCnt < targetCnt {
   207  		newCellCnt <<= 1
   208  		newMaxElemCnt = maxElemCnt(newCellCnt, intCellSize)
   209  	}
   210  
   211  	newAlloc := int(newCellCnt * intCellSize)
   212  	if ht.blockCellCnt == maxIntCellCntPerBlock {
   213  		// double the blocks
   214  		oldBlockNum := len(ht.rawData)
   215  		newBlockNum := newAlloc / maxBlockSize
   216  
   217  		ht.rawData = append(ht.rawData, make([][]byte, newBlockNum-oldBlockNum)...)
   218  		ht.cells = append(ht.cells, make([][]Int64HashMapCell, newBlockNum-oldBlockNum)...)
   219  		ht.cellCnt = ht.blockCellCnt * uint64(newBlockNum)
   220  		ht.cellCntMask = ht.cellCnt - 1
   221  
   222  		for i := oldBlockNum; i < newBlockNum; i++ {
   223  			ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt * intCellSize))
   224  			if err != nil {
   225  				return err
   226  			}
   227  			ht.cells[i] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[i][0])), ht.blockCellCnt)
   228  		}
   229  
   230  		// rearrange the cells
   231  		var block []Int64HashMapCell
   232  		var emptyCell Int64HashMapCell
   233  
   234  		for i := 0; i < oldBlockNum; i++ {
   235  			block = ht.cells[i]
   236  			for j := uint64(0); j < ht.blockCellCnt; j++ {
   237  				cell := &block[j]
   238  				if cell.Mapped == 0 {
   239  					continue
   240  				}
   241  				newCell := ht.findCell(cell.Key)
   242  				if newCell != cell {
   243  					*newCell = *cell
   244  					*cell = emptyCell
   245  				}
   246  			}
   247  		}
   248  
   249  		block = ht.cells[oldBlockNum]
   250  		for j := uint64(0); j < ht.blockCellCnt; j++ {
   251  			cell := &block[j]
   252  			if cell.Mapped == 0 {
   253  				break
   254  			}
   255  			newCell := ht.findCell(cell.Key)
   256  			if newCell != cell {
   257  				*newCell = *cell
   258  				*cell = emptyCell
   259  			}
   260  		}
   261  	} else {
   262  		oldCells0 := ht.cells[0]
   263  		oldData0 := ht.rawData[0]
   264  		ht.cellCnt = newCellCnt
   265  		ht.cellCntMask = newCellCnt - 1
   266  
   267  		if newAlloc <= maxBlockSize {
   268  			ht.blockCellCnt = newCellCnt
   269  			ht.blockMaxElemCnt = newMaxElemCnt
   270  
   271  			ht.rawData[0], err = m.Alloc(newAlloc)
   272  			if err != nil {
   273  				return err
   274  			}
   275  			ht.cells[0] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt)
   276  		} else {
   277  			ht.blockCellCnt = maxIntCellCntPerBlock
   278  			ht.blockMaxElemCnt = maxElemCnt(ht.blockCellCnt, intCellSize)
   279  
   280  			newBlockNum := newAlloc / maxBlockSize
   281  			ht.rawData = make([][]byte, newBlockNum)
   282  			ht.cells = make([][]Int64HashMapCell, newBlockNum)
   283  			ht.cellCnt = ht.blockCellCnt * uint64(newBlockNum)
   284  			ht.cellCntMask = ht.cellCnt - 1
   285  
   286  			for i := 0; i < newBlockNum; i++ {
   287  				ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt * intCellSize))
   288  				if err != nil {
   289  					return err
   290  				}
   291  				ht.cells[i] = unsafe.Slice((*Int64HashMapCell)(unsafe.Pointer(&ht.rawData[i][0])), ht.blockCellCnt)
   292  			}
   293  		}
   294  
   295  		// rearrange the cells
   296  		for i := range oldCells0 {
   297  			cell := &oldCells0[i]
   298  			if cell.Mapped != 0 {
   299  				newCell := ht.findEmptyCell(cell.Key)
   300  				*newCell = *cell
   301  			}
   302  		}
   303  
   304  		m.Free(oldData0)
   305  	}
   306  
   307  	return nil
   308  }
   309  
   310  func (ht *Int64HashMap) Cardinality() uint64 {
   311  	return ht.elemCnt
   312  }
   313  
   314  func (ht *Int64HashMap) Size() int64 {
   315  	// 41 is the fixed size of Int64HashMap
   316  	ret := int64(41)
   317  	for i := range ht.rawData {
   318  		ret += int64(len(ht.rawData[i]))
   319  		// 16 is the len of ht.cells[i]
   320  		ret += 16
   321  	}
   322  	return ret
   323  }
   324  
   325  type Int64HashMapIterator struct {
   326  	table *Int64HashMap
   327  	pos   uint64
   328  }
   329  
   330  func (it *Int64HashMapIterator) Init(ht *Int64HashMap) {
   331  	it.table = ht
   332  }
   333  
   334  func (it *Int64HashMapIterator) Next() (cell *Int64HashMapCell, err error) {
   335  	for it.pos < it.table.cellCnt {
   336  		blockId := it.pos / it.table.blockCellCnt
   337  		cellId := it.pos % it.table.blockCellCnt
   338  		cell = &it.table.cells[blockId][cellId]
   339  		if cell.Mapped != 0 {
   340  			break
   341  		}
   342  		it.pos++
   343  	}
   344  
   345  	if it.pos >= it.table.cellCnt {
   346  		err = moerr.NewInternalErrorNoCtx("out of range")
   347  		return
   348  	}
   349  
   350  	it.pos++
   351  
   352  	return
   353  }