github.com/matrixorigin/matrixone@v1.2.0/pkg/container/hashtable/string_hash_map.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package hashtable
    16  
    17  import (
    18  	"unsafe"
    19  
    20  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    21  	"github.com/matrixorigin/matrixone/pkg/common/mpool"
    22  )
    23  
    24  type StringRef struct {
    25  	Ptr *byte
    26  	Len int
    27  }
    28  
    29  type StringHashMapCell struct {
    30  	HashState [3]uint64
    31  	Mapped    uint64
    32  }
    33  
    34  var StrKeyPadding [16]byte
    35  
    36  type StringHashMap struct {
    37  	blockCellCnt    uint64
    38  	blockMaxElemCnt uint64
    39  	cellCntMask     uint64
    40  	//confCnt     uint64
    41  
    42  	cellCnt uint64
    43  	elemCnt uint64
    44  	rawData [][]byte
    45  	cells   [][]StringHashMapCell
    46  }
    47  
    48  var (
    49  	strCellSize           uint64
    50  	maxStrCellCntPerBlock uint64
    51  )
    52  
    53  func init() {
    54  	strCellSize = uint64(unsafe.Sizeof(StringHashMapCell{}))
    55  	maxStrCellCntPerBlock = maxBlockSize / strCellSize
    56  }
    57  
    58  func (ht *StringHashMap) Free(m *mpool.MPool) {
    59  	for i := range ht.rawData {
    60  		if len(ht.rawData[i]) > 0 {
    61  			m.Free(ht.rawData[i])
    62  		}
    63  		ht.rawData[i], ht.cells[i] = nil, nil
    64  	}
    65  	ht.rawData, ht.cells = nil, nil
    66  }
    67  
    68  func (ht *StringHashMap) Init(m *mpool.MPool) (err error) {
    69  	ht.blockCellCnt = kInitialCellCnt
    70  	ht.blockMaxElemCnt = maxElemCnt(kInitialCellCnt, strCellSize)
    71  	ht.elemCnt = 0
    72  	ht.cellCnt = kInitialCellCnt
    73  	ht.cellCntMask = kInitialCellCnt - 1
    74  
    75  	ht.rawData = make([][]byte, 1)
    76  	ht.cells = make([][]StringHashMapCell, 1)
    77  	if ht.rawData[0], err = m.Alloc(int(ht.blockCellCnt * strCellSize)); err == nil {
    78  		ht.cells[0] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt)
    79  	}
    80  	return
    81  }
    82  
    83  func (ht *StringHashMap) Dup() *StringHashMap {
    84  	val := &StringHashMap{
    85  		blockCellCnt:    ht.blockCellCnt,
    86  		blockMaxElemCnt: ht.blockMaxElemCnt,
    87  		cellCntMask:     ht.cellCntMask,
    88  
    89  		cellCnt: ht.cellCnt,
    90  		elemCnt: ht.elemCnt,
    91  
    92  		rawData: make([][]byte, len(ht.rawData)),
    93  		cells:   make([][]StringHashMapCell, len(ht.cells)),
    94  	}
    95  
    96  	for i, raw := range ht.rawData {
    97  		val.rawData[i] = make([]byte, len(raw))
    98  		copy(val.rawData[i], raw)
    99  	}
   100  
   101  	for i, cell := range ht.cells {
   102  		val.cells[i] = make([]StringHashMapCell, len(cell))
   103  		copy(val.cells[i], cell)
   104  	}
   105  
   106  	return val
   107  }
   108  
   109  func (ht *StringHashMap) InsertStringBatch(states [][3]uint64, keys [][]byte, values []uint64, m *mpool.MPool) error {
   110  	if err := ht.ResizeOnDemand(uint64(len(keys)), m); err != nil {
   111  		return err
   112  	}
   113  
   114  	BytesBatchGenHashStates(&keys[0], &states[0], len(keys))
   115  
   116  	for i := range keys {
   117  		cell := ht.findCell(&states[i])
   118  		if cell.Mapped == 0 {
   119  			ht.elemCnt++
   120  			cell.HashState = states[i]
   121  			cell.Mapped = ht.elemCnt
   122  		}
   123  		values[i] = cell.Mapped
   124  	}
   125  	return nil
   126  }
   127  
   128  func (ht *StringHashMap) InsertStringBatchWithRing(zValues []int64, states [][3]uint64, keys [][]byte, values []uint64, m *mpool.MPool) error {
   129  	if err := ht.ResizeOnDemand(uint64(len(keys)), m); err != nil {
   130  		return err
   131  	}
   132  
   133  	BytesBatchGenHashStates(&keys[0], &states[0], len(keys))
   134  
   135  	for i := range keys {
   136  		if zValues[i] == 0 {
   137  			continue
   138  		}
   139  
   140  		cell := ht.findCell(&states[i])
   141  		if cell.Mapped == 0 {
   142  			ht.elemCnt++
   143  			cell.HashState = states[i]
   144  			cell.Mapped = ht.elemCnt
   145  		}
   146  		values[i] = cell.Mapped
   147  	}
   148  	return nil
   149  }
   150  
   151  func (ht *StringHashMap) FindStringBatch(states [][3]uint64, keys [][]byte, values []uint64) {
   152  	BytesBatchGenHashStates(&keys[0], &states[0], len(keys))
   153  
   154  	for i := range keys {
   155  		cell := ht.findCell(&states[i])
   156  		values[i] = cell.Mapped
   157  	}
   158  }
   159  
   160  func (ht *StringHashMap) FindString24Batch(states [][3]uint64, keys [][3]uint64, values []uint64) {
   161  	Int192BatchGenHashStates(&keys[0], &states[0], len(keys))
   162  
   163  	for i := range keys {
   164  		cell := ht.findCell(&states[i])
   165  		values[i] = cell.Mapped
   166  	}
   167  }
   168  
   169  func (ht *StringHashMap) FindString32Batch(states [][3]uint64, keys [][4]uint64, values []uint64) {
   170  	Int256BatchGenHashStates(&keys[0], &states[0], len(keys))
   171  
   172  	for i := range keys {
   173  		cell := ht.findCell(&states[i])
   174  		values[i] = cell.Mapped
   175  	}
   176  }
   177  
   178  func (ht *StringHashMap) FindString40Batch(states [][3]uint64, keys [][5]uint64, values []uint64) {
   179  	Int320BatchGenHashStates(&keys[0], &states[0], len(keys))
   180  
   181  	for i := range keys {
   182  		cell := ht.findCell(&states[i])
   183  		values[i] = cell.Mapped
   184  	}
   185  }
   186  
   187  func (ht *StringHashMap) FindStringBatchWithRing(states [][3]uint64, zValues []int64, keys [][]byte, values []uint64) {
   188  	// XXX I think it is no use now.
   189  }
   190  
   191  func (ht *StringHashMap) FindHashStateBatch(states [][3]uint64, values []uint64) {
   192  	for i := range states {
   193  		cell := ht.findCell(&states[i])
   194  		values[i] = cell.Mapped
   195  	}
   196  }
   197  
   198  func (ht *StringHashMap) findCell(state *[3]uint64) *StringHashMapCell {
   199  	for idx := state[0] & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask {
   200  		blockId := idx / ht.blockCellCnt
   201  		cellId := idx % ht.blockCellCnt
   202  		cell := &ht.cells[blockId][cellId]
   203  		if cell.Mapped == 0 || cell.HashState == *state {
   204  			return cell
   205  		}
   206  	}
   207  	return nil
   208  }
   209  
   210  func (ht *StringHashMap) findEmptyCell(state *[3]uint64) *StringHashMapCell {
   211  	for idx := state[0] & ht.cellCntMask; true; idx = (idx + 1) & ht.cellCntMask {
   212  		blockId := idx / ht.blockCellCnt
   213  		cellId := idx % ht.blockCellCnt
   214  		cell := &ht.cells[blockId][cellId]
   215  		if cell.Mapped == 0 {
   216  			return cell
   217  		}
   218  	}
   219  	return nil
   220  }
   221  
   222  func (ht *StringHashMap) ResizeOnDemand(n uint64, m *mpool.MPool) error {
   223  	var err error
   224  
   225  	targetCnt := ht.elemCnt + n
   226  	if targetCnt <= uint64(len(ht.rawData))*ht.blockMaxElemCnt {
   227  		return nil
   228  	}
   229  
   230  	newCellCnt := ht.cellCnt << 1
   231  	newMaxElemCnt := maxElemCnt(newCellCnt, strCellSize)
   232  	for newMaxElemCnt < targetCnt {
   233  		newCellCnt <<= 1
   234  		newMaxElemCnt = maxElemCnt(newCellCnt, strCellSize)
   235  	}
   236  
   237  	newAlloc := int(newCellCnt * strCellSize)
   238  	if ht.blockCellCnt == maxStrCellCntPerBlock {
   239  		// double the blocks
   240  		oldBlockNum := len(ht.rawData)
   241  		newBlockNum := newAlloc / maxBlockSize
   242  
   243  		ht.rawData = append(ht.rawData, make([][]byte, newBlockNum-oldBlockNum)...)
   244  		ht.cells = append(ht.cells, make([][]StringHashMapCell, newBlockNum-oldBlockNum)...)
   245  		ht.cellCnt = ht.blockCellCnt * uint64(newBlockNum)
   246  		ht.cellCntMask = ht.cellCnt - 1
   247  
   248  		for i := oldBlockNum; i < newBlockNum; i++ {
   249  			ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt * strCellSize))
   250  			if err != nil {
   251  				return err
   252  			}
   253  			ht.cells[i] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[i][0])), ht.blockCellCnt)
   254  		}
   255  
   256  		// rearrange the cells
   257  		var block []StringHashMapCell
   258  		var emptyCell StringHashMapCell
   259  
   260  		for i := 0; i < oldBlockNum; i++ {
   261  			block = ht.cells[i]
   262  			for j := uint64(0); j < ht.blockCellCnt; j++ {
   263  				cell := &block[j]
   264  				if cell.Mapped == 0 {
   265  					continue
   266  				}
   267  				newCell := ht.findCell(&cell.HashState)
   268  				if newCell != cell {
   269  					*newCell = *cell
   270  					*cell = emptyCell
   271  				}
   272  			}
   273  		}
   274  
   275  		block = ht.cells[oldBlockNum]
   276  		for j := uint64(0); j < ht.blockCellCnt; j++ {
   277  			cell := &block[j]
   278  			if cell.Mapped == 0 {
   279  				break
   280  			}
   281  			newCell := ht.findCell(&cell.HashState)
   282  			if newCell != cell {
   283  				*newCell = *cell
   284  				*cell = emptyCell
   285  			}
   286  		}
   287  	} else {
   288  		oldCells0 := ht.cells[0]
   289  		oldData0 := ht.rawData[0]
   290  		ht.cellCnt = newCellCnt
   291  		ht.cellCntMask = ht.cellCnt - 1
   292  
   293  		if newAlloc <= maxBlockSize {
   294  			ht.blockCellCnt = newCellCnt
   295  			ht.blockMaxElemCnt = newMaxElemCnt
   296  
   297  			ht.rawData[0], err = m.Alloc(newAlloc)
   298  			if err != nil {
   299  				return err
   300  			}
   301  			ht.cells[0] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[0][0])), ht.blockCellCnt)
   302  		} else {
   303  			ht.blockCellCnt = maxStrCellCntPerBlock
   304  			ht.blockMaxElemCnt = maxElemCnt(ht.blockCellCnt, strCellSize)
   305  
   306  			newBlockNum := newAlloc / maxBlockSize
   307  			ht.rawData = make([][]byte, newBlockNum)
   308  			ht.cells = make([][]StringHashMapCell, newBlockNum)
   309  			ht.cellCnt = ht.blockCellCnt * uint64(newBlockNum)
   310  			ht.cellCntMask = ht.cellCnt - 1
   311  
   312  			for i := 0; i < newBlockNum; i++ {
   313  				ht.rawData[i], err = m.Alloc(int(ht.blockCellCnt * strCellSize))
   314  				if err != nil {
   315  					return err
   316  				}
   317  				ht.cells[i] = unsafe.Slice((*StringHashMapCell)(unsafe.Pointer(&ht.rawData[i][0])), ht.blockCellCnt)
   318  			}
   319  		}
   320  
   321  		// rearrange the cells
   322  		for i := range oldCells0 {
   323  			cell := &oldCells0[i]
   324  			if cell.Mapped != 0 {
   325  				newCell := ht.findEmptyCell(&cell.HashState)
   326  				*newCell = *cell
   327  			}
   328  		}
   329  
   330  		m.Free(oldData0)
   331  	}
   332  
   333  	return nil
   334  }
   335  
   336  func (ht *StringHashMap) Cardinality() uint64 {
   337  	return ht.elemCnt
   338  }
   339  
   340  func (ht *StringHashMap) Size() int64 {
   341  	// 33 is the origin size of StringHashMaps
   342  	ret := int64(33)
   343  	for i := range ht.rawData {
   344  		ret += int64(len(ht.rawData[i]))
   345  		// 32 is the len of ht.cells[i]
   346  		ret += 32
   347  	}
   348  	return ret
   349  }
   350  
   351  type StringHashMapIterator struct {
   352  	table *StringHashMap
   353  	pos   uint64
   354  }
   355  
   356  func (it *StringHashMapIterator) Init(ht *StringHashMap) {
   357  	it.table = ht
   358  }
   359  
   360  func (it *StringHashMapIterator) Next() (cell *StringHashMapCell, err error) {
   361  	for it.pos < it.table.cellCnt {
   362  		blockId := it.pos / it.table.blockCellCnt
   363  		cellId := it.pos % it.table.blockCellCnt
   364  		cell = &it.table.cells[blockId][cellId]
   365  		if cell.Mapped != 0 {
   366  			break
   367  		}
   368  		it.pos++
   369  	}
   370  
   371  	if it.pos >= it.table.cellCnt {
   372  		err = moerr.NewInternalErrorNoCtx("out of range")
   373  		return
   374  	}
   375  	it.pos++
   376  
   377  	return
   378  }