github.com/matrixorigin/matrixone@v0.7.0/pkg/common/bitmap/bitmap.go (about)

     1  // Copyright 2021 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package bitmap
    16  
    17  import (
    18  	"bytes"
    19  	"encoding"
    20  	"fmt"
    21  	"math/bits"
    22  
    23  	"github.com/matrixorigin/matrixone/pkg/container/types"
    24  )
    25  
    26  //
    27  // In case len is not multiple of 64, many of these code following assumes the trailing
    28  // bits of last uint64 are zero.   This may well be true in all our usage.  So let's
    29  // leave as it is for now.
    30  //
    31  
    32  type bitmask = uint64
    33  
    34  /*
    35   * Array giving the position of the right-most set bit for each possible
    36   * byte value. count the right-most position as the 0th bit, and the
    37   * left-most the 7th bit.  The 0th entry of the array should not be used.
    38   * e.g. 2 = 0x10 ==> rightmost_one_pos_8[2] = 1, 3 = 0x11 ==> rightmost_one_pos_8[3] = 0
    39   */
    40  var rightmost_one_pos_8 = [256]uint8{
    41  	0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    42  	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    43  	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    44  	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    45  	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    46  	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    47  	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    48  	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    49  	7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    50  	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    51  	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    52  	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    53  	6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    54  	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    55  	5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    56  	4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
    57  }
    58  
    59  func New(n int) *Bitmap {
    60  	return &Bitmap{
    61  		len:  int64(n),
    62  		data: make([]uint64, (n-1)/64+1),
    63  	}
    64  }
    65  
    66  func (n *Bitmap) Clone() *Bitmap {
    67  	var ret Bitmap
    68  	ret.len = n.len
    69  	ret.emptyFlag = n.emptyFlag
    70  	ret.data = make([]uint64, len(n.data))
    71  	copy(ret.data, n.data)
    72  	return &ret
    73  }
    74  
    75  func (n *Bitmap) Iterator() Iterator {
    76  	// When initialization, the itr.i is set to the first rightmost_one position.
    77  	itr := BitmapIterator{i: 0, bm: n}
    78  	if first_1_pos, has_next := itr.hasNext(0); has_next {
    79  		itr.i = first_1_pos
    80  		itr.has_next = true
    81  		return &itr
    82  	}
    83  	itr.has_next = false
    84  	return &itr
    85  }
    86  
    87  func rightmost_one_pos_64(word uint64) uint64 {
    88  	// find out the rightmost_one position.
    89  	// Firstly, use eight bits as a group to quickly determine whether there is a 1 in it.
    90  	// if not, then rightmost_one exists in next group, add up the distance with result and shift the word
    91  	// if rightmost_one exists in this group, get the distance directly from a pre-made hash table
    92  	var result uint64
    93  	for {
    94  		if (word & 0xFF) == 0 {
    95  			word >>= 8
    96  			result += 8
    97  		} else {
    98  			break
    99  		}
   100  	}
   101  	result += uint64(rightmost_one_pos_8[word&255])
   102  	return result
   103  }
   104  
   105  func (itr *BitmapIterator) hasNext(i uint64) (uint64, bool) {
   106  	// if the uint64 is 0, move forward to next word
   107  	// if the uint64 is not 0, then calculate the rightest_one position in a word, add up prev result and return.
   108  	// when there is 1 in bitmap, return true, otherwise bitmap is empty and return false.
   109  	// either case loop over words not bits
   110  	nwords := (itr.bm.len-1)/64 + 1
   111  	current_word := i >> 6
   112  	mask := (^(bitmask)(0)) << (i & 0x3F) // ignore bits check before
   113  	var result uint64
   114  
   115  	for ; current_word < uint64(nwords); current_word++ {
   116  		word := itr.bm.data[current_word]
   117  		word &= mask
   118  
   119  		if word != 0 {
   120  			result = rightmost_one_pos_64(word) + current_word*64
   121  			return result, true
   122  		}
   123  		mask = (^(bitmask)(0)) // in subsequent words, consider all bits
   124  	}
   125  	return result, false
   126  }
   127  
   128  func (itr *BitmapIterator) HasNext() bool {
   129  	// maintain a bool var to avoid unnecessary calculations.
   130  	return itr.has_next
   131  }
   132  
   133  func (itr *BitmapIterator) PeekNext() uint64 {
   134  	if itr.has_next {
   135  		return itr.i
   136  	}
   137  	return 0
   138  }
   139  
   140  func (itr *BitmapIterator) Next() uint64 {
   141  	// When a iterator is initialized, the itr.i is set to the first rightmost_one pos.
   142  	// so current itr.i is a rightmost_one pos, cal the next one pos and return current pos.
   143  	pos := itr.i
   144  	if next, has_next := itr.hasNext(itr.i + 1); has_next { // itr.i + 1 to ignore bits check before
   145  		itr.i = next
   146  		itr.has_next = true
   147  		return pos
   148  	}
   149  	itr.has_next = false
   150  	return pos
   151  }
   152  
   153  func (n *Bitmap) Clear() {
   154  	n.data = make([]uint64, (n.len-1)/64+1)
   155  	n.emptyFlag = 1
   156  }
   157  
   158  func (n *Bitmap) Len() int {
   159  	return int(n.len)
   160  }
   161  
   162  func (n *Bitmap) Size() int {
   163  	return len(n.data) * 8
   164  }
   165  
   166  func (n *Bitmap) Ptr() *uint64 {
   167  	if n == nil {
   168  		return nil
   169  	}
   170  	return &n.data[0]
   171  }
   172  
   173  // IsEmpty returns true if no bit in the Bitmap is set, otherwise it will return false.
   174  func (n *Bitmap) IsEmpty() bool {
   175  	if n.emptyFlag == 1 {
   176  		return true
   177  	} else if n.emptyFlag == -1 {
   178  		return false
   179  	}
   180  	for i := 0; i < len(n.data); i++ {
   181  		if n.data[i] != 0 {
   182  			n.emptyFlag = -1
   183  			return false
   184  		}
   185  	}
   186  	n.emptyFlag = 1
   187  	return true
   188  }
   189  
   190  func (n *Bitmap) Add(row uint64) {
   191  	n.data[row>>6] |= 1 << (row & 0x3F)
   192  	n.emptyFlag = -1 //after add operation, must be not empty
   193  
   194  }
   195  
   196  func (n *Bitmap) AddMany(rows []uint64) {
   197  	for _, row := range rows {
   198  		n.data[row>>6] |= 1 << (row & 0x3F)
   199  	}
   200  	n.emptyFlag = -1 //after add operation, must be not empty
   201  
   202  }
   203  
   204  func (n *Bitmap) Remove(row uint64) {
   205  	if row >= uint64(n.len) {
   206  		return
   207  	}
   208  	n.data[row>>6] &^= (uint64(1) << (row & 0x3F))
   209  	if n.emptyFlag == -1 {
   210  		n.emptyFlag = 0 //after remove operation, not sure
   211  	}
   212  }
   213  
   214  // Contains returns true if the row is contained in the Bitmap
   215  func (n *Bitmap) Contains(row uint64) bool {
   216  	if row >= uint64(n.len) {
   217  		return false
   218  	}
   219  	idx := row >> 6
   220  	return (n.data[idx] & (1 << (row & 0x3F))) != 0
   221  }
   222  
   223  func (n *Bitmap) AddRange(start, end uint64) {
   224  	if start >= end {
   225  		return
   226  	}
   227  	i, j := start>>6, (end-1)>>6
   228  	if i == j {
   229  		n.data[i] |= (^uint64(0) << uint(start&0x3F)) & (^uint64(0) >> (uint(-end) & 0x3F))
   230  		return
   231  	}
   232  	n.data[i] |= (^uint64(0) << uint(start&0x3F))
   233  	for k := i + 1; k < j; k++ {
   234  		n.data[k] = ^uint64(0)
   235  	}
   236  	n.data[j] |= (^uint64(0) >> (uint(-end) & 0x3F))
   237  
   238  	n.emptyFlag = -1 //after addRange operation, must be not empty
   239  
   240  }
   241  
   242  func (n *Bitmap) RemoveRange(start, end uint64) {
   243  	if end > uint64(n.len) {
   244  		end = uint64(n.len)
   245  	}
   246  	if start >= end {
   247  		return
   248  	}
   249  	i, j := start>>6, (end-1)>>6
   250  	if i == j {
   251  		n.data[i] &= ^((^uint64(0) << uint(start&0x3F)) & (^uint64(0) >> (uint(-end) % 0x3F)))
   252  		return
   253  	}
   254  	n.data[i] &= ^(^uint64(0) << uint(start&0x3F))
   255  	for k := i + 1; k < j; k++ {
   256  		n.data[k] = 0
   257  	}
   258  	n.data[j] &= ^(^uint64(0) >> (uint(-end) & 0x3F))
   259  	if n.emptyFlag == -1 {
   260  		n.emptyFlag = 0 //after removeRange operation, not sure
   261  	}
   262  }
   263  
   264  func (n *Bitmap) IsSame(m *Bitmap) bool {
   265  	if n.len != m.len || len(m.data) != len(n.data) {
   266  		return false
   267  	}
   268  	for i := 0; i < len(n.data); i++ {
   269  		if n.data[i] != m.data[i] {
   270  			return false
   271  		}
   272  	}
   273  	return true
   274  }
   275  
   276  func (n *Bitmap) Or(m *Bitmap) {
   277  	n.TryExpand(m)
   278  	size := (int(m.len) + 63) / 64
   279  	for i := 0; i < size; i++ {
   280  		n.data[i] |= m.data[i]
   281  	}
   282  	if n.emptyFlag == 1 {
   283  		n.emptyFlag = 0 //after or operation, not sure
   284  	}
   285  }
   286  
   287  func (n *Bitmap) And(m *Bitmap) {
   288  	n.TryExpand(m)
   289  	size := (int(m.len) + 63) / 64
   290  	for i := 0; i < size; i++ {
   291  		n.data[i] &= m.data[i]
   292  	}
   293  	for i := size; i < len(n.data); i++ {
   294  		n.data[i] = 0
   295  	}
   296  	if n.emptyFlag == -1 {
   297  		n.emptyFlag = 0 //after and operation, not sure
   298  	}
   299  }
   300  
   301  func (n *Bitmap) TryExpand(m *Bitmap) {
   302  	n.TryExpandWithSize(int(m.len))
   303  }
   304  
   305  func (n *Bitmap) TryExpandWithSize(size int) {
   306  	if int(n.len) >= size {
   307  		return
   308  	}
   309  	newCap := (size + 63) / 64
   310  	if newCap > cap(n.data) {
   311  		data := make([]uint64, newCap)
   312  		copy(data, n.data)
   313  		n.data = data
   314  	}
   315  	n.len = int64(size)
   316  }
   317  
   318  func (n *Bitmap) Filter(sels []int64) *Bitmap {
   319  	m := New(int(n.len))
   320  	for i, sel := range sels {
   321  		if n.Contains(uint64(sel)) {
   322  			m.Add(uint64(i))
   323  		}
   324  	}
   325  	return m
   326  }
   327  
   328  func (n *Bitmap) Count() int {
   329  	var cnt int
   330  	if n.emptyFlag == 1 { //must be empty
   331  		return 0
   332  	}
   333  	for i := 0; i < len(n.data); i++ {
   334  		cnt += bits.OnesCount64(n.data[i])
   335  	}
   336  	if cnt > 0 {
   337  		n.emptyFlag = -1 //must be not empty
   338  	} else {
   339  		n.emptyFlag = 1 //must be empty
   340  	}
   341  	return cnt
   342  }
   343  
   344  func (n *Bitmap) ToArray() []uint64 {
   345  	var rows []uint64
   346  	itr := n.Iterator()
   347  	for itr.HasNext() {
   348  		r := itr.Next()
   349  		rows = append(rows, r)
   350  	}
   351  	return rows
   352  }
   353  
   354  func (n *Bitmap) Marshal() []byte {
   355  	var buf bytes.Buffer
   356  
   357  	u1 := uint64(n.len)
   358  	u2 := uint64(len(n.data) * 8)
   359  	buf.Write(types.EncodeInt32(&n.emptyFlag))
   360  	buf.Write(types.EncodeUint64(&u1))
   361  	buf.Write(types.EncodeUint64(&u2))
   362  	buf.Write(types.EncodeSlice(n.data))
   363  	return buf.Bytes()
   364  }
   365  
   366  func (n *Bitmap) Unmarshal(data []byte) {
   367  	n.emptyFlag = types.DecodeInt32(data[:4])
   368  	data = data[4:]
   369  	n.len = int64(types.DecodeUint64(data[:8]))
   370  	data = data[8:]
   371  	size := int(types.DecodeUint64(data[:8]))
   372  	data = data[8:]
   373  	n.data = types.DecodeSlice[uint64](data[:size])
   374  }
   375  
   376  func (n *Bitmap) String() string {
   377  	return fmt.Sprintf("%v", n.ToArray())
   378  }
   379  
   380  var _ encoding.BinaryMarshaler = new(Bitmap)
   381  
   382  func (n *Bitmap) MarshalBinary() ([]byte, error) {
   383  	return n.Marshal(), nil
   384  }
   385  
   386  var _ encoding.BinaryUnmarshaler = new(Bitmap)
   387  
   388  func (n *Bitmap) UnmarshalBinary(data []byte) error {
   389  	n.Unmarshal(data)
   390  	return nil
   391  }