github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/col/coldata/nulls.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package coldata
    12  
    13  // zeroedNulls is a zeroed out slice representing a bitmap of size MaxBatchSize.
    14  // This is copied to efficiently set all nulls.
    15  var zeroedNulls [(MaxBatchSize-1)/8 + 1]byte
    16  
    17  // filledNulls is a slice representing a bitmap of size MaxBatchSize with every
    18  // single bit set.
    19  var filledNulls [(MaxBatchSize-1)/8 + 1]byte
    20  
    21  // bitMask[i] is a byte with a single bit set at i.
    22  var bitMask = [8]byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}
    23  
    24  // flippedBitMask[i] is a byte with all bits set except at i.
    25  var flippedBitMask = [8]byte{0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F}
    26  
    27  // onesMask is a byte where every bit is set to 1.
    28  const onesMask = byte(255)
    29  
    30  func init() {
    31  	// Initializes filledNulls to the desired slice.
    32  	for i := range filledNulls {
    33  		filledNulls[i] = onesMask
    34  	}
    35  }
    36  
    37  // Nulls represents a list of potentially nullable values using a bitmap. It is
    38  // intended to be used alongside a slice (e.g. in the Vec interface) -- if the
    39  // ith bit is off, then the ith element in that slice should be treated as NULL.
    40  type Nulls struct {
    41  	nulls []byte
    42  	// maybeHasNulls is a best-effort representation of whether or not the
    43  	// vector has any null values set. If it is false, there definitely will be
    44  	// no null values. If it is true, there may or may not be null values.
    45  	maybeHasNulls bool
    46  }
    47  
    48  // NewNulls returns a new nulls vector, initialized with a length.
    49  func NewNulls(len int) Nulls {
    50  	if len > 0 {
    51  		n := Nulls{
    52  			nulls: make([]byte, (len-1)/8+1),
    53  		}
    54  		n.UnsetNulls()
    55  		return n
    56  	}
    57  	return Nulls{
    58  		nulls: make([]byte, 0),
    59  	}
    60  }
    61  
    62  // MaybeHasNulls returns true if the column possibly has any null values, and
    63  // returns false if the column definitely has no null values.
    64  func (n *Nulls) MaybeHasNulls() bool {
    65  	return n.maybeHasNulls
    66  }
    67  
    68  // SetNullRange sets all the values in [startIdx, endIdx) to null.
    69  func (n *Nulls) SetNullRange(startIdx int, endIdx int) {
    70  	start, end := uint64(startIdx), uint64(endIdx)
    71  	if start >= end {
    72  		return
    73  	}
    74  
    75  	n.maybeHasNulls = true
    76  	sIdx := start / 8
    77  	eIdx := (end - 1) / 8
    78  
    79  	// Case where mask only spans one byte.
    80  	if sIdx == eIdx {
    81  		mask := onesMask >> (8 - (start % 8))
    82  		// Mask the end if needed.
    83  		if end%8 != 0 {
    84  			mask |= onesMask << (end % 8)
    85  		}
    86  		n.nulls[sIdx] &= mask
    87  		return
    88  	}
    89  
    90  	// Case where mask spans at least two bytes.
    91  	mask := onesMask >> (8 - (start % 8))
    92  	n.nulls[sIdx] &= mask
    93  
    94  	if end%8 == 0 {
    95  		n.nulls[eIdx] = 0
    96  	} else {
    97  		mask = onesMask << (end % 8)
    98  		n.nulls[eIdx] &= mask
    99  	}
   100  
   101  	for i := sIdx + 1; i < eIdx; i++ {
   102  		n.nulls[i] = 0
   103  	}
   104  }
   105  
   106  // UnsetNullRange unsets all the nulls in the range [startIdx, endIdx).
   107  // After using UnsetNullRange, n might not contain any null values,
   108  // but maybeHasNulls could still be true.
   109  func (n *Nulls) UnsetNullRange(startIdx, endIdx int) {
   110  	start, end := uint64(startIdx), uint64(endIdx)
   111  	if start >= end {
   112  		return
   113  	}
   114  	if !n.maybeHasNulls {
   115  		return
   116  	}
   117  
   118  	sIdx := start / 8
   119  	eIdx := (end - 1) / 8
   120  
   121  	// Case where mask only spans one byte.
   122  	if sIdx == eIdx {
   123  		mask := onesMask << (start % 8)
   124  		if end%8 != 0 {
   125  			mask = mask & (onesMask >> (8 - (end % 8)))
   126  		}
   127  		n.nulls[sIdx] |= mask
   128  		return
   129  	}
   130  
   131  	// Case where mask spans at least two bytes.
   132  	mask := onesMask << (start % 8)
   133  	n.nulls[sIdx] |= mask
   134  	if end%8 == 0 {
   135  		n.nulls[eIdx] = onesMask
   136  	} else {
   137  		mask = onesMask >> (8 - (end % 8))
   138  		n.nulls[eIdx] |= mask
   139  	}
   140  
   141  	for i := sIdx + 1; i < eIdx; i++ {
   142  		n.nulls[i] = onesMask
   143  	}
   144  }
   145  
   146  // Truncate sets all values with index greater than or equal to start to null.
   147  func (n *Nulls) Truncate(start int) {
   148  	end := len(n.nulls) * 8
   149  	n.SetNullRange(start, end)
   150  }
   151  
   152  // UnsetNulls sets the column to have no null values.
   153  func (n *Nulls) UnsetNulls() {
   154  	n.maybeHasNulls = false
   155  
   156  	startIdx := 0
   157  	for startIdx < len(n.nulls) {
   158  		startIdx += copy(n.nulls[startIdx:], filledNulls[:])
   159  	}
   160  }
   161  
   162  // UnsetNullsAfter sets all values with index greater than or equal to idx to
   163  // non-null.
   164  func (n *Nulls) UnsetNullsAfter(idx int) {
   165  	end := len(n.nulls) * 8
   166  	n.UnsetNullRange(idx, end)
   167  }
   168  
   169  // SetNulls sets the column to have only null values.
   170  func (n *Nulls) SetNulls() {
   171  	n.maybeHasNulls = true
   172  
   173  	startIdx := 0
   174  	for startIdx < len(n.nulls) {
   175  		startIdx += copy(n.nulls[startIdx:], zeroedNulls[:])
   176  	}
   177  }
   178  
   179  // NullAt returns true if the ith value of the column is null.
   180  func (n *Nulls) NullAt(i int) bool {
   181  	return n.nulls[i>>3]&bitMask[i&7] == 0
   182  }
   183  
   184  // SetNull sets the ith value of the column to null.
   185  func (n *Nulls) SetNull(i int) {
   186  	n.maybeHasNulls = true
   187  	n.nulls[i>>3] &= flippedBitMask[i&7]
   188  }
   189  
   190  // UnsetNull unsets the ith values of the column.
   191  func (n *Nulls) UnsetNull(i int) {
   192  	n.nulls[i>>3] |= bitMask[i&7]
   193  }
   194  
   195  // Remove the unused warning.
   196  var (
   197  	n = Nulls{}
   198  	_ = n.swap
   199  )
   200  
   201  // swap swaps the null values at the argument indices. We implement the logic
   202  // directly on the byte array rather than case on the result of NullAt to avoid
   203  // having to take some branches.
   204  func (n *Nulls) swap(iIdx, jIdx int) {
   205  	i, j := uint64(iIdx), uint64(jIdx)
   206  	// Get original null values.
   207  	ni := (n.nulls[i/8] >> (i % 8)) & 0x1
   208  	nj := (n.nulls[j/8] >> (j % 8)) & 0x1
   209  	// Write into the correct positions.
   210  	iMask := bitMask[i%8]
   211  	jMask := bitMask[j%8]
   212  	n.nulls[i/8] = (n.nulls[i/8] & ^iMask) | (nj << (i % 8))
   213  	n.nulls[j/8] = (n.nulls[j/8] & ^jMask) | (ni << (j % 8))
   214  }
   215  
   216  // set copies over a slice [args.SrcStartIdx: args.SrcEndIdx] of
   217  // args.Src.Nulls() and puts it into this nulls starting at args.DestIdx. If
   218  // the length of this nulls is smaller than args.DestIdx, then this nulls is
   219  // extended; otherwise, any overlapping old values are overwritten, and this
   220  // nulls is also extended if necessary.
   221  func (n *Nulls) set(args SliceArgs) {
   222  	if args.SrcStartIdx == args.SrcEndIdx {
   223  		return
   224  	}
   225  	toDuplicate := args.SrcEndIdx - args.SrcStartIdx
   226  	outputLen := args.DestIdx + toDuplicate
   227  	// We will need ceil(outputLen/8) bytes to encode the combined nulls.
   228  	needed := (outputLen-1)/8 + 1
   229  	current := len(n.nulls)
   230  	if current < needed {
   231  		n.nulls = append(n.nulls, filledNulls[:needed-current]...)
   232  	}
   233  	// First, we unset the whole range that is overwritten. If there are any NULL
   234  	// values in the source, those will be copied over below, one at a time.
   235  	n.UnsetNullRange(args.DestIdx, args.DestIdx+toDuplicate)
   236  	if args.Src.MaybeHasNulls() {
   237  		src := args.Src.Nulls()
   238  		if args.Sel != nil {
   239  			for i := 0; i < toDuplicate; i++ {
   240  				if src.NullAt(args.Sel[args.SrcStartIdx+i]) {
   241  					n.SetNull(args.DestIdx + i)
   242  				}
   243  			}
   244  		} else {
   245  			for i := 0; i < toDuplicate; i++ {
   246  				// TODO(yuzefovich): this can be done more efficiently with a bitwise OR:
   247  				// like n.nulls[i] |= vec.nulls[i].
   248  				if src.NullAt(args.SrcStartIdx + i) {
   249  					n.SetNull(args.DestIdx + i)
   250  				}
   251  			}
   252  		}
   253  	}
   254  }
   255  
   256  // Slice returns a new Nulls representing a slice of the current Nulls from
   257  // [start, end).
   258  func (n *Nulls) Slice(start int, end int) Nulls {
   259  	startUnsigned, endUnsigned := uint64(start), uint64(end)
   260  	if !n.maybeHasNulls {
   261  		return NewNulls(end - start)
   262  	}
   263  	if start >= end {
   264  		return NewNulls(0)
   265  	}
   266  	s := NewNulls(end - start)
   267  	s.maybeHasNulls = true
   268  	mod := startUnsigned % 8
   269  	startIdx := start / 8
   270  	if mod == 0 {
   271  		copy(s.nulls, n.nulls[startIdx:])
   272  	} else {
   273  		for i := range s.nulls {
   274  			// If start is not a multiple of 8, we need to shift over the bitmap
   275  			// to have the first index correspond.
   276  			s.nulls[i] = n.nulls[startIdx+i] >> mod
   277  			if startIdx+i+1 < len(n.nulls) {
   278  				// And now bitwise or the remaining bits with the bits we want to
   279  				// bring over from the next index.
   280  				s.nulls[i] |= n.nulls[startIdx+i+1] << (8 - mod)
   281  			}
   282  		}
   283  	}
   284  	// Zero out any trailing bits in the final byte.
   285  	endBits := (endUnsigned - startUnsigned) % 8
   286  	if endBits != 0 {
   287  		mask := onesMask << endBits
   288  		s.nulls[len(s.nulls)-1] |= mask
   289  	}
   290  	return s
   291  }
   292  
   293  // NullBitmap returns the null bitmap.
   294  func (n *Nulls) NullBitmap() []byte {
   295  	return n.nulls
   296  }
   297  
   298  // SetNullBitmap sets the null bitmap. size corresponds to how many elements
   299  // this bitmap represents. The bits past the end of this size will be set to
   300  // valid.
   301  func (n *Nulls) SetNullBitmap(bm []byte, size int) {
   302  	n.nulls = bm
   303  	n.maybeHasNulls = false
   304  	// Set all indices as valid past the last element.
   305  	if len(bm) > 0 && size != 0 {
   306  		// Set the last bits in the last element in which we want to preserve null
   307  		// information. mod, if non-zero, is the number of bits we don't want to
   308  		// overwrite (otherwise all bits are important). Note that we cast size to a
   309  		// uint64 to avoid extra instructions when modding.
   310  		mod := uint64(size) % 8
   311  		endIdx := size - 1
   312  		if mod != 0 {
   313  			bm[endIdx/8] |= onesMask << mod
   314  		}
   315  		// Fill the rest of the bitmap.
   316  		for i := (endIdx / 8) + 1; i < len(bm); {
   317  			i += copy(bm[i:], filledNulls[:])
   318  		}
   319  	}
   320  
   321  	for i := 0; i < len(bm); i++ {
   322  		if bm[i] != onesMask {
   323  			n.maybeHasNulls = true
   324  			return
   325  		}
   326  	}
   327  }
   328  
   329  // Or returns a new Nulls vector where NullAt(i) iff n1.NullAt(i) or
   330  // n2.NullAt(i).
   331  func (n *Nulls) Or(n2 *Nulls) *Nulls {
   332  	// For simplicity, enforce that len(n.nulls) <= len(n2.nulls).
   333  	if len(n.nulls) > len(n2.nulls) {
   334  		n, n2 = n2, n
   335  	}
   336  	nulls := make([]byte, len(n2.nulls))
   337  	if n.maybeHasNulls && n2.maybeHasNulls {
   338  		for i := 0; i < len(n.nulls); i++ {
   339  			nulls[i] = n.nulls[i] & n2.nulls[i]
   340  		}
   341  		// If n2 is longer, we can just copy the remainder.
   342  		copy(nulls[len(n.nulls):], n2.nulls[len(n.nulls):])
   343  	} else if n.maybeHasNulls {
   344  		copy(nulls, n.nulls)
   345  	} else if n2.maybeHasNulls {
   346  		copy(nulls, n2.nulls)
   347  	}
   348  	return &Nulls{
   349  		maybeHasNulls: n.maybeHasNulls || n2.maybeHasNulls,
   350  		nulls:         nulls,
   351  	}
   352  }
   353  
   354  // Copy returns a copy of n which can be modified independently.
   355  func (n *Nulls) Copy() Nulls {
   356  	c := Nulls{
   357  		maybeHasNulls: n.maybeHasNulls,
   358  		nulls:         make([]byte, len(n.nulls)),
   359  	}
   360  	copy(c.nulls, n.nulls)
   361  	return c
   362  }