github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/col/coldata/nulls.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package coldata
    12  
    13  // zeroedNulls is a zeroed out slice representing a bitmap of size MaxBatchSize.
    14  // This is copied to efficiently set all nulls.
    15  var zeroedNulls [(MaxBatchSize-1)/8 + 1]byte
    16  
    17  // filledNulls is a slice representing a bitmap of size MaxBatchSize with every
    18  // single bit set.
    19  var filledNulls [(MaxBatchSize-1)/8 + 1]byte
    20  
    21  // bitMask[i] is a byte with a single bit set at i.
    22  var bitMask = [8]byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80}
    23  
    24  // flippedBitMask[i] is a byte with all bits set except at i.
    25  var flippedBitMask = [8]byte{0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F}
    26  
    27  // onesMask is a byte where every bit is set to 1.
    28  const onesMask = byte(255)
    29  
    30  func init() {
    31  	// Initializes filledNulls to the desired slice.
    32  	for i := range filledNulls {
    33  		filledNulls[i] = onesMask
    34  	}
    35  }
    36  
    37  // Nulls represents a list of potentially nullable values using a bitmap. It is
    38  // intended to be used alongside a slice (e.g. in the Vec interface) -- if the
    39  // ith bit is off, then the ith element in that slice should be treated as NULL.
    40  type Nulls struct {
    41  	nulls []byte
    42  	// maybeHasNulls is a best-effort representation of whether or not the
    43  	// vector has any null values set. If it is false, there definitely will be
    44  	// no null values. If it is true, there may or may not be null values.
    45  	maybeHasNulls bool
    46  }
    47  
    48  // NewNulls returns a new nulls vector, initialized with a length.
    49  func NewNulls(len int) Nulls {
    50  	if len > 0 {
    51  		n := Nulls{
    52  			nulls: make([]byte, (len-1)/8+1),
    53  		}
    54  		n.UnsetNulls()
    55  		return n
    56  	}
    57  	return Nulls{
    58  		nulls: make([]byte, 0),
    59  	}
    60  }
    61  
    62  // MaybeHasNulls returns true if the column possibly has any null values, and
    63  // returns false if the column definitely has no null values.
    64  func (n *Nulls) MaybeHasNulls() bool {
    65  	return n.maybeHasNulls
    66  }
    67  
    68  // SetNullRange sets all the values in [startIdx, endIdx) to null.
    69  func (n *Nulls) SetNullRange(startIdx int, endIdx int) {
    70  	start, end := uint64(startIdx), uint64(endIdx)
    71  	if start >= end {
    72  		return
    73  	}
    74  
    75  	n.maybeHasNulls = true
    76  	sIdx := start / 8
    77  	eIdx := (end - 1) / 8
    78  
    79  	// Case where mask only spans one byte.
    80  	if sIdx == eIdx {
    81  		mask := onesMask >> (8 - (start % 8))
    82  		// Mask the end if needed.
    83  		if end%8 != 0 {
    84  			mask |= onesMask << (end % 8)
    85  		}
    86  		n.nulls[sIdx] &= mask
    87  		return
    88  	}
    89  
    90  	// Case where mask spans at least two bytes.
    91  	mask := onesMask >> (8 - (start % 8))
    92  	n.nulls[sIdx] &= mask
    93  
    94  	if end%8 == 0 {
    95  		n.nulls[eIdx] = 0
    96  	} else {
    97  		mask = onesMask << (end % 8)
    98  		n.nulls[eIdx] &= mask
    99  	}
   100  
   101  	for idx := int(sIdx + 1); idx < int(eIdx); {
   102  		idx += copy(n.nulls[idx:eIdx], zeroedNulls[:])
   103  	}
   104  }
   105  
   106  // UnsetNullRange unsets all the nulls in the range [startIdx, endIdx).
   107  // After using UnsetNullRange, n might not contain any null values,
   108  // but maybeHasNulls could still be true.
   109  func (n *Nulls) UnsetNullRange(startIdx, endIdx int) {
   110  	start, end := uint64(startIdx), uint64(endIdx)
   111  	if start >= end {
   112  		return
   113  	}
   114  	if !n.maybeHasNulls {
   115  		return
   116  	}
   117  
   118  	sIdx := start / 8
   119  	eIdx := (end - 1) / 8
   120  
   121  	// Case where mask only spans one byte.
   122  	if sIdx == eIdx {
   123  		mask := onesMask << (start % 8)
   124  		if end%8 != 0 {
   125  			mask = mask & (onesMask >> (8 - (end % 8)))
   126  		}
   127  		n.nulls[sIdx] |= mask
   128  		return
   129  	}
   130  
   131  	// Case where mask spans at least two bytes.
   132  	mask := onesMask << (start % 8)
   133  	n.nulls[sIdx] |= mask
   134  	if end%8 == 0 {
   135  		n.nulls[eIdx] = onesMask
   136  	} else {
   137  		mask = onesMask >> (8 - (end % 8))
   138  		n.nulls[eIdx] |= mask
   139  	}
   140  
   141  	for idx := int(sIdx + 1); idx < int(eIdx); {
   142  		idx += copy(n.nulls[idx:eIdx], filledNulls[:])
   143  	}
   144  }
   145  
   146  // Truncate sets all values with index greater than or equal to start to null.
   147  func (n *Nulls) Truncate(start int) {
   148  	end := len(n.nulls) * 8
   149  	n.SetNullRange(start, end)
   150  }
   151  
   152  // UnsetNulls sets the column to have no null values.
   153  func (n *Nulls) UnsetNulls() {
   154  	n.maybeHasNulls = false
   155  
   156  	startIdx := 0
   157  	for startIdx < len(n.nulls) {
   158  		startIdx += copy(n.nulls[startIdx:], filledNulls[:])
   159  	}
   160  }
   161  
   162  // UnsetNullsAfter sets all values with index greater than or equal to idx to
   163  // non-null.
   164  func (n *Nulls) UnsetNullsAfter(idx int) {
   165  	end := len(n.nulls) * 8
   166  	n.UnsetNullRange(idx, end)
   167  }
   168  
   169  // SetNulls sets the column to have only null values.
   170  func (n *Nulls) SetNulls() {
   171  	n.maybeHasNulls = true
   172  
   173  	startIdx := 0
   174  	for startIdx < len(n.nulls) {
   175  		startIdx += copy(n.nulls[startIdx:], zeroedNulls[:])
   176  	}
   177  }
   178  
   179  // NullAt returns true if the ith value of the column is null.
   180  func (n *Nulls) NullAt(i int) bool {
   181  	return n.nulls[i>>3]&bitMask[i&7] == 0
   182  }
   183  
   184  // NullAtChecked returns true if the ith value of the column is null and allows
   185  // an uninitialized Nulls to represent "no nulls".
   186  func (n *Nulls) NullAtChecked(i int) bool {
   187  	if n.nulls != nil {
   188  		return n.NullAt(i)
   189  	}
   190  	return false
   191  }
   192  
   193  // SetNull sets the ith value of the column to null.
   194  func (n *Nulls) SetNull(i int) {
   195  	n.maybeHasNulls = true
   196  	n.nulls[i>>3] &= flippedBitMask[i&7]
   197  }
   198  
   199  // UnsetNull unsets the ith values of the column.
   200  func (n *Nulls) UnsetNull(i int) {
   201  	n.nulls[i>>3] |= bitMask[i&7]
   202  }
   203  
   204  // setSmallRange is a helper that copies over a slice [startIdx, startIdx+toSet)
   205  // of src and puts it into this nulls starting at destIdx.
   206  func (n *Nulls) setSmallRange(src *Nulls, destIdx, startIdx, toSet int) {
   207  	for i := 0; i < toSet; i++ {
   208  		if src.NullAt(startIdx + i) {
   209  			n.SetNull(destIdx + i)
   210  		} else {
   211  			n.UnsetNull(destIdx + i)
   212  		}
   213  	}
   214  }
   215  
   216  // set copies over a slice [args.SrcStartIdx: args.SrcEndIdx] of
   217  // args.Src.Nulls() and puts it into this nulls starting at args.DestIdx. If
   218  // the length of this nulls is smaller than args.DestIdx, then this nulls is
   219  // extended; otherwise, any overlapping old values are overwritten, and this
   220  // nulls is also extended if necessary.
   221  func (n *Nulls) set(args SliceArgs) {
   222  	if args.SrcStartIdx == args.SrcEndIdx {
   223  		return
   224  	}
   225  	toDuplicate := args.SrcEndIdx - args.SrcStartIdx
   226  	outputLen := args.DestIdx + toDuplicate
   227  	// We will need ceil(outputLen/8) bytes to encode the combined nulls.
   228  	needed := (outputLen-1)/8 + 1
   229  	current := len(n.nulls)
   230  	if current < needed {
   231  		n.nulls = append(n.nulls, filledNulls[:needed-current]...)
   232  	}
   233  	if args.Src.MaybeHasNulls() {
   234  		n.maybeHasNulls = true
   235  		src := args.Src.Nulls()
   236  		if args.Sel != nil {
   237  			// With the selection vector present, we can't do any smarts, so we
   238  			// unset the whole range that is overwritten and then set new null
   239  			// values one at a time.
   240  			n.UnsetNullRange(args.DestIdx, args.DestIdx+toDuplicate)
   241  			for i := 0; i < toDuplicate; i++ {
   242  				if src.NullAt(args.Sel[args.SrcStartIdx+i]) {
   243  					n.SetNull(args.DestIdx + i)
   244  				}
   245  			}
   246  		} else {
   247  			if toDuplicate > 16 && args.DestIdx%8 == args.SrcStartIdx%8 {
   248  				// We have a special (but a very common) case when we're
   249  				// copying a lot of elements, and the shifts within the nulls
   250  				// vectors for the destination and the source ranges are the
   251  				// same, so we can optimize the performance here.
   252  				// The fact that shifts are the same allows us to copy all
   253  				// elements as is (except for the first and the last which are
   254  				// handled separately).
   255  				dstStart := args.DestIdx / 8
   256  				srcStart := args.SrcStartIdx / 8
   257  				srcEnd := (args.SrcEndIdx-1)/8 + 1
   258  				// Since the first and the last elements might not be fully
   259  				// included in the range to be set, we're not touching them.
   260  				copy(n.nulls[dstStart+1:], src.nulls[srcStart+1:srcEnd-1])
   261  				// Handle the first element.
   262  				n.setSmallRange(src, args.DestIdx, args.SrcStartIdx, 8-args.DestIdx%8)
   263  				// Handle the last element.
   264  				toSet := (args.DestIdx + toDuplicate) % 8
   265  				if toSet == 0 {
   266  					toSet = 8
   267  				}
   268  				offset := toDuplicate - toSet
   269  				n.setSmallRange(src, args.DestIdx+offset, args.SrcStartIdx+offset, toSet)
   270  				return
   271  			}
   272  			n.UnsetNullRange(args.DestIdx, args.DestIdx+toDuplicate)
   273  			for i := 0; i < toDuplicate; i++ {
   274  				if src.NullAt(args.SrcStartIdx + i) {
   275  					n.SetNull(args.DestIdx + i)
   276  				}
   277  			}
   278  		}
   279  	} else {
   280  		// No nulls in the source, so we unset the whole range that is
   281  		// overwritten.
   282  		n.UnsetNullRange(args.DestIdx, args.DestIdx+toDuplicate)
   283  	}
   284  }
   285  
   286  // Slice returns a new Nulls representing a slice of the current Nulls from
   287  // [start, end).
   288  func (n *Nulls) Slice(start int, end int) Nulls {
   289  	startUnsigned, endUnsigned := uint64(start), uint64(end)
   290  	if !n.maybeHasNulls {
   291  		return NewNulls(end - start)
   292  	}
   293  	if start >= end {
   294  		return NewNulls(0)
   295  	}
   296  	s := NewNulls(end - start)
   297  	s.maybeHasNulls = true
   298  	mod := startUnsigned % 8
   299  	startIdx := start / 8
   300  	if mod == 0 {
   301  		copy(s.nulls, n.nulls[startIdx:])
   302  	} else {
   303  		for i := range s.nulls {
   304  			// If start is not a multiple of 8, we need to shift over the bitmap
   305  			// to have the first index correspond.
   306  			s.nulls[i] = n.nulls[startIdx+i] >> mod
   307  			if startIdx+i+1 < len(n.nulls) {
   308  				// And now bitwise or the remaining bits with the bits we want to
   309  				// bring over from the next index.
   310  				s.nulls[i] |= n.nulls[startIdx+i+1] << (8 - mod)
   311  			}
   312  		}
   313  	}
   314  	// Zero out any trailing bits in the final byte.
   315  	endBits := (endUnsigned - startUnsigned) % 8
   316  	if endBits != 0 {
   317  		mask := onesMask << endBits
   318  		s.nulls[len(s.nulls)-1] |= mask
   319  	}
   320  	return s
   321  }
   322  
   323  // NullBitmap returns the null bitmap.
   324  func (n *Nulls) NullBitmap() []byte {
   325  	return n.nulls
   326  }
   327  
   328  // SetNullBitmap sets the validity of first size elements in n according to bm.
   329  // The bits past the end of this size will be set to valid. It is assumed that
   330  // n has enough capacity to store size number of elements. If bm is zero length
   331  // or if size is 0, then all elements will be set to valid.
   332  func (n *Nulls) SetNullBitmap(bm []byte, size int) {
   333  	if len(bm) == 0 || size == 0 {
   334  		n.UnsetNulls()
   335  		return
   336  	}
   337  	numBytesToCopy := (size-1)/8 + 1
   338  	copy(n.nulls, bm[:numBytesToCopy])
   339  	n.UnsetNullsAfter(size)
   340  	// Compute precisely whether we have any invalid values or not.
   341  	n.maybeHasNulls = false
   342  	for i := 0; i < numBytesToCopy; i++ {
   343  		if n.nulls[i] != onesMask {
   344  			n.maybeHasNulls = true
   345  			return
   346  		}
   347  	}
   348  }
   349  
   350  // Or returns a new Nulls vector where NullAt(i) iff n1.NullAt(i) or
   351  // n2.NullAt(i).
   352  func (n Nulls) Or(n2 Nulls) Nulls {
   353  	// For simplicity, enforce that len(n.nulls) <= len(n2.nulls).
   354  	if len(n.nulls) > len(n2.nulls) {
   355  		n, n2 = n2, n
   356  	}
   357  	res := Nulls{
   358  		maybeHasNulls: n.maybeHasNulls || n2.maybeHasNulls,
   359  		nulls:         make([]byte, len(n2.nulls)),
   360  	}
   361  	if n.maybeHasNulls && n2.maybeHasNulls {
   362  		for i := 0; i < len(n.nulls); i++ {
   363  			res.nulls[i] = n.nulls[i] & n2.nulls[i]
   364  		}
   365  		// If n2 is longer, we can just copy the remainder.
   366  		copy(res.nulls[len(n.nulls):], n2.nulls[len(n.nulls):])
   367  	} else if n.maybeHasNulls {
   368  		copy(res.nulls, n.nulls)
   369  		// We need to set all positions after len(n.nulls) to valid.
   370  		res.UnsetNullsAfter(8 * len(n.nulls))
   371  	} else if n2.maybeHasNulls {
   372  		// Since n2 is not of a smaller length, we can copy its bitmap without
   373  		// having to do anything extra.
   374  		copy(res.nulls, n2.nulls)
   375  	} else {
   376  		// We need to set the whole bitmap to valid.
   377  		res.UnsetNulls()
   378  	}
   379  	return res
   380  }
   381  
   382  // makeCopy returns a copy of n which can be modified independently.
   383  func (n *Nulls) makeCopy() Nulls {
   384  	c := Nulls{
   385  		maybeHasNulls: n.maybeHasNulls,
   386  		nulls:         make([]byte, len(n.nulls)),
   387  	}
   388  	copy(c.nulls, n.nulls)
   389  	return c
   390  }
   391  
   392  // Copy copies the contents of other into n.
   393  func (n *Nulls) Copy(other *Nulls) {
   394  	n.maybeHasNulls = other.maybeHasNulls
   395  	if cap(n.nulls) < len(other.nulls) {
   396  		n.nulls = make([]byte, len(other.nulls))
   397  	} else {
   398  		n.nulls = n.nulls[:len(other.nulls)]
   399  	}
   400  	copy(n.nulls, other.nulls)
   401  }