github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/col/coldata/nulls.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package coldata 12 13 // zeroedNulls is a zeroed out slice representing a bitmap of size MaxBatchSize. 14 // This is copied to efficiently set all nulls. 15 var zeroedNulls [(MaxBatchSize-1)/8 + 1]byte 16 17 // filledNulls is a slice representing a bitmap of size MaxBatchSize with every 18 // single bit set. 19 var filledNulls [(MaxBatchSize-1)/8 + 1]byte 20 21 // bitMask[i] is a byte with a single bit set at i. 22 var bitMask = [8]byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80} 23 24 // flippedBitMask[i] is a byte with all bits set except at i. 25 var flippedBitMask = [8]byte{0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F} 26 27 // onesMask is a byte where every bit is set to 1. 28 const onesMask = byte(255) 29 30 func init() { 31 // Initializes filledNulls to the desired slice. 32 for i := range filledNulls { 33 filledNulls[i] = onesMask 34 } 35 } 36 37 // Nulls represents a list of potentially nullable values using a bitmap. It is 38 // intended to be used alongside a slice (e.g. in the Vec interface) -- if the 39 // ith bit is off, then the ith element in that slice should be treated as NULL. 40 type Nulls struct { 41 nulls []byte 42 // maybeHasNulls is a best-effort representation of whether or not the 43 // vector has any null values set. If it is false, there definitely will be 44 // no null values. If it is true, there may or may not be null values. 45 maybeHasNulls bool 46 } 47 48 // NewNulls returns a new nulls vector, initialized with a length. 49 func NewNulls(len int) Nulls { 50 if len > 0 { 51 n := Nulls{ 52 nulls: make([]byte, (len-1)/8+1), 53 } 54 n.UnsetNulls() 55 return n 56 } 57 return Nulls{ 58 nulls: make([]byte, 0), 59 } 60 } 61 62 // MaybeHasNulls returns true if the column possibly has any null values, and 63 // returns false if the column definitely has no null values. 64 func (n *Nulls) MaybeHasNulls() bool { 65 return n.maybeHasNulls 66 } 67 68 // SetNullRange sets all the values in [startIdx, endIdx) to null. 69 func (n *Nulls) SetNullRange(startIdx int, endIdx int) { 70 start, end := uint64(startIdx), uint64(endIdx) 71 if start >= end { 72 return 73 } 74 75 n.maybeHasNulls = true 76 sIdx := start / 8 77 eIdx := (end - 1) / 8 78 79 // Case where mask only spans one byte. 80 if sIdx == eIdx { 81 mask := onesMask >> (8 - (start % 8)) 82 // Mask the end if needed. 83 if end%8 != 0 { 84 mask |= onesMask << (end % 8) 85 } 86 n.nulls[sIdx] &= mask 87 return 88 } 89 90 // Case where mask spans at least two bytes. 91 mask := onesMask >> (8 - (start % 8)) 92 n.nulls[sIdx] &= mask 93 94 if end%8 == 0 { 95 n.nulls[eIdx] = 0 96 } else { 97 mask = onesMask << (end % 8) 98 n.nulls[eIdx] &= mask 99 } 100 101 for idx := int(sIdx + 1); idx < int(eIdx); { 102 idx += copy(n.nulls[idx:eIdx], zeroedNulls[:]) 103 } 104 } 105 106 // UnsetNullRange unsets all the nulls in the range [startIdx, endIdx). 107 // After using UnsetNullRange, n might not contain any null values, 108 // but maybeHasNulls could still be true. 109 func (n *Nulls) UnsetNullRange(startIdx, endIdx int) { 110 start, end := uint64(startIdx), uint64(endIdx) 111 if start >= end { 112 return 113 } 114 if !n.maybeHasNulls { 115 return 116 } 117 118 sIdx := start / 8 119 eIdx := (end - 1) / 8 120 121 // Case where mask only spans one byte. 122 if sIdx == eIdx { 123 mask := onesMask << (start % 8) 124 if end%8 != 0 { 125 mask = mask & (onesMask >> (8 - (end % 8))) 126 } 127 n.nulls[sIdx] |= mask 128 return 129 } 130 131 // Case where mask spans at least two bytes. 132 mask := onesMask << (start % 8) 133 n.nulls[sIdx] |= mask 134 if end%8 == 0 { 135 n.nulls[eIdx] = onesMask 136 } else { 137 mask = onesMask >> (8 - (end % 8)) 138 n.nulls[eIdx] |= mask 139 } 140 141 for idx := int(sIdx + 1); idx < int(eIdx); { 142 idx += copy(n.nulls[idx:eIdx], filledNulls[:]) 143 } 144 } 145 146 // Truncate sets all values with index greater than or equal to start to null. 147 func (n *Nulls) Truncate(start int) { 148 end := len(n.nulls) * 8 149 n.SetNullRange(start, end) 150 } 151 152 // UnsetNulls sets the column to have no null values. 153 func (n *Nulls) UnsetNulls() { 154 n.maybeHasNulls = false 155 156 startIdx := 0 157 for startIdx < len(n.nulls) { 158 startIdx += copy(n.nulls[startIdx:], filledNulls[:]) 159 } 160 } 161 162 // UnsetNullsAfter sets all values with index greater than or equal to idx to 163 // non-null. 164 func (n *Nulls) UnsetNullsAfter(idx int) { 165 end := len(n.nulls) * 8 166 n.UnsetNullRange(idx, end) 167 } 168 169 // SetNulls sets the column to have only null values. 170 func (n *Nulls) SetNulls() { 171 n.maybeHasNulls = true 172 173 startIdx := 0 174 for startIdx < len(n.nulls) { 175 startIdx += copy(n.nulls[startIdx:], zeroedNulls[:]) 176 } 177 } 178 179 // NullAt returns true if the ith value of the column is null. 180 func (n *Nulls) NullAt(i int) bool { 181 return n.nulls[i>>3]&bitMask[i&7] == 0 182 } 183 184 // NullAtChecked returns true if the ith value of the column is null and allows 185 // an uninitialized Nulls to represent "no nulls". 186 func (n *Nulls) NullAtChecked(i int) bool { 187 if n.nulls != nil { 188 return n.NullAt(i) 189 } 190 return false 191 } 192 193 // SetNull sets the ith value of the column to null. 194 func (n *Nulls) SetNull(i int) { 195 n.maybeHasNulls = true 196 n.nulls[i>>3] &= flippedBitMask[i&7] 197 } 198 199 // UnsetNull unsets the ith values of the column. 200 func (n *Nulls) UnsetNull(i int) { 201 n.nulls[i>>3] |= bitMask[i&7] 202 } 203 204 // setSmallRange is a helper that copies over a slice [startIdx, startIdx+toSet) 205 // of src and puts it into this nulls starting at destIdx. 206 func (n *Nulls) setSmallRange(src *Nulls, destIdx, startIdx, toSet int) { 207 for i := 0; i < toSet; i++ { 208 if src.NullAt(startIdx + i) { 209 n.SetNull(destIdx + i) 210 } else { 211 n.UnsetNull(destIdx + i) 212 } 213 } 214 } 215 216 // set copies over a slice [args.SrcStartIdx: args.SrcEndIdx] of 217 // args.Src.Nulls() and puts it into this nulls starting at args.DestIdx. If 218 // the length of this nulls is smaller than args.DestIdx, then this nulls is 219 // extended; otherwise, any overlapping old values are overwritten, and this 220 // nulls is also extended if necessary. 221 func (n *Nulls) set(args SliceArgs) { 222 if args.SrcStartIdx == args.SrcEndIdx { 223 return 224 } 225 toDuplicate := args.SrcEndIdx - args.SrcStartIdx 226 outputLen := args.DestIdx + toDuplicate 227 // We will need ceil(outputLen/8) bytes to encode the combined nulls. 228 needed := (outputLen-1)/8 + 1 229 current := len(n.nulls) 230 if current < needed { 231 n.nulls = append(n.nulls, filledNulls[:needed-current]...) 232 } 233 if args.Src.MaybeHasNulls() { 234 n.maybeHasNulls = true 235 src := args.Src.Nulls() 236 if args.Sel != nil { 237 // With the selection vector present, we can't do any smarts, so we 238 // unset the whole range that is overwritten and then set new null 239 // values one at a time. 240 n.UnsetNullRange(args.DestIdx, args.DestIdx+toDuplicate) 241 for i := 0; i < toDuplicate; i++ { 242 if src.NullAt(args.Sel[args.SrcStartIdx+i]) { 243 n.SetNull(args.DestIdx + i) 244 } 245 } 246 } else { 247 if toDuplicate > 16 && args.DestIdx%8 == args.SrcStartIdx%8 { 248 // We have a special (but a very common) case when we're 249 // copying a lot of elements, and the shifts within the nulls 250 // vectors for the destination and the source ranges are the 251 // same, so we can optimize the performance here. 252 // The fact that shifts are the same allows us to copy all 253 // elements as is (except for the first and the last which are 254 // handled separately). 255 dstStart := args.DestIdx / 8 256 srcStart := args.SrcStartIdx / 8 257 srcEnd := (args.SrcEndIdx-1)/8 + 1 258 // Since the first and the last elements might not be fully 259 // included in the range to be set, we're not touching them. 260 copy(n.nulls[dstStart+1:], src.nulls[srcStart+1:srcEnd-1]) 261 // Handle the first element. 262 n.setSmallRange(src, args.DestIdx, args.SrcStartIdx, 8-args.DestIdx%8) 263 // Handle the last element. 264 toSet := (args.DestIdx + toDuplicate) % 8 265 if toSet == 0 { 266 toSet = 8 267 } 268 offset := toDuplicate - toSet 269 n.setSmallRange(src, args.DestIdx+offset, args.SrcStartIdx+offset, toSet) 270 return 271 } 272 n.UnsetNullRange(args.DestIdx, args.DestIdx+toDuplicate) 273 for i := 0; i < toDuplicate; i++ { 274 if src.NullAt(args.SrcStartIdx + i) { 275 n.SetNull(args.DestIdx + i) 276 } 277 } 278 } 279 } else { 280 // No nulls in the source, so we unset the whole range that is 281 // overwritten. 282 n.UnsetNullRange(args.DestIdx, args.DestIdx+toDuplicate) 283 } 284 } 285 286 // Slice returns a new Nulls representing a slice of the current Nulls from 287 // [start, end). 288 func (n *Nulls) Slice(start int, end int) Nulls { 289 startUnsigned, endUnsigned := uint64(start), uint64(end) 290 if !n.maybeHasNulls { 291 return NewNulls(end - start) 292 } 293 if start >= end { 294 return NewNulls(0) 295 } 296 s := NewNulls(end - start) 297 s.maybeHasNulls = true 298 mod := startUnsigned % 8 299 startIdx := start / 8 300 if mod == 0 { 301 copy(s.nulls, n.nulls[startIdx:]) 302 } else { 303 for i := range s.nulls { 304 // If start is not a multiple of 8, we need to shift over the bitmap 305 // to have the first index correspond. 306 s.nulls[i] = n.nulls[startIdx+i] >> mod 307 if startIdx+i+1 < len(n.nulls) { 308 // And now bitwise or the remaining bits with the bits we want to 309 // bring over from the next index. 310 s.nulls[i] |= n.nulls[startIdx+i+1] << (8 - mod) 311 } 312 } 313 } 314 // Zero out any trailing bits in the final byte. 315 endBits := (endUnsigned - startUnsigned) % 8 316 if endBits != 0 { 317 mask := onesMask << endBits 318 s.nulls[len(s.nulls)-1] |= mask 319 } 320 return s 321 } 322 323 // NullBitmap returns the null bitmap. 324 func (n *Nulls) NullBitmap() []byte { 325 return n.nulls 326 } 327 328 // SetNullBitmap sets the validity of first size elements in n according to bm. 329 // The bits past the end of this size will be set to valid. It is assumed that 330 // n has enough capacity to store size number of elements. If bm is zero length 331 // or if size is 0, then all elements will be set to valid. 332 func (n *Nulls) SetNullBitmap(bm []byte, size int) { 333 if len(bm) == 0 || size == 0 { 334 n.UnsetNulls() 335 return 336 } 337 numBytesToCopy := (size-1)/8 + 1 338 copy(n.nulls, bm[:numBytesToCopy]) 339 n.UnsetNullsAfter(size) 340 // Compute precisely whether we have any invalid values or not. 341 n.maybeHasNulls = false 342 for i := 0; i < numBytesToCopy; i++ { 343 if n.nulls[i] != onesMask { 344 n.maybeHasNulls = true 345 return 346 } 347 } 348 } 349 350 // Or returns a new Nulls vector where NullAt(i) iff n1.NullAt(i) or 351 // n2.NullAt(i). 352 func (n Nulls) Or(n2 Nulls) Nulls { 353 // For simplicity, enforce that len(n.nulls) <= len(n2.nulls). 354 if len(n.nulls) > len(n2.nulls) { 355 n, n2 = n2, n 356 } 357 res := Nulls{ 358 maybeHasNulls: n.maybeHasNulls || n2.maybeHasNulls, 359 nulls: make([]byte, len(n2.nulls)), 360 } 361 if n.maybeHasNulls && n2.maybeHasNulls { 362 for i := 0; i < len(n.nulls); i++ { 363 res.nulls[i] = n.nulls[i] & n2.nulls[i] 364 } 365 // If n2 is longer, we can just copy the remainder. 366 copy(res.nulls[len(n.nulls):], n2.nulls[len(n.nulls):]) 367 } else if n.maybeHasNulls { 368 copy(res.nulls, n.nulls) 369 // We need to set all positions after len(n.nulls) to valid. 370 res.UnsetNullsAfter(8 * len(n.nulls)) 371 } else if n2.maybeHasNulls { 372 // Since n2 is not of a smaller length, we can copy its bitmap without 373 // having to do anything extra. 374 copy(res.nulls, n2.nulls) 375 } else { 376 // We need to set the whole bitmap to valid. 377 res.UnsetNulls() 378 } 379 return res 380 } 381 382 // makeCopy returns a copy of n which can be modified independently. 383 func (n *Nulls) makeCopy() Nulls { 384 c := Nulls{ 385 maybeHasNulls: n.maybeHasNulls, 386 nulls: make([]byte, len(n.nulls)), 387 } 388 copy(c.nulls, n.nulls) 389 return c 390 } 391 392 // Copy copies the contents of other into n. 393 func (n *Nulls) Copy(other *Nulls) { 394 n.maybeHasNulls = other.maybeHasNulls 395 if cap(n.nulls) < len(other.nulls) { 396 n.nulls = make([]byte, len(other.nulls)) 397 } else { 398 n.nulls = n.nulls[:len(other.nulls)] 399 } 400 copy(n.nulls, other.nulls) 401 }