github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/col/coldata/nulls.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package coldata 12 13 // zeroedNulls is a zeroed out slice representing a bitmap of size MaxBatchSize. 14 // This is copied to efficiently set all nulls. 15 var zeroedNulls [(MaxBatchSize-1)/8 + 1]byte 16 17 // filledNulls is a slice representing a bitmap of size MaxBatchSize with every 18 // single bit set. 19 var filledNulls [(MaxBatchSize-1)/8 + 1]byte 20 21 // bitMask[i] is a byte with a single bit set at i. 22 var bitMask = [8]byte{0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80} 23 24 // flippedBitMask[i] is a byte with all bits set except at i. 25 var flippedBitMask = [8]byte{0xFE, 0xFD, 0xFB, 0xF7, 0xEF, 0xDF, 0xBF, 0x7F} 26 27 // onesMask is a byte where every bit is set to 1. 28 const onesMask = byte(255) 29 30 func init() { 31 // Initializes filledNulls to the desired slice. 32 for i := range filledNulls { 33 filledNulls[i] = onesMask 34 } 35 } 36 37 // Nulls represents a list of potentially nullable values using a bitmap. It is 38 // intended to be used alongside a slice (e.g. in the Vec interface) -- if the 39 // ith bit is off, then the ith element in that slice should be treated as NULL. 40 type Nulls struct { 41 nulls []byte 42 // maybeHasNulls is a best-effort representation of whether or not the 43 // vector has any null values set. If it is false, there definitely will be 44 // no null values. If it is true, there may or may not be null values. 45 maybeHasNulls bool 46 } 47 48 // NewNulls returns a new nulls vector, initialized with a length. 49 func NewNulls(len int) Nulls { 50 if len > 0 { 51 n := Nulls{ 52 nulls: make([]byte, (len-1)/8+1), 53 } 54 n.UnsetNulls() 55 return n 56 } 57 return Nulls{ 58 nulls: make([]byte, 0), 59 } 60 } 61 62 // MaybeHasNulls returns true if the column possibly has any null values, and 63 // returns false if the column definitely has no null values. 64 func (n *Nulls) MaybeHasNulls() bool { 65 return n.maybeHasNulls 66 } 67 68 // SetNullRange sets all the values in [startIdx, endIdx) to null. 69 func (n *Nulls) SetNullRange(startIdx int, endIdx int) { 70 start, end := uint64(startIdx), uint64(endIdx) 71 if start >= end { 72 return 73 } 74 75 n.maybeHasNulls = true 76 sIdx := start / 8 77 eIdx := (end - 1) / 8 78 79 // Case where mask only spans one byte. 80 if sIdx == eIdx { 81 mask := onesMask >> (8 - (start % 8)) 82 // Mask the end if needed. 83 if end%8 != 0 { 84 mask |= onesMask << (end % 8) 85 } 86 n.nulls[sIdx] &= mask 87 return 88 } 89 90 // Case where mask spans at least two bytes. 91 mask := onesMask >> (8 - (start % 8)) 92 n.nulls[sIdx] &= mask 93 94 if end%8 == 0 { 95 n.nulls[eIdx] = 0 96 } else { 97 mask = onesMask << (end % 8) 98 n.nulls[eIdx] &= mask 99 } 100 101 for i := sIdx + 1; i < eIdx; i++ { 102 n.nulls[i] = 0 103 } 104 } 105 106 // UnsetNullRange unsets all the nulls in the range [startIdx, endIdx). 107 // After using UnsetNullRange, n might not contain any null values, 108 // but maybeHasNulls could still be true. 109 func (n *Nulls) UnsetNullRange(startIdx, endIdx int) { 110 start, end := uint64(startIdx), uint64(endIdx) 111 if start >= end { 112 return 113 } 114 if !n.maybeHasNulls { 115 return 116 } 117 118 sIdx := start / 8 119 eIdx := (end - 1) / 8 120 121 // Case where mask only spans one byte. 122 if sIdx == eIdx { 123 mask := onesMask << (start % 8) 124 if end%8 != 0 { 125 mask = mask & (onesMask >> (8 - (end % 8))) 126 } 127 n.nulls[sIdx] |= mask 128 return 129 } 130 131 // Case where mask spans at least two bytes. 132 mask := onesMask << (start % 8) 133 n.nulls[sIdx] |= mask 134 if end%8 == 0 { 135 n.nulls[eIdx] = onesMask 136 } else { 137 mask = onesMask >> (8 - (end % 8)) 138 n.nulls[eIdx] |= mask 139 } 140 141 for i := sIdx + 1; i < eIdx; i++ { 142 n.nulls[i] = onesMask 143 } 144 } 145 146 // Truncate sets all values with index greater than or equal to start to null. 147 func (n *Nulls) Truncate(start int) { 148 end := len(n.nulls) * 8 149 n.SetNullRange(start, end) 150 } 151 152 // UnsetNulls sets the column to have no null values. 153 func (n *Nulls) UnsetNulls() { 154 n.maybeHasNulls = false 155 156 startIdx := 0 157 for startIdx < len(n.nulls) { 158 startIdx += copy(n.nulls[startIdx:], filledNulls[:]) 159 } 160 } 161 162 // UnsetNullsAfter sets all values with index greater than or equal to idx to 163 // non-null. 164 func (n *Nulls) UnsetNullsAfter(idx int) { 165 end := len(n.nulls) * 8 166 n.UnsetNullRange(idx, end) 167 } 168 169 // SetNulls sets the column to have only null values. 170 func (n *Nulls) SetNulls() { 171 n.maybeHasNulls = true 172 173 startIdx := 0 174 for startIdx < len(n.nulls) { 175 startIdx += copy(n.nulls[startIdx:], zeroedNulls[:]) 176 } 177 } 178 179 // NullAt returns true if the ith value of the column is null. 180 func (n *Nulls) NullAt(i int) bool { 181 return n.nulls[i>>3]&bitMask[i&7] == 0 182 } 183 184 // SetNull sets the ith value of the column to null. 185 func (n *Nulls) SetNull(i int) { 186 n.maybeHasNulls = true 187 n.nulls[i>>3] &= flippedBitMask[i&7] 188 } 189 190 // UnsetNull unsets the ith values of the column. 191 func (n *Nulls) UnsetNull(i int) { 192 n.nulls[i>>3] |= bitMask[i&7] 193 } 194 195 // Remove the unused warning. 196 var ( 197 n = Nulls{} 198 _ = n.swap 199 ) 200 201 // swap swaps the null values at the argument indices. We implement the logic 202 // directly on the byte array rather than case on the result of NullAt to avoid 203 // having to take some branches. 204 func (n *Nulls) swap(iIdx, jIdx int) { 205 i, j := uint64(iIdx), uint64(jIdx) 206 // Get original null values. 207 ni := (n.nulls[i/8] >> (i % 8)) & 0x1 208 nj := (n.nulls[j/8] >> (j % 8)) & 0x1 209 // Write into the correct positions. 210 iMask := bitMask[i%8] 211 jMask := bitMask[j%8] 212 n.nulls[i/8] = (n.nulls[i/8] & ^iMask) | (nj << (i % 8)) 213 n.nulls[j/8] = (n.nulls[j/8] & ^jMask) | (ni << (j % 8)) 214 } 215 216 // set copies over a slice [args.SrcStartIdx: args.SrcEndIdx] of 217 // args.Src.Nulls() and puts it into this nulls starting at args.DestIdx. If 218 // the length of this nulls is smaller than args.DestIdx, then this nulls is 219 // extended; otherwise, any overlapping old values are overwritten, and this 220 // nulls is also extended if necessary. 221 func (n *Nulls) set(args SliceArgs) { 222 if args.SrcStartIdx == args.SrcEndIdx { 223 return 224 } 225 toDuplicate := args.SrcEndIdx - args.SrcStartIdx 226 outputLen := args.DestIdx + toDuplicate 227 // We will need ceil(outputLen/8) bytes to encode the combined nulls. 228 needed := (outputLen-1)/8 + 1 229 current := len(n.nulls) 230 if current < needed { 231 n.nulls = append(n.nulls, filledNulls[:needed-current]...) 232 } 233 // First, we unset the whole range that is overwritten. If there are any NULL 234 // values in the source, those will be copied over below, one at a time. 235 n.UnsetNullRange(args.DestIdx, args.DestIdx+toDuplicate) 236 if args.Src.MaybeHasNulls() { 237 src := args.Src.Nulls() 238 if args.Sel != nil { 239 for i := 0; i < toDuplicate; i++ { 240 if src.NullAt(args.Sel[args.SrcStartIdx+i]) { 241 n.SetNull(args.DestIdx + i) 242 } 243 } 244 } else { 245 for i := 0; i < toDuplicate; i++ { 246 // TODO(yuzefovich): this can be done more efficiently with a bitwise OR: 247 // like n.nulls[i] |= vec.nulls[i]. 248 if src.NullAt(args.SrcStartIdx + i) { 249 n.SetNull(args.DestIdx + i) 250 } 251 } 252 } 253 } 254 } 255 256 // Slice returns a new Nulls representing a slice of the current Nulls from 257 // [start, end). 258 func (n *Nulls) Slice(start int, end int) Nulls { 259 startUnsigned, endUnsigned := uint64(start), uint64(end) 260 if !n.maybeHasNulls { 261 return NewNulls(end - start) 262 } 263 if start >= end { 264 return NewNulls(0) 265 } 266 s := NewNulls(end - start) 267 s.maybeHasNulls = true 268 mod := startUnsigned % 8 269 startIdx := start / 8 270 if mod == 0 { 271 copy(s.nulls, n.nulls[startIdx:]) 272 } else { 273 for i := range s.nulls { 274 // If start is not a multiple of 8, we need to shift over the bitmap 275 // to have the first index correspond. 276 s.nulls[i] = n.nulls[startIdx+i] >> mod 277 if startIdx+i+1 < len(n.nulls) { 278 // And now bitwise or the remaining bits with the bits we want to 279 // bring over from the next index. 280 s.nulls[i] |= n.nulls[startIdx+i+1] << (8 - mod) 281 } 282 } 283 } 284 // Zero out any trailing bits in the final byte. 285 endBits := (endUnsigned - startUnsigned) % 8 286 if endBits != 0 { 287 mask := onesMask << endBits 288 s.nulls[len(s.nulls)-1] |= mask 289 } 290 return s 291 } 292 293 // NullBitmap returns the null bitmap. 294 func (n *Nulls) NullBitmap() []byte { 295 return n.nulls 296 } 297 298 // SetNullBitmap sets the null bitmap. size corresponds to how many elements 299 // this bitmap represents. The bits past the end of this size will be set to 300 // valid. 301 func (n *Nulls) SetNullBitmap(bm []byte, size int) { 302 n.nulls = bm 303 n.maybeHasNulls = false 304 // Set all indices as valid past the last element. 305 if len(bm) > 0 && size != 0 { 306 // Set the last bits in the last element in which we want to preserve null 307 // information. mod, if non-zero, is the number of bits we don't want to 308 // overwrite (otherwise all bits are important). Note that we cast size to a 309 // uint64 to avoid extra instructions when modding. 310 mod := uint64(size) % 8 311 endIdx := size - 1 312 if mod != 0 { 313 bm[endIdx/8] |= onesMask << mod 314 } 315 // Fill the rest of the bitmap. 316 for i := (endIdx / 8) + 1; i < len(bm); { 317 i += copy(bm[i:], filledNulls[:]) 318 } 319 } 320 321 for i := 0; i < len(bm); i++ { 322 if bm[i] != onesMask { 323 n.maybeHasNulls = true 324 return 325 } 326 } 327 } 328 329 // Or returns a new Nulls vector where NullAt(i) iff n1.NullAt(i) or 330 // n2.NullAt(i). 331 func (n *Nulls) Or(n2 *Nulls) *Nulls { 332 // For simplicity, enforce that len(n.nulls) <= len(n2.nulls). 333 if len(n.nulls) > len(n2.nulls) { 334 n, n2 = n2, n 335 } 336 nulls := make([]byte, len(n2.nulls)) 337 if n.maybeHasNulls && n2.maybeHasNulls { 338 for i := 0; i < len(n.nulls); i++ { 339 nulls[i] = n.nulls[i] & n2.nulls[i] 340 } 341 // If n2 is longer, we can just copy the remainder. 342 copy(nulls[len(n.nulls):], n2.nulls[len(n.nulls):]) 343 } else if n.maybeHasNulls { 344 copy(nulls, n.nulls) 345 } else if n2.maybeHasNulls { 346 copy(nulls, n2.nulls) 347 } 348 return &Nulls{ 349 maybeHasNulls: n.maybeHasNulls || n2.maybeHasNulls, 350 nulls: nulls, 351 } 352 } 353 354 // Copy returns a copy of n which can be modified independently. 355 func (n *Nulls) Copy() Nulls { 356 c := Nulls{ 357 maybeHasNulls: n.maybeHasNulls, 358 nulls: make([]byte, len(n.nulls)), 359 } 360 copy(c.nulls, n.nulls) 361 return c 362 }