github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/col/coldata/bytes_test.go (about) 1 // Copyright 2019 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package coldata 12 13 import ( 14 "bytes" 15 "fmt" 16 "math/rand" 17 "strings" 18 "testing" 19 "unsafe" 20 21 "github.com/cockroachdb/cockroach/pkg/sql/types" 22 "github.com/cockroachdb/cockroach/pkg/util/leaktest" 23 "github.com/cockroachdb/cockroach/pkg/util/randutil" 24 "github.com/cockroachdb/errors" 25 "github.com/stretchr/testify/require" 26 ) 27 28 type bytesMethod int 29 30 const ( 31 set bytesMethod = iota 32 window 33 copySlice 34 appendSlice 35 appendVal 36 ) 37 38 func (m bytesMethod) String() string { 39 switch m { 40 case set: 41 return "Set" 42 case window: 43 return "Window" 44 case copySlice: 45 return "CopySlice" 46 case appendSlice: 47 return "AppendSlice" 48 case appendVal: 49 return "AppendVal" 50 default: 51 panic(fmt.Sprintf("unknown bytes method %d", m)) 52 } 53 } 54 55 var bytesMethods = []bytesMethod{set, window, copySlice, appendSlice, appendVal} 56 57 // applyMethodsAndVerify applies the given methods on b1 and a reference 58 // [][]byte implementation and checks if the results are equal. If 59 // selfReferencingSources is true, this is an indication by the caller that we 60 // are testing an edge case where the source for copies/appends refers to the 61 // destination. In cases where *Bytes updates itself under the hood, we also 62 // update the corresponding b2Source to mirror the behavior. 63 func applyMethodsAndVerify( 64 rng *rand.Rand, 65 b1, b1Source *Bytes, 66 b2, b2Source [][]byte, 67 methods []bytesMethod, 68 selfReferencingSources bool, 69 ) error { 70 if err := verifyEqual(b1, b2); err != nil { 71 return errors.Wrap(err, "arguments should start as equal") 72 } 73 if err := verifyEqual(b1Source, b2Source); err != nil { 74 return errors.Wrap(err, "argument sources should start as equal") 75 } 76 debugString := fmt.Sprintf("\ninitial:\n%s\n", b1) 77 for _, m := range methods { 78 n := b1.Len() 79 if n != len(b2) { 80 return errors.Errorf("length mismatch between flat and reference: %d != %d", n, len(b2)) 81 } 82 sourceN := b1Source.Len() 83 if sourceN != len(b2Source) { 84 return errors.Errorf("length mismatch between flat and reference sources: %d != %d", sourceN, len(b2Source)) 85 } 86 debugString += m.String() 87 switch m { 88 case set: 89 // Can only Set starting from maxSetIndex. 90 i := b1.maxSetIndex + rng.Intn(b1.Len()-b1.maxSetIndex) 91 new := make([]byte, rng.Intn(16)) 92 rng.Read(new) 93 debugString += fmt.Sprintf("(%d, %v)", i, new) 94 b1.Set(i, new) 95 b2[i] = new 96 case window: 97 start := rng.Intn(n) 98 end := rng.Intn(n + 1) 99 if start > end { 100 end = start + 1 101 } 102 debugString += fmt.Sprintf("(%d, %d)", start, end) 103 b1Window := b1.Window(start, end) 104 b2Window := b2[start:end] 105 // b1Window is not allowed to be modified, so we check explicitly whether 106 // it equals the reference, and we do not update b1 and b2. 107 b1Window.AssertOffsetsAreNonDecreasing(b1Window.Len()) 108 debugString += fmt.Sprintf("\n%s\n", b1Window) 109 if err := verifyEqual(b1Window, b2Window); err != nil { 110 return errors.Wrapf(err, 111 "\ndebugString:\n%s\nflat:\n%s\nreference:\n%s", 112 debugString, b1Window.String(), prettyByteSlice(b2Window)) 113 } 114 continue 115 case copySlice, appendSlice: 116 // Generate a length-inclusive destIdx. 117 destIdx := rng.Intn(n + 1) 118 srcStartIdx := rng.Intn(sourceN) 119 srcEndIdx := rng.Intn(sourceN) 120 if srcStartIdx > srcEndIdx { 121 srcEndIdx = srcStartIdx + 1 122 } else if srcStartIdx == srcEndIdx { 123 // Avoid whittling down our destination slice. 124 srcStartIdx = 0 125 srcEndIdx = sourceN 126 } 127 debugString += fmt.Sprintf("(%d, %d, %d)", destIdx, srcStartIdx, srcEndIdx) 128 var numNewVals int 129 if m == copySlice { 130 b1.CopySlice(b1Source, destIdx, srcStartIdx, srcEndIdx) 131 numNewVals = copy(b2[destIdx:], b2Source[srcStartIdx:srcEndIdx]) 132 } else { 133 b1.AppendSlice(b1Source, destIdx, srcStartIdx, srcEndIdx) 134 b2 = append(b2[:destIdx], b2Source[srcStartIdx:srcEndIdx]...) 135 if selfReferencingSources { 136 b1Source = b1 137 b2Source = b2 138 } 139 numNewVals = srcEndIdx - srcStartIdx 140 } 141 // Deep copy the copied/appended byte slices. 142 b2Slice := b2[destIdx : destIdx+numNewVals] 143 for i := range b2Slice { 144 b2Slice[i] = append([]byte(nil), b2Slice[i]...) 145 } 146 case appendVal: 147 v := make([]byte, 16) 148 rng.Read(v) 149 debugString += fmt.Sprintf("(%v)", v) 150 b1.AppendVal(v) 151 b2 = append(b2, v) 152 if selfReferencingSources { 153 b1Source = b1 154 b2Source = b2 155 } 156 default: 157 return errors.Errorf("unknown method name: %s", m) 158 } 159 b1.AssertOffsetsAreNonDecreasing(b1.Len()) 160 debugString += fmt.Sprintf("\n%s\n", b1) 161 if err := verifyEqual(b1, b2); err != nil { 162 return errors.Wrapf(err, 163 "\ndebugString:\n%s\nflat (maxSetIdx=%d):\n%s\nreference:\n%s", 164 debugString, b1.maxSetIndex, b1.String(), prettyByteSlice(b2)) 165 } 166 } 167 return nil 168 } 169 170 func verifyEqual(flat *Bytes, b [][]byte) error { 171 if flat.Len() != len(b) { 172 return errors.Errorf("mismatched lengths %d != %d", flat.Len(), len(b)) 173 } 174 for i := range b { 175 if !bytes.Equal(b[i], flat.Get(i)) { 176 return errors.Errorf("mismatch at index %d", i) 177 } 178 } 179 return nil 180 } 181 182 func prettyByteSlice(b [][]byte) string { 183 var builder strings.Builder 184 for i := range b { 185 builder.WriteString( 186 fmt.Sprintf("%d: %v\n", i, b[i]), 187 ) 188 } 189 return builder.String() 190 } 191 192 func TestBytesRefImpl(t *testing.T) { 193 defer leaktest.AfterTest(t)() 194 195 rng, _ := randutil.NewPseudoRand() 196 197 const ( 198 maxNumberOfCalls = 64 199 maxLength = 16 200 nRuns = 100 201 ) 202 203 for nRun := 0; nRun < nRuns; nRun++ { 204 n := 1 + rng.Intn(maxLength) 205 206 flat := NewBytes(n) 207 reference := make([][]byte, n) 208 for i := 0; i < n; i++ { 209 v := make([]byte, rng.Intn(16)) 210 rng.Read(v) 211 flat.Set(i, append([]byte(nil), v...)) 212 reference[i] = append([]byte(nil), v...) 213 } 214 215 // Make a pair of sources to copy/append from. Use the destination variables 216 // with a certain probability. 217 sourceN := n 218 flatSource := flat 219 referenceSource := reference 220 selfReferencingSources := true 221 if rng.Float64() < 0.5 { 222 selfReferencingSources = false 223 sourceN = 1 + rng.Intn(maxLength) 224 flatSource = NewBytes(sourceN) 225 referenceSource = make([][]byte, sourceN) 226 for i := 0; i < sourceN; i++ { 227 v := make([]byte, rng.Intn(16)) 228 rng.Read(v) 229 flatSource.Set(i, append([]byte(nil), v...)) 230 referenceSource[i] = append([]byte(nil), v...) 231 } 232 } 233 234 if err := verifyEqual(flat, reference); err != nil { 235 t.Fatalf("not equal: %v\nflat:\n%sreference:\n%s", err, flat, prettyByteSlice(reference)) 236 } 237 238 numCalls := 1 + rng.Intn(maxNumberOfCalls) 239 methods := make([]bytesMethod, 0, numCalls) 240 for i := 0; i < numCalls; i++ { 241 methods = append(methods, bytesMethods[rng.Intn(len(bytesMethods))]) 242 } 243 if err := applyMethodsAndVerify(rng, flat, flatSource, reference, referenceSource, methods, selfReferencingSources); err != nil { 244 t.Logf("nRun = %d\n", nRun) 245 t.Fatal(err) 246 } 247 } 248 } 249 250 func TestBytes(t *testing.T) { 251 defer leaktest.AfterTest(t)() 252 253 t.Run("Simple", func(t *testing.T) { 254 b1 := NewBytes(0) 255 b1.AppendVal([]byte("hello")) 256 require.Equal(t, "hello", string(b1.Get(0))) 257 b1.AppendVal(nil) 258 require.Equal(t, []byte{}, b1.Get(1)) 259 require.Equal(t, 2, b1.Len()) 260 // Verify that we cannot overwrite a value. 261 require.Panics( 262 t, 263 func() { b1.Set(0, []byte("not allowed")) }, 264 "should be unable to overwrite value", 265 ) 266 267 // However, it is legal to overwrite the last value. 268 b1.Set(1, []byte("ok")) 269 270 // If we Reset the Bytes, we can Set any index. 271 b1.Reset() 272 b1.Set(1, []byte("new usage")) 273 // But not an index before that. 274 require.Panics( 275 t, 276 func() { b1.Set(0, []byte("still not allowed")) }, 277 "should be unable to overwrite value", 278 ) 279 280 // Same with Reset. 281 b1.Reset() 282 b1.Set(1, []byte("reset new usage")) 283 }) 284 285 t.Run("Append", func(t *testing.T) { 286 b1 := NewBytes(0) 287 b2 := NewBytes(0) 288 b2.AppendVal([]byte("source bytes value")) 289 b1.AppendVal([]byte("one")) 290 b1.AppendVal([]byte("two")) 291 // Truncate b1. 292 require.Equal(t, 2, b1.Len()) 293 b1.AppendSlice(b2, 0, 0, 0) 294 require.Equal(t, 0, b1.Len()) 295 296 b1.AppendVal([]byte("hello again")) 297 298 // Try appending b2 3 times. The first time will overwrite the current 299 // present value in b1. 300 for i := 0; i < 3; i++ { 301 b1.AppendSlice(b2, i, 0, b2.Len()) 302 require.Equal(t, i+1, b1.Len()) 303 for j := 0; j <= i; j++ { 304 require.Equal(t, "source bytes value", string(b1.Get(j))) 305 } 306 } 307 308 b2 = NewBytes(0) 309 b2.AppendVal([]byte("hello again")) 310 b2.AppendVal([]byte("hello again")) 311 b2.AppendVal([]byte("hello again")) 312 // Try to append only a subset of the source keeping the first element of 313 // b1 intact. 314 b1.AppendSlice(b2, 1, 1, 2) 315 require.Equal(t, 2, b1.Len()) 316 require.Equal(t, "source bytes value", string(b1.Get(0))) 317 require.Equal(t, "hello again", string(b1.Get(1))) 318 }) 319 320 t.Run("Copy", func(t *testing.T) { 321 b1 := NewBytes(0) 322 b2 := NewBytes(0) 323 b1.AppendVal([]byte("one")) 324 b1.AppendVal([]byte("two")) 325 b1.AppendVal([]byte("three")) 326 327 b2.AppendVal([]byte("source one")) 328 b2.AppendVal([]byte("source two")) 329 330 // Copy "source two" into "two"'s position. This also tests that elements 331 // following the copied element are correctly shifted. 332 b1.CopySlice(b2, 1, 1, 2) 333 require.Equal(t, 3, b1.Len()) 334 require.Equal(t, "one", string(b1.Get(0))) 335 require.Equal(t, "source two", string(b1.Get(1))) 336 require.Equal(t, "three", string(b1.Get(2))) 337 338 // Copy will only copy as many elements as there is capacity for. In this 339 // call, the copy starts at index 2, so there is only capacity for one 340 // element. 341 b1.CopySlice(b2, 2, 0, b2.Len()) 342 require.Equal(t, "one", string(b1.Get(0))) 343 require.Equal(t, "source two", string(b1.Get(1))) 344 require.Equal(t, "source one", string(b1.Get(2))) 345 346 // Set the length to 1 and follow it with testing a full overwrite of only 347 // one element. 348 b1.SetLength(1) 349 require.Equal(t, 1, b1.Len()) 350 b1.CopySlice(b2, 0, 0, b2.Len()) 351 require.Equal(t, 1, b1.Len()) 352 require.Equal(t, "source one", string(b1.Get(0))) 353 354 // Verify a full overwrite with a non-zero source start index. 355 b1.CopySlice(b2, 0, 1, b2.Len()) 356 require.Equal(t, 1, b1.Len()) 357 require.Equal(t, "source two", string(b1.Get(0))) 358 }) 359 360 t.Run("Window", func(t *testing.T) { 361 b1 := NewBytes(0) 362 b1.AppendVal([]byte("one")) 363 b1.AppendVal([]byte("two")) 364 b1.AppendVal([]byte("three")) 365 366 w := b1.Window(0, 3) 367 require.NotEqual(t, unsafe.Pointer(b1), unsafe.Pointer(w), "Bytes.Window should create a new object") 368 b2 := b1.Window(1, 2) 369 require.Equal(t, "one", string(b1.Get(0))) 370 require.Equal(t, "two", string(b1.Get(1))) 371 require.Equal(t, "two", string(b2.Get(0))) 372 373 require.Panics(t, func() { b2.AppendVal([]byte("four")) }, "appending to the window into b1 should have panicked") 374 }) 375 376 t.Run("String", func(t *testing.T) { 377 b1 := NewBytes(0) 378 vals := [][]byte{ 379 []byte("one"), 380 []byte("two"), 381 []byte("three"), 382 } 383 for i := range vals { 384 b1.AppendVal(vals[i]) 385 } 386 387 // The values should be printed using the String function. 388 b1String := b1.String() 389 require.True( 390 t, 391 strings.Contains(b1String, fmt.Sprint(vals[0])) && 392 strings.Contains(b1String, fmt.Sprint(vals[1])) && 393 strings.Contains(b1String, fmt.Sprint(vals[2])), 394 ) 395 396 // A window on the bytes should only print the values included in the 397 // window. 398 b2String := b1.Window(1, 3).String() 399 require.True( 400 t, 401 !strings.Contains(b2String, fmt.Sprint(vals[0])) && 402 strings.Contains(b2String, fmt.Sprint(vals[1])) && 403 strings.Contains(b2String, fmt.Sprint(vals[2])), 404 ) 405 }) 406 407 t.Run("InvariantSimple", func(t *testing.T) { 408 b1 := NewBytes(8) 409 b1.Set(0, []byte("zero")) 410 other := b1.Window(0, 2) 411 other.AssertOffsetsAreNonDecreasing(2) 412 413 b2 := NewBytes(8) 414 b2.Set(0, []byte("zero")) 415 b2.Set(2, []byte("two")) 416 other = b2.Window(0, 4) 417 other.AssertOffsetsAreNonDecreasing(4) 418 }) 419 } 420 421 // TestAppendBytesWithLastNull makes sure that Append handles correctly the 422 // case when the last element of Bytes vector is NULL. 423 func TestAppendBytesWithLastNull(t *testing.T) { 424 src := NewMemColumn(types.Bytes, 4, StandardColumnFactory) 425 sel := []int{0, 2, 3} 426 src.Bytes().Set(0, []byte("zero")) 427 src.Nulls().SetNull(1) 428 src.Bytes().Set(2, []byte("two")) 429 src.Nulls().SetNull(3) 430 sliceArgs := SliceArgs{ 431 Src: src, 432 DestIdx: 0, 433 SrcStartIdx: 0, 434 SrcEndIdx: len(sel), 435 } 436 dest := NewMemColumn(types.Bytes, 3, StandardColumnFactory) 437 expected := NewMemColumn(types.Bytes, 3, StandardColumnFactory) 438 for _, withSel := range []bool{false, true} { 439 t.Run(fmt.Sprintf("AppendBytesWithLastNull/sel=%t", withSel), func(t *testing.T) { 440 expected.Nulls().UnsetNulls() 441 expected.Bytes().Reset() 442 if withSel { 443 sliceArgs.Sel = sel 444 for expIdx, srcIdx := range sel { 445 if src.Nulls().NullAt(srcIdx) { 446 expected.Nulls().SetNull(expIdx) 447 } else { 448 expected.Bytes().Set(expIdx, src.Bytes().Get(srcIdx)) 449 } 450 } 451 } else { 452 sliceArgs.Sel = nil 453 for expIdx := 0; expIdx < 3; expIdx++ { 454 if src.Nulls().NullAt(expIdx) { 455 expected.Nulls().SetNull(expIdx) 456 } else { 457 expected.Bytes().Set(expIdx, src.Bytes().Get(expIdx)) 458 } 459 } 460 } 461 expected.Bytes().UpdateOffsetsToBeNonDecreasing(3) 462 // require.Equal checks the "string-ified" versions of the vectors for 463 // equality. Bytes uses maxSetIndex to print out "truncated" 464 // representation, so we manually update it (Vec.Append will use 465 // AppendVal function that updates maxSetIndex itself). 466 expected.Bytes().maxSetIndex = 2 467 dest.Append(sliceArgs) 468 require.Equal(t, expected, dest) 469 }) 470 } 471 }