github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/col/coldata/bytes_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package coldata
    12  
    13  import (
    14  	"bytes"
    15  	"fmt"
    16  	"math/rand"
    17  	"strings"
    18  	"testing"
    19  	"unsafe"
    20  
    21  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    22  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    23  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    24  	"github.com/cockroachdb/errors"
    25  	"github.com/stretchr/testify/require"
    26  )
    27  
    28  type bytesMethod int
    29  
    30  const (
    31  	set bytesMethod = iota
    32  	window
    33  	copySlice
    34  	appendSlice
    35  	appendVal
    36  )
    37  
    38  func (m bytesMethod) String() string {
    39  	switch m {
    40  	case set:
    41  		return "Set"
    42  	case window:
    43  		return "Window"
    44  	case copySlice:
    45  		return "CopySlice"
    46  	case appendSlice:
    47  		return "AppendSlice"
    48  	case appendVal:
    49  		return "AppendVal"
    50  	default:
    51  		panic(fmt.Sprintf("unknown bytes method %d", m))
    52  	}
    53  }
    54  
    55  var bytesMethods = []bytesMethod{set, window, copySlice, appendSlice, appendVal}
    56  
    57  // applyMethodsAndVerify applies the given methods on b1 and a reference
    58  // [][]byte implementation and checks if the results are equal. If
    59  // selfReferencingSources is true, this is an indication by the caller that we
    60  // are testing an edge case where the source for copies/appends refers to the
    61  // destination. In cases where *Bytes updates itself under the hood, we also
    62  // update the corresponding b2Source to mirror the behavior.
    63  func applyMethodsAndVerify(
    64  	rng *rand.Rand,
    65  	b1, b1Source *Bytes,
    66  	b2, b2Source [][]byte,
    67  	methods []bytesMethod,
    68  	selfReferencingSources bool,
    69  ) error {
    70  	if err := verifyEqual(b1, b2); err != nil {
    71  		return errors.Wrap(err, "arguments should start as equal")
    72  	}
    73  	if err := verifyEqual(b1Source, b2Source); err != nil {
    74  		return errors.Wrap(err, "argument sources should start as equal")
    75  	}
    76  	debugString := fmt.Sprintf("\ninitial:\n%s\n", b1)
    77  	for _, m := range methods {
    78  		n := b1.Len()
    79  		if n != len(b2) {
    80  			return errors.Errorf("length mismatch between flat and reference: %d != %d", n, len(b2))
    81  		}
    82  		sourceN := b1Source.Len()
    83  		if sourceN != len(b2Source) {
    84  			return errors.Errorf("length mismatch between flat and reference sources: %d != %d", sourceN, len(b2Source))
    85  		}
    86  		debugString += m.String()
    87  		switch m {
    88  		case set:
    89  			// Can only Set starting from maxSetIndex.
    90  			i := b1.maxSetIndex + rng.Intn(b1.Len()-b1.maxSetIndex)
    91  			new := make([]byte, rng.Intn(16))
    92  			rng.Read(new)
    93  			debugString += fmt.Sprintf("(%d, %v)", i, new)
    94  			b1.Set(i, new)
    95  			b2[i] = new
    96  		case window:
    97  			start := rng.Intn(n)
    98  			end := rng.Intn(n + 1)
    99  			if start > end {
   100  				end = start + 1
   101  			}
   102  			debugString += fmt.Sprintf("(%d, %d)", start, end)
   103  			b1Window := b1.Window(start, end)
   104  			b2Window := b2[start:end]
   105  			// b1Window is not allowed to be modified, so we check explicitly whether
   106  			// it equals the reference, and we do not update b1 and b2.
   107  			b1Window.AssertOffsetsAreNonDecreasing(b1Window.Len())
   108  			debugString += fmt.Sprintf("\n%s\n", b1Window)
   109  			if err := verifyEqual(b1Window, b2Window); err != nil {
   110  				return errors.Wrapf(err,
   111  					"\ndebugString:\n%s\nflat:\n%s\nreference:\n%s",
   112  					debugString, b1Window.String(), prettyByteSlice(b2Window))
   113  			}
   114  			continue
   115  		case copySlice, appendSlice:
   116  			// Generate a length-inclusive destIdx.
   117  			destIdx := rng.Intn(n + 1)
   118  			srcStartIdx := rng.Intn(sourceN)
   119  			srcEndIdx := rng.Intn(sourceN)
   120  			if srcStartIdx > srcEndIdx {
   121  				srcEndIdx = srcStartIdx + 1
   122  			} else if srcStartIdx == srcEndIdx {
   123  				// Avoid whittling down our destination slice.
   124  				srcStartIdx = 0
   125  				srcEndIdx = sourceN
   126  			}
   127  			debugString += fmt.Sprintf("(%d, %d, %d)", destIdx, srcStartIdx, srcEndIdx)
   128  			var numNewVals int
   129  			if m == copySlice {
   130  				b1.CopySlice(b1Source, destIdx, srcStartIdx, srcEndIdx)
   131  				numNewVals = copy(b2[destIdx:], b2Source[srcStartIdx:srcEndIdx])
   132  			} else {
   133  				b1.AppendSlice(b1Source, destIdx, srcStartIdx, srcEndIdx)
   134  				b2 = append(b2[:destIdx], b2Source[srcStartIdx:srcEndIdx]...)
   135  				if selfReferencingSources {
   136  					b1Source = b1
   137  					b2Source = b2
   138  				}
   139  				numNewVals = srcEndIdx - srcStartIdx
   140  			}
   141  			// Deep copy the copied/appended byte slices.
   142  			b2Slice := b2[destIdx : destIdx+numNewVals]
   143  			for i := range b2Slice {
   144  				b2Slice[i] = append([]byte(nil), b2Slice[i]...)
   145  			}
   146  		case appendVal:
   147  			v := make([]byte, 16)
   148  			rng.Read(v)
   149  			debugString += fmt.Sprintf("(%v)", v)
   150  			b1.AppendVal(v)
   151  			b2 = append(b2, v)
   152  			if selfReferencingSources {
   153  				b1Source = b1
   154  				b2Source = b2
   155  			}
   156  		default:
   157  			return errors.Errorf("unknown method name: %s", m)
   158  		}
   159  		b1.AssertOffsetsAreNonDecreasing(b1.Len())
   160  		debugString += fmt.Sprintf("\n%s\n", b1)
   161  		if err := verifyEqual(b1, b2); err != nil {
   162  			return errors.Wrapf(err,
   163  				"\ndebugString:\n%s\nflat (maxSetIdx=%d):\n%s\nreference:\n%s",
   164  				debugString, b1.maxSetIndex, b1.String(), prettyByteSlice(b2))
   165  		}
   166  	}
   167  	return nil
   168  }
   169  
   170  func verifyEqual(flat *Bytes, b [][]byte) error {
   171  	if flat.Len() != len(b) {
   172  		return errors.Errorf("mismatched lengths %d != %d", flat.Len(), len(b))
   173  	}
   174  	for i := range b {
   175  		if !bytes.Equal(b[i], flat.Get(i)) {
   176  			return errors.Errorf("mismatch at index %d", i)
   177  		}
   178  	}
   179  	return nil
   180  }
   181  
   182  func prettyByteSlice(b [][]byte) string {
   183  	var builder strings.Builder
   184  	for i := range b {
   185  		builder.WriteString(
   186  			fmt.Sprintf("%d: %v\n", i, b[i]),
   187  		)
   188  	}
   189  	return builder.String()
   190  }
   191  
   192  func TestBytesRefImpl(t *testing.T) {
   193  	defer leaktest.AfterTest(t)()
   194  
   195  	rng, _ := randutil.NewPseudoRand()
   196  
   197  	const (
   198  		maxNumberOfCalls = 64
   199  		maxLength        = 16
   200  		nRuns            = 100
   201  	)
   202  
   203  	for nRun := 0; nRun < nRuns; nRun++ {
   204  		n := 1 + rng.Intn(maxLength)
   205  
   206  		flat := NewBytes(n)
   207  		reference := make([][]byte, n)
   208  		for i := 0; i < n; i++ {
   209  			v := make([]byte, rng.Intn(16))
   210  			rng.Read(v)
   211  			flat.Set(i, append([]byte(nil), v...))
   212  			reference[i] = append([]byte(nil), v...)
   213  		}
   214  
   215  		// Make a pair of sources to copy/append from. Use the destination variables
   216  		// with a certain probability.
   217  		sourceN := n
   218  		flatSource := flat
   219  		referenceSource := reference
   220  		selfReferencingSources := true
   221  		if rng.Float64() < 0.5 {
   222  			selfReferencingSources = false
   223  			sourceN = 1 + rng.Intn(maxLength)
   224  			flatSource = NewBytes(sourceN)
   225  			referenceSource = make([][]byte, sourceN)
   226  			for i := 0; i < sourceN; i++ {
   227  				v := make([]byte, rng.Intn(16))
   228  				rng.Read(v)
   229  				flatSource.Set(i, append([]byte(nil), v...))
   230  				referenceSource[i] = append([]byte(nil), v...)
   231  			}
   232  		}
   233  
   234  		if err := verifyEqual(flat, reference); err != nil {
   235  			t.Fatalf("not equal: %v\nflat:\n%sreference:\n%s", err, flat, prettyByteSlice(reference))
   236  		}
   237  
   238  		numCalls := 1 + rng.Intn(maxNumberOfCalls)
   239  		methods := make([]bytesMethod, 0, numCalls)
   240  		for i := 0; i < numCalls; i++ {
   241  			methods = append(methods, bytesMethods[rng.Intn(len(bytesMethods))])
   242  		}
   243  		if err := applyMethodsAndVerify(rng, flat, flatSource, reference, referenceSource, methods, selfReferencingSources); err != nil {
   244  			t.Logf("nRun = %d\n", nRun)
   245  			t.Fatal(err)
   246  		}
   247  	}
   248  }
   249  
   250  func TestBytes(t *testing.T) {
   251  	defer leaktest.AfterTest(t)()
   252  
   253  	t.Run("Simple", func(t *testing.T) {
   254  		b1 := NewBytes(0)
   255  		b1.AppendVal([]byte("hello"))
   256  		require.Equal(t, "hello", string(b1.Get(0)))
   257  		b1.AppendVal(nil)
   258  		require.Equal(t, []byte{}, b1.Get(1))
   259  		require.Equal(t, 2, b1.Len())
   260  		// Verify that we cannot overwrite a value.
   261  		require.Panics(
   262  			t,
   263  			func() { b1.Set(0, []byte("not allowed")) },
   264  			"should be unable to overwrite value",
   265  		)
   266  
   267  		// However, it is legal to overwrite the last value.
   268  		b1.Set(1, []byte("ok"))
   269  
   270  		// If we Reset the Bytes, we can Set any index.
   271  		b1.Reset()
   272  		b1.Set(1, []byte("new usage"))
   273  		// But not an index before that.
   274  		require.Panics(
   275  			t,
   276  			func() { b1.Set(0, []byte("still not allowed")) },
   277  			"should be unable to overwrite value",
   278  		)
   279  
   280  		// Same with Reset.
   281  		b1.Reset()
   282  		b1.Set(1, []byte("reset new usage"))
   283  	})
   284  
   285  	t.Run("Append", func(t *testing.T) {
   286  		b1 := NewBytes(0)
   287  		b2 := NewBytes(0)
   288  		b2.AppendVal([]byte("source bytes value"))
   289  		b1.AppendVal([]byte("one"))
   290  		b1.AppendVal([]byte("two"))
   291  		// Truncate b1.
   292  		require.Equal(t, 2, b1.Len())
   293  		b1.AppendSlice(b2, 0, 0, 0)
   294  		require.Equal(t, 0, b1.Len())
   295  
   296  		b1.AppendVal([]byte("hello again"))
   297  
   298  		// Try appending b2 3 times. The first time will overwrite the current
   299  		// present value in b1.
   300  		for i := 0; i < 3; i++ {
   301  			b1.AppendSlice(b2, i, 0, b2.Len())
   302  			require.Equal(t, i+1, b1.Len())
   303  			for j := 0; j <= i; j++ {
   304  				require.Equal(t, "source bytes value", string(b1.Get(j)))
   305  			}
   306  		}
   307  
   308  		b2 = NewBytes(0)
   309  		b2.AppendVal([]byte("hello again"))
   310  		b2.AppendVal([]byte("hello again"))
   311  		b2.AppendVal([]byte("hello again"))
   312  		// Try to append only a subset of the source keeping the first element of
   313  		// b1 intact.
   314  		b1.AppendSlice(b2, 1, 1, 2)
   315  		require.Equal(t, 2, b1.Len())
   316  		require.Equal(t, "source bytes value", string(b1.Get(0)))
   317  		require.Equal(t, "hello again", string(b1.Get(1)))
   318  	})
   319  
   320  	t.Run("Copy", func(t *testing.T) {
   321  		b1 := NewBytes(0)
   322  		b2 := NewBytes(0)
   323  		b1.AppendVal([]byte("one"))
   324  		b1.AppendVal([]byte("two"))
   325  		b1.AppendVal([]byte("three"))
   326  
   327  		b2.AppendVal([]byte("source one"))
   328  		b2.AppendVal([]byte("source two"))
   329  
   330  		// Copy "source two" into "two"'s position. This also tests that elements
   331  		// following the copied element are correctly shifted.
   332  		b1.CopySlice(b2, 1, 1, 2)
   333  		require.Equal(t, 3, b1.Len())
   334  		require.Equal(t, "one", string(b1.Get(0)))
   335  		require.Equal(t, "source two", string(b1.Get(1)))
   336  		require.Equal(t, "three", string(b1.Get(2)))
   337  
   338  		// Copy will only copy as many elements as there is capacity for. In this
   339  		// call, the copy starts at index 2, so there is only capacity for one
   340  		// element.
   341  		b1.CopySlice(b2, 2, 0, b2.Len())
   342  		require.Equal(t, "one", string(b1.Get(0)))
   343  		require.Equal(t, "source two", string(b1.Get(1)))
   344  		require.Equal(t, "source one", string(b1.Get(2)))
   345  
   346  		// Set the length to 1 and  follow it with testing a full overwrite of only
   347  		// one element.
   348  		b1.SetLength(1)
   349  		require.Equal(t, 1, b1.Len())
   350  		b1.CopySlice(b2, 0, 0, b2.Len())
   351  		require.Equal(t, 1, b1.Len())
   352  		require.Equal(t, "source one", string(b1.Get(0)))
   353  
   354  		// Verify a full overwrite with a non-zero source start index.
   355  		b1.CopySlice(b2, 0, 1, b2.Len())
   356  		require.Equal(t, 1, b1.Len())
   357  		require.Equal(t, "source two", string(b1.Get(0)))
   358  	})
   359  
   360  	t.Run("Window", func(t *testing.T) {
   361  		b1 := NewBytes(0)
   362  		b1.AppendVal([]byte("one"))
   363  		b1.AppendVal([]byte("two"))
   364  		b1.AppendVal([]byte("three"))
   365  
   366  		w := b1.Window(0, 3)
   367  		require.NotEqual(t, unsafe.Pointer(b1), unsafe.Pointer(w), "Bytes.Window should create a new object")
   368  		b2 := b1.Window(1, 2)
   369  		require.Equal(t, "one", string(b1.Get(0)))
   370  		require.Equal(t, "two", string(b1.Get(1)))
   371  		require.Equal(t, "two", string(b2.Get(0)))
   372  
   373  		require.Panics(t, func() { b2.AppendVal([]byte("four")) }, "appending to the window into b1 should have panicked")
   374  	})
   375  
   376  	t.Run("String", func(t *testing.T) {
   377  		b1 := NewBytes(0)
   378  		vals := [][]byte{
   379  			[]byte("one"),
   380  			[]byte("two"),
   381  			[]byte("three"),
   382  		}
   383  		for i := range vals {
   384  			b1.AppendVal(vals[i])
   385  		}
   386  
   387  		// The values should be printed using the String function.
   388  		b1String := b1.String()
   389  		require.True(
   390  			t,
   391  			strings.Contains(b1String, fmt.Sprint(vals[0])) &&
   392  				strings.Contains(b1String, fmt.Sprint(vals[1])) &&
   393  				strings.Contains(b1String, fmt.Sprint(vals[2])),
   394  		)
   395  
   396  		// A window on the bytes should only print the values included in the
   397  		// window.
   398  		b2String := b1.Window(1, 3).String()
   399  		require.True(
   400  			t,
   401  			!strings.Contains(b2String, fmt.Sprint(vals[0])) &&
   402  				strings.Contains(b2String, fmt.Sprint(vals[1])) &&
   403  				strings.Contains(b2String, fmt.Sprint(vals[2])),
   404  		)
   405  	})
   406  
   407  	t.Run("InvariantSimple", func(t *testing.T) {
   408  		b1 := NewBytes(8)
   409  		b1.Set(0, []byte("zero"))
   410  		other := b1.Window(0, 2)
   411  		other.AssertOffsetsAreNonDecreasing(2)
   412  
   413  		b2 := NewBytes(8)
   414  		b2.Set(0, []byte("zero"))
   415  		b2.Set(2, []byte("two"))
   416  		other = b2.Window(0, 4)
   417  		other.AssertOffsetsAreNonDecreasing(4)
   418  	})
   419  }
   420  
   421  // TestAppendBytesWithLastNull makes sure that Append handles correctly the
   422  // case when the last element of Bytes vector is NULL.
   423  func TestAppendBytesWithLastNull(t *testing.T) {
   424  	src := NewMemColumn(types.Bytes, 4, StandardColumnFactory)
   425  	sel := []int{0, 2, 3}
   426  	src.Bytes().Set(0, []byte("zero"))
   427  	src.Nulls().SetNull(1)
   428  	src.Bytes().Set(2, []byte("two"))
   429  	src.Nulls().SetNull(3)
   430  	sliceArgs := SliceArgs{
   431  		Src:         src,
   432  		DestIdx:     0,
   433  		SrcStartIdx: 0,
   434  		SrcEndIdx:   len(sel),
   435  	}
   436  	dest := NewMemColumn(types.Bytes, 3, StandardColumnFactory)
   437  	expected := NewMemColumn(types.Bytes, 3, StandardColumnFactory)
   438  	for _, withSel := range []bool{false, true} {
   439  		t.Run(fmt.Sprintf("AppendBytesWithLastNull/sel=%t", withSel), func(t *testing.T) {
   440  			expected.Nulls().UnsetNulls()
   441  			expected.Bytes().Reset()
   442  			if withSel {
   443  				sliceArgs.Sel = sel
   444  				for expIdx, srcIdx := range sel {
   445  					if src.Nulls().NullAt(srcIdx) {
   446  						expected.Nulls().SetNull(expIdx)
   447  					} else {
   448  						expected.Bytes().Set(expIdx, src.Bytes().Get(srcIdx))
   449  					}
   450  				}
   451  			} else {
   452  				sliceArgs.Sel = nil
   453  				for expIdx := 0; expIdx < 3; expIdx++ {
   454  					if src.Nulls().NullAt(expIdx) {
   455  						expected.Nulls().SetNull(expIdx)
   456  					} else {
   457  						expected.Bytes().Set(expIdx, src.Bytes().Get(expIdx))
   458  					}
   459  				}
   460  			}
   461  			expected.Bytes().UpdateOffsetsToBeNonDecreasing(3)
   462  			// require.Equal checks the "string-ified" versions of the vectors for
   463  			// equality. Bytes uses maxSetIndex to print out "truncated"
   464  			// representation, so we manually update it (Vec.Append will use
   465  			// AppendVal function that updates maxSetIndex itself).
   466  			expected.Bytes().maxSetIndex = 2
   467  			dest.Append(sliceArgs)
   468  			require.Equal(t, expected, dest)
   469  		})
   470  	}
   471  }