github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/colcontainer/diskqueue_test.go (about)

     1  // Copyright 2019 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  package colcontainer_test
    11  
    12  import (
    13  	"context"
    14  	"flag"
    15  	"fmt"
    16  	"testing"
    17  
    18  	"github.com/cockroachdb/cockroach/pkg/col/coldata"
    19  	"github.com/cockroachdb/cockroach/pkg/col/coldatatestutils"
    20  	"github.com/cockroachdb/cockroach/pkg/sql/colcontainer"
    21  	"github.com/cockroachdb/cockroach/pkg/sql/colexecbase"
    22  	"github.com/cockroachdb/cockroach/pkg/sql/types"
    23  	"github.com/cockroachdb/cockroach/pkg/testutils/colcontainerutils"
    24  	"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
    25  	"github.com/cockroachdb/cockroach/pkg/util/leaktest"
    26  	"github.com/cockroachdb/cockroach/pkg/util/randutil"
    27  	"github.com/stretchr/testify/require"
    28  )
    29  
    30  func TestDiskQueue(t *testing.T) {
    31  	defer leaktest.AfterTest(t)()
    32  
    33  	ctx := context.Background()
    34  	queueCfg, cleanup := colcontainerutils.NewTestingDiskQueueCfg(t, true /* inMem */)
    35  	defer cleanup()
    36  
    37  	rng, _ := randutil.NewPseudoRand()
    38  	for _, rewindable := range []bool{false, true} {
    39  		for _, bufferSizeBytes := range []int{0, 16<<10 + rng.Intn(1<<20) /* 16 KiB up to 1 MiB */} {
    40  			for _, maxFileSizeBytes := range []int{10 << 10 /* 10 KiB */, 1<<20 + rng.Intn(64<<20) /* 1 MiB up to 64 MiB */} {
    41  				alwaysCompress := rng.Float64() < 0.5
    42  				diskQueueCacheMode := colcontainer.DiskQueueCacheModeDefault
    43  				// testReuseCache will test the reuse cache modes.
    44  				testReuseCache := rng.Float64() < 0.5
    45  				dequeuedProbabilityBeforeAllEnqueuesAreDone := 0.5
    46  				if testReuseCache {
    47  					dequeuedProbabilityBeforeAllEnqueuesAreDone = 0
    48  					if rng.Float64() < 0.5 {
    49  						diskQueueCacheMode = colcontainer.DiskQueueCacheModeReuseCache
    50  					} else {
    51  						diskQueueCacheMode = colcontainer.DiskQueueCacheModeClearAndReuseCache
    52  					}
    53  				}
    54  				prefix, suffix := "", fmt.Sprintf("/BufferSizeBytes=%s/MaxFileSizeBytes=%s",
    55  					humanizeutil.IBytes(int64(bufferSizeBytes)),
    56  					humanizeutil.IBytes(int64(maxFileSizeBytes)))
    57  				if rewindable {
    58  					dequeuedProbabilityBeforeAllEnqueuesAreDone = 0
    59  					prefix, suffix = "Rewindable/", ""
    60  				}
    61  				numBatches := 1 + rng.Intn(1024)
    62  				t.Run(fmt.Sprintf("%sDiskQueueCacheMode=%d/AlwaysCompress=%t%s/NumBatches=%d",
    63  					prefix, diskQueueCacheMode, alwaysCompress, suffix, numBatches), func(t *testing.T) {
    64  					// Create random input.
    65  					batches := make([]coldata.Batch, 0, numBatches)
    66  					op := coldatatestutils.NewRandomDataOp(testAllocator, rng, coldatatestutils.RandomDataOpArgs{
    67  						NumBatches: cap(batches),
    68  						BatchSize:  1 + rng.Intn(coldata.BatchSize()),
    69  						Nulls:      true,
    70  						BatchAccumulator: func(b coldata.Batch, typs []*types.T) {
    71  							batches = append(batches, coldatatestutils.CopyBatch(b, typs, testColumnFactory))
    72  						},
    73  					})
    74  					typs := op.Typs()
    75  
    76  					queueCfg.CacheMode = diskQueueCacheMode
    77  					queueCfg.SetDefaultBufferSizeBytesForCacheMode()
    78  					if !rewindable {
    79  						if !testReuseCache {
    80  							queueCfg.BufferSizeBytes = bufferSizeBytes
    81  						}
    82  						queueCfg.MaxFileSizeBytes = maxFileSizeBytes
    83  					}
    84  					queueCfg.TestingKnobs.AlwaysCompress = alwaysCompress
    85  
    86  					// Create queue.
    87  					var (
    88  						q   colcontainer.Queue
    89  						err error
    90  					)
    91  					if rewindable {
    92  						q, err = colcontainer.NewRewindableDiskQueue(ctx, typs, queueCfg, testDiskAcc)
    93  					} else {
    94  						q, err = colcontainer.NewDiskQueue(ctx, typs, queueCfg, testDiskAcc)
    95  					}
    96  					require.NoError(t, err)
    97  
    98  					// Verify that a directory was created.
    99  					directories, err := queueCfg.FS.List(queueCfg.Path)
   100  					require.NoError(t, err)
   101  					require.Equal(t, 1, len(directories))
   102  
   103  					// Run verification.
   104  					ctx := context.Background()
   105  					for {
   106  						b := op.Next(ctx)
   107  						require.NoError(t, q.Enqueue(ctx, b))
   108  						if b.Length() == 0 {
   109  							break
   110  						}
   111  						if rng.Float64() < dequeuedProbabilityBeforeAllEnqueuesAreDone {
   112  							if ok, err := q.Dequeue(ctx, b); !ok {
   113  								t.Fatal("queue incorrectly considered empty")
   114  							} else if err != nil {
   115  								t.Fatal(err)
   116  							}
   117  							coldata.AssertEquivalentBatches(t, batches[0], b)
   118  							batches = batches[1:]
   119  						}
   120  					}
   121  					numReadIterations := 1
   122  					if rewindable {
   123  						numReadIterations = 2
   124  					}
   125  					for i := 0; i < numReadIterations; i++ {
   126  						batchIdx := 0
   127  						b := coldata.NewMemBatch(typs, testColumnFactory)
   128  						for batchIdx < len(batches) {
   129  							if ok, err := q.Dequeue(ctx, b); !ok {
   130  								t.Fatal("queue incorrectly considered empty")
   131  							} else if err != nil {
   132  								t.Fatal(err)
   133  							}
   134  							coldata.AssertEquivalentBatches(t, batches[batchIdx], b)
   135  							batchIdx++
   136  						}
   137  
   138  						if testReuseCache {
   139  							// Trying to Enqueue after a Dequeue should return an error in these
   140  							// CacheModes.
   141  							require.Error(t, q.Enqueue(ctx, b))
   142  						}
   143  
   144  						if ok, err := q.Dequeue(ctx, b); ok {
   145  							if b.Length() != 0 {
   146  								t.Fatal("queue should be empty")
   147  							}
   148  						} else if err != nil {
   149  							t.Fatal(err)
   150  						}
   151  
   152  						if rewindable {
   153  							require.NoError(t, q.(colcontainer.RewindableQueue).Rewind())
   154  						}
   155  					}
   156  
   157  					// Close queue.
   158  					require.NoError(t, q.Close(ctx))
   159  
   160  					// Verify no directories are left over.
   161  					directories, err = queueCfg.FS.List(queueCfg.Path)
   162  					require.NoError(t, err)
   163  					require.Equal(t, 0, len(directories))
   164  				})
   165  			}
   166  		}
   167  	}
   168  }
   169  
   170  // Flags for BenchmarkQueue.
   171  var (
   172  	bufferSizeBytes = flag.String("bufsize", "128KiB", "number of bytes to buffer in memory before flushing")
   173  	blockSizeBytes  = flag.String("blocksize", "32MiB", "block size for the number of bytes stored in a block. In pebble, this is the value size, with the flat implementation, this is the file size")
   174  	dataSizeBytes   = flag.String("datasize", "512MiB", "size of data in bytes to sort")
   175  )
   176  
   177  // BenchmarkDiskQueue benchmarks a queue with parameters provided through flags.
   178  func BenchmarkDiskQueue(b *testing.B) {
   179  	if testing.Short() {
   180  		b.Skip("short flag")
   181  	}
   182  
   183  	bufSize, err := humanizeutil.ParseBytes(*bufferSizeBytes)
   184  	if err != nil {
   185  		b.Fatalf("could not parse -bufsize: %s", err)
   186  	}
   187  	blockSize, err := humanizeutil.ParseBytes(*blockSizeBytes)
   188  	if err != nil {
   189  		b.Fatalf("could not parse -blocksize: %s", err)
   190  	}
   191  	dataSize, err := humanizeutil.ParseBytes(*dataSizeBytes)
   192  	if err != nil {
   193  		b.Fatalf("could not pase -datasize: %s", err)
   194  	}
   195  	numBatches := int(dataSize / (8 * int64(coldata.BatchSize())))
   196  
   197  	queueCfg, cleanup := colcontainerutils.NewTestingDiskQueueCfg(b, false /* inMem */)
   198  	defer cleanup()
   199  	queueCfg.BufferSizeBytes = int(bufSize)
   200  	queueCfg.MaxFileSizeBytes = int(blockSize)
   201  
   202  	rng, _ := randutil.NewPseudoRand()
   203  	typs := []*types.T{types.Int}
   204  	batch := coldatatestutils.RandomBatch(testAllocator, rng, typs, coldata.BatchSize(), 0, 0)
   205  	op := colexecbase.NewRepeatableBatchSource(testAllocator, batch, typs)
   206  	ctx := context.Background()
   207  	for i := 0; i < b.N; i++ {
   208  		op.ResetBatchesToReturn(numBatches)
   209  		q, err := colcontainer.NewDiskQueue(ctx, typs, queueCfg, testDiskAcc)
   210  		require.NoError(b, err)
   211  		for {
   212  			batchToEnqueue := op.Next(ctx)
   213  			if err := q.Enqueue(ctx, batchToEnqueue); err != nil {
   214  				b.Fatal(err)
   215  			}
   216  			if batchToEnqueue.Length() == 0 {
   217  				break
   218  			}
   219  		}
   220  		dequeuedBatch := coldata.NewMemBatch(typs, testColumnFactory)
   221  		for dequeuedBatch.Length() != 0 {
   222  			if _, err := q.Dequeue(ctx, dequeuedBatch); err != nil {
   223  				b.Fatal(err)
   224  			}
   225  		}
   226  		if err := q.Close(ctx); err != nil {
   227  			b.Fatal(err)
   228  		}
   229  	}
   230  	// When running this benchmark multiple times, disk throttling might kick in
   231  	// and result in unfair numbers. Uncomment this code to run the benchmark
   232  	// multiple times.
   233  	/*
   234  		b.StopTimer()
   235  		time.Sleep(10 * time.Second)
   236  	*/
   237  }