github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/store_test.go (about)

     1  // Copyright 2020 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package store
    16  
    17  import (
    18  	"context"
    19  	"io"
    20  	"math/rand"
    21  	"os"
    22  	"sync"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/dolthub/go-mysql-server/sql"
    27  	"github.com/google/uuid"
    28  	"github.com/stretchr/testify/require"
    29  
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/schema"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/sqle"
    32  	"github.com/dolthub/dolt/go/store/datas"
    33  	"github.com/dolthub/dolt/go/store/nbs"
    34  	"github.com/dolthub/dolt/go/store/types"
    35  )
    36  
    37  func poe(err error) {
    38  	if err != nil {
    39  		panic(err)
    40  	}
    41  }
    42  
    43  func getDBAtDir(ctx context.Context, dir string) datas.Database {
    44  	cs, err := nbs.NewLocalStore(ctx, types.Format_Default.VersionString(), dir, 1<<28)
    45  	poe(err)
    46  
    47  	return datas.NewDatabase(nbs.NewNBSMetricWrapper(cs))
    48  }
    49  
    50  const (
    51  	simIdxBenchDataset = "simulated_index_benchmark"
    52  	numRows            = 100000
    53  	rangeSize          = 10
    54  )
    55  
    56  var benchmarkTmpDir = os.TempDir()
    57  var genOnce = &sync.Once{}
    58  
    59  func getBenchmarkDB(ctx context.Context) datas.Database {
    60  	return getDBAtDir(ctx, benchmarkTmpDir)
    61  }
    62  
    63  func writeTupleToDB(ctx context.Context, db datas.Database, dsID string, vals ...types.Value) {
    64  	root, err := types.NewTuple(db.Format(), vals...)
    65  	poe(err)
    66  
    67  	ds, err := db.GetDataset(ctx, dsID)
    68  	poe(err)
    69  
    70  	_, err = db.CommitValue(ctx, ds, root)
    71  	poe(err)
    72  }
    73  
    74  func readTupleFromDB(ctx context.Context, t require.TestingT, dsID string) (*types.NomsBinFormat, []types.Value) {
    75  	db := getBenchmarkDB(ctx)
    76  	ds, err := db.GetDataset(ctx, dsID)
    77  	require.NoError(t, err)
    78  
    79  	ref, ok, err := ds.MaybeHeadRef()
    80  	require.NoError(t, err)
    81  	require.True(t, ok)
    82  
    83  	val, err := ref.TargetValue(ctx, db)
    84  	require.NoError(t, err)
    85  
    86  	st := val.(types.Struct)
    87  	val, ok, err = st.MaybeGet("value")
    88  	require.NoError(t, err)
    89  	require.True(t, ok)
    90  	tup := val.(types.Tuple)
    91  	valSlice, err := tup.AsSlice()
    92  	require.NoError(t, err)
    93  	return db.Format(), valSlice
    94  }
    95  
    96  var testDataCols = []schema.Column{
    97  	schema.NewColumn("id", 0, types.IntKind, true),
    98  	schema.NewColumn("fColh", 1, types.FloatKind, false),
    99  	schema.NewColumn("bCol", 2, types.BoolKind, false),
   100  	schema.NewColumn("uuidStrCol", 3, types.StringKind, false),
   101  	schema.NewColumn("timeCol", 4, types.TimestampKind, false),
   102  	schema.NewColumn("colInt1", 6, types.IntKind, false),
   103  	schema.NewColumn("colInt2", 7, types.IntKind, false),
   104  	schema.NewColumn("colInt3", 8, types.IntKind, false),
   105  	schema.NewColumn("colInt4", 9, types.IntKind, false),
   106  }
   107  
   108  func generateTestData(ctx context.Context) {
   109  	genOnce.Do(func() {
   110  		db := getBenchmarkDB(ctx)
   111  		nbf := db.Format()
   112  
   113  		m, err := types.NewMap(ctx, db)
   114  		poe(err)
   115  
   116  		idx, err := types.NewMap(ctx, db)
   117  		poe(err)
   118  
   119  		me := m.Edit()
   120  		idxMe := idx.Edit()
   121  		rng := rand.New(rand.NewSource(0))
   122  		for i := 0; i <= numRows; i++ {
   123  			k, err := types.NewTuple(nbf, types.Uint(0), types.Int(int64(i)))
   124  			poe(err)
   125  			randf := rng.Float64()
   126  			v, err := types.NewTuple(nbf, types.Uint(1), types.Float(randf), types.Uint(2), types.Bool(i%2 == 0), types.Uint(3), types.String(uuid.New().String()), types.Uint(4), types.Timestamp(time.Now()), types.Uint(6), types.Int(-100), types.Uint(7), types.Int(-1000), types.Uint(8), types.Int(-10000), types.Uint(9), types.Int(-1000000))
   127  			poe(err)
   128  			idxKey, err := types.NewTuple(nbf, types.Uint(5), types.Float(randf), types.Uint(0), types.Int(int64(i)))
   129  			poe(err)
   130  
   131  			me = me.Set(k, v)
   132  			idxMe = idxMe.Set(idxKey, types.NullValue)
   133  		}
   134  
   135  		m, err = me.Map(ctx)
   136  		poe(err)
   137  
   138  		idx, err = idxMe.Map(ctx)
   139  		poe(err)
   140  
   141  		writeTupleToDB(ctx, db, simIdxBenchDataset, m, idx)
   142  	})
   143  }
   144  
   145  func BenchmarkSimulatedIndex(b *testing.B) {
   146  	ctx := context.Background()
   147  	generateTestData(ctx)
   148  
   149  	rng := rand.New(rand.NewSource(0))
   150  	nbf, vals := readTupleFromDB(ctx, b, simIdxBenchDataset)
   151  
   152  	m := vals[0].(types.Map)
   153  	idx := vals[1].(types.Map)
   154  
   155  	b.ResetTimer()
   156  
   157  	var idxItr types.MapIterator
   158  	for i := 0; i < b.N; i++ {
   159  		randf := rng.Float64()
   160  		rangeStartKey, err := types.NewTuple(nbf, types.Uint(5), types.Float(randf))
   161  		require.NoError(b, err)
   162  		idxItr, err = idx.IteratorFrom(ctx, rangeStartKey)
   163  		require.NoError(b, err)
   164  
   165  		for j := 0; j < rangeSize; j++ {
   166  			idxKey, _, err := idxItr.Next(ctx)
   167  			require.NoError(b, err)
   168  
   169  			if idxKey == nil {
   170  				break
   171  			}
   172  
   173  			vals, err := idxKey.(types.Tuple).AsSlice()
   174  			require.NoError(b, err)
   175  			keyTup, err := types.NewTuple(nbf, vals[2:]...)
   176  
   177  			k, _, err := m.MaybeGet(ctx, keyTup)
   178  			require.NoError(b, err)
   179  			require.NotNil(b, k)
   180  		}
   181  	}
   182  }
   183  
   184  func BenchmarkSimulatedCoveringIndex(b *testing.B) {
   185  	ctx := context.Background()
   186  	generateTestData(ctx)
   187  
   188  	rng := rand.New(rand.NewSource(0))
   189  	nbf, vals := readTupleFromDB(ctx, b, simIdxBenchDataset)
   190  
   191  	idx := vals[1].(types.Map)
   192  
   193  	b.ResetTimer()
   194  
   195  	var idxItr types.MapIterator
   196  	for i := 0; i < b.N; i++ {
   197  		randf := rng.Float64()
   198  		rangeStartKey, err := types.NewTuple(nbf, types.Uint(5), types.Float(randf))
   199  		require.NoError(b, err)
   200  		idxItr, err = idx.IteratorFrom(ctx, rangeStartKey)
   201  		require.NoError(b, err)
   202  
   203  		for j := 0; j < rangeSize; j++ {
   204  			idxKey, _, err := idxItr.Next(ctx)
   205  			require.NoError(b, err)
   206  
   207  			if idxKey == nil {
   208  				break
   209  			}
   210  		}
   211  	}
   212  }
   213  
   214  func BenchmarkMapItr(b *testing.B) {
   215  	ctx := context.Background()
   216  	generateTestData(ctx)
   217  
   218  	require.True(b, b.N < numRows, "b.N:%d >= numRows:%d", b.N, numRows)
   219  
   220  	_, vals := readTupleFromDB(ctx, b, simIdxBenchDataset)
   221  	m := vals[0].(types.Map)
   222  
   223  	itr, err := m.RangeIterator(ctx, 0, uint64(b.N))
   224  	require.NoError(b, err)
   225  
   226  	var closeFunc func() error
   227  	if cl, ok := itr.(io.Closer); ok {
   228  		closeFunc = cl.Close
   229  	}
   230  
   231  	dmItr := sqle.NewDoltMapIter(ctx, itr.NextTuple, closeFunc, sqle.NewKVToSqlRowConverterForCols(m.Format(), testDataCols))
   232  
   233  	b.ResetTimer()
   234  	for {
   235  		var r sql.Row
   236  		r, err = dmItr.Next()
   237  
   238  		if r == nil || err != nil {
   239  			break
   240  		}
   241  	}
   242  	b.StopTimer()
   243  
   244  	if err != io.EOF {
   245  		require.NoError(b, err)
   246  	}
   247  }
   248  
   249  /*func BenchmarkFullScan(b *testing.B) {
   250  	const dir = "dolt directory containing db with table to scan"
   251  	const branch = "master"
   252  	const tableName = "bigram_counts"
   253  
   254  	ctx := context.Background()
   255  	ddb, err := doltdb.LoadDoltDB(ctx, types.Format_Default, dir)
   256  	require.NoError(b, err)
   257  
   258  	cs, err := doltdb.NewCommitSpec("HEAD")
   259  	require.NoError(b, err)
   260  
   261  	cm, err := ddb.Resolve(ctx, cs, ref.NewBranchRef(branch))
   262  	require.NoError(b, err)
   263  
   264  	root, err := cm.GetRootValue()
   265  	require.NoError(b, err)
   266  
   267  	tbl, ok, err := root.GetTable(ctx, tableName)
   268  	require.NoError(b, err)
   269  	require.True(b, ok)
   270  
   271  	m, err := tbl.GetRowData(ctx)
   272  	require.NoError(b, err)
   273  	require.True(b, uint64(b.N) < m.Len(), "b.N:%d >= numRows:%d", b.N, m.Len())
   274  
   275  	itr, err := m.RangeIterator(ctx, 0, uint64(b.N))
   276  	require.NoError(b, err)
   277  
   278  	dmItr := sqle.NewDoltMapIter(ctx, itr.NextTuple, closeFunc, sqle.NewKVToSqlRowConverterForCols(m.Format(), testDataCols))
   279  
   280  	b.ResetTimer()
   281  	for {
   282  		var r sql.Row
   283  		r, err = dmItr.Next()
   284  
   285  		if r == nil || err != nil {
   286  			break
   287  		}
   288  	}
   289  	b.StopTimer()
   290  
   291  	if err != io.EOF {
   292  		require.NoError(b, err)
   293  	}
   294  }*/