github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/benchmark/benchmark_batch_writes_test.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package benchmark
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"math/rand"
    21  	"os"
    22  	"path/filepath"
    23  	"sort"
    24  	"testing"
    25  
    26  	"github.com/dolthub/dolt/go/store/nbs"
    27  	"github.com/dolthub/dolt/go/store/prolly"
    28  	"github.com/dolthub/dolt/go/store/prolly/tree"
    29  	"github.com/dolthub/dolt/go/store/skip"
    30  	"github.com/dolthub/dolt/go/store/types"
    31  	"github.com/dolthub/dolt/go/store/val"
    32  
    33  	"github.com/stretchr/testify/require"
    34  	"go.etcd.io/bbolt"
    35  )
    36  
    37  const (
    38  	batch = 1 << 16
    39  	sz    = 8
    40  )
    41  
    42  var (
    43  	bucket = []byte("bolt")
    44  )
    45  
    46  func BenchmarkImportBBolt(b *testing.B) {
    47  	makeWriter := func() writer {
    48  		path, err := os.MkdirTemp("", "*")
    49  		require.NoError(b, err)
    50  		path = filepath.Join(path, "bolt.db")
    51  
    52  		db, err := bbolt.Open(path, 0666, &bbolt.Options{
    53  			// turn off fsync
    54  			NoGrowSync:     true,
    55  			NoFreelistSync: true,
    56  			NoSync:         true,
    57  		})
    58  		require.NoError(b, err)
    59  
    60  		err = db.Update(func(tx *bbolt.Tx) error {
    61  			_, err = tx.CreateBucket(bucket)
    62  			return err
    63  		})
    64  		require.NoError(b, err)
    65  		return &bboltWriter{
    66  			edits: skip.NewSkipList(bytes.Compare),
    67  			db:    db,
    68  		}
    69  	}
    70  
    71  	b.Run("BBolt", func(b *testing.B) {
    72  		benchmarkBatchWrite(b, makeWriter())
    73  	})
    74  }
    75  
    76  func BenchmarkImportDolt(b *testing.B) {
    77  	makeWriter := func() writer {
    78  		ctx := context.Background()
    79  		nbf := types.Format_DOLT
    80  		memtable := uint64(256 * 1024 * 1024)
    81  		quota := nbs.NewUnlimitedMemQuotaProvider()
    82  
    83  		path, err := os.MkdirTemp("", "*")
    84  		require.NoError(b, err)
    85  
    86  		cs, err := nbs.NewLocalStore(ctx, nbf.VersionString(), path, memtable, quota)
    87  		require.NoError(b, err)
    88  
    89  		desc := val.NewTupleDescriptor(val.Type{Enc: val.Uint64Enc})
    90  		m, err := prolly.NewMapFromTuples(ctx, tree.NewNodeStore(cs), desc, desc)
    91  		require.NoError(b, err)
    92  		return &doltWriter{mut: m.Mutate(), cs: cs}
    93  	}
    94  
    95  	b.Run("Dolt", func(b *testing.B) {
    96  		benchmarkBatchWrite(b, makeWriter())
    97  	})
    98  }
    99  
   100  type bboltWriter struct {
   101  	edits *skip.List
   102  	db    *bbolt.DB
   103  }
   104  
   105  func (wr *bboltWriter) Put(key, value []byte) error {
   106  	wr.edits.Put(key, value)
   107  	return nil
   108  }
   109  
   110  func (wr *bboltWriter) Flush() error {
   111  	return wr.db.Update(func(tx *bbolt.Tx) (err error) {
   112  		b := tx.Bucket(bucket)
   113  		iter := wr.edits.IterAtStart()
   114  		for {
   115  			k, v := iter.Current()
   116  			if k == nil {
   117  				break
   118  			}
   119  			if err = b.Put(k, v); err != nil {
   120  				return
   121  			}
   122  			iter.Advance()
   123  		}
   124  		return
   125  	})
   126  }
   127  
   128  type doltWriter struct {
   129  	mut *prolly.MutableMap
   130  	cs  *nbs.NomsBlockStore
   131  }
   132  
   133  func (wr *doltWriter) Put(key, value []byte) error {
   134  	return wr.mut.Put(context.Background(), key, value)
   135  }
   136  
   137  func (wr *doltWriter) Flush() error {
   138  	m, err := wr.mut.Map(context.Background())
   139  	if err != nil {
   140  		return err
   141  	}
   142  	wr.mut = m.Mutate()
   143  
   144  	h, err := wr.cs.Root(context.Background())
   145  	if err != nil {
   146  		return err
   147  	}
   148  	_, err = wr.cs.Commit(context.Background(), h, h)
   149  	return err
   150  }
   151  
   152  func benchmarkBatchWrite(b *testing.B, wr writer) {
   153  	dp := newDataProvider(batch)
   154  	for i := 0; i < b.N; i++ {
   155  		k, v := dp.next()
   156  		require.NoError(b, wr.Put(k, v))
   157  		if dp.empty() {
   158  			require.NoError(b, wr.Flush())
   159  			dp = newDataProvider(batch)
   160  		}
   161  	}
   162  }
   163  
   164  type writer interface {
   165  	Put(key, value []byte) error
   166  	Flush() error
   167  }
   168  
   169  type dataProvider struct {
   170  	buf []byte
   171  }
   172  
   173  var _ sort.Interface = &dataProvider{}
   174  
   175  func newDataProvider(count int) (dp *dataProvider) {
   176  	dp = &dataProvider{buf: make([]byte, count*sz)}
   177  	rand.Read(dp.buf)
   178  	return
   179  }
   180  
   181  func (dp *dataProvider) next() (k, v []byte) {
   182  	k, v = dp.buf[:sz], dp.buf[:sz]
   183  	dp.buf = dp.buf[sz:]
   184  	return
   185  }
   186  
   187  func (dp *dataProvider) empty() bool {
   188  	return len(dp.buf) == 0
   189  }
   190  
   191  func (dp *dataProvider) Len() int {
   192  	return len(dp.buf) / sz
   193  }
   194  
   195  func (dp *dataProvider) Less(i, j int) bool {
   196  	l := dp.buf[i*sz : (i*sz)+sz]
   197  	r := dp.buf[j*sz : (j*sz)+sz]
   198  	return bytes.Compare(l, r) < 0
   199  }
   200  
   201  var swap [sz]byte
   202  
   203  func (dp *dataProvider) Swap(i, j int) {
   204  	l := dp.buf[i*sz : (i*sz)+sz]
   205  	r := dp.buf[j*sz : (j*sz)+sz]
   206  	copy(swap[:], l)
   207  	copy(l, r)
   208  	copy(r, swap[:])
   209  }