github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/perf/suite/suite.go

github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/perf/suite/suite.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  // Package suite implements a performance test suite for Noms, intended for
    23  // measuring and reporting long running tests.
    24  //
    25  // Usage is similar to testify's suite:
    26  //  1. Define a test suite struct which inherits from suite.PerfSuite.
    27  //  2. Define methods on that struct that start with the word "Test", optionally
    28  //     followed by digits, then followed a non-empty capitalized string.
    29  //  3. Call suite.Run with an instance of that struct.
    30  //  4. Run go test with the -perf <path to noms db> flag.
    31  //
    32  // Flags:
    33  //
    34  //	-perf.mem      Backs the database by a memory store, instead of nbs.
    35  //	-perf.prefix   Gives the dataset IDs for test results a prefix.
    36  //	-perf.repeat   Sets how many times tests are repeated ("reps").
    37  //	-perf.run      Only run tests that match a regex (case insensitive).
    38  //	-perf.testdata Sets a custom path to the Noms testdata directory.
    39  //
    40  // PerfSuite also supports testify/suite style Setup/TearDown methods:
    41  //
    42  //	Setup/TearDownSuite is called exactly once.
    43  //	Setup/TearDownRep   is called for each repetition of the test runs, i.e. -perf.repeat times.
    44  //	Setup/TearDownTest  is called for every test.
    45  //
    46  // Test results are written to Noms, along with a dump of the environment they were recorded in.
    47  //
    48  // Test names are derived from that "non-empty capitalized string": "Test" is omitted because it's
    49  // redundant, and leading digits are omitted to allow for manual test ordering. For example:
    50  //
    51  //	> cat ./samples/go/csv/csv-import/perf_test.go
    52  //	type perfSuite {
    53  //	  suite.PerfSuite
    54  //	}
    55  //
    56  //	func (s *perfSuite) TestFoo() { ... }
    57  //	func (s *perfSuite) TestZoo() { ... }
    58  //	func (s *perfSuite) Test01Qux() { ... }
    59  //	func (s *perfSuite) Test02Bar() { ... }
    60  //
    61  //	func TestPerf(t *testing.T) {
    62  //	  suite.Run("csv-import", t, &perfSuite{})
    63  //	}
    64  //
    65  //	> noms serve &
    66  //	> go test -v ./samples/go/csv/... -perf http://localhost:8000 -perf.repeat 3
    67  //	(perf) RUN(1/3) Test01Qux (recorded as "Qux")
    68  //	(perf) PASS:    Test01Qux (5s, paused 15s, total 20s)
    69  //	(perf) RUN(1/3) Test02Bar (recorded as "Bar")
    70  //	(perf) PASS:    Test02Bar (15s, paused 2s, total 17s)
    71  //	(perf) RUN(1/3) TestFoo (recorded as "Foo")
    72  //	(perf) PASS:    TestFoo (10s, paused 1s, total 11s)
    73  //	(perf) RUN(1/3) TestZoo (recorded as "Zoo")
    74  //	(perf) PASS:    TestZoo (1s, paused 42s, total 43s)
    75  //	...
    76  //
    77  //	> noms show http://localhost:8000::csv-import
    78  //	{
    79  //	  environment: ...
    80  //	  tests: [{
    81  //	    "Bar": {elapsed: 15s, paused: 2s,  total: 17s},
    82  //	    "Foo": {elapsed: 10s, paused: 1s,  total: 11s},
    83  //	    "Qux": {elapsed: 5s,  paused: 15s, total: 20s},
    84  //	    "Zoo": {elapsed: 1s,  paused: 42s, total: 43s},
    85  //	  }, ...]
    86  //	  ...
    87  //	}
    88  package suite
    89  
    90  import (
    91  	"bytes"
    92  	"context"
    93  	"flag"
    94  	"fmt"
    95  	"io"
    96  	"os"
    97  	"os/exec"
    98  	"path"
    99  	"path/filepath"
   100  	"reflect"
   101  	"regexp"
   102  	"strings"
   103  	"testing"
   104  	"time"
   105  
   106  	"github.com/google/uuid"
   107  	"github.com/shirou/gopsutil/v3/cpu"
   108  	"github.com/shirou/gopsutil/v3/disk"
   109  	"github.com/shirou/gopsutil/v3/host"
   110  	"github.com/shirou/gopsutil/v3/mem"
   111  	"github.com/stretchr/testify/assert"
   112  	"github.com/stretchr/testify/require"
   113  	testifySuite "github.com/stretchr/testify/suite"
   114  
   115  	"github.com/dolthub/dolt/go/libraries/utils/file"
   116  	"github.com/dolthub/dolt/go/libraries/utils/osutil"
   117  	"github.com/dolthub/dolt/go/store/chunks"
   118  	"github.com/dolthub/dolt/go/store/datas"
   119  	"github.com/dolthub/dolt/go/store/marshal"
   120  	"github.com/dolthub/dolt/go/store/prolly/tree"
   121  	"github.com/dolthub/dolt/go/store/spec"
   122  	"github.com/dolthub/dolt/go/store/types"
   123  )
   124  
   125  var (
   126  	perfFlag         = flag.String("perf", "", "The database to write perf tests to. If this isn't specified, perf tests are skipped. If you want a dry run, use \"mem\" as a database")
   127  	perfMemFlag      = flag.Bool("perf.mem", false, "Back the test database by a memory store, not nbs. This will affect test timing, but it's provided in case you're low on disk space")
   128  	perfPrefixFlag   = flag.String("perf.prefix", "", `Prefix for the dataset IDs where results are written. For example, a prefix of "foo/" will write test datasets like "foo/csv-import" instead of just "csv-import"`)
   129  	perfRepeatFlag   = flag.Int("perf.repeat", 1, "The number of times to repeat each perf test")
   130  	perfRunFlag      = flag.String("perf.run", "", "Only run perf tests that match a regular expression")
   131  	perfTestdataFlag = flag.String("perf.testdata", "", "Path to the noms testdata directory. By default this is ../testdata relative to the noms directory")
   132  	testNamePattern  = regexp.MustCompile("^Test[0-9]*([A-Z].*$)")
   133  )
   134  
   135  // PerfSuite is the core of the perf testing suite. See package documentation for details.
   136  type PerfSuite struct {
   137  	// T is the testing.T instance set when the suite is passed into Run.
   138  	T *testing.T
   139  
   140  	// W is the io.Writer to write test output, which only outputs if the verbose flag is set.
   141  	W io.Writer
   142  
   143  	// AtticLabs is the path to the attic-labs directory (e.g. /path/to/go/src/github.com/attic-labs).
   144  	AtticLabs string
   145  
   146  	// Testdata is the path to the testdata directory - typically /path/to/go/src/github.com/attic-labs, but it can be overridden with the -perf.testdata flag.
   147  	Testdata string
   148  
   149  	// Database is a Noms database that tests can use for reading and writing. State is persisted across a single Run of a suite.
   150  	Database datas.Database
   151  
   152  	VS *types.ValueStore
   153  
   154  	// DatabaseSpec is the Noms spec of Database (typically a localhost URL).
   155  	DatabaseSpec string
   156  
   157  	tempFiles []*os.File
   158  	tempDirs  []string
   159  	paused    time.Duration
   160  	datasetID string
   161  }
   162  
   163  // SetupRepSuite has a SetupRep method, which runs every repetition of the test, i.e. -perf.repeat times in total.
   164  type SetupRepSuite interface {
   165  	SetupRep()
   166  }
   167  
   168  // TearDownRepSuite has a TearDownRep method, which runs every repetition of the test, i.e. -perf.repeat times in total.
   169  type TearDownRepSuite interface {
   170  	TearDownRep()
   171  }
   172  
   173  type perfSuiteT interface {
   174  	Suite() *PerfSuite
   175  }
   176  
   177  type environment struct {
   178  	DiskUsages map[string]disk.UsageStat
   179  	Cpus       map[int]cpu.InfoStat
   180  	Mem        mem.VirtualMemoryStat
   181  	Host       host.InfoStat
   182  	Partitions map[string]disk.PartitionStat
   183  }
   184  
   185  type timeInfo struct {
   186  	elapsed, paused, total time.Duration
   187  }
   188  
   189  type testRep map[string]timeInfo
   190  
   191  type nopWriter struct{}
   192  
   193  func (r nopWriter) Write(p []byte) (int, error) {
   194  	return len(p), nil
   195  }
   196  
   197  // Run runs suiteT and writes results to dataset datasetID in the database given by the -perf command line flag.
   198  func Run(datasetID string, t *testing.T, suiteT perfSuiteT) {
   199  	t.Skip()
   200  	assert := assert.New(t)
   201  
   202  	if !assert.NotEqual("", datasetID) {
   203  		return
   204  	}
   205  
   206  	// Piggy-back off the go test -v flag.
   207  	verboseFlag := flag.Lookup("test.v")
   208  	assert.NotNil(verboseFlag)
   209  	verbose := verboseFlag.Value.(flag.Getter).Get().(bool)
   210  
   211  	if *perfFlag == "" {
   212  		if verbose {
   213  			fmt.Printf("(perf) Skipping %s, -perf flag not set\n", datasetID)
   214  		}
   215  		return
   216  	}
   217  
   218  	suite := suiteT.Suite()
   219  	suite.T = t
   220  	if verbose {
   221  		suite.W = os.Stdout
   222  	} else {
   223  		suite.W = nopWriter{}
   224  	}
   225  
   226  	id, _ := uuid.NewUUID()
   227  	suite.AtticLabs = filepath.Join(os.TempDir(), "attic-labs", "noms", "suite", id.String())
   228  	suite.Testdata = *perfTestdataFlag
   229  	if suite.Testdata == "" {
   230  		suite.Testdata = filepath.Join(suite.AtticLabs, "testdata")
   231  	}
   232  
   233  	// Clean up temporary directories/files last.
   234  	defer func() {
   235  		for _, f := range suite.tempFiles {
   236  			f.Close()
   237  			file.Remove(f.Name())
   238  		}
   239  		for _, d := range suite.tempDirs {
   240  			file.RemoveAll(d)
   241  		}
   242  	}()
   243  
   244  	suite.datasetID = datasetID
   245  
   246  	// This is the database the perf test results are written to.
   247  	sp, err := spec.ForDatabase(*perfFlag)
   248  	if !assert.NoError(err) {
   249  		return
   250  	}
   251  	defer sp.Close()
   252  
   253  	// List of test runs, each a map of test name => timing info.
   254  	testReps := make([]testRep, *perfRepeatFlag)
   255  
   256  	// Note: the default value of perfRunFlag is "", which is actually a valid
   257  	// regular expression that matches everything.
   258  	perfRunRe, err := regexp.Compile("(?i)" + *perfRunFlag)
   259  	if !assert.NoError(err, `Invalid regular expression "%s"`, *perfRunFlag) {
   260  		return
   261  	}
   262  
   263  	defer func() {
   264  		db := sp.GetDatabase(context.Background())
   265  		vrw := sp.GetVRW(context.Background())
   266  
   267  		reps := make([]types.Value, *perfRepeatFlag)
   268  		for i, rep := range testReps {
   269  			timesSlice := types.ValueSlice{}
   270  			for name, info := range rep {
   271  				st, err := types.NewStruct(vrw.Format(), "", types.StructData{
   272  					"elapsed": types.Float(info.elapsed.Nanoseconds()),
   273  					"paused":  types.Float(info.paused.Nanoseconds()),
   274  					"total":   types.Float(info.total.Nanoseconds()),
   275  				})
   276  
   277  				require.NoError(t, err)
   278  				timesSlice = append(timesSlice, types.String(name), st)
   279  			}
   280  			reps[i], err = types.NewMap(context.Background(), vrw, timesSlice...)
   281  		}
   282  
   283  		l, err := types.NewList(context.Background(), vrw, reps...)
   284  		require.NoError(t, err)
   285  		record, err := types.NewStruct(vrw.Format(), "", map[string]types.Value{
   286  			"environment":      suite.getEnvironment(vrw),
   287  			"nomsRevision":     types.String(suite.getGitHead(path.Join(suite.AtticLabs, "noms"))),
   288  			"testdataRevision": types.String(suite.getGitHead(suite.Testdata)),
   289  			"reps":             l,
   290  		})
   291  		require.NoError(t, err)
   292  
   293  		ds, err := db.GetDataset(context.Background(), *perfPrefixFlag+datasetID)
   294  		require.NoError(t, err)
   295  		_, err = datas.CommitValue(context.Background(), db, ds, record)
   296  		require.NoError(t, err)
   297  	}()
   298  
   299  	if t, ok := suiteT.(testifySuite.SetupAllSuite); ok {
   300  		t.SetupSuite()
   301  	}
   302  
   303  	for repIdx := 0; repIdx < *perfRepeatFlag; repIdx++ {
   304  		testReps[repIdx] = testRep{}
   305  
   306  		storage := &chunks.MemoryStorage{}
   307  		memCS := storage.NewView()
   308  		suite.DatabaseSpec = "mem://"
   309  		suite.VS = types.NewValueStore(memCS)
   310  		ns := tree.NewNodeStore(memCS)
   311  		suite.Database = datas.NewTypesDatabase(suite.VS, ns)
   312  		defer suite.Database.Close()
   313  
   314  		if t, ok := suiteT.(SetupRepSuite); ok {
   315  			t.SetupRep()
   316  		}
   317  
   318  		for t, mIdx := reflect.TypeOf(suiteT), 0; mIdx < t.NumMethod(); mIdx++ {
   319  			m := t.Method(mIdx)
   320  
   321  			parts := testNamePattern.FindStringSubmatch(m.Name)
   322  			if parts == nil {
   323  				continue
   324  			}
   325  
   326  			recordName := parts[1]
   327  			if !perfRunRe.MatchString(recordName) && !perfRunRe.MatchString(m.Name) {
   328  				continue
   329  			}
   330  
   331  			if _, ok := testReps[repIdx][recordName]; ok {
   332  				assert.Fail(`Multiple tests are named "%s"`, recordName)
   333  				continue
   334  			}
   335  
   336  			if verbose {
   337  				fmt.Printf("(perf) RUN(%d/%d) %s (as \"%s\")\n", repIdx+1, *perfRepeatFlag, m.Name, recordName)
   338  			}
   339  
   340  			if t, ok := suiteT.(testifySuite.SetupTestSuite); ok {
   341  				t.SetupTest()
   342  			}
   343  
   344  			start := time.Now()
   345  			suite.paused = 0
   346  
   347  			err := callSafe(m.Name, m.Func, suiteT)
   348  
   349  			total := time.Since(start)
   350  			elapsed := total - suite.paused
   351  
   352  			if verbose && err == nil {
   353  				fmt.Printf("(perf) PASS:    %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total)
   354  			} else if err != nil {
   355  				fmt.Printf("(perf) FAIL:    %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total)
   356  				fmt.Println(err)
   357  			}
   358  
   359  			if osutil.IsWindows && elapsed == 0 {
   360  				elapsed = 1
   361  				total = 1
   362  			}
   363  			testReps[repIdx][recordName] = timeInfo{elapsed, suite.paused, total}
   364  
   365  			if t, ok := suiteT.(testifySuite.TearDownTestSuite); ok {
   366  				t.TearDownTest()
   367  			}
   368  		}
   369  
   370  		if t, ok := suiteT.(TearDownRepSuite); ok {
   371  			t.TearDownRep()
   372  		}
   373  	}
   374  
   375  	if t, ok := suiteT.(testifySuite.TearDownAllSuite); ok {
   376  		t.TearDownSuite()
   377  	}
   378  }
   379  
   380  func (suite *PerfSuite) Suite() *PerfSuite {
   381  	return suite
   382  }
   383  
   384  // NewAssert returns the assert.Assertions instance for this test.
   385  func (suite *PerfSuite) NewAssert() *assert.Assertions {
   386  	return assert.New(suite.T)
   387  }
   388  
   389  // TempFile creates a temporary file, which will be automatically cleaned up by
   390  // the perf test suite. Files will be prefixed with the test's dataset ID
   391  func (suite *PerfSuite) TempFile() *os.File {
   392  	f, err := os.CreateTemp("", suite.tempPrefix())
   393  	require.NoError(suite.T, err)
   394  	suite.tempFiles = append(suite.tempFiles, f)
   395  	return f
   396  }
   397  
   398  // TempDir creates a temporary directory, which will be automatically cleaned
   399  // up by the perf test suite. Directories will be prefixed with the test's
   400  // dataset ID.
   401  func (suite *PerfSuite) TempDir() string {
   402  	d, err := os.MkdirTemp("", suite.tempPrefix())
   403  	require.NoError(suite.T, err)
   404  	suite.tempDirs = append(suite.tempDirs, d)
   405  	return d
   406  }
   407  
   408  func (suite *PerfSuite) tempPrefix() string {
   409  	sep := fmt.Sprintf("%c", os.PathSeparator)
   410  	return strings.Replace(fmt.Sprintf("perf.%s.", suite.datasetID), sep, ".", -1)
   411  }
   412  
   413  // Pause pauses the test timer while fn is executing. Useful for omitting long setup code (e.g. copying files) from the test elapsed time.
   414  func (suite *PerfSuite) Pause(fn func()) {
   415  	start := time.Now()
   416  	fn()
   417  	suite.paused += time.Since(start)
   418  }
   419  
   420  // OpenGlob opens the concatenation of all files that match pattern, returned
   421  // as []io.Reader so it can be used immediately with io.MultiReader.
   422  //
   423  // Large CSV files in testdata are broken up into foo.a, foo.b, etc to get
   424  // around GitHub file size restrictions.
   425  func (suite *PerfSuite) OpenGlob(pattern ...string) []io.Reader {
   426  	glob, err := filepath.Glob(path.Join(pattern...))
   427  	require.NoError(suite.T, err)
   428  
   429  	files := make([]io.Reader, len(glob))
   430  	for i, m := range glob {
   431  		f, err := os.Open(m)
   432  		require.NoError(suite.T, err)
   433  		files[i] = f
   434  	}
   435  
   436  	return files
   437  }
   438  
   439  // CloseGlob closes all of the files, designed to be used with OpenGlob.
   440  func (suite *PerfSuite) CloseGlob(files []io.Reader) {
   441  	for _, f := range files {
   442  		require.NoError(suite.T, f.(*os.File).Close())
   443  	}
   444  }
   445  
   446  func callSafe(name string, fun reflect.Value, args ...interface{}) (err error) {
   447  	defer func() {
   448  		if r := recover(); r != nil {
   449  			err = r.(error)
   450  		}
   451  	}()
   452  
   453  	funArgs := make([]reflect.Value, len(args))
   454  	for i, arg := range args {
   455  		funArgs[i] = reflect.ValueOf(arg)
   456  	}
   457  
   458  	fun.Call(funArgs)
   459  	return
   460  }
   461  
   462  func (suite *PerfSuite) getEnvironment(vrw types.ValueReadWriter) types.Value {
   463  	env := environment{
   464  		DiskUsages: map[string]disk.UsageStat{},
   465  		Cpus:       map[int]cpu.InfoStat{},
   466  		Partitions: map[string]disk.PartitionStat{},
   467  	}
   468  
   469  	partitions, err := disk.Partitions(false)
   470  	require.NoError(suite.T, err)
   471  	for _, p := range partitions {
   472  		usage, err := disk.Usage(p.Mountpoint)
   473  		require.NoError(suite.T, err)
   474  		env.DiskUsages[p.Mountpoint] = *usage
   475  		env.Partitions[p.Device] = p
   476  	}
   477  
   478  	cpus, err := cpu.Info()
   479  	require.NoError(suite.T, err)
   480  	for i, c := range cpus {
   481  		env.Cpus[i] = c
   482  	}
   483  
   484  	mem, err := mem.VirtualMemory()
   485  	require.NoError(suite.T, err)
   486  	env.Mem = *mem
   487  
   488  	hostInfo, err := host.Info()
   489  	require.NoError(suite.T, err)
   490  	env.Host = *hostInfo
   491  
   492  	envStruct, err := marshal.Marshal(context.Background(), vrw, env)
   493  	require.NoError(suite.T, err)
   494  	return envStruct
   495  }
   496  
   497  func (suite *PerfSuite) getGitHead(dir string) string {
   498  	stdout := &bytes.Buffer{}
   499  	cmd := exec.Command("git", "rev-parse", "HEAD")
   500  	cmd.Stdout = stdout
   501  	cmd.Dir = dir
   502  	if err := cmd.Run(); err != nil {
   503  		return ""
   504  	}
   505  	return strings.TrimSpace(stdout.String())
   506  }