github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/perf/suite/suite.go

github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/store/perf/suite/suite.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  //
    15  // This file incorporates work covered by the following copyright and
    16  // permission notice:
    17  //
    18  // Copyright 2016 Attic Labs, Inc. All rights reserved.
    19  // Licensed under the Apache License, version 2.0:
    20  // http://www.apache.org/licenses/LICENSE-2.0
    21  
    22  // Package suite implements a performance test suite for Noms, intended for
    23  // measuring and reporting long running tests.
    24  //
    25  // Usage is similar to testify's suite:
    26  //  1. Define a test suite struct which inherits from suite.PerfSuite.
    27  //  2. Define methods on that struct that start with the word "Test", optionally
    28  //     followed by digits, then followed a non-empty capitalized string.
    29  //  3. Call suite.Run with an instance of that struct.
    30  //  4. Run go test with the -perf <path to noms db> flag.
    31  //
    32  // Flags:
    33  //  -perf.mem      Backs the database by a memory store, instead of nbs.
    34  //  -perf.prefix   Gives the dataset IDs for test results a prefix.
    35  //  -perf.repeat   Sets how many times tests are repeated ("reps").
    36  //  -perf.run      Only run tests that match a regex (case insensitive).
    37  //  -perf.testdata Sets a custom path to the Noms testdata directory.
    38  //
    39  // PerfSuite also supports testify/suite style Setup/TearDown methods:
    40  //  Setup/TearDownSuite is called exactly once.
    41  //  Setup/TearDownRep   is called for each repetition of the test runs, i.e. -perf.repeat times.
    42  //  Setup/TearDownTest  is called for every test.
    43  //
    44  // Test results are written to Noms, along with a dump of the environment they were recorded in.
    45  //
    46  // Test names are derived from that "non-empty capitalized string": "Test" is omitted because it's
    47  // redundant, and leading digits are omitted to allow for manual test ordering. For example:
    48  //
    49  //  > cat ./samples/go/csv/csv-import/perf_test.go
    50  //  type perfSuite {
    51  //    suite.PerfSuite
    52  //  }
    53  //
    54  //  func (s *perfSuite) TestFoo() { ... }
    55  //  func (s *perfSuite) TestZoo() { ... }
    56  //  func (s *perfSuite) Test01Qux() { ... }
    57  //  func (s *perfSuite) Test02Bar() { ... }
    58  //
    59  //  func TestPerf(t *testing.T) {
    60  //    suite.Run("csv-import", t, &perfSuite{})
    61  //  }
    62  //
    63  //  > noms serve &
    64  //  > go test -v ./samples/go/csv/... -perf http://localhost:8000 -perf.repeat 3
    65  //  (perf) RUN(1/3) Test01Qux (recorded as "Qux")
    66  //  (perf) PASS:    Test01Qux (5s, paused 15s, total 20s)
    67  //  (perf) RUN(1/3) Test02Bar (recorded as "Bar")
    68  //  (perf) PASS:    Test02Bar (15s, paused 2s, total 17s)
    69  //  (perf) RUN(1/3) TestFoo (recorded as "Foo")
    70  //  (perf) PASS:    TestFoo (10s, paused 1s, total 11s)
    71  //  (perf) RUN(1/3) TestZoo (recorded as "Zoo")
    72  //  (perf) PASS:    TestZoo (1s, paused 42s, total 43s)
    73  //  ...
    74  //
    75  //  > noms show http://localhost:8000::csv-import
    76  //  {
    77  //    environment: ...
    78  //    tests: [{
    79  //      "Bar": {elapsed: 15s, paused: 2s,  total: 17s},
    80  //      "Foo": {elapsed: 10s, paused: 1s,  total: 11s},
    81  //      "Qux": {elapsed: 5s,  paused: 15s, total: 20s},
    82  //      "Zoo": {elapsed: 1s,  paused: 42s, total: 43s},
    83  //    }, ...]
    84  //    ...
    85  //  }
    86  package suite
    87  
    88  import (
    89  	"bytes"
    90  	"context"
    91  	"flag"
    92  	"fmt"
    93  	"io"
    94  	"io/ioutil"
    95  	"os"
    96  	"os/exec"
    97  	"path"
    98  	"path/filepath"
    99  	"reflect"
   100  	"regexp"
   101  	"strings"
   102  	"testing"
   103  	"time"
   104  
   105  	"github.com/google/uuid"
   106  	"github.com/shirou/gopsutil/cpu"
   107  	"github.com/shirou/gopsutil/disk"
   108  	"github.com/shirou/gopsutil/host"
   109  	"github.com/shirou/gopsutil/mem"
   110  	"github.com/stretchr/testify/assert"
   111  	"github.com/stretchr/testify/require"
   112  	testifySuite "github.com/stretchr/testify/suite"
   113  
   114  	"github.com/dolthub/dolt/go/libraries/utils/osutil"
   115  	"github.com/dolthub/dolt/go/store/chunks"
   116  	"github.com/dolthub/dolt/go/store/datas"
   117  	"github.com/dolthub/dolt/go/store/marshal"
   118  	"github.com/dolthub/dolt/go/store/spec"
   119  	"github.com/dolthub/dolt/go/store/types"
   120  )
   121  
   122  var (
   123  	perfFlag         = flag.String("perf", "", "The database to write perf tests to. If this isn't specified, perf tests are skipped. If you want a dry run, use \"mem\" as a database")
   124  	perfMemFlag      = flag.Bool("perf.mem", false, "Back the test database by a memory store, not nbs. This will affect test timing, but it's provided in case you're low on disk space")
   125  	perfPrefixFlag   = flag.String("perf.prefix", "", `Prefix for the dataset IDs where results are written. For example, a prefix of "foo/" will write test datasets like "foo/csv-import" instead of just "csv-import"`)
   126  	perfRepeatFlag   = flag.Int("perf.repeat", 1, "The number of times to repeat each perf test")
   127  	perfRunFlag      = flag.String("perf.run", "", "Only run perf tests that match a regular expression")
   128  	perfTestdataFlag = flag.String("perf.testdata", "", "Path to the noms testdata directory. By default this is ../testdata relative to the noms directory")
   129  	testNamePattern  = regexp.MustCompile("^Test[0-9]*([A-Z].*$)")
   130  )
   131  
   132  // PerfSuite is the core of the perf testing suite. See package documentation for details.
   133  type PerfSuite struct {
   134  	// T is the testing.T instance set when the suite is passed into Run.
   135  	T *testing.T
   136  
   137  	// W is the io.Writer to write test output, which only outputs if the verbose flag is set.
   138  	W io.Writer
   139  
   140  	// AtticLabs is the path to the attic-labs directory (e.g. /path/to/go/src/github.com/attic-labs).
   141  	AtticLabs string
   142  
   143  	// Testdata is the path to the testdata directory - typically /path/to/go/src/github.com/attic-labs, but it can be overridden with the -perf.testdata flag.
   144  	Testdata string
   145  
   146  	// Database is a Noms database that tests can use for reading and writing. State is persisted across a single Run of a suite.
   147  	Database datas.Database
   148  
   149  	// DatabaseSpec is the Noms spec of Database (typically a localhost URL).
   150  	DatabaseSpec string
   151  
   152  	tempFiles []*os.File
   153  	tempDirs  []string
   154  	paused    time.Duration
   155  	datasetID string
   156  }
   157  
   158  // SetupRepSuite has a SetupRep method, which runs every repetition of the test, i.e. -perf.repeat times in total.
   159  type SetupRepSuite interface {
   160  	SetupRep()
   161  }
   162  
   163  // TearDownRepSuite has a TearDownRep method, which runs every repetition of the test, i.e. -perf.repeat times in total.
   164  type TearDownRepSuite interface {
   165  	TearDownRep()
   166  }
   167  
   168  type perfSuiteT interface {
   169  	Suite() *PerfSuite
   170  }
   171  
   172  type environment struct {
   173  	DiskUsages map[string]disk.UsageStat
   174  	Cpus       map[int]cpu.InfoStat
   175  	Mem        mem.VirtualMemoryStat
   176  	Host       host.InfoStat
   177  	Partitions map[string]disk.PartitionStat
   178  }
   179  
   180  type timeInfo struct {
   181  	elapsed, paused, total time.Duration
   182  }
   183  
   184  type testRep map[string]timeInfo
   185  
   186  type nopWriter struct{}
   187  
   188  func (r nopWriter) Write(p []byte) (int, error) {
   189  	return len(p), nil
   190  }
   191  
   192  // Run runs suiteT and writes results to dataset datasetID in the database given by the -perf command line flag.
   193  func Run(datasetID string, t *testing.T, suiteT perfSuiteT) {
   194  	t.Skip()
   195  	assert := assert.New(t)
   196  
   197  	if !assert.NotEqual("", datasetID) {
   198  		return
   199  	}
   200  
   201  	// Piggy-back off the go test -v flag.
   202  	verboseFlag := flag.Lookup("test.v")
   203  	assert.NotNil(verboseFlag)
   204  	verbose := verboseFlag.Value.(flag.Getter).Get().(bool)
   205  
   206  	if *perfFlag == "" {
   207  		if verbose {
   208  			fmt.Printf("(perf) Skipping %s, -perf flag not set\n", datasetID)
   209  		}
   210  		return
   211  	}
   212  
   213  	suite := suiteT.Suite()
   214  	suite.T = t
   215  	if verbose {
   216  		suite.W = os.Stdout
   217  	} else {
   218  		suite.W = nopWriter{}
   219  	}
   220  
   221  	id, _ := uuid.NewUUID()
   222  	suite.AtticLabs = filepath.Join(os.TempDir(), "attic-labs", "noms", "suite", id.String())
   223  	suite.Testdata = *perfTestdataFlag
   224  	if suite.Testdata == "" {
   225  		suite.Testdata = filepath.Join(suite.AtticLabs, "testdata")
   226  	}
   227  
   228  	// Clean up temporary directories/files last.
   229  	defer func() {
   230  		for _, f := range suite.tempFiles {
   231  			f.Close()
   232  			os.Remove(f.Name())
   233  		}
   234  		for _, d := range suite.tempDirs {
   235  			os.RemoveAll(d)
   236  		}
   237  	}()
   238  
   239  	suite.datasetID = datasetID
   240  
   241  	// This is the database the perf test results are written to.
   242  	sp, err := spec.ForDatabase(*perfFlag)
   243  	if !assert.NoError(err) {
   244  		return
   245  	}
   246  	defer sp.Close()
   247  
   248  	// List of test runs, each a map of test name => timing info.
   249  	testReps := make([]testRep, *perfRepeatFlag)
   250  
   251  	// Note: the default value of perfRunFlag is "", which is actually a valid
   252  	// regular expression that matches everything.
   253  	perfRunRe, err := regexp.Compile("(?i)" + *perfRunFlag)
   254  	if !assert.NoError(err, `Invalid regular expression "%s"`, *perfRunFlag) {
   255  		return
   256  	}
   257  
   258  	defer func() {
   259  		db := sp.GetDatabase(context.Background())
   260  
   261  		reps := make([]types.Value, *perfRepeatFlag)
   262  		for i, rep := range testReps {
   263  			timesSlice := types.ValueSlice{}
   264  			for name, info := range rep {
   265  				st, err := types.NewStruct(db.Format(), "", types.StructData{
   266  					"elapsed": types.Float(info.elapsed.Nanoseconds()),
   267  					"paused":  types.Float(info.paused.Nanoseconds()),
   268  					"total":   types.Float(info.total.Nanoseconds()),
   269  				})
   270  
   271  				require.NoError(t, err)
   272  				timesSlice = append(timesSlice, types.String(name), st)
   273  			}
   274  			reps[i], err = types.NewMap(context.Background(), db, timesSlice...)
   275  		}
   276  
   277  		l, err := types.NewList(context.Background(), db, reps...)
   278  		require.NoError(t, err)
   279  		record, err := types.NewStruct(db.Format(), "", map[string]types.Value{
   280  			"environment":      suite.getEnvironment(db),
   281  			"nomsRevision":     types.String(suite.getGitHead(path.Join(suite.AtticLabs, "noms"))),
   282  			"testdataRevision": types.String(suite.getGitHead(suite.Testdata)),
   283  			"reps":             l,
   284  		})
   285  		require.NoError(t, err)
   286  
   287  		ds, err := db.GetDataset(context.Background(), *perfPrefixFlag+datasetID)
   288  		require.NoError(t, err)
   289  		_, err = db.CommitValue(context.Background(), ds, record)
   290  		require.NoError(t, err)
   291  	}()
   292  
   293  	if t, ok := suiteT.(testifySuite.SetupAllSuite); ok {
   294  		t.SetupSuite()
   295  	}
   296  
   297  	for repIdx := 0; repIdx < *perfRepeatFlag; repIdx++ {
   298  		testReps[repIdx] = testRep{}
   299  
   300  		storage := &chunks.MemoryStorage{}
   301  		memCS := storage.NewView()
   302  		suite.DatabaseSpec = "mem://"
   303  		suite.Database = datas.NewDatabase(memCS)
   304  		defer suite.Database.Close()
   305  
   306  		if t, ok := suiteT.(SetupRepSuite); ok {
   307  			t.SetupRep()
   308  		}
   309  
   310  		for t, mIdx := reflect.TypeOf(suiteT), 0; mIdx < t.NumMethod(); mIdx++ {
   311  			m := t.Method(mIdx)
   312  
   313  			parts := testNamePattern.FindStringSubmatch(m.Name)
   314  			if parts == nil {
   315  				continue
   316  			}
   317  
   318  			recordName := parts[1]
   319  			if !perfRunRe.MatchString(recordName) && !perfRunRe.MatchString(m.Name) {
   320  				continue
   321  			}
   322  
   323  			if _, ok := testReps[repIdx][recordName]; ok {
   324  				assert.Fail(`Multiple tests are named "%s"`, recordName)
   325  				continue
   326  			}
   327  
   328  			if verbose {
   329  				fmt.Printf("(perf) RUN(%d/%d) %s (as \"%s\")\n", repIdx+1, *perfRepeatFlag, m.Name, recordName)
   330  			}
   331  
   332  			if t, ok := suiteT.(testifySuite.SetupTestSuite); ok {
   333  				t.SetupTest()
   334  			}
   335  
   336  			start := time.Now()
   337  			suite.paused = 0
   338  
   339  			err := callSafe(m.Name, m.Func, suiteT)
   340  
   341  			total := time.Since(start)
   342  			elapsed := total - suite.paused
   343  
   344  			if verbose && err == nil {
   345  				fmt.Printf("(perf) PASS:    %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total)
   346  			} else if err != nil {
   347  				fmt.Printf("(perf) FAIL:    %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total)
   348  				fmt.Println(err)
   349  			}
   350  
   351  			if osutil.IsWindows && elapsed == 0 {
   352  				elapsed = 1
   353  				total = 1
   354  			}
   355  			testReps[repIdx][recordName] = timeInfo{elapsed, suite.paused, total}
   356  
   357  			if t, ok := suiteT.(testifySuite.TearDownTestSuite); ok {
   358  				t.TearDownTest()
   359  			}
   360  		}
   361  
   362  		if t, ok := suiteT.(TearDownRepSuite); ok {
   363  			t.TearDownRep()
   364  		}
   365  	}
   366  
   367  	if t, ok := suiteT.(testifySuite.TearDownAllSuite); ok {
   368  		t.TearDownSuite()
   369  	}
   370  }
   371  
   372  func (suite *PerfSuite) Suite() *PerfSuite {
   373  	return suite
   374  }
   375  
   376  // NewAssert returns the assert.Assertions instance for this test.
   377  func (suite *PerfSuite) NewAssert() *assert.Assertions {
   378  	return assert.New(suite.T)
   379  }
   380  
   381  // TempFile creates a temporary file, which will be automatically cleaned up by
   382  // the perf test suite. Files will be prefixed with the test's dataset ID
   383  func (suite *PerfSuite) TempFile() *os.File {
   384  	f, err := ioutil.TempFile("", suite.tempPrefix())
   385  	require.NoError(suite.T, err)
   386  	suite.tempFiles = append(suite.tempFiles, f)
   387  	return f
   388  }
   389  
   390  // TempDir creates a temporary directory, which will be automatically cleaned
   391  // up by the perf test suite. Directories will be prefixed with the test's
   392  // dataset ID.
   393  func (suite *PerfSuite) TempDir() string {
   394  	d, err := ioutil.TempDir("", suite.tempPrefix())
   395  	require.NoError(suite.T, err)
   396  	suite.tempDirs = append(suite.tempDirs, d)
   397  	return d
   398  }
   399  
   400  func (suite *PerfSuite) tempPrefix() string {
   401  	sep := fmt.Sprintf("%c", os.PathSeparator)
   402  	return strings.Replace(fmt.Sprintf("perf.%s.", suite.datasetID), sep, ".", -1)
   403  }
   404  
   405  // Pause pauses the test timer while fn is executing. Useful for omitting long setup code (e.g. copying files) from the test elapsed time.
   406  func (suite *PerfSuite) Pause(fn func()) {
   407  	start := time.Now()
   408  	fn()
   409  	suite.paused += time.Since(start)
   410  }
   411  
   412  // OpenGlob opens the concatenation of all files that match pattern, returned
   413  // as []io.Reader so it can be used immediately with io.MultiReader.
   414  //
   415  // Large CSV files in testdata are broken up into foo.a, foo.b, etc to get
   416  // around GitHub file size restrictions.
   417  func (suite *PerfSuite) OpenGlob(pattern ...string) []io.Reader {
   418  	glob, err := filepath.Glob(path.Join(pattern...))
   419  	require.NoError(suite.T, err)
   420  
   421  	files := make([]io.Reader, len(glob))
   422  	for i, m := range glob {
   423  		f, err := os.Open(m)
   424  		require.NoError(suite.T, err)
   425  		files[i] = f
   426  	}
   427  
   428  	return files
   429  }
   430  
   431  // CloseGlob closes all of the files, designed to be used with OpenGlob.
   432  func (suite *PerfSuite) CloseGlob(files []io.Reader) {
   433  	for _, f := range files {
   434  		require.NoError(suite.T, f.(*os.File).Close())
   435  	}
   436  }
   437  
   438  func callSafe(name string, fun reflect.Value, args ...interface{}) (err error) {
   439  	defer func() {
   440  		if r := recover(); r != nil {
   441  			err = r.(error)
   442  		}
   443  	}()
   444  
   445  	funArgs := make([]reflect.Value, len(args))
   446  	for i, arg := range args {
   447  		funArgs[i] = reflect.ValueOf(arg)
   448  	}
   449  
   450  	fun.Call(funArgs)
   451  	return
   452  }
   453  
   454  func (suite *PerfSuite) getEnvironment(vrw types.ValueReadWriter) types.Value {
   455  	env := environment{
   456  		DiskUsages: map[string]disk.UsageStat{},
   457  		Cpus:       map[int]cpu.InfoStat{},
   458  		Partitions: map[string]disk.PartitionStat{},
   459  	}
   460  
   461  	partitions, err := disk.Partitions(false)
   462  	require.NoError(suite.T, err)
   463  	for _, p := range partitions {
   464  		usage, err := disk.Usage(p.Mountpoint)
   465  		require.NoError(suite.T, err)
   466  		env.DiskUsages[p.Mountpoint] = *usage
   467  		env.Partitions[p.Device] = p
   468  	}
   469  
   470  	cpus, err := cpu.Info()
   471  	require.NoError(suite.T, err)
   472  	for i, c := range cpus {
   473  		env.Cpus[i] = c
   474  	}
   475  
   476  	mem, err := mem.VirtualMemory()
   477  	require.NoError(suite.T, err)
   478  	env.Mem = *mem
   479  
   480  	hostInfo, err := host.Info()
   481  	require.NoError(suite.T, err)
   482  	env.Host = *hostInfo
   483  
   484  	envStruct, err := marshal.Marshal(context.Background(), vrw, env)
   485  	require.NoError(suite.T, err)
   486  	return envStruct
   487  }
   488  
   489  func (suite *PerfSuite) getGitHead(dir string) string {
   490  	stdout := &bytes.Buffer{}
   491  	cmd := exec.Command("git", "rev-parse", "HEAD")
   492  	cmd.Stdout = stdout
   493  	cmd.Dir = dir
   494  	if err := cmd.Run(); err != nil {
   495  		return ""
   496  	}
   497  	return strings.TrimSpace(stdout.String())
   498  }