github.com/attic-labs/noms@v0.0.0-20210827224422-e5fa29d95e8b/go/perf/suite/suite.go (about)

     1  // Copyright 2016 Attic Labs, Inc. All rights reserved.
     2  // Licensed under the Apache License, version 2.0:
     3  // http://www.apache.org/licenses/LICENSE-2.0
     4  
     5  // Package suite implements a performance test suite for Noms, intended for
     6  // measuring and reporting long running tests.
     7  //
     8  // Usage is similar to testify's suite:
     9  //  1. Define a test suite struct which inherits from suite.PerfSuite.
    10  //  2. Define methods on that struct that start with the word "Test", optionally
    11  //     followed by digits, then followed a non-empty capitalized string.
    12  //  3. Call suite.Run with an instance of that struct.
    13  //  4. Run go test with the -perf <path to noms db> flag.
    14  //
    15  // Flags:
    16  //  -perf.mem      Backs the database by a memory store, instead of nbs.
    17  //  -perf.prefix   Gives the dataset IDs for test results a prefix.
    18  //  -perf.repeat   Sets how many times tests are repeated ("reps").
    19  //  -perf.run      Only run tests that match a regex (case insensitive).
    20  //  -perf.testdata Sets a custom path to the Noms testdata directory.
    21  //
    22  // PerfSuite also supports testify/suite style Setup/TearDown methods:
    23  //  Setup/TearDownSuite is called exactly once.
    24  //  Setup/TearDownRep   is called for each repetition of the test runs, i.e. -perf.repeat times.
    25  //  Setup/TearDownTest  is called for every test.
    26  //
    27  // Test results are written to Noms, along with a dump of the environment they were recorded in.
    28  //
    29  // Test names are derived from that "non-empty capitalized string": "Test" is omitted because it's
    30  // redundant, and leading digits are omitted to allow for manual test ordering. For example:
    31  //
    32  //  > cat ./samples/go/csv/csv-import/perf_test.go
    33  //  type perfSuite {
    34  //    suite.PerfSuite
    35  //  }
    36  //
    37  //  func (s *perfSuite) TestFoo() { ... }
    38  //  func (s *perfSuite) TestZoo() { ... }
    39  //  func (s *perfSuite) Test01Qux() { ... }
    40  //  func (s *perfSuite) Test02Bar() { ... }
    41  //
    42  //  func TestPerf(t *testing.T) {
    43  //    suite.Run("csv-import", t, &perfSuite{})
    44  //  }
    45  //
    46  //  > noms serve &
    47  //  > go test -v ./samples/go/csv/... -perf http://localhost:8000 -perf.repeat 3
    48  //  (perf) RUN(1/3) Test01Qux (recorded as "Qux")
    49  //  (perf) PASS:    Test01Qux (5s, paused 15s, total 20s)
    50  //  (perf) RUN(1/3) Test02Bar (recorded as "Bar")
    51  //  (perf) PASS:    Test02Bar (15s, paused 2s, total 17s)
    52  //  (perf) RUN(1/3) TestFoo (recorded as "Foo")
    53  //  (perf) PASS:    TestFoo (10s, paused 1s, total 11s)
    54  //  (perf) RUN(1/3) TestZoo (recorded as "Zoo")
    55  //  (perf) PASS:    TestZoo (1s, paused 42s, total 43s)
    56  //  ...
    57  //
    58  //  > noms show http://localhost:8000::csv-import
    59  //  {
    60  //    environment: ...
    61  //    tests: [{
    62  //      "Bar": {elapsed: 15s, paused: 2s,  total: 17s},
    63  //      "Foo": {elapsed: 10s, paused: 1s,  total: 11s},
    64  //      "Qux": {elapsed: 5s,  paused: 15s, total: 20s},
    65  //      "Zoo": {elapsed: 1s,  paused: 42s, total: 43s},
    66  //    }, ...]
    67  //    ...
    68  //  }
    69  package suite
    70  
    71  import (
    72  	"bytes"
    73  	"flag"
    74  	"fmt"
    75  	"io"
    76  	"io/ioutil"
    77  	"os"
    78  	"os/exec"
    79  	"path"
    80  	"path/filepath"
    81  	"reflect"
    82  	"regexp"
    83  	"strings"
    84  	"testing"
    85  	"time"
    86  
    87  	"github.com/attic-labs/noms/go/chunks"
    88  	"github.com/attic-labs/noms/go/d"
    89  	"github.com/attic-labs/noms/go/datas"
    90  	"github.com/attic-labs/noms/go/marshal"
    91  	"github.com/attic-labs/noms/go/nbs"
    92  	"github.com/attic-labs/noms/go/spec"
    93  	"github.com/attic-labs/noms/go/types"
    94  	"github.com/shirou/gopsutil/cpu"
    95  	"github.com/shirou/gopsutil/disk"
    96  	"github.com/shirou/gopsutil/host"
    97  	"github.com/shirou/gopsutil/mem"
    98  	"github.com/stretchr/testify/assert"
    99  	testifySuite "github.com/stretchr/testify/suite"
   100  )
   101  
   102  var (
   103  	perfFlag         = flag.String("perf", "", "The database to write perf tests to. If this isn't specified, perf tests are skipped. If you want a dry run, use \"mem\" as a database")
   104  	perfMemFlag      = flag.Bool("perf.mem", false, "Back the test database by a memory store, not nbs. This will affect test timing, but it's provided in case you're low on disk space")
   105  	perfPrefixFlag   = flag.String("perf.prefix", "", `Prefix for the dataset IDs where results are written. For example, a prefix of "foo/" will write test datasets like "foo/csv-import" instead of just "csv-import"`)
   106  	perfRepeatFlag   = flag.Int("perf.repeat", 1, "The number of times to repeat each perf test")
   107  	perfRunFlag      = flag.String("perf.run", "", "Only run perf tests that match a regular expression")
   108  	perfTestdataFlag = flag.String("perf.testdata", "", "Path to the noms testdata directory. By default this is ../testdata relative to the noms directory")
   109  	testNamePattern  = regexp.MustCompile("^Test[0-9]*([A-Z].*$)")
   110  )
   111  
   112  // PerfSuite is the core of the perf testing suite. See package documentation for details.
   113  type PerfSuite struct {
   114  	// T is the testing.T instance set when the suite is passed into Run.
   115  	T *testing.T
   116  
   117  	// W is the io.Writer to write test output, which only outputs if the verbose flag is set.
   118  	W io.Writer
   119  
   120  	// AtticLabs is the path to the attic-labs directory (e.g. /path/to/go/src/github.com/attic-labs).
   121  	AtticLabs string
   122  
   123  	// Testdata is the path to the testdata directory - typically /path/to/go/src/github.com/attic-labs, but it can be overridden with the -perf.testdata flag.
   124  	Testdata string
   125  
   126  	// Database is a Noms database that tests can use for reading and writing. State is persisted across a single Run of a suite.
   127  	Database datas.Database
   128  
   129  	// DatabaseSpec is the Noms spec of Database (typically a localhost URL).
   130  	DatabaseSpec string
   131  
   132  	tempFiles []*os.File
   133  	tempDirs  []string
   134  	paused    time.Duration
   135  	datasetID string
   136  }
   137  
   138  // SetupRepSuite has a SetupRep method, which runs every repetition of the test, i.e. -perf.repeat times in total.
   139  type SetupRepSuite interface {
   140  	SetupRep()
   141  }
   142  
   143  // TearDownRepSuite has a TearDownRep method, which runs every repetition of the test, i.e. -perf.repeat times in total.
   144  type TearDownRepSuite interface {
   145  	TearDownRep()
   146  }
   147  
   148  type perfSuiteT interface {
   149  	Suite() *PerfSuite
   150  }
   151  
   152  type environment struct {
   153  	DiskUsages map[string]disk.UsageStat
   154  	Cpus       map[int]cpu.InfoStat
   155  	Mem        mem.VirtualMemoryStat
   156  	Host       host.InfoStat
   157  	Partitions map[string]disk.PartitionStat
   158  }
   159  
   160  type timeInfo struct {
   161  	elapsed, paused, total time.Duration
   162  }
   163  
   164  type testRep map[string]timeInfo
   165  
   166  type nopWriter struct{}
   167  
   168  func (r nopWriter) Write(p []byte) (int, error) {
   169  	return len(p), nil
   170  }
   171  
   172  // Run runs suiteT and writes results to dataset datasetID in the database given by the -perf command line flag.
   173  func Run(datasetID string, t *testing.T, suiteT perfSuiteT) {
   174  	assert := assert.New(t)
   175  
   176  	if !assert.NotEqual("", datasetID) {
   177  		return
   178  	}
   179  
   180  	// Piggy-back off the go test -v flag.
   181  	verboseFlag := flag.Lookup("test.v")
   182  	assert.NotNil(verboseFlag)
   183  	verbose := verboseFlag.Value.(flag.Getter).Get().(bool)
   184  
   185  	if *perfFlag == "" {
   186  		if verbose {
   187  			fmt.Printf("(perf) Skipping %s, -perf flag not set\n", datasetID)
   188  		}
   189  		return
   190  	}
   191  
   192  	suite := suiteT.Suite()
   193  	suite.T = t
   194  	if verbose {
   195  		suite.W = os.Stdout
   196  	} else {
   197  		suite.W = nopWriter{}
   198  	}
   199  
   200  	gopath := os.Getenv("GOPATH")
   201  	if !assert.NotEmpty(gopath) {
   202  		return
   203  	}
   204  	suite.AtticLabs = path.Join(gopath, "src", "github.com", "attic-labs")
   205  	suite.Testdata = *perfTestdataFlag
   206  	if suite.Testdata == "" {
   207  		suite.Testdata = path.Join(suite.AtticLabs, "testdata")
   208  	}
   209  
   210  	// Clean up temporary directories/files last.
   211  	defer func() {
   212  		for _, f := range suite.tempFiles {
   213  			os.Remove(f.Name())
   214  		}
   215  		for _, d := range suite.tempDirs {
   216  			os.RemoveAll(d)
   217  		}
   218  	}()
   219  
   220  	suite.datasetID = datasetID
   221  
   222  	// This is the database the perf test results are written to.
   223  	sp, err := spec.ForDatabase(*perfFlag)
   224  	if !assert.NoError(err) {
   225  		return
   226  	}
   227  	defer sp.Close()
   228  
   229  	// List of test runs, each a map of test name => timing info.
   230  	testReps := make([]testRep, *perfRepeatFlag)
   231  
   232  	// Note: the default value of perfRunFlag is "", which is actually a valid
   233  	// regular expression that matches everything.
   234  	perfRunRe, err := regexp.Compile("(?i)" + *perfRunFlag)
   235  	if !assert.NoError(err, `Invalid regular expression "%s"`, *perfRunFlag) {
   236  		return
   237  	}
   238  
   239  	defer func() {
   240  		db := sp.GetDatabase()
   241  
   242  		reps := make([]types.Value, *perfRepeatFlag)
   243  		for i, rep := range testReps {
   244  			timesSlice := types.ValueSlice{}
   245  			for name, info := range rep {
   246  				timesSlice = append(timesSlice, types.String(name), types.NewStruct("", types.StructData{
   247  					"elapsed": types.Number(info.elapsed.Nanoseconds()),
   248  					"paused":  types.Number(info.paused.Nanoseconds()),
   249  					"total":   types.Number(info.total.Nanoseconds()),
   250  				}))
   251  			}
   252  			reps[i] = types.NewMap(db, timesSlice...)
   253  		}
   254  
   255  		record := types.NewStruct("", map[string]types.Value{
   256  			"environment":      suite.getEnvironment(db),
   257  			"nomsRevision":     types.String(suite.getGitHead(path.Join(suite.AtticLabs, "noms"))),
   258  			"testdataRevision": types.String(suite.getGitHead(suite.Testdata)),
   259  			"reps":             types.NewList(db, reps...),
   260  		})
   261  
   262  		ds := db.GetDataset(*perfPrefixFlag + datasetID)
   263  		_, err := db.CommitValue(ds, record)
   264  		assert.NoError(err)
   265  	}()
   266  
   267  	if t, ok := suiteT.(testifySuite.SetupAllSuite); ok {
   268  		t.SetupSuite()
   269  	}
   270  
   271  	for repIdx := 0; repIdx < *perfRepeatFlag; repIdx++ {
   272  		testReps[repIdx] = testRep{}
   273  
   274  		serverHost, stopServerFn := suite.StartRemoteDatabase()
   275  		suite.DatabaseSpec = serverHost
   276  		suite.Database = datas.NewDatabase(datas.NewHTTPChunkStore(serverHost, ""))
   277  		defer suite.Database.Close()
   278  
   279  		if t, ok := suiteT.(SetupRepSuite); ok {
   280  			t.SetupRep()
   281  		}
   282  
   283  		for t, mIdx := reflect.TypeOf(suiteT), 0; mIdx < t.NumMethod(); mIdx++ {
   284  			m := t.Method(mIdx)
   285  
   286  			parts := testNamePattern.FindStringSubmatch(m.Name)
   287  			if parts == nil {
   288  				continue
   289  			}
   290  
   291  			recordName := parts[1]
   292  			if !perfRunRe.MatchString(recordName) && !perfRunRe.MatchString(m.Name) {
   293  				continue
   294  			}
   295  
   296  			if _, ok := testReps[repIdx][recordName]; ok {
   297  				assert.Fail(`Multiple tests are named "%s"`, recordName)
   298  				continue
   299  			}
   300  
   301  			if verbose {
   302  				fmt.Printf("(perf) RUN(%d/%d) %s (as \"%s\")\n", repIdx+1, *perfRepeatFlag, m.Name, recordName)
   303  			}
   304  
   305  			if t, ok := suiteT.(testifySuite.SetupTestSuite); ok {
   306  				t.SetupTest()
   307  			}
   308  
   309  			start := time.Now()
   310  			suite.paused = 0
   311  
   312  			err := callSafe(m.Name, m.Func, suiteT)
   313  
   314  			total := time.Since(start)
   315  			elapsed := total - suite.paused
   316  
   317  			if verbose && err == nil {
   318  				fmt.Printf("(perf) PASS:    %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total)
   319  			} else if err != nil {
   320  				fmt.Printf("(perf) FAIL:    %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total)
   321  				fmt.Println(err)
   322  			}
   323  
   324  			testReps[repIdx][recordName] = timeInfo{elapsed, suite.paused, total}
   325  
   326  			if t, ok := suiteT.(testifySuite.TearDownTestSuite); ok {
   327  				t.TearDownTest()
   328  			}
   329  		}
   330  
   331  		if t, ok := suiteT.(TearDownRepSuite); ok {
   332  			t.TearDownRep()
   333  		}
   334  
   335  		stopServerFn()
   336  	}
   337  
   338  	if t, ok := suiteT.(testifySuite.TearDownAllSuite); ok {
   339  		t.TearDownSuite()
   340  	}
   341  }
   342  
   343  func (suite *PerfSuite) Suite() *PerfSuite {
   344  	return suite
   345  }
   346  
   347  // NewAssert returns the assert.Assertions instance for this test.
   348  func (suite *PerfSuite) NewAssert() *assert.Assertions {
   349  	return assert.New(suite.T)
   350  }
   351  
   352  // TempFile creates a temporary file, which will be automatically cleaned up by
   353  // the perf test suite. Files will be prefixed with the test's dataset ID
   354  func (suite *PerfSuite) TempFile() *os.File {
   355  	f, err := ioutil.TempFile("", suite.tempPrefix())
   356  	assert.NoError(suite.T, err)
   357  	suite.tempFiles = append(suite.tempFiles, f)
   358  	return f
   359  }
   360  
   361  // TempDir creates a temporary directory, which will be automatically cleaned
   362  // up by the perf test suite. Directories will be prefixed with the test's
   363  // dataset ID.
   364  func (suite *PerfSuite) TempDir() string {
   365  	d, err := ioutil.TempDir("", suite.tempPrefix())
   366  	assert.NoError(suite.T, err)
   367  	suite.tempDirs = append(suite.tempDirs, d)
   368  	return d
   369  }
   370  
   371  func (suite *PerfSuite) tempPrefix() string {
   372  	sep := fmt.Sprintf("%c", os.PathSeparator)
   373  	return strings.Replace(fmt.Sprintf("perf.%s.", suite.datasetID), sep, ".", -1)
   374  }
   375  
   376  // Pause pauses the test timer while fn is executing. Useful for omitting long setup code (e.g. copying files) from the test elapsed time.
   377  func (suite *PerfSuite) Pause(fn func()) {
   378  	start := time.Now()
   379  	fn()
   380  	suite.paused += time.Since(start)
   381  }
   382  
   383  // OpenGlob opens the concatenation of all files that match pattern, returned
   384  // as []io.Reader so it can be used immediately with io.MultiReader.
   385  //
   386  // Large CSV files in testdata are broken up into foo.a, foo.b, etc to get
   387  // around GitHub file size restrictions.
   388  func (suite *PerfSuite) OpenGlob(pattern ...string) []io.Reader {
   389  	assert := suite.NewAssert()
   390  
   391  	glob, err := filepath.Glob(path.Join(pattern...))
   392  	assert.NoError(err)
   393  
   394  	files := make([]io.Reader, len(glob))
   395  	for i, m := range glob {
   396  		f, err := os.Open(m)
   397  		assert.NoError(err)
   398  		files[i] = f
   399  	}
   400  
   401  	return files
   402  }
   403  
   404  // CloseGlob closes all of the files, designed to be used with OpenGlob.
   405  func (suite *PerfSuite) CloseGlob(files []io.Reader) {
   406  	assert := suite.NewAssert()
   407  	for _, f := range files {
   408  		assert.NoError(f.(*os.File).Close())
   409  	}
   410  }
   411  
   412  func callSafe(name string, fun reflect.Value, args ...interface{}) error {
   413  	funArgs := make([]reflect.Value, len(args))
   414  	for i, arg := range args {
   415  		funArgs[i] = reflect.ValueOf(arg)
   416  	}
   417  	return d.Try(func() {
   418  		fun.Call(funArgs)
   419  	})
   420  }
   421  
   422  func (suite *PerfSuite) getEnvironment(vrw types.ValueReadWriter) types.Value {
   423  	assert := suite.NewAssert()
   424  
   425  	env := environment{
   426  		DiskUsages: map[string]disk.UsageStat{},
   427  		Cpus:       map[int]cpu.InfoStat{},
   428  		Partitions: map[string]disk.PartitionStat{},
   429  	}
   430  
   431  	partitions, err := disk.Partitions(false)
   432  	assert.NoError(err)
   433  	for _, p := range partitions {
   434  		usage, err := disk.Usage(p.Mountpoint)
   435  		assert.NoError(err)
   436  		env.DiskUsages[p.Mountpoint] = *usage
   437  		env.Partitions[p.Device] = p
   438  	}
   439  
   440  	cpus, err := cpu.Info()
   441  	assert.NoError(err)
   442  	for i, c := range cpus {
   443  		env.Cpus[i] = c
   444  	}
   445  
   446  	mem, err := mem.VirtualMemory()
   447  	assert.NoError(err)
   448  	env.Mem = *mem
   449  
   450  	hostInfo, err := host.Info()
   451  	assert.NoError(err)
   452  	env.Host = *hostInfo
   453  
   454  	envStruct, err := marshal.Marshal(vrw, env)
   455  	assert.NoError(err)
   456  	return envStruct
   457  }
   458  
   459  func (suite *PerfSuite) getGitHead(dir string) string {
   460  	stdout := &bytes.Buffer{}
   461  	cmd := exec.Command("git", "rev-parse", "HEAD")
   462  	cmd.Stdout = stdout
   463  	cmd.Dir = dir
   464  	if err := cmd.Run(); err != nil {
   465  		return ""
   466  	}
   467  	return strings.TrimSpace(stdout.String())
   468  }
   469  
   470  // StartRemoteDatabase creates a new remote database on an arbitrary free port,
   471  // running on a separate goroutine. Returns the hostname that that database was
   472  // started on, and a callback to run to shut down the server.
   473  //
   474  // If the -perf.mem flag is specified, the remote database is hosted in memory,
   475  // not on disk (in a temporary nbs directory).
   476  //
   477  // - Why not use a local database + memory store?
   478  // Firstly, because the spec would be "mem", and the spec library doesn't
   479  // know how to reuse stores.
   480  // Secondly, because it's an unrealistic performance measurement.
   481  //
   482  // - Why use a remote (HTTP) database?
   483  // It's more realistic to exercise the HTTP stack, even if it's just talking
   484  // over localhost.
   485  //
   486  // - Why provide an option for nbs vs memory underlying store?
   487  // Again, nbs is more realistic than memory, and in common cases disk
   488  // space > memory space.
   489  // However, on this developer's laptop, there is
   490  // actually very little disk space, and a lot of memory; plus making the
   491  // test run a little bit faster locally is nice.
   492  func (suite *PerfSuite) StartRemoteDatabase() (host string, stopFn func()) {
   493  	var chunkStore chunks.ChunkStore
   494  	if *perfMemFlag {
   495  		st := &chunks.MemoryStorage{}
   496  		chunkStore = st.NewView()
   497  	} else {
   498  		dbDir := suite.TempDir()
   499  		chunkStore = nbs.NewLocalStore(dbDir, 128*(1<<20))
   500  	}
   501  
   502  	server := datas.NewRemoteDatabaseServer(chunkStore, "0.0.0.0", 0)
   503  	portChan := make(chan int)
   504  	server.Ready = func() { portChan <- server.Port() }
   505  	go server.Run()
   506  
   507  	port := <-portChan
   508  	host = fmt.Sprintf("http://localhost:%d", port)
   509  	stopFn = func() { server.Stop() }
   510  	return
   511  }