github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/perf/suite/suite.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 // 15 // This file incorporates work covered by the following copyright and 16 // permission notice: 17 // 18 // Copyright 2016 Attic Labs, Inc. All rights reserved. 19 // Licensed under the Apache License, version 2.0: 20 // http://www.apache.org/licenses/LICENSE-2.0 21 22 // Package suite implements a performance test suite for Noms, intended for 23 // measuring and reporting long running tests. 24 // 25 // Usage is similar to testify's suite: 26 // 1. Define a test suite struct which inherits from suite.PerfSuite. 27 // 2. Define methods on that struct that start with the word "Test", optionally 28 // followed by digits, then followed a non-empty capitalized string. 29 // 3. Call suite.Run with an instance of that struct. 30 // 4. Run go test with the -perf <path to noms db> flag. 31 // 32 // Flags: 33 // 34 // -perf.mem Backs the database by a memory store, instead of nbs. 35 // -perf.prefix Gives the dataset IDs for test results a prefix. 36 // -perf.repeat Sets how many times tests are repeated ("reps"). 37 // -perf.run Only run tests that match a regex (case insensitive). 38 // -perf.testdata Sets a custom path to the Noms testdata directory. 39 // 40 // PerfSuite also supports testify/suite style Setup/TearDown methods: 41 // 42 // Setup/TearDownSuite is called exactly once. 43 // Setup/TearDownRep is called for each repetition of the test runs, i.e. -perf.repeat times. 44 // Setup/TearDownTest is called for every test. 45 // 46 // Test results are written to Noms, along with a dump of the environment they were recorded in. 47 // 48 // Test names are derived from that "non-empty capitalized string": "Test" is omitted because it's 49 // redundant, and leading digits are omitted to allow for manual test ordering. For example: 50 // 51 // > cat ./samples/go/csv/csv-import/perf_test.go 52 // type perfSuite { 53 // suite.PerfSuite 54 // } 55 // 56 // func (s *perfSuite) TestFoo() { ... } 57 // func (s *perfSuite) TestZoo() { ... } 58 // func (s *perfSuite) Test01Qux() { ... } 59 // func (s *perfSuite) Test02Bar() { ... } 60 // 61 // func TestPerf(t *testing.T) { 62 // suite.Run("csv-import", t, &perfSuite{}) 63 // } 64 // 65 // > noms serve & 66 // > go test -v ./samples/go/csv/... -perf http://localhost:8000 -perf.repeat 3 67 // (perf) RUN(1/3) Test01Qux (recorded as "Qux") 68 // (perf) PASS: Test01Qux (5s, paused 15s, total 20s) 69 // (perf) RUN(1/3) Test02Bar (recorded as "Bar") 70 // (perf) PASS: Test02Bar (15s, paused 2s, total 17s) 71 // (perf) RUN(1/3) TestFoo (recorded as "Foo") 72 // (perf) PASS: TestFoo (10s, paused 1s, total 11s) 73 // (perf) RUN(1/3) TestZoo (recorded as "Zoo") 74 // (perf) PASS: TestZoo (1s, paused 42s, total 43s) 75 // ... 76 // 77 // > noms show http://localhost:8000::csv-import 78 // { 79 // environment: ... 80 // tests: [{ 81 // "Bar": {elapsed: 15s, paused: 2s, total: 17s}, 82 // "Foo": {elapsed: 10s, paused: 1s, total: 11s}, 83 // "Qux": {elapsed: 5s, paused: 15s, total: 20s}, 84 // "Zoo": {elapsed: 1s, paused: 42s, total: 43s}, 85 // }, ...] 86 // ... 87 // } 88 package suite 89 90 import ( 91 "bytes" 92 "context" 93 "flag" 94 "fmt" 95 "io" 96 "os" 97 "os/exec" 98 "path" 99 "path/filepath" 100 "reflect" 101 "regexp" 102 "strings" 103 "testing" 104 "time" 105 106 "github.com/google/uuid" 107 "github.com/shirou/gopsutil/v3/cpu" 108 "github.com/shirou/gopsutil/v3/disk" 109 "github.com/shirou/gopsutil/v3/host" 110 "github.com/shirou/gopsutil/v3/mem" 111 "github.com/stretchr/testify/assert" 112 "github.com/stretchr/testify/require" 113 testifySuite "github.com/stretchr/testify/suite" 114 115 "github.com/dolthub/dolt/go/libraries/utils/file" 116 "github.com/dolthub/dolt/go/libraries/utils/osutil" 117 "github.com/dolthub/dolt/go/store/chunks" 118 "github.com/dolthub/dolt/go/store/datas" 119 "github.com/dolthub/dolt/go/store/marshal" 120 "github.com/dolthub/dolt/go/store/prolly/tree" 121 "github.com/dolthub/dolt/go/store/spec" 122 "github.com/dolthub/dolt/go/store/types" 123 ) 124 125 var ( 126 perfFlag = flag.String("perf", "", "The database to write perf tests to. If this isn't specified, perf tests are skipped. If you want a dry run, use \"mem\" as a database") 127 perfMemFlag = flag.Bool("perf.mem", false, "Back the test database by a memory store, not nbs. This will affect test timing, but it's provided in case you're low on disk space") 128 perfPrefixFlag = flag.String("perf.prefix", "", `Prefix for the dataset IDs where results are written. For example, a prefix of "foo/" will write test datasets like "foo/csv-import" instead of just "csv-import"`) 129 perfRepeatFlag = flag.Int("perf.repeat", 1, "The number of times to repeat each perf test") 130 perfRunFlag = flag.String("perf.run", "", "Only run perf tests that match a regular expression") 131 perfTestdataFlag = flag.String("perf.testdata", "", "Path to the noms testdata directory. By default this is ../testdata relative to the noms directory") 132 testNamePattern = regexp.MustCompile("^Test[0-9]*([A-Z].*$)") 133 ) 134 135 // PerfSuite is the core of the perf testing suite. See package documentation for details. 136 type PerfSuite struct { 137 // T is the testing.T instance set when the suite is passed into Run. 138 T *testing.T 139 140 // W is the io.Writer to write test output, which only outputs if the verbose flag is set. 141 W io.Writer 142 143 // AtticLabs is the path to the attic-labs directory (e.g. /path/to/go/src/github.com/attic-labs). 144 AtticLabs string 145 146 // Testdata is the path to the testdata directory - typically /path/to/go/src/github.com/attic-labs, but it can be overridden with the -perf.testdata flag. 147 Testdata string 148 149 // Database is a Noms database that tests can use for reading and writing. State is persisted across a single Run of a suite. 150 Database datas.Database 151 152 VS *types.ValueStore 153 154 // DatabaseSpec is the Noms spec of Database (typically a localhost URL). 155 DatabaseSpec string 156 157 tempFiles []*os.File 158 tempDirs []string 159 paused time.Duration 160 datasetID string 161 } 162 163 // SetupRepSuite has a SetupRep method, which runs every repetition of the test, i.e. -perf.repeat times in total. 164 type SetupRepSuite interface { 165 SetupRep() 166 } 167 168 // TearDownRepSuite has a TearDownRep method, which runs every repetition of the test, i.e. -perf.repeat times in total. 169 type TearDownRepSuite interface { 170 TearDownRep() 171 } 172 173 type perfSuiteT interface { 174 Suite() *PerfSuite 175 } 176 177 type environment struct { 178 DiskUsages map[string]disk.UsageStat 179 Cpus map[int]cpu.InfoStat 180 Mem mem.VirtualMemoryStat 181 Host host.InfoStat 182 Partitions map[string]disk.PartitionStat 183 } 184 185 type timeInfo struct { 186 elapsed, paused, total time.Duration 187 } 188 189 type testRep map[string]timeInfo 190 191 type nopWriter struct{} 192 193 func (r nopWriter) Write(p []byte) (int, error) { 194 return len(p), nil 195 } 196 197 // Run runs suiteT and writes results to dataset datasetID in the database given by the -perf command line flag. 198 func Run(datasetID string, t *testing.T, suiteT perfSuiteT) { 199 t.Skip() 200 assert := assert.New(t) 201 202 if !assert.NotEqual("", datasetID) { 203 return 204 } 205 206 // Piggy-back off the go test -v flag. 207 verboseFlag := flag.Lookup("test.v") 208 assert.NotNil(verboseFlag) 209 verbose := verboseFlag.Value.(flag.Getter).Get().(bool) 210 211 if *perfFlag == "" { 212 if verbose { 213 fmt.Printf("(perf) Skipping %s, -perf flag not set\n", datasetID) 214 } 215 return 216 } 217 218 suite := suiteT.Suite() 219 suite.T = t 220 if verbose { 221 suite.W = os.Stdout 222 } else { 223 suite.W = nopWriter{} 224 } 225 226 id, _ := uuid.NewUUID() 227 suite.AtticLabs = filepath.Join(os.TempDir(), "attic-labs", "noms", "suite", id.String()) 228 suite.Testdata = *perfTestdataFlag 229 if suite.Testdata == "" { 230 suite.Testdata = filepath.Join(suite.AtticLabs, "testdata") 231 } 232 233 // Clean up temporary directories/files last. 234 defer func() { 235 for _, f := range suite.tempFiles { 236 f.Close() 237 file.Remove(f.Name()) 238 } 239 for _, d := range suite.tempDirs { 240 file.RemoveAll(d) 241 } 242 }() 243 244 suite.datasetID = datasetID 245 246 // This is the database the perf test results are written to. 247 sp, err := spec.ForDatabase(*perfFlag) 248 if !assert.NoError(err) { 249 return 250 } 251 defer sp.Close() 252 253 // List of test runs, each a map of test name => timing info. 254 testReps := make([]testRep, *perfRepeatFlag) 255 256 // Note: the default value of perfRunFlag is "", which is actually a valid 257 // regular expression that matches everything. 258 perfRunRe, err := regexp.Compile("(?i)" + *perfRunFlag) 259 if !assert.NoError(err, `Invalid regular expression "%s"`, *perfRunFlag) { 260 return 261 } 262 263 defer func() { 264 db := sp.GetDatabase(context.Background()) 265 vrw := sp.GetVRW(context.Background()) 266 267 reps := make([]types.Value, *perfRepeatFlag) 268 for i, rep := range testReps { 269 timesSlice := types.ValueSlice{} 270 for name, info := range rep { 271 st, err := types.NewStruct(vrw.Format(), "", types.StructData{ 272 "elapsed": types.Float(info.elapsed.Nanoseconds()), 273 "paused": types.Float(info.paused.Nanoseconds()), 274 "total": types.Float(info.total.Nanoseconds()), 275 }) 276 277 require.NoError(t, err) 278 timesSlice = append(timesSlice, types.String(name), st) 279 } 280 reps[i], err = types.NewMap(context.Background(), vrw, timesSlice...) 281 } 282 283 l, err := types.NewList(context.Background(), vrw, reps...) 284 require.NoError(t, err) 285 record, err := types.NewStruct(vrw.Format(), "", map[string]types.Value{ 286 "environment": suite.getEnvironment(vrw), 287 "nomsRevision": types.String(suite.getGitHead(path.Join(suite.AtticLabs, "noms"))), 288 "testdataRevision": types.String(suite.getGitHead(suite.Testdata)), 289 "reps": l, 290 }) 291 require.NoError(t, err) 292 293 ds, err := db.GetDataset(context.Background(), *perfPrefixFlag+datasetID) 294 require.NoError(t, err) 295 _, err = datas.CommitValue(context.Background(), db, ds, record) 296 require.NoError(t, err) 297 }() 298 299 if t, ok := suiteT.(testifySuite.SetupAllSuite); ok { 300 t.SetupSuite() 301 } 302 303 for repIdx := 0; repIdx < *perfRepeatFlag; repIdx++ { 304 testReps[repIdx] = testRep{} 305 306 storage := &chunks.MemoryStorage{} 307 memCS := storage.NewView() 308 suite.DatabaseSpec = "mem://" 309 suite.VS = types.NewValueStore(memCS) 310 ns := tree.NewNodeStore(memCS) 311 suite.Database = datas.NewTypesDatabase(suite.VS, ns) 312 defer suite.Database.Close() 313 314 if t, ok := suiteT.(SetupRepSuite); ok { 315 t.SetupRep() 316 } 317 318 for t, mIdx := reflect.TypeOf(suiteT), 0; mIdx < t.NumMethod(); mIdx++ { 319 m := t.Method(mIdx) 320 321 parts := testNamePattern.FindStringSubmatch(m.Name) 322 if parts == nil { 323 continue 324 } 325 326 recordName := parts[1] 327 if !perfRunRe.MatchString(recordName) && !perfRunRe.MatchString(m.Name) { 328 continue 329 } 330 331 if _, ok := testReps[repIdx][recordName]; ok { 332 assert.Fail(`Multiple tests are named "%s"`, recordName) 333 continue 334 } 335 336 if verbose { 337 fmt.Printf("(perf) RUN(%d/%d) %s (as \"%s\")\n", repIdx+1, *perfRepeatFlag, m.Name, recordName) 338 } 339 340 if t, ok := suiteT.(testifySuite.SetupTestSuite); ok { 341 t.SetupTest() 342 } 343 344 start := time.Now() 345 suite.paused = 0 346 347 err := callSafe(m.Name, m.Func, suiteT) 348 349 total := time.Since(start) 350 elapsed := total - suite.paused 351 352 if verbose && err == nil { 353 fmt.Printf("(perf) PASS: %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total) 354 } else if err != nil { 355 fmt.Printf("(perf) FAIL: %s (%s, paused for %s, total %s)\n", m.Name, elapsed, suite.paused, total) 356 fmt.Println(err) 357 } 358 359 if osutil.IsWindows && elapsed == 0 { 360 elapsed = 1 361 total = 1 362 } 363 testReps[repIdx][recordName] = timeInfo{elapsed, suite.paused, total} 364 365 if t, ok := suiteT.(testifySuite.TearDownTestSuite); ok { 366 t.TearDownTest() 367 } 368 } 369 370 if t, ok := suiteT.(TearDownRepSuite); ok { 371 t.TearDownRep() 372 } 373 } 374 375 if t, ok := suiteT.(testifySuite.TearDownAllSuite); ok { 376 t.TearDownSuite() 377 } 378 } 379 380 func (suite *PerfSuite) Suite() *PerfSuite { 381 return suite 382 } 383 384 // NewAssert returns the assert.Assertions instance for this test. 385 func (suite *PerfSuite) NewAssert() *assert.Assertions { 386 return assert.New(suite.T) 387 } 388 389 // TempFile creates a temporary file, which will be automatically cleaned up by 390 // the perf test suite. Files will be prefixed with the test's dataset ID 391 func (suite *PerfSuite) TempFile() *os.File { 392 f, err := os.CreateTemp("", suite.tempPrefix()) 393 require.NoError(suite.T, err) 394 suite.tempFiles = append(suite.tempFiles, f) 395 return f 396 } 397 398 // TempDir creates a temporary directory, which will be automatically cleaned 399 // up by the perf test suite. Directories will be prefixed with the test's 400 // dataset ID. 401 func (suite *PerfSuite) TempDir() string { 402 d, err := os.MkdirTemp("", suite.tempPrefix()) 403 require.NoError(suite.T, err) 404 suite.tempDirs = append(suite.tempDirs, d) 405 return d 406 } 407 408 func (suite *PerfSuite) tempPrefix() string { 409 sep := fmt.Sprintf("%c", os.PathSeparator) 410 return strings.Replace(fmt.Sprintf("perf.%s.", suite.datasetID), sep, ".", -1) 411 } 412 413 // Pause pauses the test timer while fn is executing. Useful for omitting long setup code (e.g. copying files) from the test elapsed time. 414 func (suite *PerfSuite) Pause(fn func()) { 415 start := time.Now() 416 fn() 417 suite.paused += time.Since(start) 418 } 419 420 // OpenGlob opens the concatenation of all files that match pattern, returned 421 // as []io.Reader so it can be used immediately with io.MultiReader. 422 // 423 // Large CSV files in testdata are broken up into foo.a, foo.b, etc to get 424 // around GitHub file size restrictions. 425 func (suite *PerfSuite) OpenGlob(pattern ...string) []io.Reader { 426 glob, err := filepath.Glob(path.Join(pattern...)) 427 require.NoError(suite.T, err) 428 429 files := make([]io.Reader, len(glob)) 430 for i, m := range glob { 431 f, err := os.Open(m) 432 require.NoError(suite.T, err) 433 files[i] = f 434 } 435 436 return files 437 } 438 439 // CloseGlob closes all of the files, designed to be used with OpenGlob. 440 func (suite *PerfSuite) CloseGlob(files []io.Reader) { 441 for _, f := range files { 442 require.NoError(suite.T, f.(*os.File).Close()) 443 } 444 } 445 446 func callSafe(name string, fun reflect.Value, args ...interface{}) (err error) { 447 defer func() { 448 if r := recover(); r != nil { 449 err = r.(error) 450 } 451 }() 452 453 funArgs := make([]reflect.Value, len(args)) 454 for i, arg := range args { 455 funArgs[i] = reflect.ValueOf(arg) 456 } 457 458 fun.Call(funArgs) 459 return 460 } 461 462 func (suite *PerfSuite) getEnvironment(vrw types.ValueReadWriter) types.Value { 463 env := environment{ 464 DiskUsages: map[string]disk.UsageStat{}, 465 Cpus: map[int]cpu.InfoStat{}, 466 Partitions: map[string]disk.PartitionStat{}, 467 } 468 469 partitions, err := disk.Partitions(false) 470 require.NoError(suite.T, err) 471 for _, p := range partitions { 472 usage, err := disk.Usage(p.Mountpoint) 473 require.NoError(suite.T, err) 474 env.DiskUsages[p.Mountpoint] = *usage 475 env.Partitions[p.Device] = p 476 } 477 478 cpus, err := cpu.Info() 479 require.NoError(suite.T, err) 480 for i, c := range cpus { 481 env.Cpus[i] = c 482 } 483 484 mem, err := mem.VirtualMemory() 485 require.NoError(suite.T, err) 486 env.Mem = *mem 487 488 hostInfo, err := host.Info() 489 require.NoError(suite.T, err) 490 env.Host = *hostInfo 491 492 envStruct, err := marshal.Marshal(context.Background(), vrw, env) 493 require.NoError(suite.T, err) 494 return envStruct 495 } 496 497 func (suite *PerfSuite) getGitHead(dir string) string { 498 stdout := &bytes.Buffer{} 499 cmd := exec.Command("git", "rev-parse", "HEAD") 500 cmd.Stdout = stdout 501 cmd.Dir = dir 502 if err := cmd.Run(); err != nil { 503 return "" 504 } 505 return strings.TrimSpace(stdout.String()) 506 }