github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/dbnode/integration/setup.go (about) 1 // Copyright (c) 2016 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package integration 22 23 import ( 24 "errors" 25 "flag" 26 "fmt" 27 "io/ioutil" 28 "net" 29 "os" 30 "os/exec" 31 "strings" 32 "sync" 33 "testing" 34 "time" 35 36 // nolint: gci 37 "github.com/stretchr/testify/assert" 38 "github.com/stretchr/testify/require" 39 "github.com/uber-go/tally" 40 "github.com/uber/tchannel-go" 41 "go.uber.org/zap" 42 "go.uber.org/zap/zapcore" 43 44 clusterclient "github.com/m3db/m3/src/cluster/client" 45 "github.com/m3db/m3/src/cluster/services" 46 "github.com/m3db/m3/src/cluster/shard" 47 queryconfig "github.com/m3db/m3/src/cmd/services/m3query/config" 48 "github.com/m3db/m3/src/dbnode/client" 49 "github.com/m3db/m3/src/dbnode/generated/thrift/rpc" 50 "github.com/m3db/m3/src/dbnode/integration/fake" 51 "github.com/m3db/m3/src/dbnode/integration/generate" 52 "github.com/m3db/m3/src/dbnode/namespace" 53 "github.com/m3db/m3/src/dbnode/persist/fs" 54 "github.com/m3db/m3/src/dbnode/persist/fs/commitlog" 55 "github.com/m3db/m3/src/dbnode/retention" 56 "github.com/m3db/m3/src/dbnode/runtime" 57 "github.com/m3db/m3/src/dbnode/server" 58 "github.com/m3db/m3/src/dbnode/sharding" 59 "github.com/m3db/m3/src/dbnode/storage" 60 "github.com/m3db/m3/src/dbnode/storage/block" 61 "github.com/m3db/m3/src/dbnode/storage/bootstrap" 62 "github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper" 63 bcl "github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/commitlog" 64 bfs "github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/fs" 65 "github.com/m3db/m3/src/dbnode/storage/bootstrap/bootstrapper/uninitialized" 66 "github.com/m3db/m3/src/dbnode/storage/cluster" 67 "github.com/m3db/m3/src/dbnode/storage/index" 68 "github.com/m3db/m3/src/dbnode/storage/series" 69 "github.com/m3db/m3/src/dbnode/testdata/prototest" 70 "github.com/m3db/m3/src/dbnode/topology" 71 "github.com/m3db/m3/src/dbnode/ts" 72 queryserver "github.com/m3db/m3/src/query/server" 73 "github.com/m3db/m3/src/x/clock" 74 xconfig "github.com/m3db/m3/src/x/config" 75 "github.com/m3db/m3/src/x/ident" 76 "github.com/m3db/m3/src/x/instrument" 77 xsync "github.com/m3db/m3/src/x/sync" 78 xtime "github.com/m3db/m3/src/x/time" 79 ) 80 81 var ( 82 id = flag.String("id", "", "Node host ID") 83 httpClusterAddr = flag.String("clusterhttpaddr", "127.0.0.1:9000", "Cluster HTTP server address") 84 tchannelClusterAddr = flag.String("clustertchanneladdr", "127.0.0.1:9001", "Cluster TChannel server address") 85 httpNodeAddr = flag.String("nodehttpaddr", "127.0.0.1:9002", "Node HTTP server address") 86 tchannelNodeAddr = flag.String("nodetchanneladdr", "127.0.0.1:9003", "Node TChannel server address") 87 httpDebugAddr = flag.String("debughttpaddr", "127.0.0.1:9004", "HTTP debug server address") 88 89 errServerStartTimedOut = errors.New("server took too long to start") 90 errServerStopTimedOut = errors.New("server took too long to stop") 91 testNamespaces = []ident.ID{ident.StringID("testNs1"), ident.StringID("testNs2")} 92 93 testSchemaHistory = prototest.NewSchemaHistory() 94 testSchema = prototest.NewMessageDescriptor(testSchemaHistory) 95 testProtoMessages = prototest.NewProtoTestMessages(testSchema) 96 testProtoIter = prototest.NewProtoMessageIterator(testProtoMessages) 97 ) 98 99 // nowSetterFn is the function that sets the current time 100 type nowSetterFn func(t xtime.UnixNano) 101 102 type assertTestDataEqual func(t *testing.T, expected, actual []generate.TestValue) bool 103 104 var _ topology.MapProvider = &testSetup{} 105 106 type testSetup struct { 107 t *testing.T 108 opts TestOptions 109 schemaReg namespace.SchemaRegistry 110 111 logger *zap.Logger 112 scope tally.TestScope 113 114 db cluster.Database 115 storageOpts storage.Options 116 instrumentOpts instrument.Options 117 serverStorageOpts server.StorageOptions 118 fsOpts fs.Options 119 blockLeaseManager block.LeaseManager 120 hostID string 121 origin topology.Host 122 topoInit topology.Initializer 123 shardSet sharding.ShardSet 124 getNowFn xNowFn 125 clockNowFn clock.NowFn 126 setNowFn nowSetterFn 127 tchannelClient *TestTChannelClient 128 m3dbClient client.Client 129 // We need two distinct clients where one has the origin set to the same ID as the 130 // node itself (I.E) the client will behave exactly as if it is the node itself 131 // making requests, and another client with the origin set to an ID different than 132 // the node itself so that we can make requests from the perspective of a "different" 133 // M3DB node for verification purposes in some of the tests. 134 m3dbAdminClient client.AdminClient 135 m3dbVerificationAdminClient client.AdminClient 136 workerPool xsync.WorkerPool 137 queryAddress string 138 139 // compare expected with actual data function 140 assertEqual assertTestDataEqual 141 142 // things that need to be cleaned up 143 channel *tchannel.Channel 144 filePathPrefix string 145 namespaces []namespace.Metadata 146 147 // signals 148 doneCh chan struct{} 149 closedCh chan struct{} 150 queryInterruptCh chan error 151 queryDoneCh chan struct{} 152 } 153 154 type xNowFn func() xtime.UnixNano 155 156 // TestSetup is a test setup. 157 type TestSetup interface { 158 topology.MapProvider 159 160 Opts() TestOptions 161 SetOpts(TestOptions) 162 FilesystemOpts() fs.Options 163 AssertEqual(*testing.T, []generate.TestValue, []generate.TestValue) bool 164 DB() cluster.Database 165 Scope() tally.TestScope 166 M3DBClient() client.Client 167 M3DBVerificationAdminClient() client.AdminClient 168 TChannelClient() *TestTChannelClient 169 Namespaces() []namespace.Metadata 170 TopologyInitializer() topology.Initializer 171 SetTopologyInitializer(topology.Initializer) 172 Fetch(req *rpc.FetchRequest) ([]generate.TestValue, error) 173 FilePathPrefix() string 174 StorageOpts() storage.Options 175 SetStorageOpts(storage.Options) 176 SetServerStorageOpts(server.StorageOptions) 177 Origin() topology.Host 178 ServerIsBootstrapped() bool 179 StopServer() error 180 StopServerAndVerifyOpenFilesAreClosed() error 181 StartServer() error 182 StartServerDontWaitBootstrap() error 183 StopQuery() error 184 StartQuery(configYAML string) error 185 QueryAddress() string 186 NowFn() xNowFn 187 ClockNowFn() clock.NowFn 188 SetNowFn(xtime.UnixNano) 189 Close() 190 WriteBatch(ident.ID, generate.SeriesBlock) error 191 ShouldBeEqual() bool 192 // *NOTE*: This method is deprecated and should not be used in future tests. 193 // Also, we should migrate existing tests when we touch them away from using this. 194 SleepFor10xTickMinimumInterval() 195 BlockLeaseManager() block.LeaseManager 196 ShardSet() sharding.ShardSet 197 SetShardSet(sharding.ShardSet) 198 GeneratorOptions(retention.Options) generate.Options 199 MaybeResetClients() error 200 SchemaRegistry() namespace.SchemaRegistry 201 NamespaceMetadataOrFail(ident.ID) namespace.Metadata 202 MustSetTickMinimumInterval(time.Duration) 203 WaitUntilServerIsBootstrapped() error 204 WaitUntilServerIsUp() error 205 WaitUntilServerIsDown() error 206 Truncate(*rpc.TruncateRequest) (int64, error) 207 InitializeBootstrappers(opts InitializeBootstrappersOptions) error 208 } 209 210 // StorageOption is a reference to storage options function. 211 type StorageOption func(storage.Options) storage.Options 212 213 // NewTestSetup returns a new test setup for non-dockerized integration tests. 214 func NewTestSetup( 215 t *testing.T, 216 opts TestOptions, 217 fsOpts fs.Options, 218 storageOptFns ...StorageOption, 219 ) (TestSetup, error) { 220 if opts == nil { 221 opts = NewTestOptions(t) 222 } 223 224 nsInit := opts.NamespaceInitializer() 225 if nsInit == nil { 226 nsInit = namespace.NewStaticInitializer(opts.Namespaces()) 227 } 228 229 zapConfig := zap.NewDevelopmentConfig() 230 zapConfig.DisableCaller = true 231 zapConfig.Level = zap.NewAtomicLevelAt(zapcore.InfoLevel) 232 if level := os.Getenv("LOG_LEVEL"); level != "" { 233 var parsedLevel zap.AtomicLevel 234 if err := parsedLevel.UnmarshalText([]byte(level)); err != nil { 235 return nil, fmt.Errorf("unable to parse log level: %v", err) 236 } 237 zapConfig.Level = parsedLevel 238 } 239 logger, err := zapConfig.Build() 240 if err != nil { 241 return nil, err 242 } 243 244 // Schema registry is shared between database and admin client. 245 schemaReg := namespace.NewSchemaRegistry(opts.ProtoEncoding(), nil) 246 247 blockLeaseManager := block.NewLeaseManager(nil) 248 storageOpts := storage.NewOptions(). 249 SetNamespaceInitializer(nsInit). 250 SetSchemaRegistry(schemaReg). 251 SetBlockLeaseManager(blockLeaseManager) 252 253 if opts.ProtoEncoding() { 254 blockOpts := storageOpts.DatabaseBlockOptions(). 255 SetEncoderPool(prototest.ProtoPools.EncoderPool). 256 SetReaderIteratorPool(prototest.ProtoPools.ReaderIterPool). 257 SetMultiReaderIteratorPool(prototest.ProtoPools.MultiReaderIterPool) 258 storageOpts = storageOpts. 259 SetDatabaseBlockOptions(blockOpts). 260 SetEncoderPool(prototest.ProtoPools.EncoderPool). 261 SetReaderIteratorPool(prototest.ProtoPools.ReaderIterPool). 262 SetMultiReaderIteratorPool(prototest.ProtoPools.MultiReaderIterPool) 263 } 264 265 if strings.ToLower(os.Getenv("TEST_DEBUG_LOG")) == "true" { 266 zapConfig.Level = zap.NewAtomicLevelAt(zapcore.DebugLevel) 267 logger, err = zapConfig.Build() 268 if err != nil { 269 return nil, err 270 } 271 storageOpts = storageOpts.SetInstrumentOptions( 272 storageOpts.InstrumentOptions().SetLogger(logger)) 273 } 274 275 scope := tally.NewTestScope("", nil) 276 storageOpts = storageOpts.SetInstrumentOptions( 277 storageOpts.InstrumentOptions().SetMetricsScope(scope)) 278 279 // Use specified series cache policy from environment if set. 280 seriesCachePolicy := strings.ToLower(os.Getenv("TEST_SERIES_CACHE_POLICY")) 281 if seriesCachePolicy != "" { 282 value, err := series.ParseCachePolicy(seriesCachePolicy) 283 if err != nil { 284 return nil, err 285 } 286 storageOpts = storageOpts.SetSeriesCachePolicy(value) 287 } 288 289 fields := []zapcore.Field{ 290 zap.Stringer("cache-policy", storageOpts.SeriesCachePolicy()), 291 } 292 logger = logger.With(fields...) 293 instrumentOpts := storageOpts.InstrumentOptions().SetLogger(logger) 294 storageOpts = storageOpts.SetInstrumentOptions(instrumentOpts) 295 296 indexMode := index.InsertSync 297 if opts.WriteNewSeriesAsync() { 298 indexMode = index.InsertAsync 299 } 300 301 plCache, err := index.NewPostingsListCache(10, index.PostingsListCacheOptions{ 302 InstrumentOptions: instrumentOpts, 303 }) 304 if err != nil { 305 return nil, fmt.Errorf("unable to create postings list cache: %v", err) 306 } 307 // Ok to run immediately since it just closes the background reporting loop. Only ok because 308 // this is a test setup, in production we would want the metrics. 309 plCache.Start()() 310 311 indexOpts := storageOpts.IndexOptions(). 312 SetInsertMode(indexMode). 313 SetPostingsListCache(plCache) 314 storageOpts = storageOpts.SetIndexOptions(indexOpts) 315 316 runtimeOptsMgr := storageOpts.RuntimeOptionsManager() 317 runtimeOpts := runtimeOptsMgr.Get(). 318 SetTickMinimumInterval(opts.TickMinimumInterval()). 319 SetTickCancellationCheckInterval(opts.TickCancellationCheckInterval()). 320 SetMaxWiredBlocks(opts.MaxWiredBlocks()). 321 SetWriteNewSeriesAsync(opts.WriteNewSeriesAsync()) 322 if err := runtimeOptsMgr.Update(runtimeOpts); err != nil { 323 return nil, err 324 } 325 326 // Set up shard set. 327 shardSet, err := newTestShardSet(opts.NumShards(), opts.ShardSetOptions()) 328 if err != nil { 329 return nil, err 330 } 331 332 id := *id 333 if id == "" { 334 id = opts.ID() 335 } 336 337 tchannelNodeAddr := *tchannelNodeAddr 338 if addr := opts.TChannelNodeAddr(); addr != "" { 339 tchannelNodeAddr = addr 340 } 341 342 topoInit := opts.ClusterDatabaseTopologyInitializer() 343 if topoInit == nil { 344 topoInit, err = newTopologyInitializerForShardSet(id, tchannelNodeAddr, shardSet) 345 if err != nil { 346 return nil, err 347 } 348 } 349 350 adminClient, verificationAdminClient, err := newClients(topoInit, opts, 351 schemaReg, id, tchannelNodeAddr, instrumentOpts) 352 if err != nil { 353 return nil, err 354 } 355 356 // Set up tchannel client 357 tchanClient, err := NewTChannelClient("integration-test", tchannelNodeAddr) 358 if err != nil { 359 return nil, err 360 } 361 362 // Set up worker pool 363 workerPool := xsync.NewWorkerPool(opts.WorkerPoolSize()) 364 workerPool.Init() 365 366 // BlockSizes are specified per namespace, make best effort at finding 367 // a value to align `now` for all of them. 368 truncateSize, guess := guessBestTruncateBlockSize(opts.Namespaces()) 369 if guess { 370 logger.Warn("unable to find a single blockSize from known retention periods", 371 zap.String("guessing", truncateSize.String())) 372 } 373 374 // Set up getter and setter for now 375 var lock sync.RWMutex 376 now := xtime.Now().Truncate(truncateSize) 377 getNowFn := func() xtime.UnixNano { 378 lock.RLock() 379 t := now 380 lock.RUnlock() 381 return t 382 } 383 clockNowFn := func() time.Time { 384 return getNowFn().ToTime() 385 } 386 setNowFn := func(t xtime.UnixNano) { 387 lock.Lock() 388 now = t 389 lock.Unlock() 390 } 391 if overrideTimeNow := opts.NowFn(); overrideTimeNow != nil { 392 // Allow overriding the frozen time 393 storageOpts = storageOpts.SetClockOptions( 394 storageOpts.ClockOptions().SetNowFn(overrideTimeNow)) 395 } else { 396 storageOpts = storageOpts.SetClockOptions( 397 storageOpts.ClockOptions().SetNowFn(clockNowFn)) 398 } 399 400 // Set up file path prefix 401 filePathPrefix := opts.FilePathPrefix() 402 if filePathPrefix == "" { 403 var err error 404 filePathPrefix, err = ioutil.TempDir("", "integration-test") 405 if err != nil { 406 return nil, err 407 } 408 } 409 410 if fsOpts == nil { 411 fsOpts = fs.NewOptions(). 412 SetFilePathPrefix(filePathPrefix). 413 SetClockOptions(storageOpts.ClockOptions()) 414 } 415 416 storageOpts = storageOpts.SetCommitLogOptions( 417 storageOpts.CommitLogOptions(). 418 SetFilesystemOptions(fsOpts)) 419 420 // Set up persistence manager 421 pm, err := fs.NewPersistManager(fsOpts) 422 if err != nil { 423 return nil, err 424 } 425 storageOpts = storageOpts.SetPersistManager(pm) 426 427 // Set up index claims manager 428 icm, err := fs.NewIndexClaimsManager(fsOpts) 429 if err != nil { 430 return nil, err 431 } 432 storageOpts = storageOpts.SetIndexClaimsManager(icm) 433 434 // Set up repair options 435 storageOpts = storageOpts. 436 SetRepairOptions(storageOpts.RepairOptions(). 437 SetAdminClients([]client.AdminClient{adminClient})) 438 439 // Set up block retriever manager 440 if mgr := opts.DatabaseBlockRetrieverManager(); mgr != nil { 441 storageOpts = storageOpts.SetDatabaseBlockRetrieverManager(mgr) 442 } else { 443 switch storageOpts.SeriesCachePolicy() { 444 case series.CacheAll: 445 // Do not need a block retriever for CacheAll policy 446 default: 447 blockRetrieverMgr := block.NewDatabaseBlockRetrieverManager( 448 func(md namespace.Metadata, shardSet sharding.ShardSet) (block.DatabaseBlockRetriever, error) { 449 retrieverOpts := fs.NewBlockRetrieverOptions(). 450 SetBlockLeaseManager(blockLeaseManager) 451 retriever, err := fs.NewBlockRetriever(retrieverOpts, fsOpts) 452 if err != nil { 453 return nil, err 454 } 455 456 if err := retriever.Open(md, shardSet); err != nil { 457 return nil, err 458 } 459 return retriever, nil 460 }) 461 storageOpts = storageOpts. 462 SetDatabaseBlockRetrieverManager(blockRetrieverMgr) 463 } 464 } 465 466 // Set up wired list if required 467 if storageOpts.SeriesCachePolicy() == series.CacheLRU { 468 wiredList := block.NewWiredList(block.WiredListOptions{ 469 RuntimeOptionsManager: runtimeOptsMgr, 470 InstrumentOptions: storageOpts.InstrumentOptions(), 471 ClockOptions: storageOpts.ClockOptions(), 472 // Use a small event channel size to stress-test the implementation 473 EventsChannelSize: 1, 474 }) 475 blockOpts := storageOpts.DatabaseBlockOptions().SetWiredList(wiredList) 476 blockPool := block.NewDatabaseBlockPool(nil) 477 // Have to manually set the blockpool because the default one uses a constructor 478 // function that doesn't have the updated blockOpts. 479 blockPool.Init(func() block.DatabaseBlock { 480 return block.NewDatabaseBlock(0, 0, ts.Segment{}, blockOpts, namespace.Context{}) 481 }) 482 blockOpts = blockOpts.SetDatabaseBlockPool(blockPool) 483 storageOpts = storageOpts.SetDatabaseBlockOptions(blockOpts) 484 } 485 486 storageOpts = storageOpts.SetInstrumentOptions( 487 storageOpts.InstrumentOptions().SetReportInterval(opts.ReportInterval())) 488 489 // Set debugging options if environment vars set 490 if debugFilePrefix := os.Getenv("TEST_DEBUG_FILE_PREFIX"); debugFilePrefix != "" { 491 opts = opts.SetVerifySeriesDebugFilePathPrefix(debugFilePrefix) 492 } 493 494 for _, fn := range storageOptFns { 495 if fn != nil { 496 storageOpts = fn(storageOpts) 497 } 498 } 499 if storageOpts != nil && storageOpts.AdminClient() == nil { 500 storageOpts = storageOpts.SetAdminClient(adminClient) 501 } 502 503 return &testSetup{ 504 t: t, 505 opts: opts, 506 schemaReg: schemaReg, 507 logger: logger, 508 scope: scope, 509 storageOpts: storageOpts, 510 blockLeaseManager: blockLeaseManager, 511 instrumentOpts: instrumentOpts, 512 fsOpts: fsOpts, 513 hostID: id, 514 origin: newOrigin(id, tchannelNodeAddr), 515 topoInit: topoInit, 516 shardSet: shardSet, 517 getNowFn: getNowFn, 518 clockNowFn: clockNowFn, 519 setNowFn: setNowFn, 520 tchannelClient: tchanClient, 521 m3dbClient: adminClient.(client.Client), 522 m3dbAdminClient: adminClient, 523 m3dbVerificationAdminClient: verificationAdminClient, 524 workerPool: workerPool, 525 filePathPrefix: filePathPrefix, 526 namespaces: opts.Namespaces(), 527 doneCh: make(chan struct{}), 528 closedCh: make(chan struct{}), 529 assertEqual: opts.AssertTestDataEqual(), 530 }, nil 531 } 532 533 // guestBestTruncateBlockSize guesses for the best block size to truncate testSetup's nowFn 534 func guessBestTruncateBlockSize(mds []namespace.Metadata) (time.Duration, bool) { 535 // gcd of a pair of numbers 536 gcd := func(a, b int64) int64 { 537 for b > 0 { 538 a, b = b, a%b 539 } 540 return a 541 } 542 lcm := func(a, b int64) int64 { 543 return a * b / gcd(a, b) 544 } 545 546 // default guess 547 if len(mds) == 0 { 548 return time.Hour, true 549 } 550 551 // get all known blocksizes 552 blockSizes := make(map[int64]struct{}) 553 for _, md := range mds { 554 bs := md.Options().RetentionOptions().BlockSize().Nanoseconds() / int64(time.Millisecond) 555 blockSizes[bs] = struct{}{} 556 } 557 558 first := true 559 var l int64 560 for i := range blockSizes { 561 if first { 562 l = i 563 first = false 564 } else { 565 l = lcm(l, i) 566 } 567 } 568 569 guess := time.Duration(l) * time.Millisecond 570 // if there's only a single value, we are not guessing 571 if len(blockSizes) == 1 { 572 return guess, false 573 } 574 575 // otherwise, we are guessing 576 return guess, true 577 } 578 579 func (ts *testSetup) ShouldBeEqual() bool { 580 return ts.assertEqual == nil 581 } 582 583 func (ts *testSetup) AssertEqual(t *testing.T, a, b []generate.TestValue) bool { 584 return ts.assertEqual(t, a, b) 585 } 586 587 func (ts *testSetup) DB() cluster.Database { 588 return ts.db 589 } 590 591 func (ts *testSetup) Scope() tally.TestScope { 592 return ts.scope 593 } 594 595 func (ts *testSetup) M3DBClient() client.Client { 596 return ts.m3dbClient 597 } 598 599 func (ts *testSetup) M3DBVerificationAdminClient() client.AdminClient { 600 return ts.m3dbVerificationAdminClient 601 } 602 603 func (ts *testSetup) Namespaces() []namespace.Metadata { 604 return ts.namespaces 605 } 606 607 func (ts *testSetup) NowFn() xNowFn { 608 return ts.getNowFn 609 } 610 611 func (ts *testSetup) ClockNowFn() clock.NowFn { 612 return ts.clockNowFn 613 } 614 615 func (ts *testSetup) SetNowFn(t xtime.UnixNano) { 616 ts.setNowFn(t) 617 } 618 619 func (ts *testSetup) FilesystemOpts() fs.Options { 620 return ts.fsOpts 621 } 622 623 func (ts *testSetup) Opts() TestOptions { 624 return ts.opts 625 } 626 627 func (ts *testSetup) SetOpts(opts TestOptions) { 628 ts.opts = opts 629 } 630 631 func (ts *testSetup) Origin() topology.Host { 632 return ts.origin 633 } 634 635 func (ts *testSetup) FilePathPrefix() string { 636 return ts.filePathPrefix 637 } 638 639 func (ts *testSetup) StorageOpts() storage.Options { 640 return ts.storageOpts 641 } 642 643 func (ts *testSetup) SetStorageOpts(opts storage.Options) { 644 ts.storageOpts = opts 645 } 646 647 func (ts *testSetup) SetServerStorageOpts(opts server.StorageOptions) { 648 ts.serverStorageOpts = opts 649 } 650 651 func (ts *testSetup) TopologyInitializer() topology.Initializer { 652 return ts.topoInit 653 } 654 655 func (ts *testSetup) SetTopologyInitializer(init topology.Initializer) { 656 ts.topoInit = init 657 } 658 659 func (ts *testSetup) BlockLeaseManager() block.LeaseManager { 660 return ts.blockLeaseManager 661 } 662 663 func (ts *testSetup) ShardSet() sharding.ShardSet { 664 return ts.shardSet 665 } 666 667 func (ts *testSetup) SetShardSet(shardSet sharding.ShardSet) { 668 ts.shardSet = shardSet 669 } 670 671 func (ts *testSetup) NamespaceMetadataOrFail(id ident.ID) namespace.Metadata { 672 for _, md := range ts.namespaces { 673 if md.ID().Equal(id) { 674 return md 675 } 676 } 677 require.FailNow(ts.t, "unable to find namespace", id.String()) 678 return nil 679 } 680 681 func (ts *testSetup) GeneratorOptions(ropts retention.Options) generate.Options { 682 var ( 683 storageOpts = ts.storageOpts 684 fsOpts = storageOpts.CommitLogOptions().FilesystemOptions() 685 opts = generate.NewOptions() 686 co = opts.ClockOptions().SetNowFn(ts.clockNowFn) 687 ) 688 689 return opts. 690 SetClockOptions(co). 691 SetRetentionPeriod(ropts.RetentionPeriod()). 692 SetBlockSize(ropts.BlockSize()). 693 SetFilePathPrefix(fsOpts.FilePathPrefix()). 694 SetNewFileMode(fsOpts.NewFileMode()). 695 SetNewDirectoryMode(fsOpts.NewDirectoryMode()). 696 SetWriterBufferSize(fsOpts.WriterBufferSize()). 697 SetEncoderPool(storageOpts.EncoderPool()) 698 } 699 700 func (ts *testSetup) ServerIsBootstrapped() bool { 701 resp, err := ts.health() 702 return err == nil && resp.Bootstrapped 703 } 704 705 func (ts *testSetup) ServerIsUp() bool { 706 _, err := ts.health() 707 return err == nil 708 } 709 710 func (ts *testSetup) ServerIsDown() bool { 711 return !ts.ServerIsUp() 712 } 713 714 func (ts *testSetup) WaitUntilServerIsBootstrapped() error { 715 if waitUntil(ts.ServerIsBootstrapped, ts.opts.ServerStateChangeTimeout()) { 716 return nil 717 } 718 return errServerStartTimedOut 719 } 720 721 func (ts *testSetup) WaitUntilServerIsUp() error { 722 if waitUntil(ts.ServerIsUp, ts.opts.ServerStateChangeTimeout()) { 723 return nil 724 } 725 return errServerStopTimedOut 726 } 727 728 func (ts *testSetup) WaitUntilServerIsDown() error { 729 if waitUntil(ts.ServerIsDown, ts.opts.ServerStateChangeTimeout()) { 730 return nil 731 } 732 return errServerStopTimedOut 733 } 734 735 func (ts *testSetup) StartServerDontWaitBootstrap() error { 736 return ts.startServerBase(false) 737 } 738 739 func (ts *testSetup) StartServer() error { 740 return ts.startServerBase(true) 741 } 742 743 func (ts *testSetup) startServerBase(waitForBootstrap bool) error { 744 ts.logger.Info("starting server") 745 746 var ( 747 resultCh = make(chan error, 1) 748 err error 749 ) 750 751 topo, err := ts.topoInit.Init() 752 if err != nil { 753 return fmt.Errorf("error initializing topology: %v", err) 754 } 755 756 topoWatch, err := topo.Watch() 757 if err != nil { 758 return fmt.Errorf("error watching topology: %v", err) 759 } 760 761 ts.db, err = cluster.NewDatabase(ts.hostID, topo, topoWatch, ts.storageOpts) 762 if err != nil { 763 return err 764 } 765 766 leaseVerifier := storage.NewLeaseVerifier(ts.db) 767 if err := ts.blockLeaseManager.SetLeaseVerifier(leaseVerifier); err != nil { 768 return err 769 } 770 771 // Check if clients were closed by StopServer and need to be re-created. 772 ts.MaybeResetClients() 773 774 go func() { 775 if err := openAndServe( 776 ts.httpClusterAddr(), ts.tchannelClusterAddr(), 777 ts.httpNodeAddr(), ts.tchannelNodeAddr(), ts.httpDebugAddr(), 778 ts.db, ts.m3dbClient, ts.storageOpts, ts.serverStorageOpts, ts.doneCh, 779 ); err != nil { 780 select { 781 case resultCh <- err: 782 default: 783 } 784 } 785 786 ts.closedCh <- struct{}{} 787 }() 788 789 waitFn := ts.WaitUntilServerIsUp 790 if waitForBootstrap { 791 waitFn = ts.WaitUntilServerIsBootstrapped 792 } 793 go func() { 794 select { 795 case resultCh <- waitFn(): 796 default: 797 } 798 }() 799 800 err = <-resultCh 801 if err == nil { 802 ts.logger.Info("started server") 803 } else { 804 ts.logger.Error("start server error", zap.Error(err)) 805 } 806 return err 807 } 808 809 func (ts *testSetup) StopServer() error { 810 ts.doneCh <- struct{}{} 811 812 // NB(bodu): Need to reset the global counter of index claims managers after 813 // we've stopped the test server. This covers the restart server case. 814 fs.ResetIndexClaimsManagersUnsafe() 815 816 if ts.m3dbClient.DefaultSessionActive() { 817 session, err := ts.m3dbClient.DefaultSession() 818 if err != nil { 819 return err 820 } 821 ts.m3dbClient = nil 822 ts.m3dbAdminClient = nil 823 ts.m3dbVerificationAdminClient = nil 824 defer session.Close() 825 } 826 827 if err := ts.WaitUntilServerIsDown(); err != nil { 828 return err 829 } 830 831 // Wait for graceful server close 832 <-ts.closedCh 833 return nil 834 } 835 836 func (ts *testSetup) StopServerAndVerifyOpenFilesAreClosed() error { 837 if err := ts.DB().Close(); err != nil { 838 return err 839 } 840 841 openDataFiles := openFiles(ts.filePathPrefix + "/data/") 842 require.Empty(ts.t, openDataFiles) 843 844 return ts.StopServer() 845 } 846 847 // counts open/locked files inside parent dir. 848 func openFiles(parentDir string) []string { 849 cmd := exec.Command("lsof", "+D", parentDir) // nolint:gosec 850 851 out, _ := cmd.Output() 852 if len(out) == 0 { 853 return nil 854 } 855 856 return strings.Split(string(out), "\n") 857 } 858 859 func (ts *testSetup) StartQuery(configYAML string) error { 860 m3dbClient := ts.m3dbClient 861 if m3dbClient == nil { 862 return fmt.Errorf("dbnode admin client not set") 863 } 864 865 configFile, cleanup := newTestFile(ts.t, "config.yaml", configYAML) 866 defer cleanup() 867 868 var cfg queryconfig.Configuration 869 err := xconfig.LoadFile(&cfg, configFile.Name(), xconfig.Options{}) 870 if err != nil { 871 return err 872 } 873 874 dbClientCh := make(chan client.Client, 1) 875 dbClientCh <- m3dbClient 876 clusterClientCh := make(chan clusterclient.Client, 1) 877 listenerCh := make(chan net.Listener, 1) 878 localSessionReadyCh := make(chan struct{}, 1) 879 880 ts.queryInterruptCh = make(chan error, 1) 881 ts.queryDoneCh = make(chan struct{}, 1) 882 883 go func() { 884 queryserver.Run(queryserver.RunOptions{ 885 Config: cfg, 886 InterruptCh: ts.queryInterruptCh, 887 ListenerCh: listenerCh, 888 LocalSessionReadyCh: localSessionReadyCh, 889 DBClient: dbClientCh, 890 ClusterClient: clusterClientCh, 891 }) 892 ts.queryDoneCh <- struct{}{} 893 }() 894 895 // Wait for local session to connect. 896 <-localSessionReadyCh 897 898 // Wait for listener. 899 listener := <-listenerCh 900 ts.queryAddress = listener.Addr().String() 901 902 return nil 903 } 904 905 func (ts *testSetup) StopQuery() error { 906 // Send interrupt. 907 ts.queryInterruptCh <- fmt.Errorf("interrupt") 908 909 // Wait for done. 910 <-ts.queryDoneCh 911 912 return nil 913 } 914 915 func (ts *testSetup) QueryAddress() string { 916 return ts.queryAddress 917 } 918 919 func (ts *testSetup) TChannelClient() *TestTChannelClient { 920 return ts.tchannelClient 921 } 922 923 func (ts *testSetup) WriteBatch(namespace ident.ID, seriesList generate.SeriesBlock) error { 924 if ts.opts.UseTChannelClientForWriting() { 925 return ts.tchannelClient.TChannelClientWriteBatch( 926 ts.opts.WriteRequestTimeout(), namespace, seriesList) 927 } 928 return m3dbClientWriteBatch(ts.m3dbClient, ts.workerPool, namespace, seriesList) 929 } 930 931 func (ts *testSetup) Fetch(req *rpc.FetchRequest) ([]generate.TestValue, error) { 932 if ts.opts.UseTChannelClientForReading() { 933 fetched, err := ts.tchannelClient.TChannelClientFetch(ts.opts.ReadRequestTimeout(), req) 934 if err != nil { 935 return nil, err 936 } 937 dp := toDatapoints(fetched) 938 return dp, nil 939 } 940 return m3dbClientFetch(ts.m3dbClient, req) 941 } 942 943 func (ts *testSetup) Truncate(req *rpc.TruncateRequest) (int64, error) { 944 if ts.opts.UseTChannelClientForTruncation() { 945 return ts.tchannelClient.TChannelClientTruncate(ts.opts.TruncateRequestTimeout(), req) 946 } 947 return m3dbClientTruncate(ts.m3dbClient, req) 948 } 949 950 func (ts *testSetup) health() (*rpc.NodeHealthResult_, error) { 951 return ts.tchannelClient.TChannelClientHealth(5 * time.Second) 952 } 953 954 func (ts *testSetup) Close() { 955 if ts.channel != nil { 956 ts.channel.Close() 957 } 958 if ts.filePathPrefix != "" { 959 os.RemoveAll(ts.filePathPrefix) 960 } 961 962 // This could get called more than once in the multi node integration test case 963 // but this is fine since the reset always sets the counter to 0. 964 fs.ResetIndexClaimsManagersUnsafe() 965 } 966 967 func (ts *testSetup) MustSetTickMinimumInterval(tickMinInterval time.Duration) { 968 runtimeMgr := ts.storageOpts.RuntimeOptionsManager() 969 existingOptions := runtimeMgr.Get() 970 newOptions := existingOptions.SetTickMinimumInterval(tickMinInterval) 971 err := runtimeMgr.Update(newOptions) 972 if err != nil { 973 panic(fmt.Sprintf("err setting tick minimum interval: %v", err)) 974 } 975 } 976 977 // convenience wrapper used to ensure a tick occurs 978 func (ts *testSetup) SleepFor10xTickMinimumInterval() { 979 // Check the runtime options manager instead of relying on ts.opts 980 // because the tick interval can change at runtime. 981 runtimeMgr := ts.storageOpts.RuntimeOptionsManager() 982 opts := runtimeMgr.Get() 983 time.Sleep(opts.TickMinimumInterval() * 10) 984 } 985 986 func (ts *testSetup) httpClusterAddr() string { 987 if addr := ts.opts.HTTPClusterAddr(); addr != "" { 988 return addr 989 } 990 return *httpClusterAddr 991 } 992 993 func (ts *testSetup) httpNodeAddr() string { 994 if addr := ts.opts.HTTPNodeAddr(); addr != "" { 995 return addr 996 } 997 return *httpNodeAddr 998 } 999 1000 func (ts *testSetup) tchannelClusterAddr() string { 1001 if addr := ts.opts.TChannelClusterAddr(); addr != "" { 1002 return addr 1003 } 1004 return *tchannelClusterAddr 1005 } 1006 1007 func (ts *testSetup) tchannelNodeAddr() string { 1008 if addr := ts.opts.TChannelNodeAddr(); addr != "" { 1009 return addr 1010 } 1011 return *tchannelNodeAddr 1012 } 1013 1014 func (ts *testSetup) httpDebugAddr() string { 1015 if addr := ts.opts.HTTPDebugAddr(); addr != "" { 1016 return addr 1017 } 1018 return *httpDebugAddr 1019 } 1020 1021 func (ts *testSetup) MaybeResetClients() error { 1022 if ts.m3dbClient == nil { 1023 // Recreate the clients as their session was destroyed by StopServer() 1024 adminClient, verificationAdminClient, err := newClients(ts.topoInit, 1025 ts.opts, ts.schemaReg, ts.hostID, ts.tchannelNodeAddr(), 1026 ts.instrumentOpts) 1027 if err != nil { 1028 return err 1029 } 1030 ts.m3dbClient = adminClient.(client.Client) 1031 ts.m3dbAdminClient = adminClient 1032 ts.m3dbVerificationAdminClient = verificationAdminClient 1033 } 1034 1035 return nil 1036 } 1037 1038 func (ts *testSetup) SchemaRegistry() namespace.SchemaRegistry { 1039 return ts.schemaReg 1040 } 1041 1042 // InitializeBootstrappersOptions supplies options for bootstrapper initialization. 1043 type InitializeBootstrappersOptions struct { 1044 CommitLogOptions commitlog.Options 1045 WithCommitLog bool 1046 WithFileSystem bool 1047 } 1048 1049 func (o InitializeBootstrappersOptions) validate() error { 1050 if o.WithCommitLog && o.CommitLogOptions == nil { 1051 return errors.New("commit log options required when initializing a commit log bootstrapper") 1052 } 1053 return nil 1054 } 1055 1056 func (ts *testSetup) InitializeBootstrappers(opts InitializeBootstrappersOptions) error { 1057 var err error 1058 if err := opts.validate(); err != nil { 1059 return err 1060 } 1061 1062 bs := bootstrapper.NewNoOpAllBootstrapperProvider() 1063 storageOpts := ts.StorageOpts() 1064 bsOpts := newDefaulTestResultOptions(storageOpts) 1065 fsOpts := storageOpts.CommitLogOptions().FilesystemOptions() 1066 if opts.WithCommitLog { 1067 bclOpts := bcl.NewOptions(). 1068 SetResultOptions(bsOpts). 1069 SetCommitLogOptions(opts.CommitLogOptions). 1070 SetRuntimeOptionsManager(runtime.NewOptionsManager()) 1071 bs, err = bcl.NewCommitLogBootstrapperProvider( 1072 bclOpts, mustInspectFilesystem(fsOpts), bs) 1073 if err != nil { 1074 return err 1075 } 1076 } 1077 1078 if opts.WithFileSystem { 1079 persistMgr, err := fs.NewPersistManager(fsOpts) 1080 if err != nil { 1081 return err 1082 } 1083 storageIdxOpts := storageOpts.IndexOptions() 1084 compactor, err := newCompactorWithErr(storageIdxOpts) 1085 if err != nil { 1086 return err 1087 } 1088 bfsOpts := bfs.NewOptions(). 1089 SetResultOptions(bsOpts). 1090 SetFilesystemOptions(fsOpts). 1091 SetIndexOptions(storageIdxOpts). 1092 SetPersistManager(persistMgr). 1093 SetIndexClaimsManager(storageOpts.IndexClaimsManager()). 1094 SetCompactor(compactor). 1095 SetInstrumentOptions(storageOpts.InstrumentOptions()) 1096 bs, err = bfs.NewFileSystemBootstrapperProvider(bfsOpts, bs) 1097 if err != nil { 1098 return err 1099 } 1100 } 1101 1102 processOpts := bootstrap.NewProcessOptions(). 1103 SetTopologyMapProvider(ts). 1104 SetOrigin(ts.Origin()) 1105 process, err := bootstrap.NewProcessProvider(bs, processOpts, bsOpts, fsOpts) 1106 if err != nil { 1107 return err 1108 } 1109 ts.SetStorageOpts(storageOpts.SetBootstrapProcessProvider(process)) 1110 1111 return nil 1112 } 1113 1114 // Implements topology.MapProvider, and makes sure that the topology 1115 // map provided always comes from the most recent database in the testSetup 1116 // since they get\ recreated everytime StartServer/StopServer is called and 1117 // are not available (nil value) after creation but before the first call 1118 // to StartServer. 1119 func (ts *testSetup) TopologyMap() (topology.Map, error) { 1120 return ts.db.TopologyMap() 1121 } 1122 1123 func newOrigin(id string, tchannelNodeAddr string) topology.Host { 1124 return topology.NewHost(id, tchannelNodeAddr) 1125 } 1126 1127 func newClients( 1128 topoInit topology.Initializer, 1129 opts TestOptions, 1130 schemaReg namespace.SchemaRegistry, 1131 id, tchannelNodeAddr string, 1132 instrumentOpts instrument.Options, 1133 ) (client.AdminClient, client.AdminClient, error) { 1134 var ( 1135 clientOpts = defaultClientOptions(topoInit).SetClusterConnectTimeout( 1136 opts.ClusterConnectionTimeout()). 1137 SetFetchRequestTimeout(opts.FetchRequestTimeout()). 1138 SetWriteConsistencyLevel(opts.WriteConsistencyLevel()). 1139 SetTopologyInitializer(topoInit). 1140 SetUseV2BatchAPIs(true). 1141 SetInstrumentOptions(instrumentOpts). 1142 SetShardsLeavingAndInitializingCountTowardsConsistency(opts.ShardsLeavingAndInitializingCountTowardsConsistency()) 1143 1144 origin = newOrigin(id, tchannelNodeAddr) 1145 verificationOrigin = newOrigin(id+"-verification", tchannelNodeAddr) 1146 1147 adminOpts = clientOpts.(client.AdminOptions).SetOrigin(origin).SetSchemaRegistry(schemaReg) 1148 1149 verificationAdminOpts = adminOpts.SetOrigin(verificationOrigin).SetSchemaRegistry(schemaReg) 1150 ) 1151 1152 if opts.ProtoEncoding() { 1153 adminOpts = adminOpts.SetEncodingProto(prototest.ProtoPools.EncodingOpt).(client.AdminOptions) 1154 verificationAdminOpts = verificationAdminOpts.SetEncodingProto(prototest.ProtoPools.EncodingOpt).(client.AdminOptions) 1155 } 1156 1157 for _, opt := range opts.CustomClientAdminOptions() { 1158 adminOpts = opt(adminOpts) 1159 verificationAdminOpts = opt(verificationAdminOpts) 1160 } 1161 1162 // Set up m3db client 1163 adminClient, err := m3dbAdminClient(adminOpts) 1164 if err != nil { 1165 return nil, nil, err 1166 } 1167 1168 // Set up m3db verification client 1169 verificationAdminClient, err := m3dbAdminClient(verificationAdminOpts) 1170 if err != nil { 1171 return nil, nil, err 1172 } 1173 1174 return adminClient, verificationAdminClient, nil 1175 } 1176 1177 type testSetups []TestSetup 1178 1179 func (ts testSetups) parallel(fn func(s TestSetup)) { 1180 var wg sync.WaitGroup 1181 for _, setup := range ts { 1182 s := setup 1183 wg.Add(1) 1184 go func() { 1185 fn(s) 1186 wg.Done() 1187 }() 1188 } 1189 wg.Wait() 1190 } 1191 1192 // node generates service instances with reasonable defaults 1193 func node(t *testing.T, n int, shards shard.Shards) services.ServiceInstance { 1194 require.True(t, n < 250) // keep ports sensible 1195 return services.NewServiceInstance(). 1196 SetInstanceID(fmt.Sprintf("testhost%v", n)). 1197 SetEndpoint(fmt.Sprintf("127.0.0.1:%v", multiAddrPortStart+multiAddrPortEach*n)). 1198 SetShards(shards) 1199 } 1200 1201 // newNodes creates a set of testSetups with reasonable defaults 1202 func newNodes( 1203 t *testing.T, 1204 numShards int, 1205 instances []services.ServiceInstance, 1206 nspaces []namespace.Metadata, 1207 asyncInserts bool, 1208 ) (testSetups, topology.Initializer, closeFn) { 1209 var ( 1210 log = zap.L() 1211 opts = NewTestOptions(t). 1212 SetNamespaces(nspaces). 1213 SetTickMinimumInterval(3 * time.Second). 1214 SetWriteNewSeriesAsync(asyncInserts). 1215 SetNumShards(numShards) 1216 // NB(bl): We set replication to 3 to mimic production. This can be made 1217 // into a variable if needed. 1218 svc = fake.NewM3ClusterService(). 1219 SetInstances(instances). 1220 SetReplication(services.NewServiceReplication().SetReplicas(3)). 1221 SetSharding(services.NewServiceSharding().SetNumShards(numShards)) 1222 1223 svcs = fake.NewM3ClusterServices() 1224 ) 1225 svcs.RegisterService("m3db", svc) 1226 1227 topoOpts := topology.NewDynamicOptions(). 1228 SetConfigServiceClient(fake.NewM3ClusterClient(svcs, nil)) 1229 topoInit := topology.NewDynamicInitializer(topoOpts) 1230 1231 nodeOpt := BootstrappableTestSetupOptions{ 1232 DisablePeersBootstrapper: true, 1233 FinalBootstrapper: uninitialized.UninitializedTopologyBootstrapperName, 1234 TopologyInitializer: topoInit, 1235 } 1236 1237 nodeOpts := make([]BootstrappableTestSetupOptions, len(instances)) 1238 for i := range instances { 1239 nodeOpts[i] = nodeOpt 1240 } 1241 1242 nodes, closeFn := NewDefaultBootstrappableTestSetups(t, opts, nodeOpts) 1243 1244 nodeClose := func() { // Clean up running servers at end of test 1245 log.Debug("servers closing") 1246 nodes.parallel(func(s TestSetup) { 1247 if s.ServerIsBootstrapped() { 1248 require.NoError(t, s.StopServer()) 1249 } 1250 }) 1251 closeFn() 1252 log.Debug("servers are now down") 1253 } 1254 1255 return nodes, topoInit, nodeClose 1256 } 1257 1258 func mustInspectFilesystem(fsOpts fs.Options) fs.Inspection { 1259 inspection, err := fs.InspectFilesystem(fsOpts) 1260 if err != nil { 1261 panic(err) 1262 } 1263 1264 return inspection 1265 } 1266 1267 func newTestFile(t *testing.T, fileName, contents string) (*os.File, closeFn) { 1268 tmpFile, err := ioutil.TempFile("", fileName) 1269 require.NoError(t, err) 1270 1271 _, err = tmpFile.WriteString(contents) 1272 require.NoError(t, err) 1273 1274 return tmpFile, func() { 1275 assert.NoError(t, tmpFile.Close()) 1276 assert.NoError(t, os.Remove(tmpFile.Name())) 1277 } 1278 } 1279 1280 // DebugTest allows testing to see if a standard debug test env var is set. 1281 func DebugTest() bool { 1282 return os.Getenv("DEBUG_TEST") == "true" 1283 }