github.com/m3db/m3@v1.5.0/src/integration/resources/inprocess/dbnode.go (about) 1 // Copyright (c) 2021 Uber Technologies, Inc. 2 // 3 // Permission is hereby granted, free of charge, to any person obtaining a copy 4 // of this software and associated documentation files (the "Software"), to deal 5 // in the Software without restriction, including without limitation the rights 6 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 7 // copies of the Software, and to permit persons to whom the Software is 8 // furnished to do so, subject to the following conditions: 9 // 10 // The above copyright notice and this permission notice shall be included in 11 // all copies or substantial portions of the Software. 12 // 13 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 14 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 15 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 16 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 17 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 18 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 19 // THE SOFTWARE. 20 21 package inprocess 22 23 import ( 24 "bytes" 25 "errors" 26 "fmt" 27 "io/ioutil" 28 "net" 29 "os" 30 "os/exec" 31 "strconv" 32 "time" 33 34 "github.com/google/uuid" 35 "go.uber.org/zap" 36 "gopkg.in/yaml.v2" 37 38 "github.com/m3db/m3/src/cmd/services/m3dbnode/config" 39 "github.com/m3db/m3/src/cmd/services/m3dbnode/server" 40 "github.com/m3db/m3/src/dbnode/generated/thrift/rpc" 41 "github.com/m3db/m3/src/dbnode/integration" 42 "github.com/m3db/m3/src/integration/resources" 43 nettest "github.com/m3db/m3/src/integration/resources/net" 44 "github.com/m3db/m3/src/query/generated/proto/admin" 45 xconfig "github.com/m3db/m3/src/x/config" 46 "github.com/m3db/m3/src/x/config/hostid" 47 xos "github.com/m3db/m3/src/x/os" 48 ) 49 50 // TODO(nate): make configurable 51 const defaultRPCTimeout = time.Minute 52 53 // DBNode is an in-process implementation of resources.Node. 54 type DBNode struct { 55 cfg config.Configuration 56 logger *zap.Logger 57 tmpDirs []string 58 started bool 59 startFn DBNodeStartFn 60 61 interruptCh chan<- error 62 shutdownCh <-chan struct{} 63 // tchanClient is an RPC client used for hitting the DB nodes RPC API. 64 tchanClient *integration.TestTChannelClient 65 } 66 67 //nolint:maligned 68 // DBNodeOptions are options for starting a DB node server. 69 type DBNodeOptions struct { 70 // GeneratePorts will automatically update the config to use open ports 71 // if set to true. If false, configuration is used as-is re: ports. 72 GeneratePorts bool 73 // GenerateHostID will automatically update the host ID specified in 74 // the config if set to true. If false, configuration is used as-is re: host ID. 75 GenerateHostID bool 76 // StartFn is a custom function that can be used to start the DBNode. 77 StartFn DBNodeStartFn 78 // Start indicates whether to start the dbnode instance. 79 Start bool 80 // Logger is the logger to use for the dbnode. If not provided, 81 // a default one will be created. 82 Logger *zap.Logger 83 } 84 85 // NewDBNodeFromConfigFile creates a new in-process DB node based on the config file 86 // and options provided. 87 func NewDBNodeFromConfigFile(pathToCfg string, opts DBNodeOptions) (resources.Node, error) { 88 var cfg config.Configuration 89 if err := xconfig.LoadFile(&cfg, pathToCfg, xconfig.Options{}); err != nil { 90 return nil, err 91 } 92 93 return NewDBNode(cfg, opts) 94 } 95 96 // NewDBNodeFromYAML creates a new in-process DB node based on the YAML configuration string 97 // and options provided. 98 func NewDBNodeFromYAML(yamlCfg string, opts DBNodeOptions) (resources.Node, error) { 99 var cfg config.Configuration 100 if err := yaml.Unmarshal([]byte(yamlCfg), &cfg); err != nil { 101 return nil, err 102 } 103 104 return NewDBNode(cfg, opts) 105 } 106 107 // NewDBNode creates a new in-process DB node based on the configuration 108 // and options provided. Use NewDBNode or any of the convenience constructors 109 // (e.g. NewDBNodeFromYAML, NewDBNodeFromConfigFile) to get a running 110 // dbnode. 111 // 112 // The most typical usage of this method will be in an integration test to validate 113 // some behavior. For example, assuming we have a valid placement available already we 114 // could do the following to read and write to a namespace (note: ignoring error checking): 115 // 116 // dbnode, _ := NewDBNodeFromYAML(defaultDBNodeConfig, DBNodeOptions{}) 117 // dbnode.WaitForBootstrap() 118 // dbnode.WriteTaggedPoint(&rpc.WriteTaggedRequest{...})) 119 // res, _ = dbnode.FetchTagged(&rpc.FetchTaggedRequest{...}) 120 // 121 // The dbnode will start up as you specify in your config. However, there is some 122 // helper logic to avoid port and filesystem collisions when spinning up multiple components 123 // within the process. If you specify a GeneratePorts: true in the DBNodeOptions, address ports 124 // will be replaced with an open port. 125 // 126 // Similarly, filepath fields will be updated with a temp directory that will be cleaned up 127 // when the dbnode is destroyed. This should ensure that many of the same component can be 128 // spun up in-process without any issues with collisions. 129 func NewDBNode(cfg config.Configuration, opts DBNodeOptions) (resources.Node, error) { 130 // Massage config so it runs properly in tests. 131 cfg, tmpDirs, err := updateDBNodeConfig(cfg, opts) 132 if err != nil { 133 return nil, err 134 } 135 136 hostID, err := cfg.DB.HostIDOrDefault().Resolve() 137 if err != nil { 138 return nil, err 139 } 140 logging := cfg.DB.LoggingOrDefault() 141 if len(logging.Fields) == 0 { 142 logging.Fields = make(map[string]interface{}) 143 } 144 logging.Fields["component"] = fmt.Sprintf("dbnode:%s", hostID) 145 cfg.DB.Logging = &logging 146 147 // Configure TChannel client for hitting the DB node. 148 tchanClient, err := integration.NewTChannelClient("client", cfg.DB.ListenAddressOrDefault()) 149 if err != nil { 150 return nil, err 151 } 152 153 // Configure logger 154 if opts.Logger == nil { 155 opts.Logger, err = resources.NewLogger() 156 if err != nil { 157 return nil, err 158 } 159 } 160 161 // Start the DB node 162 node := &DBNode{ 163 cfg: cfg, 164 logger: opts.Logger, 165 tchanClient: tchanClient, 166 tmpDirs: tmpDirs, 167 startFn: opts.StartFn, 168 } 169 if opts.Start { 170 node.Start() 171 } 172 173 return node, nil 174 } 175 176 // Start starts the DBNode instance 177 func (d *DBNode) Start() { 178 if d.started { 179 d.logger.Debug("dbnode already started") 180 return 181 } 182 d.started = true 183 184 if d.startFn != nil { 185 d.interruptCh, d.shutdownCh = d.startFn(&d.cfg) 186 return 187 } 188 189 interruptCh := make(chan error, d.cfg.Components()) 190 shutdownCh := make(chan struct{}, d.cfg.Components()) 191 go func() { 192 server.RunComponents(server.Options{ 193 Configuration: d.cfg, 194 InterruptCh: interruptCh, 195 ShutdownCh: shutdownCh, 196 }) 197 }() 198 199 d.interruptCh = interruptCh 200 d.shutdownCh = shutdownCh 201 } 202 203 // HostDetails returns this node's host details on the given port. 204 func (d *DBNode) HostDetails(_ int) (*admin.Host, error) { 205 _, p, err := net.SplitHostPort(d.cfg.DB.ListenAddressOrDefault()) 206 if err != nil { 207 return nil, err 208 } 209 210 port, err := strconv.Atoi(p) 211 if err != nil { 212 return nil, err 213 } 214 215 hostID, err := d.cfg.DB.HostIDOrDefault().Resolve() 216 if err != nil { 217 return nil, err 218 } 219 220 discoverCfg := d.cfg.DB.DiscoveryOrDefault() 221 envConfig, err := discoverCfg.EnvironmentConfig(hostID) 222 if err != nil { 223 return nil, err 224 } 225 226 return &admin.Host{ 227 Id: hostID, 228 // TODO(nate): add support for multiple etcd services. Practically, this 229 // is very rare so using the zero-indexed value here will almost always be 230 // correct. 231 Zone: envConfig.Services[0].Service.Zone, 232 // TODO(nate): weight should most likely not live here as it's part of 233 // cluster configuration 234 Weight: 1024, 235 Address: "0.0.0.0", 236 Port: uint32(port), 237 }, nil 238 } 239 240 // Health gives this node's health. 241 func (d *DBNode) Health() (*rpc.NodeHealthResult_, error) { 242 return d.tchanClient.TChannelClientHealth(defaultRPCTimeout) 243 } 244 245 // WaitForBootstrap blocks until the node has bootstrapped. 246 func (d *DBNode) WaitForBootstrap() error { 247 return resources.Retry(func() error { 248 health, err := d.Health() 249 if err != nil { 250 return err 251 } 252 253 if !health.GetBootstrapped() { 254 err = fmt.Errorf("not bootstrapped") 255 d.logger.Error("node not bootstrapped", zap.Error(err)) 256 return err 257 } 258 259 return nil 260 }) 261 } 262 263 // WritePoint writes a datapoint to the node directly. 264 func (d *DBNode) WritePoint(req *rpc.WriteRequest) error { 265 return d.tchanClient.TChannelClientWrite(defaultRPCTimeout, req) 266 } 267 268 // WriteTaggedPoint writes a datapoint with tags to the node directly. 269 func (d *DBNode) WriteTaggedPoint(req *rpc.WriteTaggedRequest) error { 270 return d.tchanClient.TChannelClientWriteTagged(defaultRPCTimeout, req) 271 } 272 273 // WriteTaggedBatchRaw writes a batch of writes to the node directly. 274 func (d *DBNode) WriteTaggedBatchRaw(req *rpc.WriteTaggedBatchRawRequest) error { 275 return d.tchanClient.TChannelClientWriteTaggedBatchRaw(defaultRPCTimeout, req) 276 } 277 278 // AggregateTiles starts tiles aggregation, waits until it will complete 279 // and returns the amount of aggregated tiles. 280 func (d *DBNode) AggregateTiles(req *rpc.AggregateTilesRequest) (int64, error) { 281 res, err := d.tchanClient.TChannelClientAggregateTiles(defaultRPCTimeout, req) 282 if err != nil { 283 return 0, err 284 } 285 286 return res.ProcessedTileCount, nil 287 } 288 289 // Fetch fetches datapoints. 290 func (d *DBNode) Fetch(req *rpc.FetchRequest) (*rpc.FetchResult_, error) { 291 return d.tchanClient.TChannelClientFetch(defaultRPCTimeout, req) 292 } 293 294 // FetchTagged fetches datapoints by tag. 295 func (d *DBNode) FetchTagged(req *rpc.FetchTaggedRequest) (*rpc.FetchTaggedResult_, error) { 296 return d.tchanClient.TChannelClientFetchTagged(defaultRPCTimeout, req) 297 } 298 299 // Exec executes the given commands on the node container, returning 300 // stdout and stderr from the container. 301 func (d *DBNode) Exec(commands ...string) (string, error) { 302 //nolint:gosec 303 cmd := exec.Command(commands[0], commands[1:]...) 304 305 var out bytes.Buffer 306 cmd.Stdout = &out 307 if err := cmd.Run(); err != nil { 308 return "", err 309 } 310 311 return out.String(), nil 312 } 313 314 // GoalStateExec executes the given commands on the node container, retrying 315 // until applying the verifier returns no error or the default timeout. 316 func (d *DBNode) GoalStateExec(verifier resources.GoalStateVerifier, commands ...string) error { 317 return resources.Retry(func() error { 318 if err := verifier(d.Exec(commands...)); err != nil { 319 d.logger.Info("goal state verification failed. retrying") 320 return err 321 } 322 return nil 323 }) 324 } 325 326 // Restart restarts this container. 327 func (d *DBNode) Restart() error { 328 if err := d.Close(); err != nil { 329 return err 330 } 331 332 d.Start() 333 334 return nil 335 } 336 337 // Close closes the wrapper and releases any held resources, including 338 // deleting docker containers. 339 func (d *DBNode) Close() error { 340 defer func() { 341 for _, dir := range d.tmpDirs { 342 if err := os.RemoveAll(dir); err != nil { 343 d.logger.Error("error removing temp directory", zap.String("dir", dir), zap.Error(err)) 344 } 345 } 346 }() 347 348 for i := 0; i < d.cfg.Components(); i++ { 349 select { 350 case d.interruptCh <- xos.NewInterruptError("in-process node being shut down"): 351 case <-time.After(interruptTimeout): 352 return errors.New("timeout sending interrupt. closing without graceful shutdown") 353 } 354 } 355 356 for i := 0; i < d.cfg.Components(); i++ { 357 select { 358 case <-d.shutdownCh: 359 case <-time.After(shutdownTimeout): 360 return errors.New("timeout waiting for shutdown notification. server closing may" + 361 " not be completely graceful") 362 } 363 } 364 d.started = false 365 366 return nil 367 } 368 369 // Configuration returns a copy of the configuration used to 370 // start this dbnode. 371 func (d *DBNode) Configuration() config.Configuration { 372 return d.cfg 373 } 374 375 func updateDBNodeConfig( 376 cfg config.Configuration, 377 opts DBNodeOptions, 378 ) (config.Configuration, []string, error) { 379 var ( 380 tmpDirs []string 381 err error 382 ) 383 // Replace any ports with open ports 384 if opts.GeneratePorts { 385 cfg, err = updateDBNodePorts(cfg) 386 if err != nil { 387 return config.Configuration{}, nil, err 388 } 389 } 390 391 // Replace host ID configuration with config-based version. 392 if opts.GenerateHostID { 393 cfg = updateDBNodeHostID(cfg) 394 } 395 396 // Replace any filepath with a temporary directory 397 cfg, tmpDirs, err = updateDBNodeFilepaths(cfg) 398 if err != nil { 399 return config.Configuration{}, nil, err 400 } 401 402 return cfg, tmpDirs, nil 403 } 404 405 func updateDBNodePorts(cfg config.Configuration) (config.Configuration, error) { 406 addr1, _, err := nettest.GeneratePort(cfg.DB.ListenAddressOrDefault()) 407 if err != nil { 408 return cfg, err 409 } 410 cfg.DB.ListenAddress = &addr1 411 412 addr2, _, err := nettest.GeneratePort(cfg.DB.ClusterListenAddressOrDefault()) 413 if err != nil { 414 return cfg, err 415 } 416 cfg.DB.ClusterListenAddress = &addr2 417 418 addr3, _, err := nettest.GeneratePort(cfg.DB.HTTPNodeListenAddressOrDefault()) 419 if err != nil { 420 return cfg, err 421 } 422 cfg.DB.HTTPNodeListenAddress = &addr3 423 424 addr4, _, err := nettest.GeneratePort(cfg.DB.HTTPClusterListenAddressOrDefault()) 425 if err != nil { 426 return cfg, err 427 } 428 cfg.DB.HTTPClusterListenAddress = &addr4 429 430 addr5, _, err := nettest.GeneratePort(cfg.DB.DebugListenAddressOrDefault()) 431 if err != nil { 432 return cfg, err 433 } 434 cfg.DB.DebugListenAddress = &addr5 435 436 if cfg.Coordinator != nil { 437 coordCfg, err := updateCoordinatorPorts(*cfg.Coordinator) 438 if err != nil { 439 return cfg, err 440 } 441 442 cfg.Coordinator = &coordCfg 443 } 444 445 return cfg, nil 446 } 447 448 func updateDBNodeHostID(cfg config.Configuration) config.Configuration { 449 hostID := uuid.New().String() 450 cfg.DB.HostID = &hostid.Configuration{ 451 Resolver: hostid.ConfigResolver, 452 Value: &hostID, 453 } 454 455 return cfg 456 } 457 458 func updateDBNodeFilepaths(cfg config.Configuration) (config.Configuration, []string, error) { 459 tmpDirs := make([]string, 0, 1) 460 461 dir, err := ioutil.TempDir("", "m3db-*") 462 if err != nil { 463 return cfg, nil, err 464 } 465 tmpDirs = append(tmpDirs, dir) 466 cfg.DB.Filesystem.FilePathPrefix = &dir 467 468 ec := cfg.DB.Client.EnvironmentConfig 469 if ec != nil { 470 for _, svc := range ec.Services { 471 if svc != nil && svc.Service != nil { 472 dir, err := ioutil.TempDir("", "m3kv-*") 473 if err != nil { 474 return cfg, tmpDirs, err 475 } 476 477 tmpDirs = append(tmpDirs, dir) 478 svc.Service.CacheDir = dir 479 } 480 } 481 } 482 483 if cfg.Coordinator != nil { 484 coordCfg, coordDirs, err := updateCoordinatorFilepaths(*cfg.Coordinator) 485 if err != nil { 486 return cfg, nil, err 487 } 488 tmpDirs = append(tmpDirs, coordDirs...) 489 490 cfg.Coordinator = &coordCfg 491 } 492 493 return cfg, tmpDirs, nil 494 }