github.com/m3db/m3@v1.5.0/src/integration/resources/inprocess/dbnode.go (about)

     1  // Copyright (c) 2021  Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package inprocess
    22  
    23  import (
    24  	"bytes"
    25  	"errors"
    26  	"fmt"
    27  	"io/ioutil"
    28  	"net"
    29  	"os"
    30  	"os/exec"
    31  	"strconv"
    32  	"time"
    33  
    34  	"github.com/google/uuid"
    35  	"go.uber.org/zap"
    36  	"gopkg.in/yaml.v2"
    37  
    38  	"github.com/m3db/m3/src/cmd/services/m3dbnode/config"
    39  	"github.com/m3db/m3/src/cmd/services/m3dbnode/server"
    40  	"github.com/m3db/m3/src/dbnode/generated/thrift/rpc"
    41  	"github.com/m3db/m3/src/dbnode/integration"
    42  	"github.com/m3db/m3/src/integration/resources"
    43  	nettest "github.com/m3db/m3/src/integration/resources/net"
    44  	"github.com/m3db/m3/src/query/generated/proto/admin"
    45  	xconfig "github.com/m3db/m3/src/x/config"
    46  	"github.com/m3db/m3/src/x/config/hostid"
    47  	xos "github.com/m3db/m3/src/x/os"
    48  )
    49  
    50  // TODO(nate): make configurable
    51  const defaultRPCTimeout = time.Minute
    52  
    53  // DBNode is an in-process implementation of resources.Node.
    54  type DBNode struct {
    55  	cfg     config.Configuration
    56  	logger  *zap.Logger
    57  	tmpDirs []string
    58  	started bool
    59  	startFn DBNodeStartFn
    60  
    61  	interruptCh chan<- error
    62  	shutdownCh  <-chan struct{}
    63  	// tchanClient is an RPC client used for hitting the DB nodes RPC API.
    64  	tchanClient *integration.TestTChannelClient
    65  }
    66  
    67  //nolint:maligned
    68  // DBNodeOptions are options for starting a DB node server.
    69  type DBNodeOptions struct {
    70  	// GeneratePorts will automatically update the config to use open ports
    71  	// if set to true. If false, configuration is used as-is re: ports.
    72  	GeneratePorts bool
    73  	// GenerateHostID will automatically update the host ID specified in
    74  	// the config if set to true. If false, configuration is used as-is re: host ID.
    75  	GenerateHostID bool
    76  	// StartFn is a custom function that can be used to start the DBNode.
    77  	StartFn DBNodeStartFn
    78  	// Start indicates whether to start the dbnode instance.
    79  	Start bool
    80  	// Logger is the logger to use for the dbnode. If not provided,
    81  	// a default one will be created.
    82  	Logger *zap.Logger
    83  }
    84  
    85  // NewDBNodeFromConfigFile creates a new in-process DB node based on the config file
    86  // and options provided.
    87  func NewDBNodeFromConfigFile(pathToCfg string, opts DBNodeOptions) (resources.Node, error) {
    88  	var cfg config.Configuration
    89  	if err := xconfig.LoadFile(&cfg, pathToCfg, xconfig.Options{}); err != nil {
    90  		return nil, err
    91  	}
    92  
    93  	return NewDBNode(cfg, opts)
    94  }
    95  
    96  // NewDBNodeFromYAML creates a new in-process DB node based on the YAML configuration string
    97  // and options provided.
    98  func NewDBNodeFromYAML(yamlCfg string, opts DBNodeOptions) (resources.Node, error) {
    99  	var cfg config.Configuration
   100  	if err := yaml.Unmarshal([]byte(yamlCfg), &cfg); err != nil {
   101  		return nil, err
   102  	}
   103  
   104  	return NewDBNode(cfg, opts)
   105  }
   106  
   107  // NewDBNode creates a new in-process DB node based on the configuration
   108  // and options provided. Use NewDBNode or any of the convenience constructors
   109  // (e.g. NewDBNodeFromYAML, NewDBNodeFromConfigFile) to get a running
   110  // dbnode.
   111  //
   112  // The most typical usage of this method will be in an integration test to validate
   113  // some behavior. For example, assuming we have a valid placement available already we
   114  // could do the following to read and write to a namespace (note: ignoring error checking):
   115  //
   116  //    dbnode, _ := NewDBNodeFromYAML(defaultDBNodeConfig, DBNodeOptions{})
   117  //    dbnode.WaitForBootstrap()
   118  //    dbnode.WriteTaggedPoint(&rpc.WriteTaggedRequest{...}))
   119  //    res, _ = dbnode.FetchTagged(&rpc.FetchTaggedRequest{...})
   120  //
   121  // The dbnode will start up as you specify in your config. However, there is some
   122  // helper logic to avoid port and filesystem collisions when spinning up multiple components
   123  // within the process. If you specify a GeneratePorts: true in the DBNodeOptions, address ports
   124  // will be replaced with an open port.
   125  //
   126  // Similarly, filepath fields will  be updated with a temp directory that will be cleaned up
   127  // when the dbnode is destroyed. This should ensure that many of the same component can be
   128  // spun up in-process without any issues with collisions.
   129  func NewDBNode(cfg config.Configuration, opts DBNodeOptions) (resources.Node, error) {
   130  	// Massage config so it runs properly in tests.
   131  	cfg, tmpDirs, err := updateDBNodeConfig(cfg, opts)
   132  	if err != nil {
   133  		return nil, err
   134  	}
   135  
   136  	hostID, err := cfg.DB.HostIDOrDefault().Resolve()
   137  	if err != nil {
   138  		return nil, err
   139  	}
   140  	logging := cfg.DB.LoggingOrDefault()
   141  	if len(logging.Fields) == 0 {
   142  		logging.Fields = make(map[string]interface{})
   143  	}
   144  	logging.Fields["component"] = fmt.Sprintf("dbnode:%s", hostID)
   145  	cfg.DB.Logging = &logging
   146  
   147  	// Configure TChannel client for hitting the DB node.
   148  	tchanClient, err := integration.NewTChannelClient("client", cfg.DB.ListenAddressOrDefault())
   149  	if err != nil {
   150  		return nil, err
   151  	}
   152  
   153  	// Configure logger
   154  	if opts.Logger == nil {
   155  		opts.Logger, err = resources.NewLogger()
   156  		if err != nil {
   157  			return nil, err
   158  		}
   159  	}
   160  
   161  	// Start the DB node
   162  	node := &DBNode{
   163  		cfg:         cfg,
   164  		logger:      opts.Logger,
   165  		tchanClient: tchanClient,
   166  		tmpDirs:     tmpDirs,
   167  		startFn:     opts.StartFn,
   168  	}
   169  	if opts.Start {
   170  		node.Start()
   171  	}
   172  
   173  	return node, nil
   174  }
   175  
   176  // Start starts the DBNode instance
   177  func (d *DBNode) Start() {
   178  	if d.started {
   179  		d.logger.Debug("dbnode already started")
   180  		return
   181  	}
   182  	d.started = true
   183  
   184  	if d.startFn != nil {
   185  		d.interruptCh, d.shutdownCh = d.startFn(&d.cfg)
   186  		return
   187  	}
   188  
   189  	interruptCh := make(chan error, d.cfg.Components())
   190  	shutdownCh := make(chan struct{}, d.cfg.Components())
   191  	go func() {
   192  		server.RunComponents(server.Options{
   193  			Configuration: d.cfg,
   194  			InterruptCh:   interruptCh,
   195  			ShutdownCh:    shutdownCh,
   196  		})
   197  	}()
   198  
   199  	d.interruptCh = interruptCh
   200  	d.shutdownCh = shutdownCh
   201  }
   202  
   203  // HostDetails returns this node's host details on the given port.
   204  func (d *DBNode) HostDetails(_ int) (*admin.Host, error) {
   205  	_, p, err := net.SplitHostPort(d.cfg.DB.ListenAddressOrDefault())
   206  	if err != nil {
   207  		return nil, err
   208  	}
   209  
   210  	port, err := strconv.Atoi(p)
   211  	if err != nil {
   212  		return nil, err
   213  	}
   214  
   215  	hostID, err := d.cfg.DB.HostIDOrDefault().Resolve()
   216  	if err != nil {
   217  		return nil, err
   218  	}
   219  
   220  	discoverCfg := d.cfg.DB.DiscoveryOrDefault()
   221  	envConfig, err := discoverCfg.EnvironmentConfig(hostID)
   222  	if err != nil {
   223  		return nil, err
   224  	}
   225  
   226  	return &admin.Host{
   227  		Id: hostID,
   228  		// TODO(nate): add support for multiple etcd services. Practically, this
   229  		// is very rare so using the zero-indexed value here will almost always be
   230  		// correct.
   231  		Zone: envConfig.Services[0].Service.Zone,
   232  		// TODO(nate): weight should most likely not live here as it's part of
   233  		// cluster configuration
   234  		Weight:  1024,
   235  		Address: "0.0.0.0",
   236  		Port:    uint32(port),
   237  	}, nil
   238  }
   239  
   240  // Health gives this node's health.
   241  func (d *DBNode) Health() (*rpc.NodeHealthResult_, error) {
   242  	return d.tchanClient.TChannelClientHealth(defaultRPCTimeout)
   243  }
   244  
   245  // WaitForBootstrap blocks until the node has bootstrapped.
   246  func (d *DBNode) WaitForBootstrap() error {
   247  	return resources.Retry(func() error {
   248  		health, err := d.Health()
   249  		if err != nil {
   250  			return err
   251  		}
   252  
   253  		if !health.GetBootstrapped() {
   254  			err = fmt.Errorf("not bootstrapped")
   255  			d.logger.Error("node not bootstrapped", zap.Error(err))
   256  			return err
   257  		}
   258  
   259  		return nil
   260  	})
   261  }
   262  
   263  // WritePoint writes a datapoint to the node directly.
   264  func (d *DBNode) WritePoint(req *rpc.WriteRequest) error {
   265  	return d.tchanClient.TChannelClientWrite(defaultRPCTimeout, req)
   266  }
   267  
   268  // WriteTaggedPoint writes a datapoint with tags to the node directly.
   269  func (d *DBNode) WriteTaggedPoint(req *rpc.WriteTaggedRequest) error {
   270  	return d.tchanClient.TChannelClientWriteTagged(defaultRPCTimeout, req)
   271  }
   272  
   273  // WriteTaggedBatchRaw writes a batch of writes to the node directly.
   274  func (d *DBNode) WriteTaggedBatchRaw(req *rpc.WriteTaggedBatchRawRequest) error {
   275  	return d.tchanClient.TChannelClientWriteTaggedBatchRaw(defaultRPCTimeout, req)
   276  }
   277  
   278  // AggregateTiles starts tiles aggregation, waits until it will complete
   279  // and returns the amount of aggregated tiles.
   280  func (d *DBNode) AggregateTiles(req *rpc.AggregateTilesRequest) (int64, error) {
   281  	res, err := d.tchanClient.TChannelClientAggregateTiles(defaultRPCTimeout, req)
   282  	if err != nil {
   283  		return 0, err
   284  	}
   285  
   286  	return res.ProcessedTileCount, nil
   287  }
   288  
   289  // Fetch fetches datapoints.
   290  func (d *DBNode) Fetch(req *rpc.FetchRequest) (*rpc.FetchResult_, error) {
   291  	return d.tchanClient.TChannelClientFetch(defaultRPCTimeout, req)
   292  }
   293  
   294  // FetchTagged fetches datapoints by tag.
   295  func (d *DBNode) FetchTagged(req *rpc.FetchTaggedRequest) (*rpc.FetchTaggedResult_, error) {
   296  	return d.tchanClient.TChannelClientFetchTagged(defaultRPCTimeout, req)
   297  }
   298  
   299  // Exec executes the given commands on the node container, returning
   300  // stdout and stderr from the container.
   301  func (d *DBNode) Exec(commands ...string) (string, error) {
   302  	//nolint:gosec
   303  	cmd := exec.Command(commands[0], commands[1:]...)
   304  
   305  	var out bytes.Buffer
   306  	cmd.Stdout = &out
   307  	if err := cmd.Run(); err != nil {
   308  		return "", err
   309  	}
   310  
   311  	return out.String(), nil
   312  }
   313  
   314  // GoalStateExec executes the given commands on the node container, retrying
   315  // until applying the verifier returns no error or the default timeout.
   316  func (d *DBNode) GoalStateExec(verifier resources.GoalStateVerifier, commands ...string) error {
   317  	return resources.Retry(func() error {
   318  		if err := verifier(d.Exec(commands...)); err != nil {
   319  			d.logger.Info("goal state verification failed. retrying")
   320  			return err
   321  		}
   322  		return nil
   323  	})
   324  }
   325  
   326  // Restart restarts this container.
   327  func (d *DBNode) Restart() error {
   328  	if err := d.Close(); err != nil {
   329  		return err
   330  	}
   331  
   332  	d.Start()
   333  
   334  	return nil
   335  }
   336  
   337  // Close closes the wrapper and releases any held resources, including
   338  // deleting docker containers.
   339  func (d *DBNode) Close() error {
   340  	defer func() {
   341  		for _, dir := range d.tmpDirs {
   342  			if err := os.RemoveAll(dir); err != nil {
   343  				d.logger.Error("error removing temp directory", zap.String("dir", dir), zap.Error(err))
   344  			}
   345  		}
   346  	}()
   347  
   348  	for i := 0; i < d.cfg.Components(); i++ {
   349  		select {
   350  		case d.interruptCh <- xos.NewInterruptError("in-process node being shut down"):
   351  		case <-time.After(interruptTimeout):
   352  			return errors.New("timeout sending interrupt. closing without graceful shutdown")
   353  		}
   354  	}
   355  
   356  	for i := 0; i < d.cfg.Components(); i++ {
   357  		select {
   358  		case <-d.shutdownCh:
   359  		case <-time.After(shutdownTimeout):
   360  			return errors.New("timeout waiting for shutdown notification. server closing may" +
   361  				" not be completely graceful")
   362  		}
   363  	}
   364  	d.started = false
   365  
   366  	return nil
   367  }
   368  
   369  // Configuration returns a copy of the configuration used to
   370  // start this dbnode.
   371  func (d *DBNode) Configuration() config.Configuration {
   372  	return d.cfg
   373  }
   374  
   375  func updateDBNodeConfig(
   376  	cfg config.Configuration,
   377  	opts DBNodeOptions,
   378  ) (config.Configuration, []string, error) {
   379  	var (
   380  		tmpDirs []string
   381  		err     error
   382  	)
   383  	// Replace any ports with open ports
   384  	if opts.GeneratePorts {
   385  		cfg, err = updateDBNodePorts(cfg)
   386  		if err != nil {
   387  			return config.Configuration{}, nil, err
   388  		}
   389  	}
   390  
   391  	// Replace host ID configuration with config-based version.
   392  	if opts.GenerateHostID {
   393  		cfg = updateDBNodeHostID(cfg)
   394  	}
   395  
   396  	// Replace any filepath with a temporary directory
   397  	cfg, tmpDirs, err = updateDBNodeFilepaths(cfg)
   398  	if err != nil {
   399  		return config.Configuration{}, nil, err
   400  	}
   401  
   402  	return cfg, tmpDirs, nil
   403  }
   404  
   405  func updateDBNodePorts(cfg config.Configuration) (config.Configuration, error) {
   406  	addr1, _, err := nettest.GeneratePort(cfg.DB.ListenAddressOrDefault())
   407  	if err != nil {
   408  		return cfg, err
   409  	}
   410  	cfg.DB.ListenAddress = &addr1
   411  
   412  	addr2, _, err := nettest.GeneratePort(cfg.DB.ClusterListenAddressOrDefault())
   413  	if err != nil {
   414  		return cfg, err
   415  	}
   416  	cfg.DB.ClusterListenAddress = &addr2
   417  
   418  	addr3, _, err := nettest.GeneratePort(cfg.DB.HTTPNodeListenAddressOrDefault())
   419  	if err != nil {
   420  		return cfg, err
   421  	}
   422  	cfg.DB.HTTPNodeListenAddress = &addr3
   423  
   424  	addr4, _, err := nettest.GeneratePort(cfg.DB.HTTPClusterListenAddressOrDefault())
   425  	if err != nil {
   426  		return cfg, err
   427  	}
   428  	cfg.DB.HTTPClusterListenAddress = &addr4
   429  
   430  	addr5, _, err := nettest.GeneratePort(cfg.DB.DebugListenAddressOrDefault())
   431  	if err != nil {
   432  		return cfg, err
   433  	}
   434  	cfg.DB.DebugListenAddress = &addr5
   435  
   436  	if cfg.Coordinator != nil {
   437  		coordCfg, err := updateCoordinatorPorts(*cfg.Coordinator)
   438  		if err != nil {
   439  			return cfg, err
   440  		}
   441  
   442  		cfg.Coordinator = &coordCfg
   443  	}
   444  
   445  	return cfg, nil
   446  }
   447  
   448  func updateDBNodeHostID(cfg config.Configuration) config.Configuration {
   449  	hostID := uuid.New().String()
   450  	cfg.DB.HostID = &hostid.Configuration{
   451  		Resolver: hostid.ConfigResolver,
   452  		Value:    &hostID,
   453  	}
   454  
   455  	return cfg
   456  }
   457  
   458  func updateDBNodeFilepaths(cfg config.Configuration) (config.Configuration, []string, error) {
   459  	tmpDirs := make([]string, 0, 1)
   460  
   461  	dir, err := ioutil.TempDir("", "m3db-*")
   462  	if err != nil {
   463  		return cfg, nil, err
   464  	}
   465  	tmpDirs = append(tmpDirs, dir)
   466  	cfg.DB.Filesystem.FilePathPrefix = &dir
   467  
   468  	ec := cfg.DB.Client.EnvironmentConfig
   469  	if ec != nil {
   470  		for _, svc := range ec.Services {
   471  			if svc != nil && svc.Service != nil {
   472  				dir, err := ioutil.TempDir("", "m3kv-*")
   473  				if err != nil {
   474  					return cfg, tmpDirs, err
   475  				}
   476  
   477  				tmpDirs = append(tmpDirs, dir)
   478  				svc.Service.CacheDir = dir
   479  			}
   480  		}
   481  	}
   482  
   483  	if cfg.Coordinator != nil {
   484  		coordCfg, coordDirs, err := updateCoordinatorFilepaths(*cfg.Coordinator)
   485  		if err != nil {
   486  			return cfg, nil, err
   487  		}
   488  		tmpDirs = append(tmpDirs, coordDirs...)
   489  
   490  		cfg.Coordinator = &coordCfg
   491  	}
   492  
   493  	return cfg, tmpDirs, nil
   494  }