github.com/m3db/m3@v1.5.1-0.20231129193456-75a402aa583b/src/integration/resources/inprocess/cluster.go (about)

     1  // Copyright (c) 2021  Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package inprocess
    22  
    23  import (
    24  	"context"
    25  	"errors"
    26  	"fmt"
    27  	"time"
    28  
    29  	etcdclient "github.com/m3db/m3/src/cluster/client/etcd"
    30  	aggcfg "github.com/m3db/m3/src/cmd/services/m3aggregator/config"
    31  	dbcfg "github.com/m3db/m3/src/cmd/services/m3dbnode/config"
    32  	coordinatorcfg "github.com/m3db/m3/src/cmd/services/m3query/config"
    33  	"github.com/m3db/m3/src/dbnode/client"
    34  	"github.com/m3db/m3/src/dbnode/discovery"
    35  	"github.com/m3db/m3/src/dbnode/environment"
    36  	"github.com/m3db/m3/src/dbnode/persist/fs"
    37  	"github.com/m3db/m3/src/integration/resources"
    38  	"github.com/m3db/m3/src/integration/resources/docker/dockerexternal"
    39  	"github.com/m3db/m3/src/query/storage/m3"
    40  	xconfig "github.com/m3db/m3/src/x/config"
    41  	"github.com/m3db/m3/src/x/config/hostid"
    42  	xerrors "github.com/m3db/m3/src/x/errors"
    43  	"github.com/m3db/m3/src/x/instrument"
    44  
    45  	"github.com/google/uuid"
    46  	"github.com/ory/dockertest/v3"
    47  	"go.uber.org/zap"
    48  	"gopkg.in/yaml.v2"
    49  )
    50  
    51  // ClusterConfigs contain the input config to use for components within
    52  // the cluster. There is one default configuration for each type of component.
    53  // Given a set of ClusterConfigs, the function NewCluster can spin up an m3 cluster.
    54  // Or one can use GenerateClusterSpecification to get the per-instance configuration
    55  // and options based on the given ClusterConfigs.
    56  type ClusterConfigs struct {
    57  	// DBNode is the configuration for db nodes.
    58  	DBNode dbcfg.Configuration
    59  	// Coordinator is the configuration for the coordinator.
    60  	Coordinator coordinatorcfg.Configuration
    61  	// Aggregator is the configuration for aggregators.
    62  	// If Aggregator is nil, the cluster contains only m3coordinator and dbnodes.
    63  	Aggregator *aggcfg.Configuration
    64  }
    65  
    66  // ClusterSpecification contain the per-instance configuration and options to use
    67  // for starting each components within the cluster.
    68  // The function NewClusterFromSpecification will spin up an m3 cluster
    69  // with the given ClusterSpecification.
    70  type ClusterSpecification struct {
    71  	// Configs contains the per-instance configuration for all components in the cluster.
    72  	Configs PerInstanceConfigs
    73  	// Options contains the per-insatance options for setting up the cluster.
    74  	Options PerInstanceOptions
    75  }
    76  
    77  // PerInstanceConfigs contain the per-instance configuration for all components.
    78  type PerInstanceConfigs struct {
    79  	// DBNodes contains the per-instance configuration for db nodes.
    80  	DBNodes []dbcfg.Configuration
    81  	// Coordinator is the configuration for the coordinator.
    82  	Coordinator coordinatorcfg.Configuration
    83  	// Aggregators is the configuration for aggregators.
    84  	// If Aggregators is nil, the cluster contains only m3coordinator and dbnodes.
    85  	Aggregators []aggcfg.Configuration
    86  }
    87  
    88  // PerInstanceOptions contain the per-instance options for setting up the cluster.
    89  type PerInstanceOptions struct {
    90  	// DBNodes contains the per-instance options for db nodes in the cluster.
    91  	DBNode []DBNodeOptions
    92  }
    93  
    94  // NewClusterConfigsFromConfigFile creates a new ClusterConfigs object from the
    95  // provided filepaths for dbnode and coordinator configuration.
    96  func NewClusterConfigsFromConfigFile(
    97  	pathToDBNodeCfg string,
    98  	pathToCoordCfg string,
    99  	pathToAggCfg string,
   100  ) (ClusterConfigs, error) {
   101  	var dCfg dbcfg.Configuration
   102  	if err := xconfig.LoadFile(&dCfg, pathToDBNodeCfg, xconfig.Options{}); err != nil {
   103  		return ClusterConfigs{}, err
   104  	}
   105  
   106  	var cCfg coordinatorcfg.Configuration
   107  	if err := xconfig.LoadFile(&cCfg, pathToCoordCfg, xconfig.Options{}); err != nil {
   108  		return ClusterConfigs{}, err
   109  	}
   110  
   111  	var aCfg aggcfg.Configuration
   112  	if pathToAggCfg != "" {
   113  		if err := xconfig.LoadFile(&aCfg, pathToAggCfg, xconfig.Options{}); err != nil {
   114  			return ClusterConfigs{}, err
   115  		}
   116  	}
   117  
   118  	return ClusterConfigs{
   119  		DBNode:      dCfg,
   120  		Coordinator: cCfg,
   121  		Aggregator:  &aCfg,
   122  	}, nil
   123  }
   124  
   125  // NewClusterConfigsFromYAML creates a new ClusterConfigs object from YAML strings
   126  // representing component configs.
   127  func NewClusterConfigsFromYAML(dbnodeYaml string, coordYaml string, aggYaml string) (ClusterConfigs, error) {
   128  	// "db":
   129  	//  discovery:
   130  	//    "config":
   131  	//      "service":
   132  	//        "etcdClusters":
   133  	//          - "endpoints": ["http://127.0.0.1:2379"]
   134  	//            "zone": "embedded"
   135  	//        "service": "m3db"
   136  	//        "zone": "embedded"
   137  	//        "env": "default_env"
   138  	etcdClientCfg := &etcdclient.Configuration{
   139  		Zone:    "embedded",
   140  		Env:     "default_env",
   141  		Service: "m3db",
   142  		ETCDClusters: []etcdclient.ClusterConfig{{
   143  			Zone:      "embedded",
   144  			Endpoints: []string{"http://127.0.0.1:2379"},
   145  		}},
   146  	}
   147  	var dbCfg = dbcfg.Configuration{
   148  		DB: &dbcfg.DBConfiguration{
   149  			Discovery: &discovery.Configuration{
   150  				Config: &environment.Configuration{
   151  					Services: environment.DynamicConfiguration{{
   152  						Service: etcdClientCfg,
   153  					}},
   154  				},
   155  			},
   156  		},
   157  	}
   158  	if err := yaml.Unmarshal([]byte(dbnodeYaml), &dbCfg); err != nil {
   159  		return ClusterConfigs{}, err
   160  	}
   161  
   162  	var coordCfg = coordinatorcfg.Configuration{
   163  		ClusterManagement: coordinatorcfg.ClusterManagementConfiguration{
   164  			Etcd: etcdClientCfg,
   165  		},
   166  	}
   167  	if err := yaml.Unmarshal([]byte(coordYaml), &coordCfg); err != nil {
   168  		return ClusterConfigs{}, err
   169  	}
   170  
   171  	var aggCfg = aggcfg.Configuration{}
   172  
   173  	if aggYaml != "" {
   174  		if err := yaml.Unmarshal([]byte(aggYaml), &aggCfg); err != nil {
   175  			return ClusterConfigs{}, err
   176  		}
   177  	}
   178  
   179  	return ClusterConfigs{
   180  		Coordinator: coordCfg,
   181  		DBNode:      dbCfg,
   182  		Aggregator:  &aggCfg,
   183  	}, nil
   184  }
   185  
   186  // NewCluster creates a new M3 cluster based on the ClusterOptions provided.
   187  // Expects at least a coordinator, a dbnode and an aggregator config.
   188  func NewCluster(
   189  	configs ClusterConfigs,
   190  	opts resources.ClusterOptions,
   191  ) (resources.M3Resources, error) {
   192  	fullConfigs, err := GenerateClusterSpecification(configs, opts)
   193  	if err != nil {
   194  		return nil, err
   195  	}
   196  
   197  	return NewClusterFromSpecification(fullConfigs, opts)
   198  }
   199  
   200  // NewClusterFromSpecification creates a new M3 cluster with the given ClusterSpecification.
   201  func NewClusterFromSpecification(
   202  	specs ClusterSpecification,
   203  	opts resources.ClusterOptions,
   204  ) (_ resources.M3Resources, finalErr error) {
   205  	if err := opts.Validate(); err != nil {
   206  		return nil, err
   207  	}
   208  
   209  	logger, err := resources.NewLogger()
   210  	if err != nil {
   211  		return nil, err
   212  	}
   213  
   214  	var (
   215  		etcd  *dockerexternal.EtcdNode
   216  		coord resources.Coordinator
   217  		nodes = make(resources.Nodes, 0, len(specs.Configs.DBNodes))
   218  		aggs  = make(resources.Aggregators, 0, len(specs.Configs.Aggregators))
   219  	)
   220  
   221  	fs.DisableIndexClaimsManagersCheckUnsafe()
   222  
   223  	// Ensure that once we start creating resources, they all get cleaned up even if the function
   224  	// fails half way.
   225  	defer func() {
   226  		if finalErr != nil {
   227  			cleanup(logger, etcd, nodes, coord, aggs)
   228  		}
   229  	}()
   230  
   231  	etcdEndpoints := opts.EtcdEndpoints
   232  	if len(opts.EtcdEndpoints) == 0 {
   233  		// TODO: amainsd: maybe not the cleanest place to do this.
   234  		pool, err := dockertest.NewPool("")
   235  		if err != nil {
   236  			return nil, err
   237  		}
   238  		etcd, err = dockerexternal.NewEtcd(pool, instrument.NewOptions())
   239  		if err != nil {
   240  			return nil, err
   241  		}
   242  
   243  		// TODO(amains): etcd *needs* to be setup before the coordinator, because ConfigurePlacementsForAggregation spins
   244  		// up a dedicated coordinator for some reason. Either clean this up or just accept it.
   245  		if err := etcd.Setup(context.TODO()); err != nil {
   246  			return nil, err
   247  		}
   248  		etcdEndpoints = []string{fmt.Sprintf(etcd.Address())}
   249  	}
   250  
   251  	updateEtcdEndpoints := func(etcdCfg *etcdclient.Configuration) {
   252  		etcdCfg.ETCDClusters[0].Endpoints = etcdEndpoints
   253  		etcdCfg.ETCDClusters[0].AutoSyncInterval = -1
   254  	}
   255  	for i := 0; i < len(specs.Configs.DBNodes); i++ {
   256  		var node resources.Node
   257  		updateEtcdEndpoints(specs.Configs.DBNodes[i].DB.Discovery.Config.Services[0].Service)
   258  		node, err = NewDBNode(specs.Configs.DBNodes[i], specs.Options.DBNode[i])
   259  		if err != nil {
   260  			return nil, err
   261  		}
   262  		nodes = append(nodes, node)
   263  	}
   264  
   265  	for _, aggCfg := range specs.Configs.Aggregators {
   266  		var agg resources.Aggregator
   267  		agg, err = NewAggregator(aggCfg, AggregatorOptions{
   268  			GeneratePorts:  true,
   269  			GenerateHostID: false,
   270  			EtcdEndpoints:  etcdEndpoints,
   271  		})
   272  		if err != nil {
   273  			return nil, err
   274  		}
   275  		aggs = append(aggs, agg)
   276  	}
   277  
   278  	updateEtcdEndpoints(specs.Configs.Coordinator.ClusterManagement.Etcd)
   279  	coord, err = NewCoordinator(
   280  		specs.Configs.Coordinator,
   281  		CoordinatorOptions{GeneratePorts: opts.Coordinator.GeneratePorts},
   282  	)
   283  	if err != nil {
   284  		return nil, err
   285  	}
   286  
   287  	if err = ConfigurePlacementsForAggregation(nodes, coord, aggs, specs, opts); err != nil {
   288  		return nil, fmt.Errorf("failed to setup placements for aggregation: %w", err)
   289  	}
   290  
   291  	// Start all the configured resources.
   292  	m3 := NewM3Resources(ResourceOptions{
   293  		Coordinator: coord,
   294  		DBNodes:     nodes,
   295  		Aggregators: aggs,
   296  		Etcd:        etcd,
   297  	})
   298  	m3.Start()
   299  
   300  	if err = resources.SetupCluster(m3, opts); err != nil {
   301  		return nil, err
   302  	}
   303  
   304  	return m3, nil
   305  }
   306  
   307  // ConfigurePlacementsForAggregation sets up the correct placement information for
   308  // coordinators and aggregators when aggregation is enabled.
   309  func ConfigurePlacementsForAggregation(
   310  	nodes resources.Nodes,
   311  	coord resources.Coordinator,
   312  	aggs resources.Aggregators,
   313  	specs ClusterSpecification,
   314  	opts resources.ClusterOptions,
   315  ) error {
   316  	if len(aggs) == 0 {
   317  		return nil
   318  	}
   319  
   320  	coordAPI := coord
   321  	hostDetails, err := coord.HostDetails()
   322  	if err != nil {
   323  		return err
   324  	}
   325  
   326  	// With remote aggregation enabled, aggregation is not handled within the coordinator.
   327  	// When this is true, the coordinator will fail to start until placement is updated with
   328  	// aggregation related information. As such, use the coordinator embedded within the dbnode
   329  	// to configure the placement and topics.
   330  	if specs.Configs.Coordinator.Downsample.RemoteAggregator != nil {
   331  		if len(specs.Configs.DBNodes) == 0 ||
   332  			specs.Configs.DBNodes[0].Coordinator == nil {
   333  			return errors.New("remote aggregation requires at least one DB node" +
   334  				" running an embedded coordinator for placement and topic configuration")
   335  		}
   336  
   337  		embedded, err := NewEmbeddedCoordinator(nodes[0].(*DBNode))
   338  		if err != nil {
   339  			return nil
   340  		}
   341  
   342  		coordAPI = embedded
   343  	} else {
   344  		// TODO(nate): Remove this in a follow up. If we're not doing remote aggregation
   345  		// we should not be starting aggs which is what requires the coordinator to get started.
   346  		// Once we've refactored existing tests that have aggs w/o remote aggregation enabled,
   347  		// this should be killable.
   348  		coord.Start()
   349  	}
   350  
   351  	if err = coordAPI.WaitForNamespace(""); err != nil {
   352  		return err
   353  	}
   354  
   355  	if err = resources.SetupPlacement(coordAPI, *hostDetails, aggs, *opts.Aggregator); err != nil {
   356  		return err
   357  	}
   358  
   359  	aggInstanceInfo, err := aggs[0].HostDetails()
   360  	if err != nil {
   361  		return err
   362  	}
   363  
   364  	return resources.SetupM3MsgTopics(coordAPI, *aggInstanceInfo, opts)
   365  }
   366  
   367  // GenerateClusterSpecification generates the per-instance configuration and options
   368  // for the cluster set up based on the given input configuation and options.
   369  func GenerateClusterSpecification(
   370  	configs ClusterConfigs,
   371  	opts resources.ClusterOptions,
   372  ) (ClusterSpecification, error) {
   373  	if err := opts.Validate(); err != nil {
   374  		return ClusterSpecification{}, err
   375  	}
   376  
   377  	nodeCfgs, nodeOpts, envConfig, err := GenerateDBNodeConfigsForCluster(configs, opts.DBNode)
   378  	if err != nil {
   379  		return ClusterSpecification{}, err
   380  	}
   381  
   382  	coordConfig := configs.Coordinator
   383  	// TODO(nate): refactor to support having envconfig if no DB.
   384  	if len(coordConfig.Clusters) > 0 {
   385  		coordConfig.Clusters[0].Client.EnvironmentConfig = &envConfig
   386  	} else {
   387  		coordConfig.Clusters = m3.ClustersStaticConfiguration{
   388  			{
   389  				Client: client.Configuration{
   390  					EnvironmentConfig: &envConfig,
   391  				},
   392  			},
   393  		}
   394  	}
   395  
   396  	var aggCfgs []aggcfg.Configuration
   397  	if opts.Aggregator != nil {
   398  		aggCfgs, err = GenerateAggregatorConfigsForCluster(configs, opts.Aggregator)
   399  		if err != nil {
   400  			return ClusterSpecification{}, err
   401  		}
   402  	}
   403  
   404  	return ClusterSpecification{
   405  		Configs: PerInstanceConfigs{
   406  			DBNodes:     nodeCfgs,
   407  			Coordinator: coordConfig,
   408  			Aggregators: aggCfgs,
   409  		},
   410  		Options: PerInstanceOptions{
   411  			DBNode: nodeOpts,
   412  		},
   413  	}, nil
   414  }
   415  
   416  // GenerateDBNodeConfigsForCluster generates the unique configs and options
   417  // for each DB node that will be instantiated. Additionally, provides
   418  // default environment config that can be used to connect to embedded KV
   419  // within the DB nodes.
   420  func GenerateDBNodeConfigsForCluster(
   421  	configs ClusterConfigs,
   422  	opts *resources.DBNodeClusterOptions,
   423  ) ([]dbcfg.Configuration, []DBNodeOptions, environment.Configuration, error) {
   424  	if opts == nil {
   425  		return nil, nil, environment.Configuration{}, errors.New("dbnode cluster options is nil")
   426  	}
   427  
   428  	var (
   429  		numNodes            = opts.RF * opts.NumInstances
   430  		generatePortsAndIDs = numNodes > 1
   431  	)
   432  
   433  	// TODO(nate): eventually support clients specifying their own discovery stanza.
   434  	// Practically, this should cover 99% of cases.
   435  	//
   436  	// Generate a discovery config with the dbnode using the generated hostID marked as
   437  	// the etcd server (i.e. seed node).
   438  	hostID := uuid.NewString()
   439  	defaultDBNodesCfg := configs.DBNode
   440  
   441  	if configs.DBNode.DB.Discovery == nil {
   442  		return nil, nil, environment.Configuration{}, errors.New(
   443  			"configuration must specify at least `discovery`" +
   444  				" in order to construct an etcd client")
   445  	}
   446  	discoveryCfg, envConfig := configs.DBNode.DB.Discovery, configs.DBNode.DB.Discovery.Config
   447  
   448  	var (
   449  		defaultDBNodeOpts = DBNodeOptions{
   450  			GenerateHostID: generatePortsAndIDs,
   451  			GeneratePorts:  generatePortsAndIDs,
   452  			Start:          true,
   453  		}
   454  		cfgs     = make([]dbcfg.Configuration, 0, numNodes)
   455  		nodeOpts = make([]DBNodeOptions, 0, numNodes)
   456  	)
   457  	for i := 0; i < int(numNodes); i++ {
   458  		cfg, err := defaultDBNodesCfg.DeepCopy()
   459  		if err != nil {
   460  			return nil, nil, environment.Configuration{}, err
   461  		}
   462  		dbnodeOpts := defaultDBNodeOpts
   463  
   464  		if i == 0 {
   465  			// Mark the initial node as the etcd seed node.
   466  			dbnodeOpts.GenerateHostID = false
   467  			cfg.DB.HostID = &hostid.Configuration{
   468  				Resolver: hostid.ConfigResolver,
   469  				Value:    &hostID,
   470  			}
   471  		}
   472  		cfg.DB.Discovery = discoveryCfg
   473  
   474  		cfgs = append(cfgs, cfg)
   475  		nodeOpts = append(nodeOpts, dbnodeOpts)
   476  	}
   477  
   478  	return cfgs, nodeOpts, *envConfig, nil
   479  }
   480  
   481  func cleanup(
   482  	logger *zap.Logger,
   483  	etcd *dockerexternal.EtcdNode,
   484  	nodes resources.Nodes,
   485  	coord resources.Coordinator,
   486  	aggs resources.Aggregators,
   487  ) {
   488  	var multiErr xerrors.MultiError
   489  
   490  	ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
   491  	defer cancel()
   492  
   493  	if etcd != nil {
   494  		multiErr = multiErr.Add(etcd.Close(ctx))
   495  	}
   496  
   497  	for _, n := range nodes {
   498  		multiErr = multiErr.Add(n.Close())
   499  	}
   500  
   501  	if coord != nil {
   502  		multiErr = multiErr.Add(coord.Close())
   503  	}
   504  
   505  	for _, a := range aggs {
   506  		multiErr = multiErr.Add(a.Close())
   507  	}
   508  
   509  	if !multiErr.Empty() {
   510  		logger.Warn("failed closing resources", zap.Error(multiErr.FinalError()))
   511  	}
   512  }
   513  
   514  // GenerateAggregatorConfigsForCluster generates the unique configs for each aggregator instance.
   515  func GenerateAggregatorConfigsForCluster(
   516  	configs ClusterConfigs,
   517  	opts *resources.AggregatorClusterOptions,
   518  ) ([]aggcfg.Configuration, error) {
   519  	if configs.Aggregator == nil {
   520  		return nil, nil
   521  	}
   522  
   523  	cfgs := make([]aggcfg.Configuration, 0, int(opts.NumInstances))
   524  	for i := 0; i < int(opts.NumInstances); i++ {
   525  		cfg, err := configs.Aggregator.DeepCopy()
   526  		if err != nil {
   527  			return nil, err
   528  		}
   529  
   530  		hostID := fmt.Sprintf("m3aggregator%02d", i)
   531  		aggCfg := cfg.AggregatorOrDefault()
   532  		aggCfg.HostID = &hostid.Configuration{
   533  			Resolver: hostid.ConfigResolver,
   534  			Value:    &hostID,
   535  		}
   536  		cfg.Aggregator = &aggCfg
   537  
   538  		cfgs = append(cfgs, cfg)
   539  	}
   540  
   541  	return cfgs, nil
   542  }