github.com/matrixorigin/matrixone@v0.7.0/pkg/logservice/config.go (about)

     1  // Copyright 2021 - 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package logservice
    16  
    17  import (
    18  	"strconv"
    19  	"strings"
    20  	"time"
    21  
    22  	"github.com/google/uuid"
    23  	"github.com/lni/dragonboat/v4"
    24  	"github.com/lni/vfs"
    25  
    26  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    27  	"github.com/matrixorigin/matrixone/pkg/hakeeper"
    28  	"github.com/matrixorigin/matrixone/pkg/util/toml"
    29  )
    30  
    31  const (
    32  	defaultDataDir           = "mo-data/logservice"
    33  	defaultSnapshotExportDir = "exported-snapshot"
    34  	defaultServiceAddress    = "0.0.0.0:32000"
    35  	defaultRaftAddress       = "0.0.0.0:32001"
    36  	defaultGossipAddress     = "0.0.0.0:32002"
    37  	defaultGossipSeedAddress = "127.0.0.1:32002"
    38  
    39  	defaultGossipProbeInterval = 5 * time.Second
    40  	defaultHeartbeatInterval   = time.Second
    41  	defaultLogDBBufferSize     = 768 * 1024
    42  	defaultTruncateInterval    = 10 * time.Second
    43  	defaultMaxExportedSnapshot = 20
    44  	defaultMaxMessageSize      = 1024 * 1024 * 100
    45  )
    46  
    47  // Config defines the Configurations supported by the Log Service.
    48  type Config struct {
    49  	// FS is the underlying virtual FS used by the log service. Leave it as empty
    50  	// in production.
    51  	FS vfs.FS
    52  	// DeploymentID is basically the Cluster ID, nodes with different DeploymentID
    53  	// will not be able to communicate via raft.
    54  	DeploymentID uint64 `toml:"deployment-id"`
    55  	// UUID is the UUID of the log service node. UUID value must be set.
    56  	UUID string `toml:"uuid"`
    57  	// RTTMillisecond is the average round trip time between log service nodes in
    58  	// milliseconds.
    59  	RTTMillisecond uint64 `toml:"rttmillisecond"`
    60  	// DataDir is the name of the directory for storing all log service data. It
    61  	// should a locally mounted partition with good write and fsync performance.
    62  	DataDir string `toml:"data-dir"`
    63  	// SnapshotExportDir is the directory where the dragonboat snapshots are
    64  	// exported.
    65  	SnapshotExportDir string `toml:"snapshot-export-dir"`
    66  	// MaxExportedSnapshot is the max count of exported snapshots. If there are
    67  	// already MaxExportedSnapshot exported snapshots, no exported snapshot will
    68  	// be generated.
    69  	MaxExportedSnapshot int `toml:"max-exported-snapshot"`
    70  	// ServiceAddress is log service's service address that can be reached by
    71  	// other nodes such as DN nodes.
    72  	ServiceAddress string `toml:"logservice-address"`
    73  	// ServiceListenAddress is the local listen address of the ServiceAddress.
    74  	ServiceListenAddress string `toml:"logservice-listen-address"`
    75  	// RaftAddress is the address that can be reached by other log service nodes
    76  	// via their raft layer.
    77  	RaftAddress string `toml:"raft-address"`
    78  	// RaftListenAddress is the local listen address of the RaftAddress.
    79  	RaftListenAddress string `toml:"raft-listen-address"`
    80  	// UseTeeLogDB enables the log service to use tee based LogDB which is backed
    81  	// by both a pebble and a tan based LogDB. This field should only be set to
    82  	// true during testing.
    83  	UseTeeLogDB bool `toml:"use-tee-logdb"`
    84  	// LogDBBufferSize is the size of the logdb buffer in bytes.
    85  	LogDBBufferSize uint64 `toml:"logdb-buffer-size"`
    86  	// GossipAddress is the address used for accepting gossip communication.
    87  	GossipAddress string `toml:"gossip-address"`
    88  	// GossipListenAddress is the local listen address of the GossipAddress
    89  	GossipListenAddress string `toml:"gossip-listen-address"`
    90  	// GossipSeedAddresses is list of seed addresses that are used for
    91  	// introducing the local node into the gossip network.
    92  	GossipSeedAddresses []string `toml:"gossip-seed-addresses"`
    93  	// GossipProbeInterval how often gossip nodes probe each other.
    94  	GossipProbeInterval toml.Duration `toml:"gossip-probe-interval"`
    95  	// GossipAllowSelfAsSeed allow use self as gossip seed
    96  	GossipAllowSelfAsSeed bool `toml:"gossip-allow-self-as-seed"`
    97  	// HeartbeatInterval is the interval of how often log service node should be
    98  	// sending heartbeat message to the HAKeeper.
    99  	HeartbeatInterval toml.Duration `toml:"logservice-heartbeat-interval"`
   100  	// HAKeeperTickInterval is the interval of how often log service node should
   101  	// tick the HAKeeper.
   102  	HAKeeperTickInterval toml.Duration `toml:"hakeeper-tick-interval"`
   103  	// HAKeeperCheckInterval is the interval of how often HAKeeper should run
   104  	// cluster health checks.
   105  	HAKeeperCheckInterval toml.Duration `toml:"hakeeper-check-interval"`
   106  	// TruncateInterval is the interval of how often log service should
   107  	// process truncate.
   108  	TruncateInterval toml.Duration `toml:"truncate-interval"`
   109  
   110  	RPC struct {
   111  		// MaxMessageSize is the max size for RPC message. The default value is 10MiB.
   112  		MaxMessageSize toml.ByteSize `toml:"max-message-size"`
   113  		// EnableCompress enable compress
   114  		EnableCompress bool `toml:"enable-compress"`
   115  	}
   116  
   117  	// BootstrapConfig is the configuration specified for the bootstrapping
   118  	// procedure. It only needs to be specified for Log Stores selected to host
   119  	// initial HAKeeper replicas during bootstrapping.
   120  	BootstrapConfig struct {
   121  		// BootstrapCluster indicates whether the cluster should be bootstrapped.
   122  		// Note the bootstrapping procedure will only be executed if BootstrapCluster
   123  		// is true and Config.UUID is found in Config.BootstrapConfig.InitHAKeeperMembers.
   124  		BootstrapCluster bool `toml:"bootstrap-cluster"`
   125  		// NumOfLogShards defines the number of Log shards in the initial deployment.
   126  		NumOfLogShards uint64 `toml:"num-of-log-shards"`
   127  		// NumOfDNShards defines the number of DN shards in the initial deployment.
   128  		// The count must be the same as NumOfLogShards in the current implementation.
   129  		NumOfDNShards uint64 `toml:"num-of-dn-shards"`
   130  		// NumOfLogShardReplicas is the number of replicas for each shard managed by
   131  		// Log Stores, including Log Service shards and the HAKeeper.
   132  		NumOfLogShardReplicas uint64 `toml:"num-of-log-shard-replicas"`
   133  		// InitHAKeeperMembers defines the initial members of the HAKeeper as a list
   134  		// of HAKeeper replicaID and UUID pairs. For example,
   135  		// when the initial HAKeeper members are
   136  		// replica with replica ID 101 running on Log Store uuid1
   137  		// replica with replica ID 102 running on Log Store uuid2
   138  		// replica with replica ID 103 running on Log Store uuid3
   139  		// the InitHAKeeperMembers string value should be
   140  		// []string{"101:uuid1", "102:uuid2", "103:uuid3"}
   141  		// Note that these initial HAKeeper replica IDs must be assigned by k8s
   142  		// from the range [K8SIDRangeStart, K8SIDRangeEnd) as defined in pkg/hakeeper.
   143  		// All uuid values are assigned by k8s, they are used to uniquely identify
   144  		// CN/DN/Log stores.
   145  		// Config.UUID and Config.BootstrapConfig values are considered together to
   146  		// figure out what is the replica ID of the initial HAKeeper replica. That
   147  		// is when Config.UUID is found in InitHAKeeperMembers, then the corresponding
   148  		// replica ID value will be used to launch a HAKeeper replica on the Log
   149  		// Service instance.
   150  		InitHAKeeperMembers []string `toml:"init-hakeeper-members"`
   151  	}
   152  
   153  	HAKeeperConfig struct {
   154  		// TickPerSecond indicates how many ticks every second.
   155  		// In HAKeeper, we do not use actual time to measure time elapse.
   156  		// Instead, we use ticks.
   157  		TickPerSecond int `toml:"tick-per-second"`
   158  		// LogStoreTimeout is the actual time limit between a log store's heartbeat.
   159  		// If HAKeeper does not receive two heartbeat within LogStoreTimeout,
   160  		// it regards the log store as down.
   161  		LogStoreTimeout toml.Duration `toml:"log-store-timeout"`
   162  		// DNStoreTimeout is the actual time limit between a dn store's heartbeat.
   163  		// If HAKeeper does not receive two heartbeat within DNStoreTimeout,
   164  		// it regards the dn store as down.
   165  		DNStoreTimeout toml.Duration `toml:"dn-store-timeout"`
   166  		// CNStoreTimeout is the actual time limit between a cn store's heartbeat.
   167  		// If HAKeeper does not receive two heartbeat within CNStoreTimeout,
   168  		// it regards the dn store as down.
   169  		CNStoreTimeout toml.Duration `toml:"cn-store-timeout"`
   170  	}
   171  
   172  	// HAKeeperClientConfig is the config for HAKeeperClient
   173  	HAKeeperClientConfig HAKeeperClientConfig
   174  
   175  	// DisableWorkers disables the HAKeeper ticker and HAKeeper client in tests.
   176  	// Never set this field to true in production
   177  	DisableWorkers bool
   178  }
   179  
   180  func (c *Config) GetHAKeeperConfig() hakeeper.Config {
   181  	return hakeeper.Config{
   182  		TickPerSecond:   c.HAKeeperConfig.TickPerSecond,
   183  		LogStoreTimeout: c.HAKeeperConfig.LogStoreTimeout.Duration,
   184  		DNStoreTimeout:  c.HAKeeperConfig.DNStoreTimeout.Duration,
   185  		CNStoreTimeout:  c.HAKeeperConfig.CNStoreTimeout.Duration,
   186  	}
   187  }
   188  
   189  func (c *Config) GetHAKeeperClientConfig() HAKeeperClientConfig {
   190  	saddr := make([]string, 0)
   191  	saddr = append(saddr, c.HAKeeperClientConfig.ServiceAddresses...)
   192  	return HAKeeperClientConfig{
   193  		DiscoveryAddress: c.HAKeeperClientConfig.DiscoveryAddress,
   194  		ServiceAddresses: saddr,
   195  	}
   196  }
   197  
   198  // returns replica ID of the HAKeeper replica and a boolean indicating whether
   199  // we should run the bootstrap procedure.
   200  func (c *Config) Bootstrapping() (uint64, bool) {
   201  	if !c.BootstrapConfig.BootstrapCluster {
   202  		return 0, false
   203  	}
   204  	members, err := c.GetInitHAKeeperMembers()
   205  	if err != nil {
   206  		return 0, false
   207  	}
   208  	for replicaID, uuid := range members {
   209  		if uuid == c.UUID {
   210  			return replicaID, true
   211  		}
   212  	}
   213  	return 0, false
   214  }
   215  
   216  func (c *Config) GetInitHAKeeperMembers() (map[uint64]dragonboat.Target, error) {
   217  	result := make(map[uint64]dragonboat.Target)
   218  	for _, pair := range c.BootstrapConfig.InitHAKeeperMembers {
   219  		pair = strings.TrimSpace(pair)
   220  		parts := strings.Split(pair, ":")
   221  		if len(parts) == 2 {
   222  			id := strings.TrimSpace(parts[0])
   223  			target := strings.TrimSpace(parts[1])
   224  			if _, err := uuid.Parse(target); err != nil {
   225  				return nil, moerr.NewBadConfigNoCtx("uuid %s", target)
   226  			}
   227  			idn, err := strconv.ParseUint(id, 10, 64)
   228  			if err != nil {
   229  				return nil, moerr.NewBadConfigNoCtx("replicateID '%v'", id)
   230  			}
   231  			if idn >= hakeeper.K8SIDRangeEnd || idn < hakeeper.K8SIDRangeStart {
   232  				return nil, moerr.NewBadConfigNoCtx("replicateID '%v'", id)
   233  			}
   234  			result[idn] = target
   235  		} else {
   236  			return nil, moerr.NewBadConfigNoCtx("replicaID:target %s", pair)
   237  		}
   238  	}
   239  	return result, nil
   240  }
   241  
   242  // Validate validates the configuration.
   243  func (c *Config) Validate() error {
   244  	if len(c.UUID) == 0 {
   245  		return moerr.NewBadConfigNoCtx("uuid not set")
   246  	}
   247  	if c.DeploymentID == 0 {
   248  		return moerr.NewBadConfigNoCtx("deploymentID not set")
   249  	}
   250  	// when *ListenAddress is not empty and *Address is empty, consider it as an
   251  	// error
   252  	if len(c.ServiceAddress) == 0 && len(c.ServiceListenAddress) != 0 {
   253  		return moerr.NewBadConfigNoCtx("ServiceAddress not set")
   254  	}
   255  	if len(c.RaftAddress) == 0 && len(c.RaftListenAddress) != 0 {
   256  		return moerr.NewBadConfigNoCtx("RaftAddress not set")
   257  	}
   258  	if c.LogDBBufferSize == 0 {
   259  		return moerr.NewBadConfigNoCtx("LogDBBufferSize not set")
   260  	}
   261  	if len(c.GossipAddress) == 0 && len(c.GossipListenAddress) != 0 {
   262  		return moerr.NewBadConfigNoCtx("GossipAddress not set")
   263  	}
   264  	if len(c.GossipSeedAddresses) == 0 {
   265  		return moerr.NewBadConfigNoCtx("GossipSeedAddress not set")
   266  	}
   267  	if c.HAKeeperConfig.TickPerSecond == 0 {
   268  		return moerr.NewBadConfigNoCtx("TickPerSecond not set")
   269  	}
   270  	if c.HAKeeperConfig.LogStoreTimeout.Duration == 0 {
   271  		return moerr.NewBadConfigNoCtx("LogStoreTimeout not set")
   272  	}
   273  	if c.HAKeeperConfig.DNStoreTimeout.Duration == 0 {
   274  		return moerr.NewBadConfigNoCtx("DNStoreTimeout not set")
   275  	}
   276  	if c.GossipProbeInterval.Duration == 0 {
   277  		return moerr.NewBadConfigNoCtx("GossipProbeInterval not set")
   278  	}
   279  	if c.TruncateInterval.Duration == 0 {
   280  		return moerr.NewBadConfigNoCtx("TruncateInterval not set")
   281  	}
   282  	if c.RPC.MaxMessageSize == 0 {
   283  		return moerr.NewBadConfigNoCtx("MaxMessageSize not set")
   284  	}
   285  	// validate BootstrapConfig
   286  	if c.BootstrapConfig.BootstrapCluster {
   287  		if c.BootstrapConfig.NumOfLogShards == 0 {
   288  			return moerr.NewBadConfigNoCtx("NumOfLogShards not set")
   289  		}
   290  		if c.BootstrapConfig.NumOfDNShards == 0 {
   291  			return moerr.NewBadConfigNoCtx("NumOfDNShards not set")
   292  		}
   293  		if c.BootstrapConfig.NumOfLogShardReplicas == 0 {
   294  			return moerr.NewBadConfigNoCtx("NumOfLogShardReplica not set")
   295  		}
   296  		if c.BootstrapConfig.NumOfDNShards != c.BootstrapConfig.NumOfLogShards {
   297  			return moerr.NewBadConfigNoCtx("NumOfDNShards does not match NumOfLogShards")
   298  		}
   299  		members, err := c.GetInitHAKeeperMembers()
   300  		if err != nil {
   301  			return err
   302  		}
   303  		if len(members) == 0 {
   304  			return moerr.NewBadConfigNoCtx("InitHAKeeperMembers not set")
   305  		}
   306  		if uint64(len(members)) != c.BootstrapConfig.NumOfLogShardReplicas {
   307  			return moerr.NewBadConfigNoCtx("InitHAKeeperMembers does not match NumOfLogShardReplicas")
   308  		}
   309  	}
   310  
   311  	return nil
   312  }
   313  
   314  func (c *Config) Fill() {
   315  	if c.FS == nil {
   316  		c.FS = vfs.Default
   317  	}
   318  	if c.RTTMillisecond == 0 {
   319  		c.RTTMillisecond = 200
   320  	}
   321  	if len(c.DataDir) == 0 {
   322  		c.DataDir = defaultDataDir
   323  	}
   324  	if len(c.SnapshotExportDir) == 0 {
   325  		c.SnapshotExportDir = defaultSnapshotExportDir
   326  	}
   327  	if c.MaxExportedSnapshot == 0 {
   328  		c.MaxExportedSnapshot = defaultMaxExportedSnapshot
   329  	}
   330  	if len(c.ServiceAddress) == 0 {
   331  		c.ServiceAddress = defaultServiceAddress
   332  		c.ServiceListenAddress = defaultServiceAddress
   333  	} else if len(c.ServiceAddress) != 0 && len(c.ServiceListenAddress) == 0 {
   334  		c.ServiceListenAddress = c.ServiceAddress
   335  	}
   336  	if len(c.RaftAddress) == 0 {
   337  		c.RaftAddress = defaultRaftAddress
   338  		c.RaftListenAddress = defaultRaftAddress
   339  	} else if len(c.RaftAddress) != 0 && len(c.RaftListenAddress) == 0 {
   340  		c.RaftListenAddress = c.RaftAddress
   341  	}
   342  	if c.LogDBBufferSize == 0 {
   343  		c.LogDBBufferSize = defaultLogDBBufferSize
   344  	}
   345  	if len(c.GossipAddress) == 0 {
   346  		c.GossipAddress = defaultGossipAddress
   347  		c.GossipListenAddress = defaultGossipAddress
   348  	} else if len(c.GossipAddress) != 0 && len(c.GossipListenAddress) == 0 {
   349  		c.GossipListenAddress = c.GossipAddress
   350  	}
   351  	if c.HAKeeperConfig.TickPerSecond == 0 {
   352  		c.HAKeeperConfig.TickPerSecond = hakeeper.DefaultTickPerSecond
   353  	}
   354  	if c.HAKeeperConfig.LogStoreTimeout.Duration == 0 {
   355  		c.HAKeeperConfig.LogStoreTimeout.Duration = hakeeper.DefaultLogStoreTimeout
   356  	}
   357  	if c.HAKeeperConfig.DNStoreTimeout.Duration == 0 {
   358  		c.HAKeeperConfig.DNStoreTimeout.Duration = hakeeper.DefaultDNStoreTimeout
   359  	}
   360  	if c.HAKeeperConfig.CNStoreTimeout.Duration == 0 {
   361  		c.HAKeeperConfig.CNStoreTimeout.Duration = hakeeper.DefaultCNStoreTimeout
   362  	}
   363  	if c.HeartbeatInterval.Duration == 0 {
   364  		c.HeartbeatInterval.Duration = defaultHeartbeatInterval
   365  	}
   366  	if c.HAKeeperTickInterval.Duration == 0 {
   367  		c.HAKeeperTickInterval.Duration = time.Second / time.Duration(c.HAKeeperConfig.TickPerSecond)
   368  	}
   369  	if c.HAKeeperCheckInterval.Duration == 0 {
   370  		c.HAKeeperCheckInterval.Duration = hakeeper.CheckDuration
   371  	}
   372  	if c.GossipProbeInterval.Duration == 0 {
   373  		c.GossipProbeInterval.Duration = defaultGossipProbeInterval
   374  	}
   375  	if c.TruncateInterval.Duration == 0 {
   376  		c.TruncateInterval.Duration = defaultTruncateInterval
   377  	}
   378  	if c.RPC.MaxMessageSize == 0 {
   379  		c.RPC.MaxMessageSize = toml.ByteSize(defaultMaxMessageSize)
   380  	}
   381  }
   382  
   383  // HAKeeperClientConfig is the config for HAKeeper clients.
   384  type HAKeeperClientConfig struct {
   385  	// DiscoveryAddress is the Log Service discovery address provided by k8s.
   386  	DiscoveryAddress string `toml:"discovery-address"`
   387  	// ServiceAddresses is a list of well known Log Services' service addresses.
   388  	ServiceAddresses []string `toml:"service-addresses"`
   389  	// AllocateIDBatch how many IDs are assigned from hakeeper each time. Default is
   390  	// 100.
   391  	AllocateIDBatch uint64 `toml:"allocate-id-batch"`
   392  	// EnableCompress enable compress
   393  	EnableCompress bool `toml:"enable-compress"`
   394  }
   395  
   396  // Validate validates the HAKeeperClientConfig.
   397  func (c *HAKeeperClientConfig) Validate() error {
   398  	if len(c.DiscoveryAddress) == 0 && len(c.ServiceAddresses) == 0 {
   399  		return moerr.NewBadConfigNoCtx("HAKeeperClientConfig not set")
   400  	}
   401  	if c.AllocateIDBatch == 0 {
   402  		c.AllocateIDBatch = 100
   403  	}
   404  	return nil
   405  }
   406  
   407  // ClientConfig is the configuration for log service clients.
   408  type ClientConfig struct {
   409  	// Tag client tag
   410  	Tag string
   411  	// ReadOnly indicates whether this is a read-only client.
   412  	ReadOnly bool
   413  	// LogShardID is the shard ID of the log service shard to be used.
   414  	LogShardID uint64
   415  	// DNReplicaID is the replica ID of the DN that owns the created client.
   416  	DNReplicaID uint64
   417  	// DiscoveryAddress is the Log Service discovery address provided by k8s.
   418  	DiscoveryAddress string
   419  	// LogService nodes service addresses. This field is provided for testing
   420  	// purposes only.
   421  	ServiceAddresses []string
   422  	// MaxMessageSize is the max message size for RPC.
   423  	MaxMessageSize int
   424  	// EnableCompress enable compress
   425  	EnableCompress bool
   426  }
   427  
   428  // Validate validates the ClientConfig.
   429  func (c *ClientConfig) Validate() error {
   430  	if c.LogShardID == 0 {
   431  		return moerr.NewBadConfigNoCtx("LogShardID value cannot be 0")
   432  	}
   433  	if c.DNReplicaID == 0 {
   434  		return moerr.NewBadConfigNoCtx("DNReplicaID value cannot be 0")
   435  	}
   436  	if len(c.DiscoveryAddress) == 0 && len(c.ServiceAddresses) == 0 {
   437  		return moerr.NewBadConfigNoCtx("ServiceAddresses not set")
   438  	}
   439  	return nil
   440  }
   441  
   442  func splitAddresses(v string) []string {
   443  	results := make([]string, 0)
   444  	parts := strings.Split(v, ";")
   445  	for _, v := range parts {
   446  		t := strings.TrimSpace(v)
   447  		if len(t) > 0 {
   448  			results = append(results, t)
   449  		}
   450  	}
   451  	return results
   452  }