github.com/clly/consul@v1.4.5/agent/consul/config.go (about) 1 package consul 2 3 import ( 4 "fmt" 5 "io" 6 "net" 7 "os" 8 "time" 9 10 "github.com/hashicorp/consul/agent/consul/autopilot" 11 "github.com/hashicorp/consul/agent/structs" 12 "github.com/hashicorp/consul/lib" 13 "github.com/hashicorp/consul/tlsutil" 14 "github.com/hashicorp/consul/types" 15 "github.com/hashicorp/consul/version" 16 "github.com/hashicorp/memberlist" 17 "github.com/hashicorp/raft" 18 "github.com/hashicorp/serf/serf" 19 "golang.org/x/time/rate" 20 ) 21 22 const ( 23 DefaultDC = "dc1" 24 DefaultRPCPort = 8300 25 DefaultLANSerfPort = 8301 26 DefaultWANSerfPort = 8302 27 28 // DefaultRaftMultiplier is used as a baseline Raft configuration that 29 // will be reliable on a very basic server. See docs/guides/performance.html 30 // for information on how this value was obtained. 31 DefaultRaftMultiplier uint = 5 32 33 // MaxRaftMultiplier is a fairly arbitrary upper bound that limits the 34 // amount of performance detuning that's possible. 35 MaxRaftMultiplier uint = 10 36 ) 37 38 var ( 39 DefaultRPCAddr = &net.TCPAddr{IP: net.ParseIP("0.0.0.0"), Port: DefaultRPCPort} 40 41 // ProtocolVersionMap is the mapping of Consul protocol versions 42 // to Serf protocol versions. We mask the Serf protocols using 43 // our own protocol version. 44 protocolVersionMap map[uint8]uint8 45 ) 46 47 func init() { 48 protocolVersionMap = map[uint8]uint8{ 49 1: 4, 50 2: 4, 51 3: 4, 52 } 53 } 54 55 // (Enterprise-only) NetworkSegment is the address and port configuration 56 // for a network segment. 57 type NetworkSegment struct { 58 Name string 59 Bind string 60 Port int 61 Advertise string 62 RPCAddr *net.TCPAddr 63 SerfConfig *serf.Config 64 } 65 66 // Config is used to configure the server 67 type Config struct { 68 // Bootstrap mode is used to bring up the first Consul server. 69 // It is required so that it can elect a leader without any 70 // other nodes being present 71 Bootstrap bool 72 73 // BootstrapExpect mode is used to automatically bring up a collection of 74 // Consul servers. This can be used to automatically bring up a collection 75 // of nodes. 76 BootstrapExpect int 77 78 // Datacenter is the datacenter this Consul server represents. 79 Datacenter string 80 81 // PrimaryDatacenter is the authoritative datacenter for features like ACLs 82 // and Connect. 83 PrimaryDatacenter string 84 85 // DataDir is the directory to store our state in. 86 DataDir string 87 88 // DevMode is used to enable a development server mode. 89 DevMode bool 90 91 // NodeID is a unique identifier for this node across space and time. 92 NodeID types.NodeID 93 94 // Node name is the name we use to advertise. Defaults to hostname. 95 NodeName string 96 97 // Domain is the DNS domain for the records. Defaults to "consul." 98 Domain string 99 100 // RaftConfig is the configuration used for Raft in the local DC 101 RaftConfig *raft.Config 102 103 // (Enterprise-only) NonVoter is used to prevent this server from being added 104 // as a voting member of the Raft cluster. 105 NonVoter bool 106 107 // NotifyListen is called after the RPC listener has been configured. 108 // RPCAdvertise will be set to the listener address if it hasn't been 109 // configured at this point. 110 NotifyListen func() 111 112 // RPCAddr is the RPC address used by Consul. This should be reachable 113 // by the WAN and LAN 114 RPCAddr *net.TCPAddr 115 116 // RPCAdvertise is the address that is advertised to other nodes for 117 // the RPC endpoint. This can differ from the RPC address, if for example 118 // the RPCAddr is unspecified "0.0.0.0:8300", but this address must be 119 // reachable. If RPCAdvertise is nil then it will be set to the Listener 120 // address after the listening socket is configured. 121 RPCAdvertise *net.TCPAddr 122 123 // RPCSrcAddr is the source address for outgoing RPC connections. 124 RPCSrcAddr *net.TCPAddr 125 126 // (Enterprise-only) The network segment this agent is part of. 127 Segment string 128 129 // (Enterprise-only) Segments is a list of network segments for a server to 130 // bind on. 131 Segments []NetworkSegment 132 133 // SerfLANConfig is the configuration for the intra-dc serf 134 SerfLANConfig *serf.Config 135 136 // SerfWANConfig is the configuration for the cross-dc serf 137 SerfWANConfig *serf.Config 138 139 // SerfFloodInterval controls how often we attempt to flood local Serf 140 // Consul servers into the global areas (WAN and user-defined areas in 141 // Consul Enterprise). 142 SerfFloodInterval time.Duration 143 144 // ReconcileInterval controls how often we reconcile the strongly 145 // consistent store with the Serf info. This is used to handle nodes 146 // that are force removed, as well as intermittent unavailability during 147 // leader election. 148 ReconcileInterval time.Duration 149 150 // LogOutput is the location to write logs to. If this is not set, 151 // logs will go to stderr. 152 LogOutput io.Writer 153 154 // ProtocolVersion is the protocol version to speak. This must be between 155 // ProtocolVersionMin and ProtocolVersionMax. 156 ProtocolVersion uint8 157 158 // VerifyIncoming is used to verify the authenticity of incoming connections. 159 // This means that TCP requests are forbidden, only allowing for TLS. TLS connections 160 // must match a provided certificate authority. This can be used to force client auth. 161 VerifyIncoming bool 162 163 // VerifyOutgoing is used to force verification of the authenticity of outgoing connections. 164 // This means that TLS requests are used, and TCP requests are not made. TLS connections 165 // must match a provided certificate authority. 166 VerifyOutgoing bool 167 168 // UseTLS is used to enable TLS for outgoing connections to other TLS-capable Consul 169 // servers. This doesn't imply any verification, it only enables TLS if possible. 170 UseTLS bool 171 172 // VerifyServerHostname is used to enable hostname verification of servers. This 173 // ensures that the certificate presented is valid for server.<datacenter>.<domain>. 174 // This prevents a compromised client from being restarted as a server, and then 175 // intercepting request traffic as well as being added as a raft peer. This should be 176 // enabled by default with VerifyOutgoing, but for legacy reasons we cannot break 177 // existing clients. 178 VerifyServerHostname bool 179 180 // CAFile is a path to a certificate authority file. This is used with VerifyIncoming 181 // or VerifyOutgoing to verify the TLS connection. 182 CAFile string 183 184 // CAPath is a path to a directory of certificate authority files. This is used with 185 // VerifyIncoming or VerifyOutgoing to verify the TLS connection. 186 CAPath string 187 188 // CertFile is used to provide a TLS certificate that is used for serving TLS connections. 189 // Must be provided to serve TLS connections. 190 CertFile string 191 192 // KeyFile is used to provide a TLS key that is used for serving TLS connections. 193 // Must be provided to serve TLS connections. 194 KeyFile string 195 196 // ServerName is used with the TLS certificate to ensure the name we 197 // provide matches the certificate 198 ServerName string 199 200 // TLSMinVersion is used to set the minimum TLS version used for TLS connections. 201 TLSMinVersion string 202 203 // TLSCipherSuites is used to specify the list of supported ciphersuites. 204 TLSCipherSuites []uint16 205 206 // TLSPreferServerCipherSuites specifies whether to prefer the server's ciphersuite 207 // over the client ciphersuites. 208 TLSPreferServerCipherSuites bool 209 210 // RejoinAfterLeave controls our interaction with Serf. 211 // When set to false (default), a leave causes a Consul to not rejoin 212 // the cluster until an explicit join is received. If this is set to 213 // true, we ignore the leave, and rejoin the cluster on start. 214 RejoinAfterLeave bool 215 216 // Build is a string that is gossiped around, and can be used to help 217 // operators track which versions are actively deployed 218 Build string 219 220 // ACLEnabled is used to enable ACLs 221 ACLsEnabled bool 222 223 // ACLEnforceVersion8 is used to gate a set of ACL policy features that 224 // are opt-in prior to Consul 0.8 and opt-out in Consul 0.8 and later. 225 ACLEnforceVersion8 bool 226 227 // ACLMasterToken is used to bootstrap the ACL system. It should be specified 228 // on the servers in the ACLDatacenter. When the leader comes online, it ensures 229 // that the Master token is available. This provides the initial token. 230 ACLMasterToken string 231 232 // ACLDatacenter provides the authoritative datacenter for ACL 233 // tokens. If not provided, ACL verification is disabled. 234 ACLDatacenter string 235 236 // ACLTokenTTL controls the time-to-live of cached ACL tokens. 237 // It can be set to zero to disable caching, but this adds 238 // a substantial cost. 239 ACLTokenTTL time.Duration 240 241 // ACLPolicyTTL controls the time-to-live of cached ACL policies. 242 // It can be set to zero to disable caching, but this adds 243 // a substantial cost. 244 ACLPolicyTTL time.Duration 245 246 // ACLDisabledTTL is the time between checking if ACLs should be 247 // enabled. This 248 ACLDisabledTTL time.Duration 249 250 // ACLTokenReplication is used to enabled token replication. 251 // 252 // By default policy-only replication is enabled. When token 253 // replication is off and the primary datacenter is not 254 // yet upgraded to the new ACLs no replication will be performed 255 ACLTokenReplication bool 256 257 // ACLDefaultPolicy is used to control the ACL interaction when 258 // there is no defined policy. This can be "allow" which means 259 // ACLs are used to black-list, or "deny" which means ACLs are 260 // white-lists. 261 ACLDefaultPolicy string 262 263 // ACLDownPolicy controls the behavior of ACLs if the ACLDatacenter 264 // cannot be contacted. It can be either "deny" to deny all requests, 265 // "extend-cache" or "async-cache" which ignores the ACLCacheInterval and 266 // uses cached policies. 267 // If a policy is not in the cache, it acts like deny. 268 // "allow" can be used to allow all requests. This is not recommended. 269 ACLDownPolicy string 270 271 // ACLReplicationRate is the max number of replication rounds that can 272 // be run per second. Note that either 1 or 2 RPCs are used during each replication 273 // round 274 ACLReplicationRate int 275 276 // ACLReplicationBurst is how many replication RPCs can be bursted after a 277 // period of idleness 278 ACLReplicationBurst int 279 280 // ACLReplicationApplyLimit is the max number of replication-related 281 // apply operations that we allow during a one second period. This is 282 // used to limit the amount of Raft bandwidth used for replication. 283 ACLReplicationApplyLimit int 284 285 // ACLEnableKeyListPolicy is used to gate enforcement of the new "list" policy that 286 // protects listing keys by prefix. This behavior is opt-in 287 // by default in Consul 1.0 and later. 288 ACLEnableKeyListPolicy bool 289 290 // TombstoneTTL is used to control how long KV tombstones are retained. 291 // This provides a window of time where the X-Consul-Index is monotonic. 292 // Outside this window, the index may not be monotonic. This is a result 293 // of a few trade offs: 294 // 1) The index is defined by the data view and not globally. This is a 295 // performance optimization that prevents any write from incrementing the 296 // index for all data views. 297 // 2) Tombstones are not kept indefinitely, since otherwise storage required 298 // is also monotonic. This prevents deletes from reducing the disk space 299 // used. 300 // In theory, neither of these are intrinsic limitations, however for the 301 // purposes of building a practical system, they are reasonable trade offs. 302 // 303 // It is also possible to set this to an incredibly long time, thereby 304 // simulating infinite retention. This is not recommended however. 305 // 306 TombstoneTTL time.Duration 307 308 // TombstoneTTLGranularity is used to control how granular the timers are 309 // for the Tombstone GC. This is used to batch the GC of many keys together 310 // to reduce overhead. It is unlikely a user would ever need to tune this. 311 TombstoneTTLGranularity time.Duration 312 313 // Minimum Session TTL 314 SessionTTLMin time.Duration 315 316 // ServerUp callback can be used to trigger a notification that 317 // a Consul server is now up and known about. 318 ServerUp func() 319 320 // UserEventHandler callback can be used to handle incoming 321 // user events. This function should not block. 322 UserEventHandler func(serf.UserEvent) 323 324 // CoordinateUpdatePeriod controls how long a server batches coordinate 325 // updates before applying them in a Raft transaction. A larger period 326 // leads to fewer Raft transactions, but also the stored coordinates 327 // being more stale. 328 CoordinateUpdatePeriod time.Duration 329 330 // CoordinateUpdateBatchSize controls the maximum number of updates a 331 // server batches before applying them in a Raft transaction. 332 CoordinateUpdateBatchSize int 333 334 // CoordinateUpdateMaxBatches controls the maximum number of batches we 335 // are willing to apply in one period. After this limit we will issue a 336 // warning and discard the remaining updates. 337 CoordinateUpdateMaxBatches int 338 339 // RPCHoldTimeout is how long an RPC can be "held" before it is errored. 340 // This is used to paper over a loss of leadership by instead holding RPCs, 341 // so that the caller experiences a slow response rather than an error. 342 // This period is meant to be long enough for a leader election to take 343 // place, and a small jitter is applied to avoid a thundering herd. 344 RPCHoldTimeout time.Duration 345 346 // RPCRate and RPCMaxBurst control how frequently RPC calls are allowed 347 // to happen. In any large enough time interval, rate limiter limits the 348 // rate to RPCRate tokens per second, with a maximum burst size of 349 // RPCMaxBurst events. As a special case, if RPCRate == Inf (the infinite 350 // rate), RPCMaxBurst is ignored. 351 // 352 // See https://en.wikipedia.org/wiki/Token_bucket for more about token 353 // buckets. 354 RPCRate rate.Limit 355 RPCMaxBurst int 356 357 // LeaveDrainTime is used to wait after a server has left the LAN Serf 358 // pool for RPCs to drain and new requests to be sent to other servers. 359 LeaveDrainTime time.Duration 360 361 // AutopilotConfig is used to apply the initial autopilot config when 362 // bootstrapping. 363 AutopilotConfig *autopilot.Config 364 365 // ServerHealthInterval is the frequency with which the health of the 366 // servers in the cluster will be updated. 367 ServerHealthInterval time.Duration 368 369 // AutopilotInterval is the frequency with which the leader will perform 370 // autopilot tasks, such as promoting eligible non-voters and removing 371 // dead servers. 372 AutopilotInterval time.Duration 373 374 // ConnectEnabled is whether to enable Connect features such as the CA. 375 ConnectEnabled bool 376 377 // CAConfig is used to apply the initial Connect CA configuration when 378 // bootstrapping. 379 CAConfig *structs.CAConfiguration 380 } 381 382 func (c *Config) ToTLSUtilConfig() tlsutil.Config { 383 return tlsutil.Config{ 384 VerifyIncoming: c.VerifyIncoming, 385 VerifyOutgoing: c.VerifyOutgoing, 386 CAFile: c.CAFile, 387 CAPath: c.CAPath, 388 CertFile: c.CertFile, 389 KeyFile: c.KeyFile, 390 NodeName: c.NodeName, 391 ServerName: c.ServerName, 392 TLSMinVersion: c.TLSMinVersion, 393 CipherSuites: c.TLSCipherSuites, 394 PreferServerCipherSuites: c.TLSPreferServerCipherSuites, 395 } 396 } 397 398 // CheckProtocolVersion validates the protocol version. 399 func (c *Config) CheckProtocolVersion() error { 400 if c.ProtocolVersion < ProtocolVersionMin { 401 return fmt.Errorf("Protocol version '%d' too low. Must be in range: [%d, %d]", c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax) 402 } 403 if c.ProtocolVersion > ProtocolVersionMax { 404 return fmt.Errorf("Protocol version '%d' too high. Must be in range: [%d, %d]", c.ProtocolVersion, ProtocolVersionMin, ProtocolVersionMax) 405 } 406 return nil 407 } 408 409 // CheckACL validates the ACL configuration. 410 func (c *Config) CheckACL() error { 411 switch c.ACLDefaultPolicy { 412 case "allow": 413 case "deny": 414 default: 415 return fmt.Errorf("Unsupported default ACL policy: %s", c.ACLDefaultPolicy) 416 } 417 switch c.ACLDownPolicy { 418 case "allow": 419 case "deny": 420 case "async-cache", "extend-cache": 421 default: 422 return fmt.Errorf("Unsupported down ACL policy: %s", c.ACLDownPolicy) 423 } 424 return nil 425 } 426 427 // DefaultConfig returns a sane default configuration. 428 func DefaultConfig() *Config { 429 hostname, err := os.Hostname() 430 if err != nil { 431 panic(err) 432 } 433 434 conf := &Config{ 435 Build: version.Version, 436 Datacenter: DefaultDC, 437 NodeName: hostname, 438 RPCAddr: DefaultRPCAddr, 439 RaftConfig: raft.DefaultConfig(), 440 SerfLANConfig: lib.SerfDefaultConfig(), 441 SerfWANConfig: lib.SerfDefaultConfig(), 442 SerfFloodInterval: 60 * time.Second, 443 ReconcileInterval: 60 * time.Second, 444 ProtocolVersion: ProtocolVersion2Compatible, 445 ACLPolicyTTL: 30 * time.Second, 446 ACLTokenTTL: 30 * time.Second, 447 ACLDefaultPolicy: "allow", 448 ACLDownPolicy: "extend-cache", 449 ACLReplicationRate: 1, 450 ACLReplicationBurst: 5, 451 ACLReplicationApplyLimit: 100, // ops / sec 452 TombstoneTTL: 15 * time.Minute, 453 TombstoneTTLGranularity: 30 * time.Second, 454 SessionTTLMin: 10 * time.Second, 455 456 // These are tuned to provide a total throughput of 128 updates 457 // per second. If you update these, you should update the client- 458 // side SyncCoordinateRateTarget parameter accordingly. 459 CoordinateUpdatePeriod: 5 * time.Second, 460 CoordinateUpdateBatchSize: 128, 461 CoordinateUpdateMaxBatches: 5, 462 463 RPCRate: rate.Inf, 464 RPCMaxBurst: 1000, 465 466 TLSMinVersion: "tls10", 467 468 // TODO (slackpad) - Until #3744 is done, we need to keep these 469 // in sync with agent/config/default.go. 470 AutopilotConfig: &autopilot.Config{ 471 CleanupDeadServers: true, 472 LastContactThreshold: 200 * time.Millisecond, 473 MaxTrailingLogs: 250, 474 ServerStabilizationTime: 10 * time.Second, 475 }, 476 477 CAConfig: &structs.CAConfiguration{ 478 Provider: "consul", 479 Config: map[string]interface{}{ 480 "RotationPeriod": "2160h", 481 "LeafCertTTL": "72h", 482 }, 483 }, 484 485 ServerHealthInterval: 2 * time.Second, 486 AutopilotInterval: 10 * time.Second, 487 } 488 489 // Increase our reap interval to 3 days instead of 24h. 490 conf.SerfLANConfig.ReconnectTimeout = 3 * 24 * time.Hour 491 conf.SerfWANConfig.ReconnectTimeout = 3 * 24 * time.Hour 492 493 // WAN Serf should use the WAN timing, since we are using it 494 // to communicate between DC's 495 conf.SerfWANConfig.MemberlistConfig = memberlist.DefaultWANConfig() 496 497 // Ensure we don't have port conflicts 498 conf.SerfLANConfig.MemberlistConfig.BindPort = DefaultLANSerfPort 499 conf.SerfWANConfig.MemberlistConfig.BindPort = DefaultWANSerfPort 500 501 // Raft protocol version 3 only works with other Consul servers running 502 // 0.8.0 or later. 503 conf.RaftConfig.ProtocolVersion = 3 504 505 // Disable shutdown on removal 506 conf.RaftConfig.ShutdownOnRemove = false 507 508 // Check every 5 seconds to see if there are enough new entries for a snapshot, can be overridden 509 conf.RaftConfig.SnapshotInterval = 30 * time.Second 510 511 // Snapshots are created every 16384 entries by default, can be overridden 512 conf.RaftConfig.SnapshotThreshold = 16384 513 514 return conf 515 }