go.temporal.io/server@v1.23.0/common/membership/ringpop/monitor.go (about) 1 // The MIT License 2 // 3 // Copyright (c) 2020 Temporal Technologies Inc. All rights reserved. 4 // 5 // Copyright (c) 2020 Uber Technologies, Inc. 6 // 7 // Permission is hereby granted, free of charge, to any person obtaining a copy 8 // of this software and associated documentation files (the "Software"), to deal 9 // in the Software without restriction, including without limitation the rights 10 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 11 // copies of the Software, and to permit persons to whom the Software is 12 // furnished to do so, subject to the following conditions: 13 // 14 // The above copyright notice and this permission notice shall be included in 15 // all copies or substantial portions of the Software. 16 // 17 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 18 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 19 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 20 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 21 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 22 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN 23 // THE SOFTWARE. 24 25 package ringpop 26 27 import ( 28 "context" 29 "fmt" 30 "math" 31 "math/rand" 32 "net" 33 "strconv" 34 "strings" 35 "sync" 36 "time" 37 38 "github.com/pborman/uuid" 39 "github.com/temporalio/ringpop-go" 40 "github.com/temporalio/ringpop-go/discovery/statichosts" 41 "github.com/temporalio/ringpop-go/swim" 42 43 "go.temporal.io/server/common" 44 "go.temporal.io/server/common/backoff" 45 "go.temporal.io/server/common/config" 46 "go.temporal.io/server/common/convert" 47 "go.temporal.io/server/common/future" 48 "go.temporal.io/server/common/headers" 49 "go.temporal.io/server/common/log" 50 "go.temporal.io/server/common/log/tag" 51 "go.temporal.io/server/common/membership" 52 "go.temporal.io/server/common/persistence" 53 "go.temporal.io/server/common/primitives" 54 ) 55 56 const ( 57 upsertMembershipRecordExpiryDefault = time.Hour * 48 58 59 // 10 second base reporting frequency + 5 second jitter + 5 second acceptable time skew 60 healthyHostLastHeartbeatCutoff = time.Second * 20 61 62 // Number of times we retry refreshing the bootstrap list and try to join the Ringpop cluster before giving up 63 maxBootstrapRetries = 5 64 ) 65 66 type monitor struct { 67 stateLock sync.Mutex 68 status int32 69 70 lifecycleCtx context.Context 71 lifecycleCancel context.CancelFunc 72 73 serviceName primitives.ServiceName 74 services config.ServicePortMap 75 rp *ringpop.Ringpop 76 maxJoinDuration time.Duration 77 rings map[primitives.ServiceName]*serviceResolver 78 logger log.Logger 79 metadataManager persistence.ClusterMetadataManager 80 broadcastHostPortResolver func() (string, error) 81 hostID uuid.UUID 82 initialized *future.FutureImpl[struct{}] 83 } 84 85 var _ membership.Monitor = (*monitor)(nil) 86 87 // newMonitor returns a ringpop-based membership monitor 88 func newMonitor( 89 serviceName primitives.ServiceName, 90 services config.ServicePortMap, 91 rp *ringpop.Ringpop, 92 logger log.Logger, 93 metadataManager persistence.ClusterMetadataManager, 94 broadcastHostPortResolver func() (string, error), 95 maxJoinDuration time.Duration, 96 ) *monitor { 97 lifecycleCtx, lifecycleCancel := context.WithCancel(context.Background()) 98 lifecycleCtx = headers.SetCallerInfo( 99 lifecycleCtx, 100 headers.SystemBackgroundCallerInfo, 101 ) 102 103 rpo := &monitor{ 104 status: common.DaemonStatusInitialized, 105 106 lifecycleCtx: lifecycleCtx, 107 lifecycleCancel: lifecycleCancel, 108 109 serviceName: serviceName, 110 services: services, 111 rp: rp, 112 rings: make(map[primitives.ServiceName]*serviceResolver), 113 logger: logger, 114 metadataManager: metadataManager, 115 broadcastHostPortResolver: broadcastHostPortResolver, 116 hostID: uuid.NewUUID(), 117 initialized: future.NewFuture[struct{}](), 118 maxJoinDuration: maxJoinDuration, 119 } 120 for service, port := range services { 121 rpo.rings[service] = newServiceResolver(service, port, rp, logger) 122 } 123 return rpo 124 } 125 126 // Start the membership monitor. Stop() can be called concurrently so we relinquish the state lock when 127 // it's safe for Stop() to run, which is at any point when we are neither updating the status field nor 128 // starting rings 129 func (rpo *monitor) Start() { 130 rpo.stateLock.Lock() 131 if rpo.status != common.DaemonStatusInitialized { 132 rpo.stateLock.Unlock() 133 return 134 } 135 rpo.status = common.DaemonStatusStarted 136 rpo.stateLock.Unlock() 137 138 broadcastAddress, err := rpo.broadcastHostPortResolver() 139 if err != nil { 140 rpo.logger.Fatal("unable to resolve broadcast address", tag.Error(err)) 141 } 142 143 // TODO - Note this presents a small race condition as we write our identity before we bootstrap ringpop. 144 // This is a current limitation of the current structure of the ringpop library as 145 // we must know our seed nodes before bootstrapping 146 147 if err = rpo.startHeartbeat(broadcastAddress); err != nil { 148 rpo.logger.Fatal("unable to initialize membership heartbeats", tag.Error(err)) 149 } 150 151 if err = rpo.bootstrapRingPop(); err != nil { 152 // Stop() called during Start()'s execution. This is ok 153 if strings.Contains(err.Error(), "destroyed while attempting to join") { 154 return 155 } 156 rpo.logger.Fatal("failed to start ringpop", tag.Error(err)) 157 } 158 159 labels, err := rpo.rp.Labels() 160 if err != nil { 161 rpo.logger.Fatal("unable to get ring pop labels", tag.Error(err)) 162 } 163 164 if err = labels.Set(rolePort, strconv.Itoa(rpo.services[rpo.serviceName])); err != nil { 165 rpo.logger.Fatal("unable to set ring pop ServicePort label", tag.Error(err)) 166 } 167 168 if err = labels.Set(roleKey, string(rpo.serviceName)); err != nil { 169 rpo.logger.Fatal("unable to set ring pop ServiceRole label", tag.Error(err)) 170 } 171 172 // Our individual rings may not support concurrent start/stop calls so we reacquire the state lock while acting upon them 173 rpo.stateLock.Lock() 174 for _, ring := range rpo.rings { 175 ring.Start() 176 } 177 rpo.stateLock.Unlock() 178 179 rpo.initialized.Set(struct{}{}, nil) 180 } 181 182 // bootstrap ring pop service by discovering the bootstrap hosts and joining the ring pop cluster 183 func (rpo *monitor) bootstrapRingPop() error { 184 policy := backoff.NewExponentialRetryPolicy(healthyHostLastHeartbeatCutoff / 2). 185 WithBackoffCoefficient(1). 186 WithMaximumAttempts(maxBootstrapRetries) 187 op := func() error { 188 hostPorts, err := rpo.fetchCurrentBootstrapHostports() 189 if err != nil { 190 return err 191 } 192 193 bootParams := &swim.BootstrapOptions{ 194 ParallelismFactor: 10, 195 JoinSize: 1, 196 MaxJoinDuration: rpo.maxJoinDuration, 197 DiscoverProvider: statichosts.New(hostPorts...), 198 } 199 200 _, err = rpo.rp.Bootstrap(bootParams) 201 if err != nil { 202 rpo.logger.Warn("unable to bootstrap ringpop. retrying", tag.Error(err)) 203 } 204 return err 205 } 206 207 if err := backoff.ThrottleRetry(op, policy, nil); err != nil { 208 return fmt.Errorf("exhausted all retries: %w", err) 209 } 210 return nil 211 } 212 213 func (rpo *monitor) WaitUntilInitialized(ctx context.Context) error { 214 _, err := rpo.initialized.Get(ctx) 215 return err 216 } 217 218 func serviceNameToServiceTypeEnum(name primitives.ServiceName) (persistence.ServiceType, error) { 219 switch name { 220 case primitives.AllServices: 221 return persistence.All, nil 222 case primitives.FrontendService: 223 return persistence.Frontend, nil 224 case primitives.InternalFrontendService: 225 return persistence.InternalFrontend, nil 226 case primitives.HistoryService: 227 return persistence.History, nil 228 case primitives.MatchingService: 229 return persistence.Matching, nil 230 case primitives.WorkerService: 231 return persistence.Worker, nil 232 default: 233 return persistence.All, fmt.Errorf("unable to parse servicename '%s'", name) 234 } 235 } 236 237 func (rpo *monitor) upsertMyMembership( 238 ctx context.Context, 239 request *persistence.UpsertClusterMembershipRequest, 240 ) error { 241 err := rpo.metadataManager.UpsertClusterMembership(ctx, request) 242 243 if err == nil { 244 rpo.logger.Debug("Membership heartbeat upserted successfully", 245 tag.Address(request.RPCAddress.String()), 246 tag.Port(int(request.RPCPort)), 247 tag.HostID(request.HostID.String())) 248 } 249 250 return err 251 } 252 253 // splitHostPortTyped expands upon net.SplitHostPort by providing type parsing. 254 func splitHostPortTyped(hostPort string) (net.IP, uint16, error) { 255 ipstr, portstr, err := net.SplitHostPort(hostPort) 256 if err != nil { 257 return nil, 0, err 258 } 259 260 broadcastAddress := net.ParseIP(ipstr) 261 broadcastPort, err := strconv.ParseUint(portstr, 10, 16) 262 if err != nil { 263 return nil, 0, err 264 } 265 266 return broadcastAddress, uint16(broadcastPort), nil 267 } 268 269 func (rpo *monitor) startHeartbeat(broadcastHostport string) error { 270 // Start by cleaning up expired records to avoid growth 271 err := rpo.metadataManager.PruneClusterMembership(rpo.lifecycleCtx, &persistence.PruneClusterMembershipRequest{MaxRecordsPruned: 10}) 272 if err != nil { 273 return err 274 } 275 276 sessionStarted := time.Now().UTC() 277 278 // Parse and validate broadcast hostport 279 broadcastAddress, broadcastPort, err := splitHostPortTyped(broadcastHostport) 280 if err != nil { 281 return err 282 } 283 284 // Parse and validate existing service name 285 role, err := serviceNameToServiceTypeEnum(rpo.serviceName) 286 if err != nil { 287 return err 288 } 289 290 req := &persistence.UpsertClusterMembershipRequest{ 291 Role: role, 292 RPCAddress: broadcastAddress, 293 RPCPort: broadcastPort, 294 SessionStart: sessionStarted, 295 RecordExpiry: upsertMembershipRecordExpiryDefault, 296 HostID: rpo.hostID, 297 } 298 299 // Upsert before fetching bootstrap hosts. 300 // This makes us discoverable by other Temporal cluster members 301 // Expire in 48 hours to allow for inspection of table by humans for debug scenarios. 302 // For bootstrapping, we filter to a much shorter duration on the 303 // read side by filtering on the last time a heartbeat was seen. 304 err = rpo.upsertMyMembership(rpo.lifecycleCtx, req) 305 if err == nil { 306 rpo.logger.Info("Membership heartbeat upserted successfully", 307 tag.Address(broadcastAddress.String()), 308 tag.Port(int(broadcastPort)), 309 tag.HostID(rpo.hostID.String())) 310 311 rpo.startHeartbeatUpsertLoop(req) 312 } 313 314 return err 315 } 316 317 func (rpo *monitor) fetchCurrentBootstrapHostports() ([]string, error) { 318 pageSize := 1000 319 set := make(map[string]struct{}) 320 321 var nextPageToken []byte 322 323 for { 324 resp, err := rpo.metadataManager.GetClusterMembers( 325 rpo.lifecycleCtx, 326 &persistence.GetClusterMembersRequest{ 327 LastHeartbeatWithin: healthyHostLastHeartbeatCutoff, 328 PageSize: pageSize, 329 NextPageToken: nextPageToken, 330 }) 331 332 if err != nil { 333 return nil, err 334 } 335 336 // Dedupe on hostport 337 for _, host := range resp.ActiveMembers { 338 set[net.JoinHostPort(host.RPCAddress.String(), convert.Uint16ToString(host.RPCPort))] = struct{}{} 339 } 340 341 // Stop iterating once we have either 500 unique ip:port combos or there is no more results. 342 if nextPageToken == nil || len(set) >= 500 { 343 bootstrapHostPorts := make([]string, 0, len(set)) 344 for k := range set { 345 bootstrapHostPorts = append(bootstrapHostPorts, k) 346 } 347 348 rpo.logger.Info("bootstrap hosts fetched", tag.BootstrapHostPorts(strings.Join(bootstrapHostPorts, ","))) 349 return bootstrapHostPorts, nil 350 } 351 } 352 } 353 354 func (rpo *monitor) startHeartbeatUpsertLoop(request *persistence.UpsertClusterMembershipRequest) { 355 loopUpsertMembership := func() { 356 for { 357 select { 358 case <-rpo.lifecycleCtx.Done(): 359 return 360 default: 361 } 362 err := rpo.upsertMyMembership(rpo.lifecycleCtx, request) 363 364 if err != nil { 365 rpo.logger.Error("Membership upsert failed.", tag.Error(err)) 366 } 367 368 jitter := math.Round(rand.Float64() * 5) 369 time.Sleep(time.Second * time.Duration(10+jitter)) 370 } 371 } 372 373 go loopUpsertMembership() 374 } 375 376 // Stop the membership monitor and all associated rings. This holds the state lock 377 // for the entire call as the individual ring Start/Stop functions may not be safe to 378 // call concurrently 379 func (rpo *monitor) Stop() { 380 rpo.stateLock.Lock() 381 defer rpo.stateLock.Unlock() 382 if rpo.status != common.DaemonStatusStarted { 383 return 384 } 385 rpo.status = common.DaemonStatusStopped 386 387 rpo.lifecycleCancel() 388 389 for _, ring := range rpo.rings { 390 ring.Stop() 391 } 392 393 rpo.rp.Destroy() 394 } 395 396 func (rpo *monitor) EvictSelf() error { 397 return rpo.rp.SelfEvict() 398 } 399 400 func (rpo *monitor) GetResolver(service primitives.ServiceName) (membership.ServiceResolver, error) { 401 ring, found := rpo.rings[service] 402 if !found { 403 return nil, membership.ErrUnknownService 404 } 405 return ring, nil 406 } 407 408 func (rpo *monitor) GetReachableMembers() ([]string, error) { 409 return rpo.rp.GetReachableMembers() 410 } 411 412 func replaceServicePort(address string, servicePort int) (string, error) { 413 host, _, err := net.SplitHostPort(address) 414 if err != nil { 415 return "", membership.ErrIncorrectAddressFormat 416 } 417 return net.JoinHostPort(host, convert.IntToString(servicePort)), nil 418 }