github.com/cilium/cilium@v1.16.2/pkg/clustermesh/common/remote_cluster.go (about) 1 // SPDX-License-Identifier: Apache-2.0 2 // Copyright Authors of Cilium 3 4 package common 5 6 import ( 7 "context" 8 "errors" 9 "fmt" 10 "sync" 11 "time" 12 13 "github.com/go-openapi/strfmt" 14 "github.com/prometheus/client_golang/prometheus" 15 "github.com/sirupsen/logrus" 16 "google.golang.org/grpc" 17 18 "github.com/cilium/cilium/api/v1/models" 19 "github.com/cilium/cilium/pkg/clustermesh/types" 20 cmutils "github.com/cilium/cilium/pkg/clustermesh/utils" 21 "github.com/cilium/cilium/pkg/controller" 22 "github.com/cilium/cilium/pkg/dial" 23 "github.com/cilium/cilium/pkg/kvstore" 24 "github.com/cilium/cilium/pkg/lock" 25 "github.com/cilium/cilium/pkg/logging/logfields" 26 "github.com/cilium/cilium/pkg/metrics" 27 "github.com/cilium/cilium/pkg/option" 28 ) 29 30 var ( 31 remoteConnectionControllerGroup = controller.NewGroup("clustermesh-remote-cluster") 32 clusterConfigControllerGroup = controller.NewGroup("clustermesh-cluster-config") 33 ) 34 35 type RemoteCluster interface { 36 // Run implements the actual business logic once the connection to the remote cluster has been established. 37 // The ready channel shall be closed when the initialization tasks completed, possibly returning an error. 38 Run(ctx context.Context, backend kvstore.BackendOperations, config types.CiliumClusterConfig, ready chan<- error) 39 40 Stop() 41 Remove(ctx context.Context) 42 } 43 44 // remoteCluster represents another cluster other than the cluster the agent is 45 // running in 46 type remoteCluster struct { 47 RemoteCluster 48 49 // name is the name of the cluster 50 name string 51 52 // configPath is the path to the etcd configuration to be used to 53 // connect to the etcd cluster of the remote cluster 54 configPath string 55 56 // clusterSizeDependantInterval allows to calculate intervals based on cluster size. 57 clusterSizeDependantInterval kvstore.ClusterSizeDependantIntervalFunc 58 59 // resolvers are the set of resolvers used to create the custom dialer. 60 resolvers []dial.Resolver 61 62 // changed receives an event when the remote cluster configuration has 63 // changed and is closed when the configuration file was removed 64 changed chan bool 65 66 controllers *controller.Manager 67 68 // wg is used to wait for the termination of the goroutines spawned by the 69 // controller upon reconnection for long running background tasks. 70 wg sync.WaitGroup 71 72 // remoteConnectionControllerName is the name of the backing controller 73 // that maintains the remote connection 74 remoteConnectionControllerName string 75 76 // mutex protects the following variables 77 // - backend 78 // - config 79 // - etcdClusterID 80 // - failures 81 // - lastFailure 82 mutex lock.RWMutex 83 84 // backend is the kvstore backend being used 85 backend kvstore.BackendOperations 86 87 // config contains the information about the cluster config for status reporting 88 config *models.RemoteClusterConfig 89 90 // etcdClusterID contains the information about the etcd cluster ID for status 91 // reporting. It is used to distinguish which instance of the clustermesh-apiserver 92 // we are connected to when running in HA mode. 93 etcdClusterID string 94 95 // failures is the number of observed failures 96 failures int 97 98 // lastFailure is the timestamp of the last failure 99 lastFailure time.Time 100 101 logger logrus.FieldLogger 102 103 metricLastFailureTimestamp prometheus.Gauge 104 metricReadinessStatus prometheus.Gauge 105 metricTotalFailures prometheus.Gauge 106 } 107 108 var ( 109 // skipKvstoreConnection skips the etcd connection, used for testing 110 skipKvstoreConnection bool 111 ) 112 113 // releaseOldConnection releases the etcd connection to a remote cluster 114 func (rc *remoteCluster) releaseOldConnection() { 115 rc.metricReadinessStatus.Set(metrics.BoolToFloat64(false)) 116 117 // Make sure that all child goroutines terminated before performing cleanup. 118 rc.wg.Wait() 119 120 rc.mutex.Lock() 121 backend := rc.backend 122 rc.backend = nil 123 rc.config = nil 124 rc.etcdClusterID = "" 125 rc.mutex.Unlock() 126 127 if backend != nil { 128 backend.Close() 129 } 130 } 131 132 func (rc *remoteCluster) restartRemoteConnection() { 133 rc.controllers.UpdateController( 134 rc.remoteConnectionControllerName, 135 controller.ControllerParams{ 136 Group: remoteConnectionControllerGroup, 137 DoFunc: func(ctx context.Context) error { 138 rc.releaseOldConnection() 139 140 clusterLock := newClusterLock() 141 142 extraOpts := rc.makeExtraOpts(clusterLock) 143 144 backend, errChan := kvstore.NewClient(ctx, kvstore.EtcdBackendName, 145 rc.makeEtcdOpts(), &extraOpts) 146 147 // Block until either an error is returned or 148 // the channel is closed due to success of the 149 // connection 150 rc.logger.Debugf("Waiting for connection to be established") 151 152 var err error 153 select { 154 case err = <-errChan: 155 case err = <-clusterLock.errors: 156 } 157 158 if err != nil { 159 if backend != nil { 160 backend.Close() 161 } 162 rc.logger.WithError(err).Warning("Unable to establish etcd connection to remote cluster") 163 return err 164 } 165 166 etcdClusterID := fmt.Sprintf("%x", clusterLock.etcdClusterID.Load()) 167 168 rc.mutex.Lock() 169 rc.backend = backend 170 rc.etcdClusterID = etcdClusterID 171 rc.mutex.Unlock() 172 173 ctx, cancel := context.WithCancel(ctx) 174 rc.wg.Add(1) 175 go func() { 176 rc.watchdog(ctx, backend, clusterLock) 177 cancel() 178 rc.wg.Done() 179 }() 180 181 rc.logger.WithField(logfields.EtcdClusterID, etcdClusterID).Info("Connection to remote cluster established") 182 183 config, err := rc.getClusterConfig(ctx, backend) 184 if err != nil { 185 lgr := rc.logger 186 if errors.Is(err, cmutils.ErrClusterConfigNotFound) { 187 lgr = lgr.WithField(logfields.Hint, 188 "If KVStoreMesh is enabled, check whether it is connected to the target cluster."+ 189 " Additionally, ensure that the cluster name is correct.") 190 } 191 192 lgr.WithError(err).Warning("Unable to get remote cluster configuration") 193 cancel() 194 return err 195 } 196 rc.logger.Info("Found remote cluster configuration") 197 198 ready := make(chan error) 199 200 // Let's execute the long running logic in background. This allows 201 // to return early from the controller body, so that the statistics 202 // are updated correctly. Instead, blocking until rc.Run terminates 203 // would prevent a previous failure from being cleared out. 204 rc.wg.Add(1) 205 go func() { 206 rc.Run(ctx, backend, config, ready) 207 cancel() 208 rc.wg.Done() 209 }() 210 211 if err := <-ready; err != nil { 212 rc.logger.WithError(err).Warning("Connection to remote cluster failed") 213 return err 214 } 215 216 rc.metricReadinessStatus.Set(metrics.BoolToFloat64(true)) 217 return nil 218 }, 219 StopFunc: func(ctx context.Context) error { 220 rc.releaseOldConnection() 221 rc.logger.Info("Connection to remote cluster stopped") 222 return nil 223 }, 224 CancelDoFuncOnUpdate: true, 225 }, 226 ) 227 } 228 229 func (rc *remoteCluster) watchdog(ctx context.Context, backend kvstore.BackendOperations, clusterLock *clusterLock) { 230 handleErr := func(err error) { 231 rc.logger.WithError(err).Warning("Error observed on etcd connection, reconnecting etcd") 232 rc.mutex.Lock() 233 rc.failures++ 234 rc.lastFailure = time.Now() 235 rc.metricLastFailureTimestamp.SetToCurrentTime() 236 rc.metricTotalFailures.Set(float64(rc.failures)) 237 rc.metricReadinessStatus.Set(metrics.BoolToFloat64(rc.isReadyLocked())) 238 rc.mutex.Unlock() 239 240 rc.restartRemoteConnection() 241 } 242 243 select { 244 case err, ok := <-backend.StatusCheckErrors(): 245 if ok && err != nil { 246 handleErr(err) 247 } 248 case err, ok := <-clusterLock.errors: 249 if ok && err != nil { 250 handleErr(err) 251 } 252 case <-ctx.Done(): 253 return 254 } 255 } 256 257 func (rc *remoteCluster) getClusterConfig(ctx context.Context, backend kvstore.BackendOperations) (types.CiliumClusterConfig, error) { 258 var ( 259 clusterConfigRetrievalTimeout = 3 * time.Minute 260 lastError = context.Canceled 261 lastErrorLock lock.Mutex 262 ) 263 264 ctx, cancel := context.WithTimeout(ctx, clusterConfigRetrievalTimeout) 265 defer cancel() 266 267 rc.mutex.Lock() 268 rc.config = &models.RemoteClusterConfig{Required: true} 269 rc.mutex.Unlock() 270 271 cfgch := make(chan types.CiliumClusterConfig, 1) 272 defer close(cfgch) 273 274 // We retry here rather than simply returning an error and relying on the external 275 // controller backoff period to avoid recreating every time a new connection to the remote 276 // kvstore, which would introduce an unnecessary overhead. Still, we do return in case of 277 // consecutive failures, to ensure that we do not retry forever if something strange happened. 278 ctrlname := rc.remoteConnectionControllerName + "-cluster-config" 279 defer rc.controllers.RemoveControllerAndWait(ctrlname) 280 rc.controllers.UpdateController(ctrlname, controller.ControllerParams{ 281 Group: clusterConfigControllerGroup, 282 DoFunc: func(ctx context.Context) error { 283 rc.logger.Debug("Retrieving cluster configuration from remote kvstore") 284 config, err := cmutils.GetClusterConfig(ctx, rc.name, backend) 285 if err != nil { 286 lastErrorLock.Lock() 287 lastError = err 288 lastErrorLock.Unlock() 289 return err 290 } 291 292 cfgch <- config 293 return nil 294 }, 295 Context: ctx, 296 MaxRetryInterval: 30 * time.Second, 297 }) 298 299 // Wait until either the configuration is retrieved, or the context expires 300 select { 301 case config := <-cfgch: 302 rc.mutex.Lock() 303 rc.config.Retrieved = true 304 rc.config.ClusterID = int64(config.ID) 305 rc.config.Kvstoremesh = config.Capabilities.Cached 306 rc.config.SyncCanaries = config.Capabilities.SyncedCanaries 307 rc.mutex.Unlock() 308 309 return config, nil 310 case <-ctx.Done(): 311 lastErrorLock.Lock() 312 defer lastErrorLock.Unlock() 313 return types.CiliumClusterConfig{}, fmt.Errorf("failed to retrieve cluster configuration: %w", lastError) 314 } 315 } 316 317 func (rc *remoteCluster) makeEtcdOpts() map[string]string { 318 opts := map[string]string{ 319 kvstore.EtcdOptionConfig: rc.configPath, 320 } 321 322 for key, value := range option.Config.KVStoreOpt { 323 switch key { 324 case kvstore.EtcdRateLimitOption, kvstore.EtcdMaxInflightOption, kvstore.EtcdListLimitOption, 325 kvstore.EtcdOptionKeepAliveHeartbeat, kvstore.EtcdOptionKeepAliveTimeout: 326 opts[key] = value 327 } 328 } 329 330 return opts 331 } 332 333 func (rc *remoteCluster) makeExtraOpts(clusterLock *clusterLock) kvstore.ExtraOptions { 334 var dialOpts []grpc.DialOption 335 336 dialOpts = append(dialOpts, grpc.WithStreamInterceptor(newStreamInterceptor(clusterLock)), grpc.WithUnaryInterceptor(newUnaryInterceptor(clusterLock))) 337 338 // Allow to resolve service names without depending on the DNS. This prevents the need 339 // for setting the DNSPolicy to ClusterFirstWithHostNet when running in host network. 340 dialOpts = append(dialOpts, grpc.WithContextDialer(dial.NewContextDialer(rc.logger, rc.resolvers...))) 341 342 return kvstore.ExtraOptions{ 343 NoLockQuorumCheck: true, 344 ClusterName: rc.name, 345 ClusterSizeDependantInterval: rc.clusterSizeDependantInterval, 346 DialOption: dialOpts, 347 NoEndpointStatusChecks: true, 348 } 349 } 350 351 func (rc *remoteCluster) onInsert() { 352 rc.logger.Info("New remote cluster configuration") 353 354 if skipKvstoreConnection { 355 return 356 } 357 358 rc.remoteConnectionControllerName = fmt.Sprintf("remote-etcd-%s", rc.name) 359 rc.restartRemoteConnection() 360 361 go func() { 362 for { 363 val := <-rc.changed 364 if val { 365 rc.logger.Info("etcd configuration has changed, re-creating connection") 366 rc.restartRemoteConnection() 367 } else { 368 rc.logger.Info("Closing connection to remote etcd") 369 return 370 } 371 } 372 }() 373 } 374 375 // onStop is executed when the clustermesh subsystem is being stopped. 376 // In this case, we don't want to drain the known entries, otherwise 377 // we would break existing connections when the agent gets restarted. 378 func (rc *remoteCluster) onStop() { 379 _ = rc.controllers.RemoveControllerAndWait(rc.remoteConnectionControllerName) 380 close(rc.changed) 381 rc.Stop() 382 } 383 384 // onRemove is executed when a remote cluster is explicitly disconnected 385 // (i.e., its configuration is removed). In this case, we need to drain 386 // all known entries, to properly cleanup the status without requiring to 387 // restart the agent. 388 func (rc *remoteCluster) onRemove(ctx context.Context) { 389 rc.onStop() 390 rc.Remove(ctx) 391 392 rc.logger.Info("Remote cluster disconnected") 393 } 394 395 func (rc *remoteCluster) isReady() bool { 396 rc.mutex.RLock() 397 defer rc.mutex.RUnlock() 398 399 return rc.isReadyLocked() 400 } 401 402 func (rc *remoteCluster) isReadyLocked() bool { 403 return rc.backend != nil && rc.config != nil && (!rc.config.Required || rc.config.Retrieved) 404 } 405 406 func (rc *remoteCluster) status() *models.RemoteCluster { 407 rc.mutex.RLock() 408 defer rc.mutex.RUnlock() 409 410 // This can happen when the controller in restartRemoteConnection is waiting 411 // for the first connection to succeed. 412 var backendStatus = "Waiting for initial connection to be established" 413 if rc.backend != nil { 414 var backendError error 415 backendStatus, backendError = rc.backend.Status() 416 if backendError != nil { 417 backendStatus = backendError.Error() 418 } 419 420 if rc.etcdClusterID != "" { 421 backendStatus += ", ID: " + rc.etcdClusterID 422 } 423 } 424 425 status := &models.RemoteCluster{ 426 Name: rc.name, 427 Ready: rc.isReadyLocked(), 428 Connected: rc.backend != nil, 429 Status: backendStatus, 430 Config: rc.config, 431 NumFailures: int64(rc.failures), 432 LastFailure: strfmt.DateTime(rc.lastFailure), 433 } 434 435 return status 436 }