github.com/fafucoder/cilium@v1.6.11/pkg/clustermesh/remote_cluster.go (about)

     1  // Copyright 2018 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package clustermesh
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"path"
    21  	"time"
    22  
    23  	"github.com/cilium/cilium/pkg/allocator"
    24  	"github.com/cilium/cilium/pkg/controller"
    25  	"github.com/cilium/cilium/pkg/identity/cache"
    26  	"github.com/cilium/cilium/pkg/ipcache"
    27  	"github.com/cilium/cilium/pkg/kvstore"
    28  	"github.com/cilium/cilium/pkg/kvstore/store"
    29  	"github.com/cilium/cilium/pkg/lock"
    30  	nodeStore "github.com/cilium/cilium/pkg/node/store"
    31  	"github.com/cilium/cilium/pkg/service"
    32  
    33  	"github.com/sirupsen/logrus"
    34  )
    35  
    36  // remoteCluster represents another cluster other than the cluster the agent is
    37  // running in
    38  type remoteCluster struct {
    39  	// name is the name of the cluster
    40  	name string
    41  
    42  	// configPath is the path to the etcd configuration to be used to
    43  	// connect to the etcd cluster of the remote cluster
    44  	configPath string
    45  
    46  	// changed receives an event when the remote cluster configuration has
    47  	// changed and is closed when the configuration file was removed
    48  	changed chan bool
    49  
    50  	// mesh is the cluster mesh this remote cluster belongs to
    51  	mesh *ClusterMesh
    52  
    53  	controllers *controller.Manager
    54  
    55  	// remoteConnectionControllerName is the name of the backing controller
    56  	// that maintains the remote connection
    57  	remoteConnectionControllerName string
    58  
    59  	// mutex protects the following variables
    60  	// - store
    61  	// - remoteNodes
    62  	// - ipCacheWatcher
    63  	// - remoteIdentityCache
    64  	mutex lock.RWMutex
    65  
    66  	// store is the shared store representing all nodes in the remote cluster
    67  	remoteNodes *store.SharedStore
    68  
    69  	// remoteServices is the shared store representing services in remote
    70  	// clusters
    71  	remoteServices *store.SharedStore
    72  
    73  	// ipCacheWatcher is the watcher that notifies about IP<->identity
    74  	// changes in the remote cluster
    75  	ipCacheWatcher *ipcache.IPIdentityWatcher
    76  
    77  	// remoteIdentityCache is a locally cached copy of the identity
    78  	// allocations in the remote cluster
    79  	remoteIdentityCache *allocator.RemoteCache
    80  
    81  	// backend is the kvstore backend being used
    82  	backend kvstore.BackendOperations
    83  }
    84  
    85  var (
    86  	// skipKvstoreConnection skips the etcd connection, used for testing
    87  	skipKvstoreConnection bool
    88  )
    89  
    90  func (rc *remoteCluster) getLogger() *logrus.Entry {
    91  	var (
    92  		status string
    93  		err    error
    94  	)
    95  
    96  	if rc.backend != nil {
    97  		status, err = rc.backend.Status()
    98  	}
    99  
   100  	return log.WithFields(logrus.Fields{
   101  		fieldClusterName:   rc.name,
   102  		fieldConfig:        rc.configPath,
   103  		fieldKVStoreStatus: status,
   104  		fieldKVStoreErr:    err,
   105  	})
   106  }
   107  
   108  func (rc *remoteCluster) releaseOldConnection() {
   109  	if rc.ipCacheWatcher != nil {
   110  		rc.ipCacheWatcher.Close()
   111  		rc.ipCacheWatcher = nil
   112  	}
   113  
   114  	if rc.remoteNodes != nil {
   115  		rc.remoteNodes.Close()
   116  		rc.remoteNodes = nil
   117  	}
   118  	if rc.remoteIdentityCache != nil {
   119  		rc.remoteIdentityCache.Close()
   120  		rc.remoteIdentityCache = nil
   121  	}
   122  	if rc.remoteServices != nil {
   123  		rc.remoteServices.Close()
   124  		rc.remoteServices = nil
   125  	}
   126  	if rc.backend != nil {
   127  		rc.backend.Close()
   128  		rc.backend = nil
   129  	}
   130  }
   131  
   132  func (rc *remoteCluster) restartRemoteConnection() {
   133  	rc.controllers.UpdateController(rc.remoteConnectionControllerName,
   134  		controller.ControllerParams{
   135  			DoFunc: func(ctx context.Context) error {
   136  				rc.mutex.Lock()
   137  				if rc.backend != nil {
   138  					rc.releaseOldConnection()
   139  				}
   140  				rc.mutex.Unlock()
   141  
   142  				backend, errChan := kvstore.NewClient(kvstore.EtcdBackendName,
   143  					map[string]string{
   144  						kvstore.EtcdOptionConfig: rc.configPath,
   145  					},
   146  					nil)
   147  
   148  				// Block until either an error is returned or
   149  				// the channel is closed due to success of the
   150  				// connection
   151  				rc.getLogger().Debugf("Waiting for connection to be established")
   152  				err, isErr := <-errChan
   153  				if isErr {
   154  					if backend != nil {
   155  						backend.Close()
   156  					}
   157  					rc.getLogger().WithError(err).Warning("Unable to establish etcd connection to remote cluster")
   158  					return err
   159  				}
   160  
   161  				rc.getLogger().Info("Connection to remote cluster established")
   162  
   163  				remoteNodes, err := store.JoinSharedStore(store.Configuration{
   164  					Prefix:                  path.Join(nodeStore.NodeStorePrefix, rc.name),
   165  					KeyCreator:              rc.mesh.conf.NodeKeyCreator,
   166  					SynchronizationInterval: time.Minute,
   167  					Backend:                 backend,
   168  					Observer:                rc.mesh.conf.NodeObserver(),
   169  				})
   170  				if err != nil {
   171  					backend.Close()
   172  					return err
   173  				}
   174  
   175  				remoteServices, err := store.JoinSharedStore(store.Configuration{
   176  					Prefix: path.Join(service.ServiceStorePrefix, rc.name),
   177  					KeyCreator: func() store.Key {
   178  						svc := service.ClusterService{}
   179  						return &svc
   180  					},
   181  					SynchronizationInterval: time.Minute,
   182  					Backend:                 backend,
   183  					Observer: &remoteServiceObserver{
   184  						remoteCluster: rc,
   185  					},
   186  				})
   187  				if err != nil {
   188  					remoteNodes.Close()
   189  					backend.Close()
   190  					return err
   191  				}
   192  
   193  				remoteIdentityCache, err := cache.WatchRemoteIdentities(backend)
   194  				if err != nil {
   195  					remoteNodes.Close()
   196  					backend.Close()
   197  					return err
   198  				}
   199  
   200  				ipCacheWatcher := ipcache.NewIPIdentityWatcher(backend)
   201  				go ipCacheWatcher.Watch()
   202  
   203  				rc.mutex.Lock()
   204  				rc.remoteNodes = remoteNodes
   205  				rc.remoteServices = remoteServices
   206  				rc.backend = backend
   207  				rc.ipCacheWatcher = ipCacheWatcher
   208  				rc.remoteIdentityCache = remoteIdentityCache
   209  				rc.mutex.Unlock()
   210  
   211  				rc.getLogger().Info("Established connection to remote etcd")
   212  
   213  				return nil
   214  			},
   215  			StopFunc: func(ctx context.Context) error {
   216  				rc.mutex.Lock()
   217  				rc.releaseOldConnection()
   218  				rc.mutex.Unlock()
   219  
   220  				rc.getLogger().Info("All resources of remote cluster cleaned up")
   221  
   222  				return nil
   223  			},
   224  		},
   225  	)
   226  }
   227  
   228  func (rc *remoteCluster) onInsert() {
   229  	rc.getLogger().Info("New remote cluster configuration")
   230  
   231  	if skipKvstoreConnection {
   232  		return
   233  	}
   234  
   235  	rc.remoteConnectionControllerName = fmt.Sprintf("remote-etcd-%s", rc.name)
   236  	rc.restartRemoteConnection()
   237  
   238  	go func() {
   239  		for {
   240  			val := <-rc.changed
   241  			if val {
   242  				rc.getLogger().Info("etcd configuration has changed, re-creating connection")
   243  				rc.restartRemoteConnection()
   244  			} else {
   245  				rc.getLogger().Info("Closing connection to remote etcd")
   246  				return
   247  			}
   248  		}
   249  	}()
   250  }
   251  
   252  func (rc *remoteCluster) onRemove() {
   253  	rc.controllers.RemoveAllAndWait()
   254  	close(rc.changed)
   255  
   256  	rc.getLogger().Info("Remote cluster disconnected")
   257  }
   258  
   259  func (rc *remoteCluster) isReady() bool {
   260  	rc.mutex.RLock()
   261  	defer rc.mutex.RUnlock()
   262  
   263  	return rc.backend != nil && rc.remoteNodes != nil && rc.ipCacheWatcher != nil
   264  }