github.com/fafucoder/cilium@v1.6.11/pkg/endpointmanager/manager.go (about)

     1  // Copyright 2016-2019 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package endpointmanager
    16  
    17  import (
    18  	"context"
    19  	"fmt"
    20  	"net"
    21  	"sync"
    22  	"time"
    23  
    24  	"github.com/cilium/cilium/pkg/completion"
    25  	"github.com/cilium/cilium/pkg/endpoint"
    26  	endpointid "github.com/cilium/cilium/pkg/endpoint/id"
    27  	"github.com/cilium/cilium/pkg/endpoint/regeneration"
    28  	"github.com/cilium/cilium/pkg/eventqueue"
    29  	"github.com/cilium/cilium/pkg/lock"
    30  	"github.com/cilium/cilium/pkg/logging"
    31  	"github.com/cilium/cilium/pkg/logging/logfields"
    32  	"github.com/cilium/cilium/pkg/metrics"
    33  	monitorAPI "github.com/cilium/cilium/pkg/monitor/api"
    34  	"github.com/cilium/cilium/pkg/option"
    35  	"github.com/cilium/cilium/pkg/policy"
    36  
    37  	"github.com/prometheus/client_golang/prometheus"
    38  	"github.com/sirupsen/logrus"
    39  )
    40  
    41  var (
    42  	log = logging.DefaultLogger.WithField(logfields.LogSubsys, "endpoint-manager")
    43  
    44  	// mutex protects endpoints and endpointsAux
    45  	mutex lock.RWMutex
    46  
    47  	// endpoints is the global list of endpoints indexed by ID. mutex must
    48  	// be held to read and write.
    49  	endpoints    = map[uint16]*endpoint.Endpoint{}
    50  	endpointsAux = map[string]*endpoint.Endpoint{}
    51  
    52  	// EndpointSynchronizer updates external resources (e.g., Kubernetes) with
    53  	// up-to-date information about endpoints managed by the endpoint manager.
    54  	EndpointSynchronizer EndpointResourceSynchronizer
    55  )
    56  
    57  // EndpointResourceSynchronizer is an interface which synchronizes CiliumEndpoint
    58  // resources with Kubernetes.
    59  type EndpointResourceSynchronizer interface {
    60  	RunK8sCiliumEndpointSync(ep *endpoint.Endpoint)
    61  }
    62  
    63  func init() {
    64  	// EndpointCount is a function used to collect this metric. We cannot
    65  	// increment/decrement a gauge since we invoke Remove gratuitiously and that
    66  	// would result in negative counts.
    67  	// It must be thread-safe.
    68  	metrics.EndpointCount = prometheus.NewGaugeFunc(prometheus.GaugeOpts{
    69  		Namespace: metrics.Namespace,
    70  		Name:      "endpoint_count",
    71  		Help:      "Number of endpoints managed by this agent",
    72  	},
    73  		func() float64 { return float64(len(GetEndpoints())) },
    74  	)
    75  	metrics.MustRegister(metrics.EndpointCount)
    76  }
    77  
    78  // waitForProxyCompletions blocks until all proxy changes have been completed.
    79  func waitForProxyCompletions(proxyWaitGroup *completion.WaitGroup) error {
    80  	err := proxyWaitGroup.Context().Err()
    81  	if err != nil {
    82  		return fmt.Errorf("context cancelled before waiting for proxy updates: %s", err)
    83  	}
    84  
    85  	start := time.Now()
    86  	log.Debug("Waiting for proxy updates to complete...")
    87  	err = proxyWaitGroup.Wait()
    88  	if err != nil {
    89  		return fmt.Errorf("proxy updates failed: %s", err)
    90  	}
    91  	log.Debug("Wait time for proxy updates: ", time.Since(start))
    92  
    93  	return nil
    94  }
    95  
    96  // UpdatePolicyMaps returns a WaitGroup which is signaled upon once all endpoints
    97  // have had their PolicyMaps updated against the Endpoint's desired policy state.
    98  func UpdatePolicyMaps(ctx context.Context) *sync.WaitGroup {
    99  	var epWG sync.WaitGroup
   100  	var wg sync.WaitGroup
   101  
   102  	proxyWaitGroup := completion.NewWaitGroup(ctx)
   103  
   104  	eps := GetEndpoints()
   105  	epWG.Add(len(eps))
   106  	wg.Add(1)
   107  
   108  	// This is in a goroutine to allow the caller to proceed with other tasks before waiting for the ACKs to complete
   109  	go func() {
   110  		// Wait for all the eps to have applied policy map
   111  		// changes before waiting for the changes to be ACKed
   112  		epWG.Wait()
   113  		if err := waitForProxyCompletions(proxyWaitGroup); err != nil {
   114  			log.WithError(err).Warning("Failed to apply L7 proxy policy changes. These will be re-applied in future updates.")
   115  		}
   116  		wg.Done()
   117  	}()
   118  
   119  	// TODO: bound by number of CPUs?
   120  	for _, ep := range eps {
   121  		go func(ep *endpoint.Endpoint) {
   122  			if err := ep.ApplyPolicyMapChanges(proxyWaitGroup); err != nil {
   123  				ep.Logger("endpointmanager").WithError(err).Warning("Failed to apply policy map changes. These will be re-applied in future updates.")
   124  			}
   125  			epWG.Done()
   126  		}(ep)
   127  	}
   128  
   129  	return &wg
   130  }
   131  
   132  // Insert inserts the endpoint into the global maps.
   133  func Insert(ep *endpoint.Endpoint) error {
   134  	if ep.ID != 0 {
   135  		if err := endpointid.Reuse(ep.ID); err != nil {
   136  			return fmt.Errorf("unable to reuse endpoint ID: %s", err)
   137  		}
   138  	} else {
   139  		id := endpointid.Allocate()
   140  		if id == uint16(0) {
   141  			return fmt.Errorf("no more endpoint IDs available")
   142  		}
   143  		ep.ID = id
   144  
   145  		ep.UpdateLogger(map[string]interface{}{
   146  			logfields.EndpointID: ep.ID,
   147  		})
   148  	}
   149  
   150  	// No need to check liveness as an endpoint can only be deleted via the
   151  	// API after it has been inserted into the manager.
   152  	ep.UnconditionalRLock()
   153  	mutex.Lock()
   154  
   155  	ep.StartRegenerationFailureHandler()
   156  	// Now that the endpoint has its ID, it can be created with a name based on
   157  	// its ID, and its eventqueue can be safely started. Ensure that it is only
   158  	// started once it is exposed to the endpointmanager so that it will be
   159  	// stopped when the endpoint is removed from the endpointmanager.
   160  	ep.EventQueue = eventqueue.NewEventQueueBuffered(fmt.Sprintf("endpoint-%d", ep.ID), option.Config.EndpointQueueSize)
   161  	ep.EventQueue.Run()
   162  
   163  	endpoints[ep.ID] = ep
   164  	updateReferences(ep)
   165  
   166  	mutex.Unlock()
   167  	ep.RUnlock()
   168  
   169  	if EndpointSynchronizer != nil {
   170  		EndpointSynchronizer.RunK8sCiliumEndpointSync(ep)
   171  	}
   172  
   173  	ep.InsertEvent()
   174  
   175  	return nil
   176  }
   177  
   178  // Lookup looks up the endpoint by prefix id
   179  func Lookup(id string) (*endpoint.Endpoint, error) {
   180  	mutex.RLock()
   181  	defer mutex.RUnlock()
   182  
   183  	prefix, eid, err := endpointid.Parse(id)
   184  	if err != nil {
   185  		return nil, err
   186  	}
   187  
   188  	switch prefix {
   189  	case endpointid.CiliumLocalIdPrefix:
   190  		n, err := endpointid.ParseCiliumID(id)
   191  		if err != nil {
   192  			return nil, err
   193  		}
   194  		return lookupCiliumID(uint16(n)), nil
   195  
   196  	case endpointid.CiliumGlobalIdPrefix:
   197  		return nil, ErrUnsupportedID
   198  
   199  	case endpointid.ContainerIdPrefix:
   200  		return lookupContainerID(eid), nil
   201  
   202  	case endpointid.DockerEndpointPrefix:
   203  		return lookupDockerEndpoint(eid), nil
   204  
   205  	case endpointid.ContainerNamePrefix:
   206  		return lookupDockerContainerName(eid), nil
   207  
   208  	case endpointid.PodNamePrefix:
   209  		return lookupPodNameLocked(eid), nil
   210  
   211  	case endpointid.IPv4Prefix:
   212  		return lookupIPv4(eid), nil
   213  
   214  	case endpointid.IPv6Prefix:
   215  		return lookupIPv6(eid), nil
   216  
   217  	default:
   218  		return nil, ErrInvalidPrefix{InvalidPrefix: prefix.String()}
   219  	}
   220  }
   221  
   222  // LookupCiliumID looks up endpoint by endpoint ID
   223  func LookupCiliumID(id uint16) *endpoint.Endpoint {
   224  	mutex.RLock()
   225  	ep := lookupCiliumID(id)
   226  	mutex.RUnlock()
   227  	return ep
   228  }
   229  
   230  // LookupContainerID looks up endpoint by Docker ID
   231  func LookupContainerID(id string) *endpoint.Endpoint {
   232  	mutex.RLock()
   233  	ep := lookupContainerID(id)
   234  	mutex.RUnlock()
   235  	return ep
   236  }
   237  
   238  // LookupIPv4 looks up endpoint by IPv4 address
   239  func LookupIPv4(ipv4 string) *endpoint.Endpoint {
   240  	mutex.RLock()
   241  	ep := lookupIPv4(ipv4)
   242  	mutex.RUnlock()
   243  	return ep
   244  }
   245  
   246  // LookupIPv6 looks up endpoint by IPv6 address
   247  func LookupIPv6(ipv6 string) *endpoint.Endpoint {
   248  	mutex.RLock()
   249  	ep := lookupIPv6(ipv6)
   250  	mutex.RUnlock()
   251  	return ep
   252  }
   253  
   254  // LookupIP looks up endpoint by IP address
   255  func LookupIP(ip net.IP) (ep *endpoint.Endpoint) {
   256  	addr := ip.String()
   257  	mutex.RLock()
   258  	if ip.To4() != nil {
   259  		ep = lookupIPv4(addr)
   260  	} else {
   261  		ep = lookupIPv6(addr)
   262  	}
   263  	mutex.RUnlock()
   264  	return ep
   265  }
   266  
   267  // LookupPodName looks up endpoint by namespace + pod name
   268  func LookupPodName(name string) *endpoint.Endpoint {
   269  	mutex.RLock()
   270  	ep := lookupPodNameLocked(name)
   271  	mutex.RUnlock()
   272  	return ep
   273  }
   274  
   275  // UpdateReferences makes an endpoint available by all possible reference
   276  // fields as available for this endpoint (containerID, IPv4 address, ...)
   277  // Must be called with ep.Mutex.RLock held.
   278  func UpdateReferences(ep *endpoint.Endpoint) {
   279  	mutex.Lock()
   280  	defer mutex.Unlock()
   281  	updateReferences(ep)
   282  }
   283  
   284  func releaseID(ep *endpoint.Endpoint) {
   285  	if err := endpointid.Release(ep.ID); err != nil {
   286  		// While restoring, endpoint IDs may not have been reused yet.
   287  		// Failure to release means that the endpoint ID was not reused
   288  		// yet.
   289  		//
   290  		// While endpoint is disconnecting, ID is already available in ID cache.
   291  		//
   292  		// Avoid irritating warning messages.
   293  		state := ep.GetState()
   294  		if state != endpoint.StateRestoring && state != endpoint.StateDisconnecting {
   295  			log.WithError(err).WithField("state", state).Warning("Unable to release endpoint ID")
   296  		}
   297  	}
   298  }
   299  
   300  // WaitEndpointRemoved waits until all operations associated with Remove of
   301  // the endpoint have been completed.
   302  func WaitEndpointRemoved(ep *endpoint.Endpoint) {
   303  	select {
   304  	case <-Remove(ep):
   305  		return
   306  	}
   307  }
   308  
   309  // Remove removes the endpoint from the global maps and releases the node-local
   310  // ID allocated for the endpoint.
   311  // Must be called with ep.Mutex.RLock held. Releasing of the ID of the endpoint
   312  // is done asynchronously. Once the ID of the endpoint is released, the returned
   313  // channel is closed.
   314  func Remove(ep *endpoint.Endpoint) <-chan struct{} {
   315  
   316  	epRemoved := make(chan struct{})
   317  
   318  	mutex.Lock()
   319  	defer mutex.Unlock()
   320  
   321  	// This must be done before the ID is released for the endpoint!
   322  	delete(endpoints, ep.ID)
   323  
   324  	go func(ep *endpoint.Endpoint) {
   325  
   326  		// The endpoint's EventQueue may not be stopped yet (depending on whether
   327  		// the caller of the EventQueue has stopped it or not). Call it here
   328  		// to be safe so that ep.WaitToBeDrained() does not hang forever.
   329  		ep.EventQueue.Stop()
   330  
   331  		// Wait for no more events (primarily regenerations) to be occurring for
   332  		// this endpoint.
   333  		ep.EventQueue.WaitToBeDrained()
   334  
   335  		releaseID(ep)
   336  		close(epRemoved)
   337  	}(ep)
   338  
   339  	if ep.ContainerID != "" {
   340  		delete(endpointsAux, endpointid.NewID(endpointid.ContainerIdPrefix, ep.ContainerID))
   341  	}
   342  
   343  	if ep.DockerEndpointID != "" {
   344  		delete(endpointsAux, endpointid.NewID(endpointid.DockerEndpointPrefix, ep.DockerEndpointID))
   345  	}
   346  
   347  	if ep.IPv4.IsSet() {
   348  		delete(endpointsAux, endpointid.NewID(endpointid.IPv4Prefix, ep.IPv4.String()))
   349  	}
   350  
   351  	if ep.IPv6.IsSet() {
   352  		delete(endpointsAux, endpointid.NewID(endpointid.IPv6Prefix, ep.IPv6.String()))
   353  	}
   354  
   355  	if ep.ContainerName != "" {
   356  		delete(endpointsAux, endpointid.NewID(endpointid.ContainerNamePrefix, ep.ContainerName))
   357  	}
   358  
   359  	if podName := ep.GetK8sNamespaceAndPodNameLocked(); podName != "" {
   360  		delete(endpointsAux, endpointid.NewID(endpointid.PodNamePrefix, podName))
   361  	}
   362  	return epRemoved
   363  }
   364  
   365  // RemoveAll removes all endpoints from the global maps.
   366  func RemoveAll() {
   367  	mutex.Lock()
   368  	defer mutex.Unlock()
   369  	endpointid.ReallocatePool()
   370  	endpoints = map[uint16]*endpoint.Endpoint{}
   371  	endpointsAux = map[string]*endpoint.Endpoint{}
   372  }
   373  
   374  // lookupCiliumID looks up endpoint by endpoint ID
   375  func lookupCiliumID(id uint16) *endpoint.Endpoint {
   376  	if ep, ok := endpoints[id]; ok {
   377  		return ep
   378  	}
   379  	return nil
   380  }
   381  
   382  func lookupDockerEndpoint(id string) *endpoint.Endpoint {
   383  	if ep, ok := endpointsAux[endpointid.NewID(endpointid.DockerEndpointPrefix, id)]; ok {
   384  		return ep
   385  	}
   386  	return nil
   387  }
   388  
   389  func lookupPodNameLocked(name string) *endpoint.Endpoint {
   390  	if ep, ok := endpointsAux[endpointid.NewID(endpointid.PodNamePrefix, name)]; ok {
   391  		return ep
   392  	}
   393  	return nil
   394  }
   395  
   396  func lookupDockerContainerName(name string) *endpoint.Endpoint {
   397  	if ep, ok := endpointsAux[endpointid.NewID(endpointid.ContainerNamePrefix, name)]; ok {
   398  		return ep
   399  	}
   400  	return nil
   401  }
   402  
   403  func lookupIPv4(ipv4 string) *endpoint.Endpoint {
   404  	if ep, ok := endpointsAux[endpointid.NewID(endpointid.IPv4Prefix, ipv4)]; ok {
   405  		return ep
   406  	}
   407  	return nil
   408  }
   409  
   410  func lookupIPv6(ipv6 string) *endpoint.Endpoint {
   411  	if ep, ok := endpointsAux[endpointid.NewID(endpointid.IPv6Prefix, ipv6)]; ok {
   412  		return ep
   413  	}
   414  	return nil
   415  }
   416  
   417  func lookupContainerID(id string) *endpoint.Endpoint {
   418  	if ep, ok := endpointsAux[endpointid.NewID(endpointid.ContainerIdPrefix, id)]; ok {
   419  		return ep
   420  	}
   421  	return nil
   422  }
   423  
   424  // UpdateReferences updates the mappings of various values to their corresponding
   425  // endpoints, such as ContainerID, Docker Container Name, Pod Name, etc.
   426  func updateReferences(ep *endpoint.Endpoint) {
   427  	if ep.ContainerID != "" {
   428  		endpointsAux[endpointid.NewID(endpointid.ContainerIdPrefix, ep.ContainerID)] = ep
   429  	}
   430  
   431  	if ep.DockerEndpointID != "" {
   432  		endpointsAux[endpointid.NewID(endpointid.DockerEndpointPrefix, ep.DockerEndpointID)] = ep
   433  	}
   434  
   435  	if ep.IPv4.IsSet() {
   436  		endpointsAux[endpointid.NewID(endpointid.IPv4Prefix, ep.IPv4.String())] = ep
   437  	}
   438  
   439  	if ep.IPv6.IsSet() {
   440  		endpointsAux[endpointid.NewID(endpointid.IPv6Prefix, ep.IPv6.String())] = ep
   441  	}
   442  
   443  	if ep.ContainerName != "" {
   444  		endpointsAux[endpointid.NewID(endpointid.ContainerNamePrefix, ep.ContainerName)] = ep
   445  	}
   446  
   447  	if podName := ep.GetK8sNamespaceAndPodNameLocked(); podName != "" {
   448  		endpointsAux[endpointid.NewID(endpointid.PodNamePrefix, podName)] = ep
   449  	}
   450  }
   451  
   452  // RegenerateAllEndpoints calls a SetStateLocked for each endpoint and
   453  // regenerates if state transaction is valid. During this process, the endpoint
   454  // list is locked and cannot be modified.
   455  // Returns a waiting group that can be used to know when all the endpoints are
   456  // regenerated.
   457  func RegenerateAllEndpoints(regenMetadata *regeneration.ExternalRegenerationMetadata) *sync.WaitGroup {
   458  	var wg sync.WaitGroup
   459  
   460  	eps := GetEndpoints()
   461  	wg.Add(len(eps))
   462  
   463  	// Dereference "reason" field outside of logging statement; see
   464  	// https://github.com/sirupsen/logrus/issues/1003.
   465  	reason := regenMetadata.Reason
   466  	log.WithFields(logrus.Fields{"reason": reason}).Info("regenerating all endpoints")
   467  	for _, ep := range eps {
   468  		go func(ep *endpoint.Endpoint) {
   469  			<-ep.RegenerateIfAlive(regenMetadata)
   470  			wg.Done()
   471  		}(ep)
   472  	}
   473  
   474  	return &wg
   475  }
   476  
   477  // HasGlobalCT returns true if the endpoints have a global CT, false otherwise.
   478  func HasGlobalCT() bool {
   479  	eps := GetEndpoints()
   480  	for _, e := range eps {
   481  		if !e.Options.IsEnabled(option.ConntrackLocal) {
   482  			return true
   483  		}
   484  	}
   485  	return false
   486  }
   487  
   488  // GetEndpoints returns a slice of all endpoints present in endpoint manager.
   489  func GetEndpoints() []*endpoint.Endpoint {
   490  	mutex.RLock()
   491  	eps := make([]*endpoint.Endpoint, 0, len(endpoints))
   492  	for _, ep := range endpoints {
   493  		eps = append(eps, ep)
   494  	}
   495  	mutex.RUnlock()
   496  	return eps
   497  }
   498  
   499  // GetPolicyEndpoints returns a map of all endpoints present in endpoint
   500  // manager as policy.Endpoint interface set for the map key.
   501  func GetPolicyEndpoints() map[policy.Endpoint]struct{} {
   502  	mutex.RLock()
   503  	eps := make(map[policy.Endpoint]struct{}, len(endpoints))
   504  	for _, ep := range endpoints {
   505  		eps[ep] = struct{}{}
   506  	}
   507  	mutex.RUnlock()
   508  	return eps
   509  }
   510  
   511  // AddEndpoint takes the prepared endpoint object and starts managing it.
   512  func AddEndpoint(owner regeneration.Owner, ep *endpoint.Endpoint, reason string) (err error) {
   513  	alwaysEnforce := policy.GetPolicyEnabled() == option.AlwaysEnforce
   514  	ep.SetDesiredIngressPolicyEnabled(alwaysEnforce)
   515  	ep.SetDesiredEgressPolicyEnabled(alwaysEnforce)
   516  
   517  	if ep.ID != 0 {
   518  		return fmt.Errorf("Endpoint ID is already set to %d", ep.ID)
   519  	}
   520  	err = Insert(ep)
   521  	if err != nil {
   522  		return err
   523  	}
   524  
   525  	repr, err := monitorAPI.EndpointCreateRepr(ep)
   526  	// Ignore endpoint creation if EndpointCreateRepr != nil
   527  	if err == nil {
   528  		owner.SendNotification(monitorAPI.AgentNotifyEndpointCreated, repr)
   529  	}
   530  	return nil
   531  }
   532  
   533  // WaitForEndpointsAtPolicyRev waits for all endpoints which existed at the time
   534  // this function is called to be at a given policy revision.
   535  // New endpoints appearing while waiting are ignored.
   536  func WaitForEndpointsAtPolicyRev(ctx context.Context, rev uint64) error {
   537  	eps := GetEndpoints()
   538  	for i := range eps {
   539  		select {
   540  		case <-ctx.Done():
   541  			return ctx.Err()
   542  		case <-eps[i].WaitForPolicyRevision(ctx, rev, nil):
   543  			if ctx.Err() != nil {
   544  				return ctx.Err()
   545  			}
   546  		}
   547  	}
   548  	return nil
   549  }
   550  
   551  // CallbackForEndpointsAtPolicyRev registers a callback on all endpoints that
   552  // exist when invoked. It is similar to WaitForEndpointsAtPolicyRevision but
   553  // each endpoint that reaches the desired revision calls 'done' independently.
   554  // The provided callback should not block and generally be lightweight.
   555  func CallbackForEndpointsAtPolicyRev(ctx context.Context, rev uint64, done func(time.Time)) error {
   556  	eps := GetEndpoints()
   557  	for i := range eps {
   558  		eps[i].WaitForPolicyRevision(ctx, rev, done)
   559  	}
   560  	return nil
   561  }