github.com/looshlee/cilium@v1.6.12/pkg/k8s/endpointsynchronizer/cep.go (about)

     1  // Copyright 2016-2018 Authors of Cilium
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package endpointsynchronizer
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"github.com/cilium/cilium/pkg/k8s"
    22  	"reflect"
    23  	"time"
    24  
    25  	"github.com/cilium/cilium/pkg/controller"
    26  	"github.com/cilium/cilium/pkg/endpoint"
    27  	cilium_v2 "github.com/cilium/cilium/pkg/k8s/apis/cilium.io/v2"
    28  	k8sversion "github.com/cilium/cilium/pkg/k8s/version"
    29  	pkgLabels "github.com/cilium/cilium/pkg/labels"
    30  	"github.com/cilium/cilium/pkg/option"
    31  
    32  	k8serrors "k8s.io/apimachinery/pkg/api/errors"
    33  	meta_v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    34  	"k8s.io/apimachinery/pkg/types"
    35  )
    36  
    37  // EndpointSynchronizer currently is an empty type, which wraps around syncing
    38  // of CiliumEndpoint resources.
    39  // TODO - see whether folding the global variables below into this function
    40  // is cleaner.
    41  type EndpointSynchronizer struct{}
    42  
    43  // RunK8sCiliumEndpointSync starts a controller that synchronizes the endpoint
    44  // to the corresponding k8s CiliumEndpoint CRD. It is expected that each CEP
    45  // has 1 controller that updates it, and a local copy is retained and only
    46  // updates are pushed up.
    47  // CiliumEndpoint objects have the same name as the pod they represent.
    48  func (epSync *EndpointSynchronizer) RunK8sCiliumEndpointSync(e *endpoint.Endpoint) {
    49  	var (
    50  		endpointID     = e.ID
    51  		controllerName = fmt.Sprintf("sync-to-k8s-ciliumendpoint (%v)", endpointID)
    52  		scopedLog      = e.Logger(subsysEndpointSync).WithField("controller", controllerName)
    53  	)
    54  
    55  	if option.Config.DisableCiliumEndpointCRD {
    56  		scopedLog.Warn("Not running controller. CEP CRD synchronization is disabled")
    57  		return
    58  	}
    59  
    60  	if !k8s.IsEnabled() {
    61  		scopedLog.Debug("Not starting controller because k8s is disabled")
    62  		return
    63  	}
    64  
    65  	ciliumClient := k8s.CiliumClient().CiliumV2()
    66  
    67  	// The health endpoint doesn't really exist in k8s and updates to it caused
    68  	// arbitrary errors. Disable the controller for these endpoints.
    69  	if isHealthEP := e.HasLabels(pkgLabels.LabelHealth); isHealthEP {
    70  		scopedLog.Debug("Not starting unnecessary CEP controller for cilium-health endpoint")
    71  		return
    72  	}
    73  
    74  	var (
    75  		lastMdl  *cilium_v2.EndpointStatus
    76  		localCEP *cilium_v2.CiliumEndpoint // the local copy of the CEP object. Reused.
    77  		needInit = true                    // needInit indicates that we may need to create the CEP
    78  	)
    79  
    80  	// NOTE: The controller functions do NOT hold the endpoint locks
    81  	e.UpdateController(controllerName,
    82  		controller.ControllerParams{
    83  			RunInterval: 10 * time.Second,
    84  			DoFunc: func(ctx context.Context) (err error) {
    85  				// Update logger as scopeLog might not have the podName when it
    86  				// was created.
    87  				scopedLog = e.Logger(subsysEndpointSync).WithField("controller", controllerName)
    88  
    89  				if k8sversion.Version() == nil {
    90  					return fmt.Errorf("Kubernetes apiserver is not available")
    91  				}
    92  
    93  				capabilities := k8sversion.Capabilities()
    94  
    95  				// K8sPodName and K8sNamespace are not always available when an
    96  				// endpoint is first created, so we collect them here.
    97  				podName := e.GetK8sPodName()
    98  				if podName == "" {
    99  					scopedLog.Debug("Skipping CiliumEndpoint update because it has no k8s pod name")
   100  					return nil
   101  				}
   102  
   103  				namespace := e.GetK8sNamespace()
   104  				if namespace == "" {
   105  					scopedLog.Debug("Skipping CiliumEndpoint update because it has no k8s namespace")
   106  					return nil
   107  				}
   108  
   109  				// Serialize the endpoint into a model. It is compared with the one
   110  				// from before, only updating on changes.
   111  				mdl := e.GetCiliumEndpointStatus()
   112  				if reflect.DeepEqual(mdl, lastMdl) {
   113  					scopedLog.Debug("Skipping CiliumEndpoint update because it has not changed")
   114  					return nil
   115  				}
   116  
   117  				// Initialize the CEP by deleting the upstream instance and recreating
   118  				// it. Deleting first allows for upgrade scenarios where the format has
   119  				// changed but our k8s CEP code cannot read in the upstream value.
   120  				if needInit {
   121  					state := e.GetState()
   122  					// Don't bother to create if the
   123  					// endpoint is already disconnecting
   124  					if state == endpoint.StateDisconnecting ||
   125  						state == endpoint.StateDisconnected {
   126  						return nil
   127  					}
   128  
   129  					scopedLog.Debug("Deleting CEP during an initialization")
   130  					err := ciliumClient.CiliumEndpoints(namespace).Delete(podName, &meta_v1.DeleteOptions{})
   131  					// It's only an error if it exists but something else happened
   132  					if err != nil && !k8serrors.IsNotFound(err) {
   133  						scopedLog.WithError(err).Warn("Error deleting CEP")
   134  						return err
   135  					}
   136  
   137  					// We can't create localCEP directly, it must come from the k8s
   138  					// server via an API call.
   139  					cep := &cilium_v2.CiliumEndpoint{
   140  						ObjectMeta: meta_v1.ObjectMeta{
   141  							Name: podName,
   142  						},
   143  						Status: *mdl,
   144  					}
   145  					localCEP, err = ciliumClient.CiliumEndpoints(namespace).Create(cep)
   146  					if err != nil {
   147  						scopedLog.WithError(err).Error("Cannot create CEP")
   148  						return err
   149  					}
   150  
   151  					// We have successfully created the CEP and can return. Subsequent
   152  					// runs will update using localCEP.
   153  					needInit = false
   154  					return nil
   155  				}
   156  
   157  				// We have no localCEP copy. We need to fetch it for updates, below.
   158  				// This is unexpected as there should be only 1 writer per CEP, this
   159  				// controller, and the localCEP created on startup will be used.
   160  				if localCEP == nil {
   161  					localCEP, err = ciliumClient.CiliumEndpoints(namespace).Get(podName, meta_v1.GetOptions{})
   162  					switch {
   163  					// The CEP doesn't exist in k8s. This is unexpetected but may occur
   164  					// if the endpoint was removed from k8s but not yet within the agent.
   165  					// Mark the CEP for creation on the next controller iteration. This
   166  					// may never occur if the controller is stopped on Endpoint delete.
   167  					case err != nil && k8serrors.IsNotFound(err):
   168  						needInit = true
   169  						return err
   170  
   171  					// We cannot read the upstream CEP. needInit will cause the next
   172  					// iteration to delete and create the CEP. This is an unexpected
   173  					// situation.
   174  					case err != nil && k8serrors.IsInvalid(err):
   175  						scopedLog.WithError(err).Warn("Invalid CEP during update")
   176  						needInit = true
   177  						return nil
   178  
   179  					// A real error
   180  					case err != nil:
   181  						scopedLog.WithError(err).Error("Cannot get CEP during update")
   182  						return err
   183  					}
   184  				}
   185  
   186  				switch {
   187  				case capabilities.Patch:
   188  					// For json patch we don't need to perform a GET for endpoints
   189  
   190  					// If it fails it means the test from the previous patch failed
   191  					// so we can safely replace this node in the CNP status.
   192  					replaceCEPStatus := []k8s.JSONPatch{
   193  						{
   194  							OP:    "replace",
   195  							Path:  "/status",
   196  							Value: mdl,
   197  						},
   198  					}
   199  					var createStatusPatch []byte
   200  					createStatusPatch, err = json.Marshal(replaceCEPStatus)
   201  					if err != nil {
   202  						return err
   203  					}
   204  					localCEP, err = ciliumClient.CiliumEndpoints(namespace).Patch(podName, types.JSONPatchType, createStatusPatch, "status")
   205  				default:
   206  					// We have an object to reuse. Update and push it up. In the case of an
   207  					// update error, we retry in the next iteration of the controller using
   208  					// the copy returned by Update.
   209  					scopedLog.Debug("Updating CEP from local copy")
   210  					mdl.DeepCopyInto(&localCEP.Status)
   211  					switch {
   212  					case capabilities.UpdateStatus:
   213  						localCEP, err = ciliumClient.CiliumEndpoints(namespace).UpdateStatus(localCEP)
   214  					default:
   215  						localCEP, err = ciliumClient.CiliumEndpoints(namespace).Update(localCEP)
   216  					}
   217  				}
   218  
   219  				// Handle Update errors or return successfully
   220  				switch {
   221  				// Return no error when we see a conflict. We want to retry without a
   222  				// backoff and the Update* calls returned the current localCEP
   223  				case err != nil && k8serrors.IsConflict(err):
   224  					scopedLog.WithError(err).Warn("Cannot update CEP due to a revision conflict. The next controller execution will try again")
   225  					needInit = true
   226  					return nil
   227  
   228  				// Ensure we re-init when we see a generic error. This will recrate the
   229  				// CEP.
   230  				case err != nil:
   231  					scopedLog.WithError(err).Error("Cannot update CEP")
   232  					needInit = true
   233  					return err
   234  
   235  				// A successful update means no more updates unless mdl changes
   236  				default:
   237  					lastMdl = mdl
   238  					return nil
   239  				}
   240  			},
   241  			StopFunc: func(ctx context.Context) error {
   242  				podName := e.GetK8sPodName()
   243  				if podName == "" {
   244  					scopedLog.Debug("Skipping CiliumEndpoint deletion because it has no k8s pod name")
   245  					return nil
   246  				}
   247  				namespace := e.GetK8sNamespace()
   248  				if namespace == "" {
   249  					scopedLog.Debug("Skipping CiliumEndpoint deletion because it has no k8s namespace")
   250  					return nil
   251  				}
   252  				if err := ciliumClient.CiliumEndpoints(namespace).Delete(podName, &meta_v1.DeleteOptions{}); err != nil {
   253  					if !k8serrors.IsNotFound(err) {
   254  						scopedLog.WithError(err).Warning("Unable to delete CEP")
   255  					}
   256  				}
   257  				return nil
   258  			},
   259  		})
   260  }