istio.io/istio@v0.0.0-20240520182934-d79c90f27776/cni/pkg/nodeagent/cni-watcher.go (about)

     1  // Copyright Istio Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package nodeagent
    16  
    17  import (
    18  	"context"
    19  	"encoding/json"
    20  	"fmt"
    21  	"io"
    22  	"net"
    23  	"net/http"
    24  	"net/netip"
    25  	"time"
    26  
    27  	corev1 "k8s.io/api/core/v1"
    28  
    29  	pconstants "istio.io/istio/cni/pkg/constants"
    30  	"istio.io/istio/cni/pkg/pluginlistener"
    31  )
    32  
    33  // Just a composite of the CNI plugin add event struct + some extracted "args"
    34  type CNIPluginAddEvent struct {
    35  	Netns        string
    36  	PodName      string
    37  	PodNamespace string
    38  	IPs          []IPConfig
    39  }
    40  
    41  // IPConfig contains an interface/gateway/address combo defined for a newly-started pod by CNI.
    42  // This is "from the horse's mouth" so to speak and will be populated before Kube is informed of the
    43  // pod IP.
    44  type IPConfig struct {
    45  	Interface *int
    46  	Address   net.IPNet
    47  	Gateway   net.IP
    48  }
    49  
    50  type CniPluginServer struct {
    51  	cniListenServer       *http.Server
    52  	cniListenServerCancel context.CancelFunc
    53  	handlers              K8sHandlers
    54  	dataplane             MeshDataplane
    55  
    56  	sockAddress string
    57  	ctx         context.Context
    58  }
    59  
    60  func startCniPluginServer(ctx context.Context, pluginSocket string,
    61  	handlers K8sHandlers,
    62  	dataplane MeshDataplane,
    63  ) *CniPluginServer {
    64  	ctx, cancel := context.WithCancel(ctx)
    65  	mux := http.NewServeMux()
    66  	s := &CniPluginServer{
    67  		handlers:  handlers,
    68  		dataplane: dataplane,
    69  		cniListenServer: &http.Server{
    70  			Handler: mux,
    71  		},
    72  		cniListenServerCancel: cancel,
    73  		sockAddress:           pluginSocket,
    74  		ctx:                   ctx,
    75  	}
    76  
    77  	mux.HandleFunc(pconstants.CNIAddEventPath, s.handleAddEvent)
    78  	return s
    79  }
    80  
    81  func (s *CniPluginServer) Stop() {
    82  	s.cniListenServerCancel()
    83  }
    84  
    85  // Start starts up a UDS server which receives events from the CNI chain plugin.
    86  func (s *CniPluginServer) Start() error {
    87  	if s.sockAddress == "" {
    88  		return fmt.Errorf("no socket address provided")
    89  	}
    90  	log.Info("Start a listen server for CNI plugin events")
    91  	unixListener, err := pluginlistener.NewListener(s.sockAddress)
    92  	if err != nil {
    93  		return fmt.Errorf("failed to create CNI listener: %v", err)
    94  	}
    95  	go func() {
    96  		err := s.cniListenServer.Serve(unixListener)
    97  
    98  		select {
    99  		case <-s.ctx.Done():
   100  			// ctx done, we should silently go away
   101  			return
   102  		default:
   103  			// If the cniListener exits, at least we should record an error log
   104  			log.Errorf("CNI listener server exiting unexpectedly: %v", err)
   105  		}
   106  	}()
   107  
   108  	context.AfterFunc(s.ctx, func() {
   109  		if err := s.cniListenServer.Close(); err != nil {
   110  			log.Errorf("CNI listen server terminated with error: %v", err)
   111  		} else {
   112  			log.Debug("CNI listen server terminated")
   113  		}
   114  	})
   115  	return nil
   116  }
   117  
   118  func (s *CniPluginServer) handleAddEvent(w http.ResponseWriter, req *http.Request) {
   119  	if req.Body == nil {
   120  		log.Error("empty request body")
   121  		http.Error(w, "empty request body", http.StatusBadRequest)
   122  		return
   123  	}
   124  	defer req.Body.Close()
   125  	data, err := io.ReadAll(req.Body)
   126  	if err != nil {
   127  		log.Errorf("Failed to read event report from cni plugin: %v", err)
   128  		http.Error(w, err.Error(), http.StatusInternalServerError)
   129  		return
   130  	}
   131  	msg, err := processAddEvent(data)
   132  	if err != nil {
   133  		log.Errorf("Failed to process CNI event payload: %v", err)
   134  		http.Error(w, err.Error(), http.StatusBadRequest)
   135  		return
   136  	}
   137  
   138  	if err := s.ReconcileCNIAddEvent(req.Context(), msg); err != nil {
   139  		log.Errorf("Failed to handle add event: %v", err)
   140  		http.Error(w, err.Error(), http.StatusInternalServerError)
   141  		return
   142  	}
   143  }
   144  
   145  func processAddEvent(body []byte) (CNIPluginAddEvent, error) {
   146  	var msg CNIPluginAddEvent
   147  	err := json.Unmarshal(body, &msg)
   148  	if err != nil {
   149  		log.Errorf("Failed to unmarshal CNI plugin event: %v", err)
   150  		return msg, err
   151  	}
   152  
   153  	log.Debugf("Deserialized CNI plugin event: %+v", msg)
   154  	return msg, nil
   155  }
   156  
   157  func (s *CniPluginServer) ReconcileCNIAddEvent(ctx context.Context, addCmd CNIPluginAddEvent) error {
   158  	log := log.WithLabels("cni-event", addCmd)
   159  
   160  	log.Debugf("netns: %s", addCmd.Netns)
   161  
   162  	// The CNI node plugin should have already checked the pod against the k8s API before forwarding us the event,
   163  	// but we have to invoke the K8S client anyway, so to be safe we check it again here to make sure we get the same result.
   164  	maxStaleRetries := 10
   165  	msInterval := 10
   166  	retries := 0
   167  	var ambientPod *corev1.Pod
   168  	var err error
   169  
   170  	log.Debugf("Checking pod: %s in ns: %s is enabled for ambient", addCmd.PodName, addCmd.PodNamespace)
   171  	// The plugin already consulted the k8s API - but on this end handler caches may be stale, so retry a few times if we get no pod.
   172  	for ambientPod, err = s.handlers.GetPodIfAmbient(addCmd.PodName, addCmd.PodNamespace); (ambientPod == nil) && (retries < maxStaleRetries); retries++ {
   173  		if err != nil {
   174  			return err
   175  		}
   176  		log.Warnf("got an event for pod %s in namespace %s not found in current pod cache, retry %d of %d",
   177  			addCmd.PodName, addCmd.PodNamespace, retries, maxStaleRetries)
   178  		time.Sleep(time.Duration(msInterval) * time.Millisecond)
   179  	}
   180  
   181  	if ambientPod == nil {
   182  		return fmt.Errorf("got event for pod %s in namespace %s but could not find in pod cache after retries", addCmd.PodName, addCmd.PodNamespace)
   183  	}
   184  	log.Debugf("Pod: %s in ns: %s is enabled for ambient, adding to mesh.", addCmd.PodName, addCmd.PodNamespace)
   185  
   186  	var podIps []netip.Addr
   187  	for _, configuredPodIPs := range addCmd.IPs {
   188  		// net.ip is implicitly convertible to netip as slice
   189  		ip, _ := netip.AddrFromSlice(configuredPodIPs.Address.IP)
   190  		// We ignore the mask of the IPNet - it's fine if the IPNet defines
   191  		// a block grant of addresses, we just need one for checking routes.
   192  		podIps = append(podIps, ip)
   193  	}
   194  	// Note that we use the IP info from the CNI plugin here - the Pod struct as reported by K8S doesn't have this info
   195  	// yet (because the K8S control plane doesn't), so it will be empty there.
   196  	err = s.dataplane.AddPodToMesh(ctx, ambientPod, podIps, addCmd.Netns)
   197  	if err != nil {
   198  		return err
   199  	}
   200  
   201  	return nil
   202  }