github.com/asynkron/protoactor-go@v0.0.0-20240308120642-ef91a6abee75/cluster/clusterproviders/k8s/k8s_provider.go (about)

     1  package k8s
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"log/slog"
     8  	"os"
     9  	"path/filepath"
    10  	"strconv"
    11  	"strings"
    12  	"time"
    13  
    14  	"github.com/asynkron/protoactor-go/actor"
    15  	"github.com/asynkron/protoactor-go/cluster"
    16  	"github.com/google/uuid"
    17  	v1 "k8s.io/api/core/v1"
    18  	apierrors "k8s.io/apimachinery/pkg/api/errors"
    19  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    20  	"k8s.io/apimachinery/pkg/types"
    21  	"k8s.io/apimachinery/pkg/watch"
    22  	"k8s.io/client-go/kubernetes"
    23  	"k8s.io/client-go/rest"
    24  )
    25  
    26  var ProviderShuttingDownError = fmt.Errorf("kubernetes cluster provider is being shut down")
    27  
    28  // Convenience type to store cluster labels
    29  type Labels map[string]string
    30  
    31  // This data structure provides of k8s as cluster provider for Proto.Actor
    32  type Provider struct {
    33  	id             string
    34  	cluster        *cluster.Cluster
    35  	clusterName    string
    36  	podName        string
    37  	host           string
    38  	address        string
    39  	namespace      string
    40  	knownKinds     []string
    41  	clusterPods    map[types.UID]*v1.Pod
    42  	port           int
    43  	client         *kubernetes.Clientset
    44  	clusterMonitor *actor.PID
    45  	deregistered   bool
    46  	shutdown       bool
    47  	cancelWatch    context.CancelFunc
    48  }
    49  
    50  // make sure our Provider complies with the ClusterProvider interface
    51  var _ cluster.ClusterProvider = (*Provider)(nil)
    52  
    53  // New crates a new k8s Provider in the heap and return back a reference to its memory address
    54  func New(opts ...Option) (*Provider, error) {
    55  	// create new default k8s config
    56  	config, err := rest.InClusterConfig()
    57  	if err != nil {
    58  		return nil, err
    59  	}
    60  
    61  	return NewWithConfig(config, opts...)
    62  }
    63  
    64  // NewWithConfig creates a new k8s Provider in the heap using the given configuration
    65  // and options, it returns a reference to its memory address or an error
    66  func NewWithConfig(config *rest.Config, opts ...Option) (*Provider, error) {
    67  	clientset, err := kubernetes.NewForConfig(config)
    68  	if err != nil {
    69  		return nil, err
    70  	}
    71  
    72  	p := Provider{
    73  		client: clientset,
    74  	}
    75  
    76  	// process given options
    77  	for _, opt := range opts {
    78  		opt(&p)
    79  	}
    80  	return &p, nil
    81  }
    82  
    83  // initializes the cluster provider
    84  func (p *Provider) init(c *cluster.Cluster) error {
    85  	host, port, err := c.ActorSystem.GetHostPort()
    86  	if err != nil {
    87  		return err
    88  	}
    89  
    90  	p.cluster = c
    91  	p.id = strings.Replace(uuid.New().String(), "-", "", -1)
    92  	p.knownKinds = c.GetClusterKinds()
    93  	p.clusterName = c.Config.Name
    94  	p.clusterPods = make(map[types.UID]*v1.Pod)
    95  	p.host = host
    96  	p.port = port
    97  	p.address = fmt.Sprintf("%s:%d", host, port)
    98  	return nil
    99  }
   100  
   101  // StartMember registers the member in the cluster and start it
   102  func (p *Provider) StartMember(c *cluster.Cluster) error {
   103  	if err := p.init(c); err != nil {
   104  		return err
   105  	}
   106  
   107  	if err := p.startClusterMonitor(c); err != nil {
   108  		return err
   109  	}
   110  
   111  	p.registerMemberAsync(c)
   112  	p.startWatchingClusterAsync(c)
   113  
   114  	return nil
   115  }
   116  
   117  // StartClient starts the k8s client and monitor watch
   118  func (p *Provider) StartClient(c *cluster.Cluster) error {
   119  	if err := p.init(c); err != nil {
   120  		return err
   121  	}
   122  
   123  	if err := p.startClusterMonitor(c); err != nil {
   124  		return err
   125  	}
   126  
   127  	p.startWatchingClusterAsync(c)
   128  	return nil
   129  }
   130  
   131  func (p *Provider) Shutdown(graceful bool) error {
   132  	if p.shutdown {
   133  		// we are already shut down or shutting down
   134  		return nil
   135  	}
   136  
   137  	p.shutdown = true
   138  
   139  	p.cluster.Logger().Info("Shutting down k8s cluster provider")
   140  	if p.clusterMonitor != nil {
   141  		if err := p.cluster.ActorSystem.Root.RequestFuture(p.clusterMonitor, &DeregisterMember{}, 5*time.Second).Wait(); err != nil {
   142  			p.cluster.Logger().Error("Failed to deregister member - cluster monitor did not respond, proceeding with shutdown", slog.Any("error", err))
   143  		}
   144  
   145  		if err := p.cluster.ActorSystem.Root.RequestFuture(p.clusterMonitor, &StopWatchingCluster{}, 5*time.Second).Wait(); err != nil {
   146  			p.cluster.Logger().Error("Failed to deregister member - cluster monitor did not respond, proceeding with shutdown", slog.Any("error", err))
   147  		}
   148  
   149  		_ = p.cluster.ActorSystem.Root.StopFuture(p.clusterMonitor).Wait()
   150  		p.clusterMonitor = nil
   151  	}
   152  
   153  	return nil
   154  }
   155  
   156  // starts the cluster monitor in its own goroutine
   157  func (p *Provider) startClusterMonitor(c *cluster.Cluster) error {
   158  	var err error
   159  	p.clusterMonitor, err = c.ActorSystem.Root.SpawnNamed(actor.PropsFromProducer(func() actor.Actor {
   160  		return newClusterMonitor(p)
   161  	}), "k8s-cluster-monitor")
   162  
   163  	if err != nil {
   164  		p.cluster.Logger().Error("Failed to start k8s-cluster-monitor actor", slog.Any("error", err))
   165  		return err
   166  	}
   167  
   168  	p.podName, _ = os.Hostname()
   169  	return nil
   170  }
   171  
   172  // registers itself as a member asynchronously using an actor
   173  func (p *Provider) registerMemberAsync(c *cluster.Cluster) {
   174  	msg := RegisterMember{}
   175  	c.ActorSystem.Root.Send(p.clusterMonitor, &msg)
   176  }
   177  
   178  // registers itself as a member in k8s cluster
   179  func (p *Provider) registerMember(timeout time.Duration) error {
   180  	p.cluster.Logger().Info(fmt.Sprintf("Registering service %s on %s", p.podName, p.address))
   181  
   182  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   183  	defer cancel()
   184  
   185  	pod, err := p.client.CoreV1().Pods(p.retrieveNamespace()).Get(ctx, p.podName, metav1.GetOptions{})
   186  	if err != nil {
   187  		return fmt.Errorf("unable to get own pod information for %s: %w", p.podName, err)
   188  	}
   189  
   190  	p.cluster.Logger().Info(fmt.Sprintf("Using Kubernetes namespace: %s\nUsing Kubernetes port: %d", pod.Namespace, p.port))
   191  
   192  	labels := Labels{
   193  		LabelCluster:  p.clusterName,
   194  		LabelPort:     fmt.Sprintf("%d", p.port),
   195  		LabelMemberID: p.id,
   196  	}
   197  
   198  	// add known kinds to labels
   199  	for _, kind := range p.knownKinds {
   200  		labelkey := fmt.Sprintf("%s-%s", LabelKind, kind)
   201  		labels[labelkey] = "true"
   202  	}
   203  
   204  	// add existing labels back
   205  	for key, value := range pod.ObjectMeta.Labels {
   206  		labels[key] = value
   207  	}
   208  	pod.SetLabels(labels)
   209  
   210  	return p.replacePodLabels(ctx, pod)
   211  }
   212  
   213  func (p *Provider) startWatchingClusterAsync(c *cluster.Cluster) {
   214  	msg := StartWatchingCluster{p.clusterName}
   215  	c.ActorSystem.Root.Send(p.clusterMonitor, &msg)
   216  }
   217  
   218  func (p *Provider) startWatchingCluster() error {
   219  	selector := fmt.Sprintf("%s=%s", LabelCluster, p.clusterName)
   220  
   221  	p.cluster.Logger().Debug(fmt.Sprintf("Starting to watch pods with %s", selector), slog.String("selector", selector))
   222  
   223  	ctx, cancel := context.WithCancel(context.Background())
   224  	p.cancelWatch = cancel
   225  
   226  	// start a new goroutine to monitor the cluster events
   227  	go func() {
   228  		for {
   229  			select {
   230  			case <-ctx.Done():
   231  				p.cluster.Logger().Debug("Stopping watch on pods")
   232  				return
   233  			default:
   234  				if err := p.watchPods(ctx, selector); err != nil {
   235  					p.cluster.Logger().Error("Error watching pods, will retry", slog.Any("error", err))
   236  					time.Sleep(5 * time.Second)
   237  				}
   238  			}
   239  		}
   240  	}()
   241  
   242  	return nil
   243  }
   244  
   245  func (p *Provider) watchPods(ctx context.Context, selector string) error {
   246  	watcher, err := p.client.CoreV1().Pods(p.retrieveNamespace()).Watch(context.Background(), metav1.ListOptions{LabelSelector: selector, Watch: true})
   247  	if err != nil {
   248  		err = fmt.Errorf("unable to watch pods: %w", err)
   249  		p.cluster.Logger().Error(err.Error(), slog.Any("error", err))
   250  		return err
   251  	}
   252  
   253  	p.cluster.Logger().Info("Pod watcher started")
   254  
   255  	for {
   256  		select {
   257  		case <-ctx.Done():
   258  			watcher.Stop()
   259  			return nil
   260  		case event, ok := <-watcher.ResultChan():
   261  			if !ok {
   262  				return fmt.Errorf("pod watcher channel closed abruptly")
   263  			}
   264  			pod, ok := event.Object.(*v1.Pod)
   265  			if !ok {
   266  				err := fmt.Errorf("could not cast %#v[%T] into v1.Pod", event.Object, event.Object)
   267  				p.cluster.Logger().Error(err.Error(), slog.Any("error", err))
   268  				continue
   269  			}
   270  
   271  			p.processPodEvent(event, pod)
   272  		}
   273  	}
   274  }
   275  
   276  func (p *Provider) processPodEvent(event watch.Event, pod *v1.Pod) {
   277  	p.cluster.Logger().Debug("Watcher reported event for pod", slog.Any("eventType", event.Type), slog.String("podName", pod.ObjectMeta.Name))
   278  
   279  	podClusterName, hasClusterName := pod.ObjectMeta.Labels[LabelCluster]
   280  	if !hasClusterName {
   281  		p.cluster.Logger().Info("The pod is not a cluster member", slog.Any("podName", pod.ObjectMeta.Name))
   282  		delete(p.clusterPods, pod.UID) // pod could have been in the cluster, but then it was deregistered
   283  	} else if podClusterName != p.clusterName {
   284  		p.cluster.Logger().Info("The pod is a member of another cluster", slog.Any("podName", pod.ObjectMeta.Name), slog.String("otherCluster", podClusterName))
   285  		return
   286  	} else {
   287  		switch event.Type {
   288  		case watch.Deleted:
   289  			delete(p.clusterPods, pod.UID)
   290  		case watch.Error:
   291  			err := apierrors.FromObject(event.Object)
   292  			p.cluster.Logger().Error(err.Error(), slog.Any("error", err))
   293  		default:
   294  			p.clusterPods[pod.UID] = pod
   295  		}
   296  	}
   297  
   298  	if p.cluster.Logger().Enabled(nil, slog.LevelDebug) {
   299  		logCurrentPods(p.clusterPods, p.cluster.Logger())
   300  	}
   301  
   302  	members := mapPodsToMembers(p.clusterPods, p.cluster.Logger())
   303  
   304  	p.cluster.Logger().Info("Topology received from Kubernetes", slog.Any("members", members))
   305  	p.cluster.MemberList.UpdateClusterTopology(members)
   306  }
   307  
   308  func logCurrentPods(clusterPods map[types.UID]*v1.Pod, logger *slog.Logger) {
   309  	podNames := make([]string, 0, len(clusterPods))
   310  	for _, pod := range clusterPods {
   311  		podNames = append(podNames, pod.ObjectMeta.Name)
   312  	}
   313  	logger.Debug("Detected cluster pods are now", slog.Int("numberOfPods", len(clusterPods)), slog.Any("podNames", podNames))
   314  }
   315  
   316  func mapPodsToMembers(clusterPods map[types.UID]*v1.Pod, logger *slog.Logger) []*cluster.Member {
   317  	members := make([]*cluster.Member, 0, len(clusterPods))
   318  	for _, clusterPod := range clusterPods {
   319  		if clusterPod.Status.Phase == "Running" && len(clusterPod.Status.PodIPs) > 0 {
   320  
   321  			var kinds []string
   322  			for key, value := range clusterPod.ObjectMeta.Labels {
   323  				if strings.HasPrefix(key, LabelKind) && value == "true" {
   324  					kinds = append(kinds, strings.Replace(key, fmt.Sprintf("%s-", LabelKind), "", 1))
   325  				}
   326  			}
   327  
   328  			host := clusterPod.Status.PodIP
   329  			port, err := strconv.Atoi(clusterPod.ObjectMeta.Labels[LabelPort])
   330  			if err != nil {
   331  				err = fmt.Errorf("can not convert pod meta %s into integer: %w", LabelPort, err)
   332  				logger.Error(err.Error(), slog.Any("error", err))
   333  				continue
   334  			}
   335  
   336  			mid := clusterPod.ObjectMeta.Labels[LabelMemberID]
   337  			alive := true
   338  			for _, status := range clusterPod.Status.ContainerStatuses {
   339  				if !status.Ready {
   340  					logger.Debug("Pod container is not ready", slog.String("podName", clusterPod.ObjectMeta.Name), slog.String("containerName", status.Name))
   341  					alive = false
   342  					break
   343  				}
   344  			}
   345  
   346  			if !alive {
   347  				continue
   348  			}
   349  
   350  			logger.Debug("Pod is running and all containers are ready", slog.String("podName", clusterPod.ObjectMeta.Name), slog.Any("podIPs", clusterPod.Status.PodIPs), slog.String("podPhase", string(clusterPod.Status.Phase)))
   351  
   352  			members = append(members, &cluster.Member{
   353  				Id:    mid,
   354  				Host:  host,
   355  				Port:  int32(port),
   356  				Kinds: kinds,
   357  			})
   358  		} else {
   359  			logger.Debug("Pod is not in Running state", slog.String("podName", clusterPod.ObjectMeta.Name), slog.Any("podIPs", clusterPod.Status.PodIPs), slog.String("podPhase", string(clusterPod.Status.Phase)))
   360  		}
   361  	}
   362  
   363  	return members
   364  }
   365  
   366  // deregister itself as a member from a k8s cluster
   367  func (p *Provider) deregisterMember(timeout time.Duration) error {
   368  	p.cluster.Logger().Info(fmt.Sprintf("Deregistering service %s from %s", p.podName, p.address))
   369  
   370  	ctx, cancel := context.WithTimeout(context.Background(), timeout)
   371  	defer cancel()
   372  
   373  	pod, err := p.client.CoreV1().Pods(p.retrieveNamespace()).Get(ctx, p.podName, metav1.GetOptions{})
   374  	if err != nil {
   375  		return fmt.Errorf("unable to get own pod information for %s: %w", p.podName, err)
   376  	}
   377  
   378  	labels := pod.GetLabels()
   379  
   380  	for labelKey := range labels {
   381  		if strings.HasPrefix(labelKey, LabelPrefix) {
   382  			delete(labels, labelKey)
   383  		}
   384  	}
   385  
   386  	pod.SetLabels(labels)
   387  
   388  	return p.replacePodLabels(ctx, pod)
   389  }
   390  
   391  // prepares a patching payload and sends it to kubernetes to replace labels
   392  func (p *Provider) replacePodLabels(ctx context.Context, pod *v1.Pod) error {
   393  	p.cluster.Logger().Debug("Setting pod labels to ", slog.Any("labels", pod.GetLabels()))
   394  
   395  	payload := []struct {
   396  		Op    string `json:"op"`
   397  		Path  string `json:"path"`
   398  		Value Labels `json:"value"`
   399  	}{
   400  		{
   401  			Op:    "replace",
   402  			Path:  "/metadata/labels",
   403  			Value: pod.GetLabels(),
   404  		},
   405  	}
   406  
   407  	payloadData, err := json.Marshal(payload)
   408  	if err != nil {
   409  		return fmt.Errorf("unable to update pod labels, operation failed: %w", err)
   410  	}
   411  
   412  	_, patcherr := p.client.CoreV1().Pods(pod.GetNamespace()).Patch(ctx, pod.GetName(), types.JSONPatchType, payloadData, metav1.PatchOptions{})
   413  	return patcherr
   414  }
   415  
   416  // get the namespace of the current pod
   417  func (p *Provider) retrieveNamespace() string {
   418  	if (p.namespace) == "" {
   419  		filename := filepath.Join(string(filepath.Separator), "var", "run", "secrets", "kubernetes.io", "serviceaccount", "namespace")
   420  		content, err := os.ReadFile(filename)
   421  		if err != nil {
   422  			p.cluster.Logger().Warn(fmt.Sprintf("Could not read %s contents defaulting to empty namespace: %s", filename, err.Error()))
   423  			return p.namespace
   424  		}
   425  		p.namespace = string(content)
   426  	}
   427  
   428  	return p.namespace
   429  }