github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/etl/boot.go

github.com/NVIDIA/aistore@v1.3.23-0.20240517131212-7df6609be51d/ext/etl/boot.go (about)

     1  // Package etl provides utilities to initialize and use transformation pods.
     2  /*
     3   * Copyright (c) 2018-2024, NVIDIA CORPORATION. All rights reserved.
     4   */
     5  package etl
     6  
     7  import (
     8  	"context"
     9  	"fmt"
    10  	"net"
    11  	"time"
    12  
    13  	"github.com/NVIDIA/aistore/api/apc"
    14  	"github.com/NVIDIA/aistore/cmn"
    15  	"github.com/NVIDIA/aistore/cmn/cos"
    16  	"github.com/NVIDIA/aistore/cmn/debug"
    17  	"github.com/NVIDIA/aistore/cmn/k8s"
    18  	"github.com/NVIDIA/aistore/cmn/nlog"
    19  	"github.com/NVIDIA/aistore/core"
    20  	"github.com/NVIDIA/aistore/xact/xreg"
    21  	corev1 "k8s.io/api/core/v1"
    22  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    23  	"k8s.io/apimachinery/pkg/util/wait"
    24  )
    25  
    26  const appLabel = "app"
    27  
    28  type etlBootstrapper struct {
    29  	// construction
    30  	errCtx *cmn.ETLErrCtx
    31  	config *cmn.Config
    32  	msg    InitSpecMsg
    33  	env    map[string]string
    34  
    35  	// runtime
    36  	xctn            core.Xact
    37  	pod             *corev1.Pod
    38  	svc             *corev1.Service
    39  	uri             string
    40  	originalPodName string
    41  	originalCommand []string
    42  }
    43  
    44  func (b *etlBootstrapper) createPodSpec() (err error) {
    45  	if b.pod, err = ParsePodSpec(b.errCtx, b.msg.Spec); err != nil {
    46  		return
    47  	}
    48  	b.originalPodName = b.pod.GetName()
    49  	b.errCtx.ETLName = b.originalPodName
    50  	return b._prepSpec()
    51  }
    52  
    53  func (b *etlBootstrapper) _prepSpec() (err error) {
    54  	// Override pod name: append target ID
    55  	// (K8s doesn't allow `_` and uppercase)
    56  	b.pod.SetName(k8s.CleanName(b.msg.IDX + "-" + core.T.SID()))
    57  	b.errCtx.PodName = b.pod.GetName()
    58  	b.pod.APIVersion = "v1"
    59  
    60  	// The following combination of Affinity and Anti-Affinity provides for:
    61  	// 1. The ETL container is always scheduled on the target invoking it.
    62  	// 2. No more than a single ETL container with the same target is scheduled on
    63  	//    the same node at any given point in time.
    64  	if err = b._setAffinity(); err != nil {
    65  		return
    66  	}
    67  	if err = b._setAntiAffinity(); err != nil {
    68  		return
    69  	}
    70  
    71  	b._updPodCommand()
    72  	b._updPodLabels()
    73  	b._updReady()
    74  
    75  	b._setPodEnv()
    76  
    77  	if cmn.Rom.FastV(4, cos.SmoduleETL) {
    78  		nlog.Infof("prep pod spec: %s, %+v", b.msg.String(), b.errCtx)
    79  	}
    80  	return
    81  }
    82  
    83  func (b *etlBootstrapper) createServiceSpec() {
    84  	b.svc = &corev1.Service{
    85  		TypeMeta: metav1.TypeMeta{
    86  			Kind:       "Service",
    87  			APIVersion: "v1",
    88  		},
    89  		ObjectMeta: metav1.ObjectMeta{
    90  			Name: b.pod.GetName(),
    91  		},
    92  		Spec: corev1.ServiceSpec{
    93  			Ports: []corev1.ServicePort{
    94  				{Port: b.pod.Spec.Containers[0].Ports[0].ContainerPort},
    95  			},
    96  			Selector: map[string]string{
    97  				podNameLabel: b.pod.Labels[podNameLabel],
    98  				appLabel:     b.pod.Labels[appLabel],
    99  			},
   100  			Type: corev1.ServiceTypeNodePort,
   101  		},
   102  	}
   103  	b._setSvcLabels()
   104  	b.errCtx.SvcName = b.svc.Name
   105  }
   106  
   107  func (b *etlBootstrapper) setupConnection() (err error) {
   108  	// Retrieve host IP of the pod.
   109  	var hostIP string
   110  	if hostIP, err = b._getHost(); err != nil {
   111  		return
   112  	}
   113  
   114  	// Retrieve assigned port by the service.
   115  	var nodePort uint
   116  	if nodePort, err = b._getPort(); err != nil {
   117  		return
   118  	}
   119  
   120  	// Make sure we can access the pod via TCP socket address to ensure that
   121  	// it is accessible from target.
   122  	etlSocketAddr := fmt.Sprintf("%s:%d", hostIP, nodePort)
   123  	if err = b._dial(etlSocketAddr); err != nil {
   124  		if cmn.Rom.FastV(4, cos.SmoduleETL) {
   125  			nlog.Warningf("failed to dial -> %s: %s, %+v, %s", etlSocketAddr, b.msg.String(), b.errCtx, b.uri)
   126  		}
   127  		err = cmn.NewErrETL(b.errCtx, err.Error())
   128  		return
   129  	}
   130  
   131  	b.uri = "http://" + etlSocketAddr
   132  	if cmn.Rom.FastV(4, cos.SmoduleETL) {
   133  		nlog.Infof("setup connection -> %s, %+v, %s", b.uri, b.msg.String(), b.errCtx)
   134  	}
   135  	return nil
   136  }
   137  
   138  func (b *etlBootstrapper) _dial(socketAddr string) error {
   139  	probeInterval := cmn.Rom.MaxKeepalive()
   140  	err := cmn.NetworkCallWithRetry(&cmn.RetryArgs{
   141  		Call: func() (int, error) {
   142  			conn, err := net.DialTimeout("tcp", socketAddr, probeInterval)
   143  			if err != nil {
   144  				return 0, err
   145  			}
   146  			cos.Close(conn)
   147  			return 0, nil
   148  		},
   149  		SoftErr: 10,
   150  		HardErr: 2,
   151  		Sleep:   3 * time.Second,
   152  		Action:  "dial POD " + b.pod.Name + " at " + socketAddr,
   153  	})
   154  	if err != nil {
   155  		return fmt.Errorf("failed to wait for ETL Service/Pod %q to respond, err: %v", b.pod.Name, err)
   156  	}
   157  	return nil
   158  }
   159  
   160  func (b *etlBootstrapper) createEntity(entity string) error {
   161  	client, err := k8s.GetClient()
   162  	if err != nil {
   163  		return err
   164  	}
   165  	switch entity {
   166  	case k8s.Pod:
   167  		err = client.Create(b.pod)
   168  	case k8s.Svc:
   169  		err = client.Create(b.svc)
   170  	default:
   171  		cos.AssertMsg(false, "invalid K8s entity :"+entity)
   172  	}
   173  
   174  	if err != nil {
   175  		err = cmn.NewErrETL(b.errCtx, "failed to create %s (err: %v)", entity, err)
   176  	}
   177  	return err
   178  }
   179  
   180  // waitPodReady waits until ETL Pod becomes `Ready`. This happens
   181  // only after the Pod's containers will have started and the Pod's `readinessProbe`
   182  // request (made by the Kubernetes itself) returns OK. If the Pod doesn't have
   183  // `readinessProbe` config specified the last step gets skipped.
   184  //
   185  // NOTE: currently, we do require readinessProbe config in the ETL spec.
   186  func (b *etlBootstrapper) waitPodReady() error {
   187  	var (
   188  		timeout     = b.msg.Timeout.D()
   189  		interval    = cos.ProbingFrequency(timeout)
   190  		client, err = k8s.GetClient()
   191  	)
   192  	if err != nil {
   193  		return cmn.NewErrETL(b.errCtx, "%v", err)
   194  	}
   195  	if cmn.Rom.FastV(4, cos.SmoduleETL) {
   196  		nlog.Infof("waiting pod %q ready (%+v, %s) timeout=%v ival=%v",
   197  			b.pod.Name, b.msg.String(), b.errCtx, timeout, interval)
   198  	}
   199  	// wait
   200  	err = wait.PollUntilContextTimeout(context.Background(), interval, timeout, false, /*immediate*/
   201  		func(context.Context) (ready bool, err error) {
   202  			return checkPodReady(client, b.pod.Name)
   203  		},
   204  	)
   205  
   206  	if err == nil {
   207  		return nil
   208  	}
   209  	pod, _ := client.Pod(b.pod.Name)
   210  	if pod == nil {
   211  		return cmn.NewErrETL(b.errCtx, "%v", err)
   212  	}
   213  	err = cmn.NewErrETL(b.errCtx,
   214  		`%v (pod phase: %q, pod conditions: %s; expected condition: %s)`,
   215  		err, pod.Status.Phase, podConditionsToString(pod.Status.Conditions),
   216  		podConditionToString(&corev1.PodCondition{Type: corev1.PodReady, Status: corev1.ConditionTrue}),
   217  	)
   218  	return err
   219  }
   220  
   221  func (b *etlBootstrapper) setupXaction(xid string) {
   222  	rns := xreg.RenewETL(b.msg, xid)
   223  	debug.AssertNoErr(rns.Err)
   224  	debug.Assert(!rns.IsRunning())
   225  	b.xctn = rns.Entry.Get()
   226  	debug.Assertf(b.xctn.ID() == xid, "%s vs %s", b.xctn.ID(), xid)
   227  }
   228  
   229  func (b *etlBootstrapper) _updPodCommand() {
   230  	if b.msg.CommTypeX != HpushStdin {
   231  		return
   232  	}
   233  
   234  	b.originalCommand = b.pod.Spec.Containers[0].Command
   235  	b.pod.Spec.Containers[0].Command = []string{"sh", "-c", "/server"}
   236  }
   237  
   238  // Sets pods node affinity, so pod will be scheduled on the same node as a target creating it.
   239  func (b *etlBootstrapper) _setAffinity() error {
   240  	if b.pod.Spec.Affinity == nil {
   241  		b.pod.Spec.Affinity = &corev1.Affinity{}
   242  	}
   243  	if b.pod.Spec.Affinity.NodeAffinity == nil {
   244  		b.pod.Spec.Affinity.NodeAffinity = &corev1.NodeAffinity{}
   245  	}
   246  
   247  	reqAffinity := b.pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution
   248  	prefAffinity := b.pod.Spec.Affinity.NodeAffinity.PreferredDuringSchedulingIgnoredDuringExecution
   249  	if reqAffinity != nil && len(reqAffinity.NodeSelectorTerms) > 0 || len(prefAffinity) > 0 {
   250  		return cmn.NewErrETL(b.errCtx, "error in YAML spec: pod should not have any NodeAffinities defined")
   251  	}
   252  
   253  	nodeSelector := &corev1.NodeSelector{
   254  		NodeSelectorTerms: []corev1.NodeSelectorTerm{
   255  			{
   256  				MatchExpressions: []corev1.NodeSelectorRequirement{{
   257  					Key:      nodeNameLabel,
   258  					Operator: corev1.NodeSelectorOpIn,
   259  					Values:   []string{k8s.NodeName},
   260  				}},
   261  			},
   262  		},
   263  	}
   264  	// RequiredDuringSchedulingIgnoredDuringExecution means that ETL container will be placed on the same machine as
   265  	// target which creates it. This guarantee holds only during scheduling - initial pod start-up sequence.
   266  	// However, a target removes its ETL pod when it goes down, so this guarantee is sufficient.
   267  	// Additionally, if other targets notice that another target went down, they all stop all running ETL pods.
   268  	b.pod.Spec.Affinity.NodeAffinity.RequiredDuringSchedulingIgnoredDuringExecution = nodeSelector
   269  	return nil
   270  }
   271  
   272  // Sets pods node anti-affinity, so no two pods with the matching criteria is scheduled on the same node
   273  // at the same time.
   274  func (b *etlBootstrapper) _setAntiAffinity() error {
   275  	if b.pod.Spec.Affinity == nil {
   276  		b.pod.Spec.Affinity = &corev1.Affinity{}
   277  	}
   278  	if b.pod.Spec.Affinity.PodAntiAffinity == nil {
   279  		b.pod.Spec.Affinity.PodAntiAffinity = &corev1.PodAntiAffinity{}
   280  	}
   281  
   282  	reqAntiAffinities := b.pod.Spec.Affinity.PodAntiAffinity.RequiredDuringSchedulingIgnoredDuringExecution
   283  	prefAntiAffinity := b.pod.Spec.Affinity.PodAntiAffinity.PreferredDuringSchedulingIgnoredDuringExecution
   284  
   285  	if len(reqAntiAffinities) > 0 || len(prefAntiAffinity) > 0 {
   286  		return cmn.NewErrETL(b.errCtx, "error in YAML spec, pod should not have any NodeAntiAffinities defined")
   287  	}
   288  
   289  	return nil
   290  }
   291  
   292  func (b *etlBootstrapper) _updPodLabels() {
   293  	if b.pod.Labels == nil {
   294  		b.pod.Labels = make(map[string]string, 6)
   295  	}
   296  
   297  	b.pod.Labels[appLabel] = "ais"
   298  	b.pod.Labels[podNameLabel] = b.pod.GetName()
   299  	b.pod.Labels[podNodeLabel] = k8s.NodeName
   300  	b.pod.Labels[podTargetLabel] = core.T.SID()
   301  	b.pod.Labels[appK8sNameLabel] = "etl"
   302  	b.pod.Labels[appK8sComponentLabel] = "server"
   303  }
   304  
   305  func (b *etlBootstrapper) _setSvcLabels() {
   306  	if b.svc.Labels == nil {
   307  		b.svc.Labels = make(map[string]string, 4)
   308  	}
   309  	b.svc.Labels[appLabel] = "ais"
   310  	b.svc.Labels[svcNameLabel] = b.svc.GetName()
   311  	b.svc.Labels[appK8sNameLabel] = "etl"
   312  	b.svc.Labels[appK8sComponentLabel] = "server"
   313  }
   314  
   315  func (b *etlBootstrapper) _updReady() {
   316  	probe := b.pod.Spec.Containers[0].ReadinessProbe
   317  
   318  	// If someone already set these values, we don't to touch them.
   319  	if probe.TimeoutSeconds != 0 || probe.PeriodSeconds != 0 {
   320  		return
   321  	}
   322  
   323  	// Set default values.
   324  	probe.TimeoutSeconds = 5
   325  	probe.PeriodSeconds = 10
   326  }
   327  
   328  // Sets environment variables that can be accessed inside the container.
   329  func (b *etlBootstrapper) _setPodEnv() {
   330  	containers := b.pod.Spec.Containers
   331  	debug.Assert(len(containers) > 0)
   332  	for idx := range containers {
   333  		containers[idx].Env = append(containers[idx].Env, corev1.EnvVar{
   334  			Name:  "AIS_TARGET_URL",
   335  			Value: core.T.Snode().URL(cmn.NetPublic) + apc.URLPathETLObject.Join(reqSecret),
   336  		})
   337  		for k, v := range b.env {
   338  			containers[idx].Env = append(containers[idx].Env, corev1.EnvVar{
   339  				Name:  k,
   340  				Value: v,
   341  			})
   342  		}
   343  	}
   344  	for idx := range b.pod.Spec.InitContainers {
   345  		for k, v := range b.env {
   346  			b.pod.Spec.InitContainers[idx].Env = append(b.pod.Spec.InitContainers[idx].Env, corev1.EnvVar{
   347  				Name:  k,
   348  				Value: v,
   349  			})
   350  		}
   351  	}
   352  }
   353  
   354  func (b *etlBootstrapper) _getHost() (string, error) {
   355  	client, err := k8s.GetClient()
   356  	if err != nil {
   357  		return "", cmn.NewErrETL(b.errCtx, err.Error())
   358  	}
   359  	p, err := client.Pod(b.pod.Name)
   360  	if err != nil {
   361  		return "", err
   362  	}
   363  	return p.Status.HostIP, nil
   364  }
   365  
   366  func (b *etlBootstrapper) _getPort() (uint, error) {
   367  	client, err := k8s.GetClient()
   368  	if err != nil {
   369  		return 0, cmn.NewErrETL(b.errCtx, err.Error())
   370  	}
   371  
   372  	s, err := client.Service(b.svc.Name)
   373  	if err != nil {
   374  		return 0, cmn.NewErrETL(b.errCtx, err.Error())
   375  	}
   376  
   377  	nodePort := int(s.Spec.Ports[0].NodePort)
   378  	port, err := cmn.ValidatePort(nodePort)
   379  	if err != nil {
   380  		return 0, cmn.NewErrETL(b.errCtx, err.Error())
   381  	}
   382  	return uint(port), nil
   383  }