github.com/buildkite/agent-stack-k8s@v0.4.0/scheduler/scheduler.go (about)

     1  package scheduler
     2  
     3  import (
     4  	"context"
     5  	"encoding/json"
     6  	"fmt"
     7  	"strconv"
     8  	"strings"
     9  
    10  	"github.com/buildkite/agent-stack-k8s/api"
    11  	"github.com/buildkite/agent-stack-k8s/monitor"
    12  	"github.com/buildkite/agent/v3/clicommand"
    13  	"go.uber.org/zap"
    14  	batchv1 "k8s.io/api/batch/v1"
    15  	corev1 "k8s.io/api/core/v1"
    16  	"k8s.io/apimachinery/pkg/api/errors"
    17  	metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    18  	"k8s.io/apimachinery/pkg/labels"
    19  	"k8s.io/apimachinery/pkg/selection"
    20  	"k8s.io/client-go/informers"
    21  	"k8s.io/client-go/kubernetes"
    22  	"k8s.io/utils/pointer"
    23  )
    24  
    25  const (
    26  	agentTokenKey      = "BUILDKITE_AGENT_TOKEN"
    27  	AgentContainerName = "agent"
    28  )
    29  
    30  func New(logger *zap.Logger, client kubernetes.Interface, cfg api.Config) *worker {
    31  	return &worker{
    32  		cfg:    cfg,
    33  		client: client,
    34  		logger: logger.Named("worker"),
    35  	}
    36  }
    37  
    38  // returns an informer factory configured to watch resources (pods, jobs) created by the scheduler
    39  func NewInformerFactory(k8s kubernetes.Interface, tags []string) (informers.SharedInformerFactory, error) {
    40  	hasTag, err := labels.NewRequirement(api.TagLabel, selection.In, api.TagsToLabels(tags))
    41  	if err != nil {
    42  		return nil, fmt.Errorf("failed to build tag label selector for job manager: %w", err)
    43  	}
    44  	hasUUID, err := labels.NewRequirement(api.UUIDLabel, selection.Exists, nil)
    45  	if err != nil {
    46  		return nil, fmt.Errorf("failed to build uuid label selector for job manager: %w", err)
    47  	}
    48  	factory := informers.NewSharedInformerFactoryWithOptions(k8s, 0, informers.WithTweakListOptions(func(opt *metav1.ListOptions) {
    49  		opt.LabelSelector = labels.NewSelector().Add(*hasTag, *hasUUID).String()
    50  	}))
    51  	return factory, nil
    52  }
    53  
    54  type KubernetesPlugin struct {
    55  	PodSpec    *corev1.PodSpec
    56  	GitEnvFrom []corev1.EnvFromSource
    57  	Sidecars   []corev1.Container `json:"sidecars,omitempty"`
    58  	Metadata   Metadata
    59  }
    60  
    61  type Metadata struct {
    62  	Annotations map[string]string
    63  	Labels      map[string]string
    64  }
    65  
    66  type worker struct {
    67  	cfg    api.Config
    68  	client kubernetes.Interface
    69  	logger *zap.Logger
    70  }
    71  
    72  func (w *worker) Create(ctx context.Context, job *monitor.Job) error {
    73  	logger := w.logger.With(zap.String("uuid", job.Uuid))
    74  	logger.Info("creating job")
    75  	jobWrapper := NewJobWrapper(w.logger, job, w.cfg).ParsePlugins()
    76  	kjob, err := jobWrapper.Build()
    77  	if err != nil {
    78  		kjob, err = jobWrapper.BuildFailureJob(err)
    79  		if err != nil {
    80  			return fmt.Errorf("failed to create job: %w", err)
    81  		}
    82  	}
    83  	_, err = w.client.BatchV1().Jobs(w.cfg.Namespace).Create(ctx, kjob, metav1.CreateOptions{})
    84  	if err != nil {
    85  		if errors.IsInvalid(err) {
    86  			kjob, err = jobWrapper.BuildFailureJob(err)
    87  			if err != nil {
    88  				return fmt.Errorf("failed to create job: %w", err)
    89  			}
    90  			_, err = w.client.BatchV1().Jobs(w.cfg.Namespace).Create(ctx, kjob, metav1.CreateOptions{})
    91  			if err != nil {
    92  				return fmt.Errorf("failed to create job: %w", err)
    93  			}
    94  			return nil
    95  		} else {
    96  			return err
    97  		}
    98  	}
    99  	return nil
   100  }
   101  
   102  type jobWrapper struct {
   103  	logger       *zap.Logger
   104  	job          *monitor.Job
   105  	envMap       map[string]string
   106  	err          error
   107  	k8sPlugin    KubernetesPlugin
   108  	otherPlugins []map[string]json.RawMessage
   109  	cfg          api.Config
   110  }
   111  
   112  func NewJobWrapper(logger *zap.Logger, job *monitor.Job, config api.Config) *jobWrapper {
   113  	return &jobWrapper{
   114  		logger: logger,
   115  		job:    job,
   116  		cfg:    config,
   117  		envMap: make(map[string]string),
   118  	}
   119  }
   120  
   121  func (w *jobWrapper) ParsePlugins() *jobWrapper {
   122  	for _, val := range w.job.Env {
   123  		parts := strings.SplitN(val, "=", 2)
   124  		w.envMap[parts[0]] = parts[1]
   125  	}
   126  	var plugins []map[string]json.RawMessage
   127  	if pluginsJson, ok := w.envMap["BUILDKITE_PLUGINS"]; ok {
   128  		if err := json.Unmarshal([]byte(pluginsJson), &plugins); err != nil {
   129  			w.logger.Debug("invalid plugin spec", zap.String("json", pluginsJson))
   130  			w.err = fmt.Errorf("failed parsing plugins: %w", err)
   131  			return w
   132  		}
   133  	}
   134  	for _, plugin := range plugins {
   135  		if len(plugin) != 1 {
   136  			w.err = fmt.Errorf("found invalid plugin: %v", plugin)
   137  			return w
   138  		}
   139  		if val, ok := plugin["github.com/buildkite-plugins/kubernetes-buildkite-plugin"]; ok {
   140  			if err := json.Unmarshal(val, &w.k8sPlugin); err != nil {
   141  				w.err = fmt.Errorf("failed parsing Kubernetes plugin: %w", err)
   142  				return w
   143  			}
   144  		} else {
   145  			for k, v := range plugin {
   146  				w.otherPlugins = append(w.otherPlugins, map[string]json.RawMessage{k: v})
   147  			}
   148  		}
   149  	}
   150  	return w
   151  }
   152  
   153  func (w *jobWrapper) Build() (*batchv1.Job, error) {
   154  	// if previous steps have failed, error immediately
   155  	if w.err != nil {
   156  		return nil, w.err
   157  	}
   158  
   159  	kjob := &batchv1.Job{}
   160  	kjob.Name = kjobName(w.job)
   161  	if w.k8sPlugin.PodSpec != nil {
   162  		kjob.Spec.Template.Spec = *w.k8sPlugin.PodSpec
   163  	} else {
   164  		kjob.Spec.Template.Spec.Containers = []corev1.Container{
   165  			{
   166  				Image:   w.cfg.Image,
   167  				Command: []string{w.job.Command},
   168  			},
   169  		}
   170  	}
   171  	if w.k8sPlugin.Metadata.Labels == nil {
   172  		w.k8sPlugin.Metadata.Labels = map[string]string{}
   173  		w.k8sPlugin.Metadata.Annotations = map[string]string{}
   174  	}
   175  	w.k8sPlugin.Metadata.Labels[api.UUIDLabel] = w.job.Uuid
   176  	w.k8sPlugin.Metadata.Labels[api.TagLabel] = api.TagToLabel(w.job.Tag)
   177  	w.k8sPlugin.Metadata.Annotations[api.BuildURLAnnotation] = w.envMap["BUILDKITE_BUILD_URL"]
   178  	kjob.Labels = w.k8sPlugin.Metadata.Labels
   179  	kjob.Spec.Template.Labels = w.k8sPlugin.Metadata.Labels
   180  	kjob.Annotations = w.k8sPlugin.Metadata.Annotations
   181  	kjob.Spec.Template.Annotations = w.k8sPlugin.Metadata.Annotations
   182  	kjob.Spec.BackoffLimit = pointer.Int32(0)
   183  	env := []corev1.EnvVar{
   184  		{
   185  			Name:  "BUILDKITE_BUILD_PATH",
   186  			Value: "/workspace/build",
   187  		}, {
   188  			Name:  "BUILDKITE_BIN_PATH",
   189  			Value: "/workspace",
   190  		}, {
   191  			Name: agentTokenKey,
   192  			ValueFrom: &corev1.EnvVarSource{
   193  				SecretKeyRef: &corev1.SecretKeySelector{
   194  					LocalObjectReference: corev1.LocalObjectReference{Name: w.cfg.AgentTokenSecret},
   195  					Key:                  agentTokenKey,
   196  				},
   197  			},
   198  		}, {
   199  			Name:  "BUILDKITE_AGENT_ACQUIRE_JOB",
   200  			Value: w.job.Uuid,
   201  		},
   202  	}
   203  	if w.otherPlugins != nil {
   204  		otherPluginsJson, err := json.Marshal(w.otherPlugins)
   205  		if err != nil {
   206  			return nil, fmt.Errorf("failed to remarshal non-k8s plugins: %w", err)
   207  		}
   208  		env = append(env, corev1.EnvVar{
   209  			Name:  "BUILDKITE_PLUGINS",
   210  			Value: string(otherPluginsJson),
   211  		})
   212  	}
   213  	for k, v := range w.envMap {
   214  		switch k {
   215  		case "BUILDKITE_COMMAND", "BUILDKITE_ARTIFACT_PATHS", "BUILDKITE_PLUGINS": //noop
   216  		default:
   217  			env = append(env, corev1.EnvVar{Name: k, Value: v})
   218  		}
   219  	}
   220  	volumeMounts := []corev1.VolumeMount{{Name: "workspace", MountPath: "/workspace"}}
   221  	const systemContainers = 1
   222  	ttl := int32(w.cfg.JobTTL.Seconds())
   223  	kjob.Spec.TTLSecondsAfterFinished = &ttl
   224  
   225  	podSpec := &kjob.Spec.Template.Spec
   226  	for i, c := range podSpec.Containers {
   227  		command := strings.Join(append(c.Command, c.Args...), " ")
   228  		c.Command = []string{"/workspace/buildkite-agent"}
   229  		c.Args = []string{"bootstrap"}
   230  		c.ImagePullPolicy = corev1.PullAlways
   231  		c.Env = append(c.Env, env...)
   232  		c.Env = append(c.Env, corev1.EnvVar{
   233  			Name:  "BUILDKITE_COMMAND",
   234  			Value: command,
   235  		}, corev1.EnvVar{
   236  			Name:  "BUILDKITE_AGENT_EXPERIMENT",
   237  			Value: "kubernetes-exec",
   238  		}, corev1.EnvVar{
   239  			Name:  "BUILDKITE_BOOTSTRAP_PHASES",
   240  			Value: "plugin,command",
   241  		}, corev1.EnvVar{
   242  			Name:  "BUILDKITE_AGENT_NAME",
   243  			Value: "buildkite",
   244  		}, corev1.EnvVar{
   245  			Name:  "BUILDKITE_CONTAINER_ID",
   246  			Value: strconv.Itoa(i + systemContainers),
   247  		}, corev1.EnvVar{
   248  			Name:  "BUILDKITE_PLUGINS_PATH",
   249  			Value: "/tmp",
   250  		}, corev1.EnvVar{
   251  			Name:  clicommand.RedactedVars.EnvVar,
   252  			Value: strings.Join(clicommand.RedactedVars.Value.Value(), ","),
   253  		})
   254  		if c.Name == "" {
   255  			c.Name = fmt.Sprintf("%s-%d", "container", i)
   256  		}
   257  		if c.WorkingDir == "" {
   258  			c.WorkingDir = "/workspace"
   259  		}
   260  		c.VolumeMounts = append(c.VolumeMounts, volumeMounts...)
   261  		c.EnvFrom = append(c.EnvFrom, w.k8sPlugin.GitEnvFrom...)
   262  		podSpec.Containers[i] = c
   263  	}
   264  
   265  	containerCount := len(podSpec.Containers) + systemContainers
   266  
   267  	for i, c := range w.k8sPlugin.Sidecars {
   268  		if c.Name == "" {
   269  			c.Name = fmt.Sprintf("%s-%d", "sidecar", i)
   270  		}
   271  		c.VolumeMounts = append(c.VolumeMounts, volumeMounts...)
   272  		c.EnvFrom = append(c.EnvFrom, w.k8sPlugin.GitEnvFrom...)
   273  		podSpec.Containers = append(podSpec.Containers, c)
   274  	}
   275  
   276  	if artifactPaths, found := w.envMap["BUILDKITE_ARTIFACT_PATHS"]; found && artifactPaths != "" {
   277  		artifactsContainer := corev1.Container{
   278  			Name:            "upload-artifacts",
   279  			Image:           w.cfg.Image,
   280  			Args:            []string{"bootstrap"},
   281  			WorkingDir:      "/workspace",
   282  			VolumeMounts:    volumeMounts,
   283  			ImagePullPolicy: corev1.PullAlways,
   284  			Env: []corev1.EnvVar{{
   285  				Name:  "BUILDKITE_AGENT_EXPERIMENT",
   286  				Value: "kubernetes-exec",
   287  			}, {
   288  				Name:  "BUILDKITE_BOOTSTRAP_PHASES",
   289  				Value: "command",
   290  			}, {
   291  				Name:  "BUILDKITE_COMMAND",
   292  				Value: "true",
   293  			}, {
   294  				Name:  "BUILDKITE_AGENT_NAME",
   295  				Value: "buildkite",
   296  			}, {
   297  				Name:  "BUILDKITE_CONTAINER_ID",
   298  				Value: strconv.Itoa(containerCount),
   299  			}, {
   300  				Name:  "BUILDKITE_ARTIFACT_PATHS",
   301  				Value: artifactPaths,
   302  			}},
   303  		}
   304  		artifactsContainer.Env = append(artifactsContainer.Env, env...)
   305  		containerCount++
   306  		podSpec.Containers = append(podSpec.Containers, artifactsContainer)
   307  	}
   308  	// agent server container
   309  	agentContainer := corev1.Container{
   310  		Name:            AgentContainerName,
   311  		Args:            []string{"start"},
   312  		Image:           w.cfg.Image,
   313  		WorkingDir:      "/workspace",
   314  		VolumeMounts:    volumeMounts,
   315  		ImagePullPolicy: corev1.PullAlways,
   316  		Env: []corev1.EnvVar{
   317  			{
   318  				Name:  "BUILDKITE_AGENT_EXPERIMENT",
   319  				Value: "kubernetes-exec",
   320  			}, {
   321  				Name:  "BUILDKITE_CONTAINER_COUNT",
   322  				Value: strconv.Itoa(containerCount),
   323  			},
   324  		},
   325  	}
   326  	agentContainer.Env = append(agentContainer.Env, env...)
   327  	// system client container(s)
   328  	checkoutContainer := corev1.Container{
   329  		Name:            "checkout",
   330  		Image:           w.cfg.Image,
   331  		Args:            []string{"bootstrap"},
   332  		WorkingDir:      "/workspace",
   333  		VolumeMounts:    volumeMounts,
   334  		ImagePullPolicy: corev1.PullAlways,
   335  		Env: []corev1.EnvVar{{
   336  			Name:  "BUILDKITE_AGENT_EXPERIMENT",
   337  			Value: "kubernetes-exec",
   338  		}, {
   339  			Name:  "BUILDKITE_BOOTSTRAP_PHASES",
   340  			Value: "checkout,command",
   341  		}, {
   342  			Name:  "BUILDKITE_AGENT_NAME",
   343  			Value: "buildkite",
   344  		}, {
   345  			Name:  "BUILDKITE_CONTAINER_ID",
   346  			Value: "0",
   347  		}, {
   348  			Name:  "BUILDKITE_COMMAND",
   349  			Value: "cp -r ~/.ssh /workspace/.ssh && chmod -R 777 /workspace",
   350  		}},
   351  		EnvFrom: w.k8sPlugin.GitEnvFrom,
   352  	}
   353  	checkoutContainer.Env = append(checkoutContainer.Env, env...)
   354  	podSpec.Containers = append(podSpec.Containers, agentContainer, checkoutContainer)
   355  	podSpec.InitContainers = append(podSpec.InitContainers, corev1.Container{
   356  		Name:            "copy-agent",
   357  		Image:           w.cfg.Image,
   358  		ImagePullPolicy: corev1.PullAlways,
   359  		Command:         []string{"cp"},
   360  		Args:            []string{"/usr/local/bin/buildkite-agent", "/usr/local/bin/ssh-env-config.sh", "/workspace"},
   361  		VolumeMounts: []corev1.VolumeMount{
   362  			{
   363  				Name:      "workspace",
   364  				MountPath: "/workspace",
   365  			},
   366  		},
   367  	})
   368  	podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{
   369  		Name: "workspace",
   370  		VolumeSource: corev1.VolumeSource{
   371  			EmptyDir: &corev1.EmptyDirVolumeSource{},
   372  		},
   373  	})
   374  	podSpec.RestartPolicy = corev1.RestartPolicyNever
   375  	return kjob, nil
   376  }
   377  
   378  func (w *jobWrapper) BuildFailureJob(err error) (*batchv1.Job, error) {
   379  	w.err = nil
   380  	w.k8sPlugin = KubernetesPlugin{
   381  		PodSpec: &corev1.PodSpec{
   382  			Containers: []corev1.Container{
   383  				{
   384  					Image:   w.cfg.Image,
   385  					Command: []string{fmt.Sprintf("echo %q && exit 1", err.Error())},
   386  				},
   387  			},
   388  		},
   389  	}
   390  	w.otherPlugins = nil
   391  	return w.Build()
   392  }
   393  
   394  func kjobName(job *monitor.Job) string {
   395  	return fmt.Sprintf("buildkite-%s", job.Uuid)
   396  }