github.com/buildkite/agent-stack-k8s@v0.4.0/scheduler/scheduler.go (about) 1 package scheduler 2 3 import ( 4 "context" 5 "encoding/json" 6 "fmt" 7 "strconv" 8 "strings" 9 10 "github.com/buildkite/agent-stack-k8s/api" 11 "github.com/buildkite/agent-stack-k8s/monitor" 12 "github.com/buildkite/agent/v3/clicommand" 13 "go.uber.org/zap" 14 batchv1 "k8s.io/api/batch/v1" 15 corev1 "k8s.io/api/core/v1" 16 "k8s.io/apimachinery/pkg/api/errors" 17 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 18 "k8s.io/apimachinery/pkg/labels" 19 "k8s.io/apimachinery/pkg/selection" 20 "k8s.io/client-go/informers" 21 "k8s.io/client-go/kubernetes" 22 "k8s.io/utils/pointer" 23 ) 24 25 const ( 26 agentTokenKey = "BUILDKITE_AGENT_TOKEN" 27 AgentContainerName = "agent" 28 ) 29 30 func New(logger *zap.Logger, client kubernetes.Interface, cfg api.Config) *worker { 31 return &worker{ 32 cfg: cfg, 33 client: client, 34 logger: logger.Named("worker"), 35 } 36 } 37 38 // returns an informer factory configured to watch resources (pods, jobs) created by the scheduler 39 func NewInformerFactory(k8s kubernetes.Interface, tags []string) (informers.SharedInformerFactory, error) { 40 hasTag, err := labels.NewRequirement(api.TagLabel, selection.In, api.TagsToLabels(tags)) 41 if err != nil { 42 return nil, fmt.Errorf("failed to build tag label selector for job manager: %w", err) 43 } 44 hasUUID, err := labels.NewRequirement(api.UUIDLabel, selection.Exists, nil) 45 if err != nil { 46 return nil, fmt.Errorf("failed to build uuid label selector for job manager: %w", err) 47 } 48 factory := informers.NewSharedInformerFactoryWithOptions(k8s, 0, informers.WithTweakListOptions(func(opt *metav1.ListOptions) { 49 opt.LabelSelector = labels.NewSelector().Add(*hasTag, *hasUUID).String() 50 })) 51 return factory, nil 52 } 53 54 type KubernetesPlugin struct { 55 PodSpec *corev1.PodSpec 56 GitEnvFrom []corev1.EnvFromSource 57 Sidecars []corev1.Container `json:"sidecars,omitempty"` 58 Metadata Metadata 59 } 60 61 type Metadata struct { 62 Annotations map[string]string 63 Labels map[string]string 64 } 65 66 type worker struct { 67 cfg api.Config 68 client kubernetes.Interface 69 logger *zap.Logger 70 } 71 72 func (w *worker) Create(ctx context.Context, job *monitor.Job) error { 73 logger := w.logger.With(zap.String("uuid", job.Uuid)) 74 logger.Info("creating job") 75 jobWrapper := NewJobWrapper(w.logger, job, w.cfg).ParsePlugins() 76 kjob, err := jobWrapper.Build() 77 if err != nil { 78 kjob, err = jobWrapper.BuildFailureJob(err) 79 if err != nil { 80 return fmt.Errorf("failed to create job: %w", err) 81 } 82 } 83 _, err = w.client.BatchV1().Jobs(w.cfg.Namespace).Create(ctx, kjob, metav1.CreateOptions{}) 84 if err != nil { 85 if errors.IsInvalid(err) { 86 kjob, err = jobWrapper.BuildFailureJob(err) 87 if err != nil { 88 return fmt.Errorf("failed to create job: %w", err) 89 } 90 _, err = w.client.BatchV1().Jobs(w.cfg.Namespace).Create(ctx, kjob, metav1.CreateOptions{}) 91 if err != nil { 92 return fmt.Errorf("failed to create job: %w", err) 93 } 94 return nil 95 } else { 96 return err 97 } 98 } 99 return nil 100 } 101 102 type jobWrapper struct { 103 logger *zap.Logger 104 job *monitor.Job 105 envMap map[string]string 106 err error 107 k8sPlugin KubernetesPlugin 108 otherPlugins []map[string]json.RawMessage 109 cfg api.Config 110 } 111 112 func NewJobWrapper(logger *zap.Logger, job *monitor.Job, config api.Config) *jobWrapper { 113 return &jobWrapper{ 114 logger: logger, 115 job: job, 116 cfg: config, 117 envMap: make(map[string]string), 118 } 119 } 120 121 func (w *jobWrapper) ParsePlugins() *jobWrapper { 122 for _, val := range w.job.Env { 123 parts := strings.SplitN(val, "=", 2) 124 w.envMap[parts[0]] = parts[1] 125 } 126 var plugins []map[string]json.RawMessage 127 if pluginsJson, ok := w.envMap["BUILDKITE_PLUGINS"]; ok { 128 if err := json.Unmarshal([]byte(pluginsJson), &plugins); err != nil { 129 w.logger.Debug("invalid plugin spec", zap.String("json", pluginsJson)) 130 w.err = fmt.Errorf("failed parsing plugins: %w", err) 131 return w 132 } 133 } 134 for _, plugin := range plugins { 135 if len(plugin) != 1 { 136 w.err = fmt.Errorf("found invalid plugin: %v", plugin) 137 return w 138 } 139 if val, ok := plugin["github.com/buildkite-plugins/kubernetes-buildkite-plugin"]; ok { 140 if err := json.Unmarshal(val, &w.k8sPlugin); err != nil { 141 w.err = fmt.Errorf("failed parsing Kubernetes plugin: %w", err) 142 return w 143 } 144 } else { 145 for k, v := range plugin { 146 w.otherPlugins = append(w.otherPlugins, map[string]json.RawMessage{k: v}) 147 } 148 } 149 } 150 return w 151 } 152 153 func (w *jobWrapper) Build() (*batchv1.Job, error) { 154 // if previous steps have failed, error immediately 155 if w.err != nil { 156 return nil, w.err 157 } 158 159 kjob := &batchv1.Job{} 160 kjob.Name = kjobName(w.job) 161 if w.k8sPlugin.PodSpec != nil { 162 kjob.Spec.Template.Spec = *w.k8sPlugin.PodSpec 163 } else { 164 kjob.Spec.Template.Spec.Containers = []corev1.Container{ 165 { 166 Image: w.cfg.Image, 167 Command: []string{w.job.Command}, 168 }, 169 } 170 } 171 if w.k8sPlugin.Metadata.Labels == nil { 172 w.k8sPlugin.Metadata.Labels = map[string]string{} 173 w.k8sPlugin.Metadata.Annotations = map[string]string{} 174 } 175 w.k8sPlugin.Metadata.Labels[api.UUIDLabel] = w.job.Uuid 176 w.k8sPlugin.Metadata.Labels[api.TagLabel] = api.TagToLabel(w.job.Tag) 177 w.k8sPlugin.Metadata.Annotations[api.BuildURLAnnotation] = w.envMap["BUILDKITE_BUILD_URL"] 178 kjob.Labels = w.k8sPlugin.Metadata.Labels 179 kjob.Spec.Template.Labels = w.k8sPlugin.Metadata.Labels 180 kjob.Annotations = w.k8sPlugin.Metadata.Annotations 181 kjob.Spec.Template.Annotations = w.k8sPlugin.Metadata.Annotations 182 kjob.Spec.BackoffLimit = pointer.Int32(0) 183 env := []corev1.EnvVar{ 184 { 185 Name: "BUILDKITE_BUILD_PATH", 186 Value: "/workspace/build", 187 }, { 188 Name: "BUILDKITE_BIN_PATH", 189 Value: "/workspace", 190 }, { 191 Name: agentTokenKey, 192 ValueFrom: &corev1.EnvVarSource{ 193 SecretKeyRef: &corev1.SecretKeySelector{ 194 LocalObjectReference: corev1.LocalObjectReference{Name: w.cfg.AgentTokenSecret}, 195 Key: agentTokenKey, 196 }, 197 }, 198 }, { 199 Name: "BUILDKITE_AGENT_ACQUIRE_JOB", 200 Value: w.job.Uuid, 201 }, 202 } 203 if w.otherPlugins != nil { 204 otherPluginsJson, err := json.Marshal(w.otherPlugins) 205 if err != nil { 206 return nil, fmt.Errorf("failed to remarshal non-k8s plugins: %w", err) 207 } 208 env = append(env, corev1.EnvVar{ 209 Name: "BUILDKITE_PLUGINS", 210 Value: string(otherPluginsJson), 211 }) 212 } 213 for k, v := range w.envMap { 214 switch k { 215 case "BUILDKITE_COMMAND", "BUILDKITE_ARTIFACT_PATHS", "BUILDKITE_PLUGINS": //noop 216 default: 217 env = append(env, corev1.EnvVar{Name: k, Value: v}) 218 } 219 } 220 volumeMounts := []corev1.VolumeMount{{Name: "workspace", MountPath: "/workspace"}} 221 const systemContainers = 1 222 ttl := int32(w.cfg.JobTTL.Seconds()) 223 kjob.Spec.TTLSecondsAfterFinished = &ttl 224 225 podSpec := &kjob.Spec.Template.Spec 226 for i, c := range podSpec.Containers { 227 command := strings.Join(append(c.Command, c.Args...), " ") 228 c.Command = []string{"/workspace/buildkite-agent"} 229 c.Args = []string{"bootstrap"} 230 c.ImagePullPolicy = corev1.PullAlways 231 c.Env = append(c.Env, env...) 232 c.Env = append(c.Env, corev1.EnvVar{ 233 Name: "BUILDKITE_COMMAND", 234 Value: command, 235 }, corev1.EnvVar{ 236 Name: "BUILDKITE_AGENT_EXPERIMENT", 237 Value: "kubernetes-exec", 238 }, corev1.EnvVar{ 239 Name: "BUILDKITE_BOOTSTRAP_PHASES", 240 Value: "plugin,command", 241 }, corev1.EnvVar{ 242 Name: "BUILDKITE_AGENT_NAME", 243 Value: "buildkite", 244 }, corev1.EnvVar{ 245 Name: "BUILDKITE_CONTAINER_ID", 246 Value: strconv.Itoa(i + systemContainers), 247 }, corev1.EnvVar{ 248 Name: "BUILDKITE_PLUGINS_PATH", 249 Value: "/tmp", 250 }, corev1.EnvVar{ 251 Name: clicommand.RedactedVars.EnvVar, 252 Value: strings.Join(clicommand.RedactedVars.Value.Value(), ","), 253 }) 254 if c.Name == "" { 255 c.Name = fmt.Sprintf("%s-%d", "container", i) 256 } 257 if c.WorkingDir == "" { 258 c.WorkingDir = "/workspace" 259 } 260 c.VolumeMounts = append(c.VolumeMounts, volumeMounts...) 261 c.EnvFrom = append(c.EnvFrom, w.k8sPlugin.GitEnvFrom...) 262 podSpec.Containers[i] = c 263 } 264 265 containerCount := len(podSpec.Containers) + systemContainers 266 267 for i, c := range w.k8sPlugin.Sidecars { 268 if c.Name == "" { 269 c.Name = fmt.Sprintf("%s-%d", "sidecar", i) 270 } 271 c.VolumeMounts = append(c.VolumeMounts, volumeMounts...) 272 c.EnvFrom = append(c.EnvFrom, w.k8sPlugin.GitEnvFrom...) 273 podSpec.Containers = append(podSpec.Containers, c) 274 } 275 276 if artifactPaths, found := w.envMap["BUILDKITE_ARTIFACT_PATHS"]; found && artifactPaths != "" { 277 artifactsContainer := corev1.Container{ 278 Name: "upload-artifacts", 279 Image: w.cfg.Image, 280 Args: []string{"bootstrap"}, 281 WorkingDir: "/workspace", 282 VolumeMounts: volumeMounts, 283 ImagePullPolicy: corev1.PullAlways, 284 Env: []corev1.EnvVar{{ 285 Name: "BUILDKITE_AGENT_EXPERIMENT", 286 Value: "kubernetes-exec", 287 }, { 288 Name: "BUILDKITE_BOOTSTRAP_PHASES", 289 Value: "command", 290 }, { 291 Name: "BUILDKITE_COMMAND", 292 Value: "true", 293 }, { 294 Name: "BUILDKITE_AGENT_NAME", 295 Value: "buildkite", 296 }, { 297 Name: "BUILDKITE_CONTAINER_ID", 298 Value: strconv.Itoa(containerCount), 299 }, { 300 Name: "BUILDKITE_ARTIFACT_PATHS", 301 Value: artifactPaths, 302 }}, 303 } 304 artifactsContainer.Env = append(artifactsContainer.Env, env...) 305 containerCount++ 306 podSpec.Containers = append(podSpec.Containers, artifactsContainer) 307 } 308 // agent server container 309 agentContainer := corev1.Container{ 310 Name: AgentContainerName, 311 Args: []string{"start"}, 312 Image: w.cfg.Image, 313 WorkingDir: "/workspace", 314 VolumeMounts: volumeMounts, 315 ImagePullPolicy: corev1.PullAlways, 316 Env: []corev1.EnvVar{ 317 { 318 Name: "BUILDKITE_AGENT_EXPERIMENT", 319 Value: "kubernetes-exec", 320 }, { 321 Name: "BUILDKITE_CONTAINER_COUNT", 322 Value: strconv.Itoa(containerCount), 323 }, 324 }, 325 } 326 agentContainer.Env = append(agentContainer.Env, env...) 327 // system client container(s) 328 checkoutContainer := corev1.Container{ 329 Name: "checkout", 330 Image: w.cfg.Image, 331 Args: []string{"bootstrap"}, 332 WorkingDir: "/workspace", 333 VolumeMounts: volumeMounts, 334 ImagePullPolicy: corev1.PullAlways, 335 Env: []corev1.EnvVar{{ 336 Name: "BUILDKITE_AGENT_EXPERIMENT", 337 Value: "kubernetes-exec", 338 }, { 339 Name: "BUILDKITE_BOOTSTRAP_PHASES", 340 Value: "checkout,command", 341 }, { 342 Name: "BUILDKITE_AGENT_NAME", 343 Value: "buildkite", 344 }, { 345 Name: "BUILDKITE_CONTAINER_ID", 346 Value: "0", 347 }, { 348 Name: "BUILDKITE_COMMAND", 349 Value: "cp -r ~/.ssh /workspace/.ssh && chmod -R 777 /workspace", 350 }}, 351 EnvFrom: w.k8sPlugin.GitEnvFrom, 352 } 353 checkoutContainer.Env = append(checkoutContainer.Env, env...) 354 podSpec.Containers = append(podSpec.Containers, agentContainer, checkoutContainer) 355 podSpec.InitContainers = append(podSpec.InitContainers, corev1.Container{ 356 Name: "copy-agent", 357 Image: w.cfg.Image, 358 ImagePullPolicy: corev1.PullAlways, 359 Command: []string{"cp"}, 360 Args: []string{"/usr/local/bin/buildkite-agent", "/usr/local/bin/ssh-env-config.sh", "/workspace"}, 361 VolumeMounts: []corev1.VolumeMount{ 362 { 363 Name: "workspace", 364 MountPath: "/workspace", 365 }, 366 }, 367 }) 368 podSpec.Volumes = append(podSpec.Volumes, corev1.Volume{ 369 Name: "workspace", 370 VolumeSource: corev1.VolumeSource{ 371 EmptyDir: &corev1.EmptyDirVolumeSource{}, 372 }, 373 }) 374 podSpec.RestartPolicy = corev1.RestartPolicyNever 375 return kjob, nil 376 } 377 378 func (w *jobWrapper) BuildFailureJob(err error) (*batchv1.Job, error) { 379 w.err = nil 380 w.k8sPlugin = KubernetesPlugin{ 381 PodSpec: &corev1.PodSpec{ 382 Containers: []corev1.Container{ 383 { 384 Image: w.cfg.Image, 385 Command: []string{fmt.Sprintf("echo %q && exit 1", err.Error())}, 386 }, 387 }, 388 }, 389 } 390 w.otherPlugins = nil 391 return w.Build() 392 } 393 394 func kjobName(job *monitor.Job) string { 395 return fmt.Sprintf("buildkite-%s", job.Uuid) 396 }