github.com/buildkite/agent-stack-k8s@v0.4.0/integration/integration_test.go (about) 1 package integration 2 3 import ( 4 "bytes" 5 "context" 6 "embed" 7 "fmt" 8 "log" 9 "net/http" 10 "os" 11 "strconv" 12 "strings" 13 "sync" 14 "testing" 15 "text/template" 16 "time" 17 18 "github.com/Khan/genqlient/graphql" 19 "github.com/buildkite/agent-stack-k8s/api" 20 "github.com/buildkite/agent-stack-k8s/cmd/controller" 21 "github.com/buildkite/go-buildkite/v3/buildkite" 22 "github.com/buildkite/roko" 23 "github.com/stretchr/testify/assert" 24 "github.com/stretchr/testify/require" 25 "go.uber.org/zap" 26 "go.uber.org/zap/zaptest" 27 v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 28 "k8s.io/apimachinery/pkg/labels" 29 "k8s.io/apimachinery/pkg/selection" 30 "k8s.io/client-go/kubernetes" 31 restconfig "sigs.k8s.io/controller-runtime/pkg/client/config" 32 ) 33 34 const ( 35 repoHTTP = "https://github.com/buildkite/agent-stack-k8s" 36 repoSSH = "git@github.com:buildkite/agent-stack-k8s" 37 branch = "v2" 38 ) 39 40 var ( 41 preservePipelines bool 42 deleteOrphanedPipelines bool 43 cfg api.Config 44 45 //go:embed fixtures/* 46 fixtures embed.FS 47 ) 48 49 // hacks to make --config work 50 func TestMain(m *testing.M) { 51 if err := os.Chdir(".."); err != nil { 52 log.Fatal(err) 53 } 54 cmd := controller.New() 55 cmd.Flags().BoolVar(&preservePipelines, "preserve-pipelines", false, "preserve pipelines created by tests") 56 cmd.Flags().BoolVar(&deleteOrphanedPipelines, "delete-orphaned-pipelines", false, "delete all pipelines matching agent-k8s-*") 57 var err error 58 cfg, err = controller.ParseConfig(cmd, os.Args[1:]) 59 if err != nil { 60 log.Fatal(err) 61 } 62 if err := os.Chdir("integration"); err != nil { 63 log.Fatal(err) 64 } 65 for i, v := range os.Args { 66 if strings.Contains(v, "test") { 67 os.Args[i] = v 68 } else { 69 os.Args[i] = "" 70 } 71 } 72 os.Exit(m.Run()) 73 } 74 75 func TestWalkingSkeleton(t *testing.T) { 76 tc := testcase{ 77 T: t, 78 Fixture: "helloworld.yaml", 79 Repo: repoHTTP, 80 GraphQL: api.NewClient(cfg.BuildkiteToken), 81 }.Init() 82 ctx := context.Background() 83 pipelineID := tc.CreatePipeline(ctx) 84 tc.StartController(ctx, cfg) 85 build := tc.TriggerBuild(ctx, pipelineID) 86 tc.AssertSuccess(ctx, build) 87 tc.AssertLogsContain(build, "Buildkite Agent Stack for Kubernetes") 88 tc.AssertArtifactsContain(build, "README.md", "CODE_OF_CONDUCT.md") 89 tc.AssertMetadata(ctx, map[string]string{"some-annotation": "cool"}, map[string]string{"some-label": "wow"}) 90 } 91 92 func TestSSHRepoClone(t *testing.T) { 93 tc := testcase{ 94 T: t, 95 Fixture: "secretref.yaml", 96 Repo: repoSSH, 97 GraphQL: api.NewClient(cfg.BuildkiteToken), 98 }.Init() 99 100 ctx := context.Background() 101 _, err := tc.Kubernetes.CoreV1().Secrets(cfg.Namespace).Get(ctx, "agent-stack-k8s", v1.GetOptions{}) 102 require.NoError(t, err, "agent-stack-k8s secret must exist") 103 104 pipelineID := tc.CreatePipeline(ctx) 105 tc.StartController(ctx, cfg) 106 build := tc.TriggerBuild(ctx, pipelineID) 107 tc.AssertSuccess(ctx, build) 108 } 109 110 func TestPluginCloneFailsTests(t *testing.T) { 111 tc := testcase{ 112 T: t, 113 Fixture: "unknown-plugin.yaml", 114 Repo: repoHTTP, 115 GraphQL: api.NewClient(cfg.BuildkiteToken), 116 }.Init() 117 118 ctx := context.Background() 119 120 pipelineID := tc.CreatePipeline(ctx) 121 tc.StartController(ctx, cfg) 122 build := tc.TriggerBuild(ctx, pipelineID) 123 tc.AssertFail(ctx, build) 124 } 125 126 func TestMaxInFlightLimited(t *testing.T) { 127 tc := testcase{ 128 T: t, 129 Fixture: "parallel.yaml", 130 Repo: repoHTTP, 131 GraphQL: api.NewClient(cfg.BuildkiteToken), 132 }.Init() 133 134 ctx := context.Background() 135 136 pipelineID := tc.CreatePipeline(ctx) 137 cfg := cfg 138 cfg.MaxInFlight = 1 139 tc.StartController(ctx, cfg) 140 buildID := tc.TriggerBuild(ctx, pipelineID).Number 141 142 for { 143 build, _, err := tc.Buildkite.Builds.Get(cfg.Org, tc.PipelineName, fmt.Sprintf("%d", buildID), nil) 144 require.NoError(t, err) 145 if *build.State == "running" { 146 require.LessOrEqual(t, *build.Pipeline.RunningJobsCount, cfg.MaxInFlight) 147 } else if *build.State == "passed" { 148 break 149 } else if *build.State == "scheduled" { 150 t.Log("waiting for build to start") 151 time.Sleep(time.Second) 152 continue 153 } else { 154 t.Fatalf("unexpected build state: %v", *build.State) 155 } 156 } 157 } 158 159 func TestMaxInFlightUnlimited(t *testing.T) { 160 tc := testcase{ 161 T: t, 162 Fixture: "parallel.yaml", 163 Repo: repoHTTP, 164 GraphQL: api.NewClient(cfg.BuildkiteToken), 165 }.Init() 166 167 ctx := context.Background() 168 169 pipelineID := tc.CreatePipeline(ctx) 170 cfg := cfg 171 cfg.MaxInFlight = 0 172 tc.StartController(ctx, cfg) 173 buildID := tc.TriggerBuild(ctx, pipelineID).Number 174 175 var maxRunningJobs int 176 for { 177 build, _, err := tc.Buildkite.Builds.Get(cfg.Org, tc.PipelineName, fmt.Sprintf("%d", buildID), nil) 178 require.NoError(t, err) 179 if *build.State == "running" { 180 var runningJobs int 181 for _, job := range build.Jobs { 182 if *job.State == "running" { 183 runningJobs++ 184 } 185 } 186 t.Logf("running, runningJobs: %d", runningJobs) 187 maxRunningJobs = maxOf(maxRunningJobs, runningJobs) 188 } else if *build.State == "passed" { 189 require.Equal(t, 4, maxRunningJobs) // all jobs should have run at once 190 break 191 } else if *build.State == "scheduled" { 192 t.Log("waiting for build to start") 193 } else { 194 t.Fatalf("unexpected build state: %v", *build.State) 195 } 196 } 197 } 198 199 func TestSidecars(t *testing.T) { 200 tc := testcase{ 201 T: t, 202 Fixture: "sidecars.yaml", 203 Repo: repoHTTP, 204 GraphQL: api.NewClient(cfg.BuildkiteToken), 205 }.Init() 206 ctx := context.Background() 207 pipelineID := tc.CreatePipeline(ctx) 208 tc.StartController(ctx, cfg) 209 build := tc.TriggerBuild(ctx, pipelineID) 210 tc.AssertSuccess(ctx, build) 211 tc.AssertLogsContain(build, "Welcome to nginx!") 212 } 213 214 func TestInvalidPodSpec(t *testing.T) { 215 tc := testcase{ 216 T: t, 217 Fixture: "invalid.yaml", 218 Repo: repoHTTP, 219 GraphQL: api.NewClient(cfg.BuildkiteToken), 220 }.Init() 221 ctx := context.Background() 222 pipelineID := tc.CreatePipeline(ctx) 223 tc.StartController(ctx, cfg) 224 build := tc.TriggerBuild(ctx, pipelineID) 225 tc.AssertFail(ctx, build) 226 tc.AssertLogsContain(build, `is invalid: spec.template.spec.containers[0].volumeMounts[0].name: Not found: "this-doesnt-exist"`) 227 } 228 229 func TestInvalidPodJSON(t *testing.T) { 230 tc := testcase{ 231 T: t, 232 Fixture: "invalid2.yaml", 233 Repo: repoHTTP, 234 GraphQL: api.NewClient(cfg.BuildkiteToken), 235 }.Init() 236 ctx := context.Background() 237 pipelineID := tc.CreatePipeline(ctx) 238 tc.StartController(ctx, cfg) 239 build := tc.TriggerBuild(ctx, pipelineID) 240 tc.AssertFail(ctx, build) 241 tc.AssertLogsContain(build, `failed parsing Kubernetes plugin: json: cannot unmarshal number into Go struct field EnvVar.PodSpec.containers.env.value of type string`) 242 } 243 244 func maxOf(x, y int) int { 245 if x < y { 246 return y 247 } 248 return x 249 } 250 251 func TestCleanupOrphanedPipelines(t *testing.T) { 252 if !deleteOrphanedPipelines { 253 t.Skip("not cleaning orphaned pipelines") 254 } 255 ctx := context.Background() 256 graphqlClient := api.NewClient(cfg.BuildkiteToken) 257 258 pipelines, err := api.SearchPipelines(ctx, graphqlClient, cfg.Org, "agent-k8s-", 100) 259 require.NoError(t, err) 260 var wg sync.WaitGroup 261 wg.Add(len(pipelines.Organization.Pipelines.Edges)) 262 for _, pipeline := range pipelines.Organization.Pipelines.Edges { 263 pipeline := pipeline // prevent loop variable capture 264 t.Run(pipeline.Node.Name, func(t *testing.T) { 265 builds, err := api.GetBuilds(ctx, graphqlClient, fmt.Sprintf("%s/%s", cfg.Org, pipeline.Node.Name), []api.BuildStates{api.BuildStatesRunning}, 100) 266 require.NoError(t, err) 267 for _, build := range builds.Pipeline.Builds.Edges { 268 _, err = api.BuildCancel(ctx, graphqlClient, api.BuildCancelInput{Id: build.Node.Id}) 269 assert.NoError(t, err) 270 } 271 tc := testcase{ 272 T: t, 273 GraphQL: api.NewClient(cfg.BuildkiteToken), 274 }.Init() 275 tc.PipelineName = pipeline.Node.Name 276 tc.deletePipeline(context.Background()) 277 }) 278 } 279 } 280 281 type testcase struct { 282 *testing.T 283 Logger *zap.Logger 284 Fixture string 285 Repo string 286 GraphQL graphql.Client 287 Kubernetes kubernetes.Interface 288 Buildkite *buildkite.Client 289 PipelineName string 290 } 291 292 func (t testcase) Init() testcase { 293 t.Helper() 294 t.Parallel() 295 296 t.PipelineName = fmt.Sprintf("agent-k8s-%s-%d", strings.ToLower(t.Name()), time.Now().UnixNano()) 297 t.Logger = zaptest.NewLogger(t).Named(t.Name()) 298 299 clientConfig, err := restconfig.GetConfig() 300 require.NoError(t, err) 301 clientset, err := kubernetes.NewForConfig(clientConfig) 302 require.NoError(t, err) 303 t.Kubernetes = clientset 304 config, err := buildkite.NewTokenConfig(cfg.BuildkiteToken, false) 305 require.NoError(t, err) 306 307 t.Buildkite = buildkite.NewClient(config.Client()) 308 309 return t 310 } 311 312 func (t testcase) CreatePipeline(ctx context.Context) string { 313 t.Helper() 314 315 tpl, err := template.ParseFS(fixtures, fmt.Sprintf("fixtures/%s", t.Fixture)) 316 require.NoError(t, err) 317 318 var steps bytes.Buffer 319 require.NoError(t, tpl.Execute(&steps, map[string]string{ 320 "queue": t.PipelineName, 321 })) 322 pipeline, _, err := t.Buildkite.Pipelines.Create(cfg.Org, &buildkite.CreatePipeline{ 323 Name: t.PipelineName, 324 Repository: t.Repo, 325 ProviderSettings: &buildkite.GitHubSettings{ 326 TriggerMode: strPtr("none"), 327 }, 328 Configuration: steps.String(), 329 }) 330 require.NoError(t, err) 331 332 if !preservePipelines { 333 t.deletePipeline(ctx) 334 } 335 336 return *pipeline.GraphQLID 337 } 338 339 func (t testcase) StartController(ctx context.Context, cfg api.Config) { 340 t.Helper() 341 342 runCtx, cancel := context.WithCancel(ctx) 343 EnsureCleanup(t.T, cancel) 344 345 cfg.Tags = []string{fmt.Sprintf("queue=%s", t.PipelineName)} 346 cfg.Debug = true 347 go controller.Run(runCtx, t.Kubernetes, cfg) 348 } 349 350 func (t testcase) TriggerBuild(ctx context.Context, pipelineID string) api.Build { 351 t.Helper() 352 353 // trigger build 354 createBuild, err := api.BuildCreate(ctx, t.GraphQL, api.BuildCreateInput{ 355 PipelineID: pipelineID, 356 Commit: "HEAD", 357 Branch: branch, 358 }) 359 require.NoError(t, err) 360 EnsureCleanup(t.T, func() { 361 if _, err := api.BuildCancel(ctx, t.GraphQL, api.BuildCancelInput{ 362 Id: createBuild.BuildCreate.Build.Id, 363 }); err != nil { 364 if ignorableError(err) { 365 return 366 } 367 t.Logf("failed to cancel build: %v", err) 368 } 369 }) 370 build := createBuild.BuildCreate.Build 371 require.GreaterOrEqual(t, len(build.Jobs.Edges), 1) 372 node := build.Jobs.Edges[0].Node 373 _, ok := node.(*api.JobJobTypeCommand) 374 require.True(t, ok) 375 376 return build.Build 377 } 378 379 func (t testcase) AssertSuccess(ctx context.Context, build api.Build) { 380 t.Helper() 381 require.Equal(t, api.BuildStatesPassed, t.waitForBuild(ctx, build)) 382 } 383 384 func (t testcase) AssertLogsContain(build api.Build, content string) { 385 t.Helper() 386 config, err := buildkite.NewTokenConfig(cfg.BuildkiteToken, false) 387 require.NoError(t, err) 388 389 client := buildkite.NewClient(config.Client()) 390 job := build.Jobs.Edges[0].Node.(*api.JobJobTypeCommand) 391 logs, _, err := client.Jobs.GetJobLog(cfg.Org, t.PipelineName, strconv.Itoa(build.Number), job.Uuid) 392 require.NoError(t, err) 393 require.NotNil(t, logs.Content) 394 require.Contains(t, *logs.Content, content) 395 396 } 397 func (t testcase) AssertArtifactsContain(build api.Build, expected ...string) { 398 t.Helper() 399 config, err := buildkite.NewTokenConfig(cfg.BuildkiteToken, false) 400 require.NoError(t, err) 401 client := buildkite.NewClient(config.Client()) 402 403 artifacts, _, err := client.Artifacts.ListByBuild(cfg.Org, t.PipelineName, strconv.Itoa(build.Number), nil) 404 require.NoError(t, err) 405 require.Len(t, artifacts, 2) 406 filenames := []string{*artifacts[0].Filename, *artifacts[1].Filename} 407 for _, filename := range expected { 408 require.Contains(t, filenames, filename) 409 } 410 } 411 412 func (t testcase) AssertFail(ctx context.Context, build api.Build) { 413 t.Helper() 414 415 require.Equal(t, api.BuildStatesFailed, t.waitForBuild(ctx, build)) 416 } 417 418 func (t testcase) waitForBuild(ctx context.Context, build api.Build) api.BuildStates { 419 t.Helper() 420 421 for { 422 getBuild, err := api.GetBuild(ctx, t.GraphQL, build.Uuid) 423 require.NoError(t, err) 424 switch getBuild.Build.State { 425 case api.BuildStatesPassed, api.BuildStatesFailed, api.BuildStatesCanceled, api.BuildStatesCanceling: 426 return getBuild.Build.State 427 case api.BuildStatesScheduled, api.BuildStatesRunning: 428 t.Logger.Debug("sleeping", zap.Any("build state", getBuild.Build.State)) 429 time.Sleep(time.Second) 430 default: 431 t.Errorf("unknown build state %q", getBuild.Build.State) 432 return getBuild.Build.State 433 } 434 } 435 } 436 437 func (t testcase) AssertMetadata(ctx context.Context, annotations, labelz map[string]string) { 438 t.Helper() 439 440 tagReq, err := labels.NewRequirement(api.TagLabel, selection.Equals, []string{fmt.Sprintf("queue_%s", t.PipelineName)}) 441 require.NoError(t, err) 442 selector := labels.NewSelector().Add(*tagReq) 443 444 jobs, err := t.Kubernetes.BatchV1().Jobs(cfg.Namespace).List(ctx, v1.ListOptions{LabelSelector: selector.String()}) 445 require.NoError(t, err) 446 require.Len(t, jobs.Items, 1) 447 for k, v := range annotations { 448 require.Equal(t, jobs.Items[0].ObjectMeta.Annotations[k], v) 449 require.Equal(t, jobs.Items[0].Spec.Template.Annotations[k], v) 450 } 451 for k, v := range labelz { 452 require.Equal(t, jobs.Items[0].ObjectMeta.Labels[k], v) 453 require.Equal(t, jobs.Items[0].Spec.Template.Labels[k], v) 454 } 455 } 456 457 func strPtr(p string) *string { 458 return &p 459 } 460 461 func ignorableError(err error) bool { 462 reasons := []string{ 463 "already finished", 464 "already being canceled", 465 "already been canceled", 466 "No build found", 467 } 468 for _, reason := range reasons { 469 if strings.Contains(err.Error(), reason) { 470 return true 471 } 472 } 473 return false 474 } 475 476 func (t testcase) deletePipeline(ctx context.Context) { 477 t.Helper() 478 479 EnsureCleanup(t.T, func() { 480 err := roko.NewRetrier( 481 roko.WithMaxAttempts(10), 482 roko.WithStrategy(roko.Exponential(time.Second, 5*time.Second)), 483 ).DoWithContext(ctx, func(r *roko.Retrier) error { 484 resp, err := t.Buildkite.Pipelines.Delete(cfg.Org, t.PipelineName) 485 if err != nil { 486 if resp.StatusCode == http.StatusNotFound { 487 return nil 488 } 489 t.Logf("waiting for build to be canceled on pipeline %s", t.PipelineName) 490 return err 491 } 492 return nil 493 }) 494 if err != nil { 495 t.Logf("failed to cleanup pipeline %s: %v", t.PipelineName, err) 496 return 497 } 498 t.Logf("deleted pipeline! %s", t.PipelineName) 499 }) 500 }