github.com/buildkite/agent-stack-k8s@v0.4.0/integration/integration_test.go (about)

     1  package integration
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"embed"
     7  	"fmt"
     8  	"log"
     9  	"net/http"
    10  	"os"
    11  	"strconv"
    12  	"strings"
    13  	"sync"
    14  	"testing"
    15  	"text/template"
    16  	"time"
    17  
    18  	"github.com/Khan/genqlient/graphql"
    19  	"github.com/buildkite/agent-stack-k8s/api"
    20  	"github.com/buildkite/agent-stack-k8s/cmd/controller"
    21  	"github.com/buildkite/go-buildkite/v3/buildkite"
    22  	"github.com/buildkite/roko"
    23  	"github.com/stretchr/testify/assert"
    24  	"github.com/stretchr/testify/require"
    25  	"go.uber.org/zap"
    26  	"go.uber.org/zap/zaptest"
    27  	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    28  	"k8s.io/apimachinery/pkg/labels"
    29  	"k8s.io/apimachinery/pkg/selection"
    30  	"k8s.io/client-go/kubernetes"
    31  	restconfig "sigs.k8s.io/controller-runtime/pkg/client/config"
    32  )
    33  
    34  const (
    35  	repoHTTP = "https://github.com/buildkite/agent-stack-k8s"
    36  	repoSSH  = "git@github.com:buildkite/agent-stack-k8s"
    37  	branch   = "v2"
    38  )
    39  
    40  var (
    41  	preservePipelines       bool
    42  	deleteOrphanedPipelines bool
    43  	cfg                     api.Config
    44  
    45  	//go:embed fixtures/*
    46  	fixtures embed.FS
    47  )
    48  
    49  // hacks to make --config work
    50  func TestMain(m *testing.M) {
    51  	if err := os.Chdir(".."); err != nil {
    52  		log.Fatal(err)
    53  	}
    54  	cmd := controller.New()
    55  	cmd.Flags().BoolVar(&preservePipelines, "preserve-pipelines", false, "preserve pipelines created by tests")
    56  	cmd.Flags().BoolVar(&deleteOrphanedPipelines, "delete-orphaned-pipelines", false, "delete all pipelines matching agent-k8s-*")
    57  	var err error
    58  	cfg, err = controller.ParseConfig(cmd, os.Args[1:])
    59  	if err != nil {
    60  		log.Fatal(err)
    61  	}
    62  	if err := os.Chdir("integration"); err != nil {
    63  		log.Fatal(err)
    64  	}
    65  	for i, v := range os.Args {
    66  		if strings.Contains(v, "test") {
    67  			os.Args[i] = v
    68  		} else {
    69  			os.Args[i] = ""
    70  		}
    71  	}
    72  	os.Exit(m.Run())
    73  }
    74  
    75  func TestWalkingSkeleton(t *testing.T) {
    76  	tc := testcase{
    77  		T:       t,
    78  		Fixture: "helloworld.yaml",
    79  		Repo:    repoHTTP,
    80  		GraphQL: api.NewClient(cfg.BuildkiteToken),
    81  	}.Init()
    82  	ctx := context.Background()
    83  	pipelineID := tc.CreatePipeline(ctx)
    84  	tc.StartController(ctx, cfg)
    85  	build := tc.TriggerBuild(ctx, pipelineID)
    86  	tc.AssertSuccess(ctx, build)
    87  	tc.AssertLogsContain(build, "Buildkite Agent Stack for Kubernetes")
    88  	tc.AssertArtifactsContain(build, "README.md", "CODE_OF_CONDUCT.md")
    89  	tc.AssertMetadata(ctx, map[string]string{"some-annotation": "cool"}, map[string]string{"some-label": "wow"})
    90  }
    91  
    92  func TestSSHRepoClone(t *testing.T) {
    93  	tc := testcase{
    94  		T:       t,
    95  		Fixture: "secretref.yaml",
    96  		Repo:    repoSSH,
    97  		GraphQL: api.NewClient(cfg.BuildkiteToken),
    98  	}.Init()
    99  
   100  	ctx := context.Background()
   101  	_, err := tc.Kubernetes.CoreV1().Secrets(cfg.Namespace).Get(ctx, "agent-stack-k8s", v1.GetOptions{})
   102  	require.NoError(t, err, "agent-stack-k8s secret must exist")
   103  
   104  	pipelineID := tc.CreatePipeline(ctx)
   105  	tc.StartController(ctx, cfg)
   106  	build := tc.TriggerBuild(ctx, pipelineID)
   107  	tc.AssertSuccess(ctx, build)
   108  }
   109  
   110  func TestPluginCloneFailsTests(t *testing.T) {
   111  	tc := testcase{
   112  		T:       t,
   113  		Fixture: "unknown-plugin.yaml",
   114  		Repo:    repoHTTP,
   115  		GraphQL: api.NewClient(cfg.BuildkiteToken),
   116  	}.Init()
   117  
   118  	ctx := context.Background()
   119  
   120  	pipelineID := tc.CreatePipeline(ctx)
   121  	tc.StartController(ctx, cfg)
   122  	build := tc.TriggerBuild(ctx, pipelineID)
   123  	tc.AssertFail(ctx, build)
   124  }
   125  
   126  func TestMaxInFlightLimited(t *testing.T) {
   127  	tc := testcase{
   128  		T:       t,
   129  		Fixture: "parallel.yaml",
   130  		Repo:    repoHTTP,
   131  		GraphQL: api.NewClient(cfg.BuildkiteToken),
   132  	}.Init()
   133  
   134  	ctx := context.Background()
   135  
   136  	pipelineID := tc.CreatePipeline(ctx)
   137  	cfg := cfg
   138  	cfg.MaxInFlight = 1
   139  	tc.StartController(ctx, cfg)
   140  	buildID := tc.TriggerBuild(ctx, pipelineID).Number
   141  
   142  	for {
   143  		build, _, err := tc.Buildkite.Builds.Get(cfg.Org, tc.PipelineName, fmt.Sprintf("%d", buildID), nil)
   144  		require.NoError(t, err)
   145  		if *build.State == "running" {
   146  			require.LessOrEqual(t, *build.Pipeline.RunningJobsCount, cfg.MaxInFlight)
   147  		} else if *build.State == "passed" {
   148  			break
   149  		} else if *build.State == "scheduled" {
   150  			t.Log("waiting for build to start")
   151  			time.Sleep(time.Second)
   152  			continue
   153  		} else {
   154  			t.Fatalf("unexpected build state: %v", *build.State)
   155  		}
   156  	}
   157  }
   158  
   159  func TestMaxInFlightUnlimited(t *testing.T) {
   160  	tc := testcase{
   161  		T:       t,
   162  		Fixture: "parallel.yaml",
   163  		Repo:    repoHTTP,
   164  		GraphQL: api.NewClient(cfg.BuildkiteToken),
   165  	}.Init()
   166  
   167  	ctx := context.Background()
   168  
   169  	pipelineID := tc.CreatePipeline(ctx)
   170  	cfg := cfg
   171  	cfg.MaxInFlight = 0
   172  	tc.StartController(ctx, cfg)
   173  	buildID := tc.TriggerBuild(ctx, pipelineID).Number
   174  
   175  	var maxRunningJobs int
   176  	for {
   177  		build, _, err := tc.Buildkite.Builds.Get(cfg.Org, tc.PipelineName, fmt.Sprintf("%d", buildID), nil)
   178  		require.NoError(t, err)
   179  		if *build.State == "running" {
   180  			var runningJobs int
   181  			for _, job := range build.Jobs {
   182  				if *job.State == "running" {
   183  					runningJobs++
   184  				}
   185  			}
   186  			t.Logf("running, runningJobs: %d", runningJobs)
   187  			maxRunningJobs = maxOf(maxRunningJobs, runningJobs)
   188  		} else if *build.State == "passed" {
   189  			require.Equal(t, 4, maxRunningJobs) // all jobs should have run at once
   190  			break
   191  		} else if *build.State == "scheduled" {
   192  			t.Log("waiting for build to start")
   193  		} else {
   194  			t.Fatalf("unexpected build state: %v", *build.State)
   195  		}
   196  	}
   197  }
   198  
   199  func TestSidecars(t *testing.T) {
   200  	tc := testcase{
   201  		T:       t,
   202  		Fixture: "sidecars.yaml",
   203  		Repo:    repoHTTP,
   204  		GraphQL: api.NewClient(cfg.BuildkiteToken),
   205  	}.Init()
   206  	ctx := context.Background()
   207  	pipelineID := tc.CreatePipeline(ctx)
   208  	tc.StartController(ctx, cfg)
   209  	build := tc.TriggerBuild(ctx, pipelineID)
   210  	tc.AssertSuccess(ctx, build)
   211  	tc.AssertLogsContain(build, "Welcome to nginx!")
   212  }
   213  
   214  func TestInvalidPodSpec(t *testing.T) {
   215  	tc := testcase{
   216  		T:       t,
   217  		Fixture: "invalid.yaml",
   218  		Repo:    repoHTTP,
   219  		GraphQL: api.NewClient(cfg.BuildkiteToken),
   220  	}.Init()
   221  	ctx := context.Background()
   222  	pipelineID := tc.CreatePipeline(ctx)
   223  	tc.StartController(ctx, cfg)
   224  	build := tc.TriggerBuild(ctx, pipelineID)
   225  	tc.AssertFail(ctx, build)
   226  	tc.AssertLogsContain(build, `is invalid: spec.template.spec.containers[0].volumeMounts[0].name: Not found: "this-doesnt-exist"`)
   227  }
   228  
   229  func TestInvalidPodJSON(t *testing.T) {
   230  	tc := testcase{
   231  		T:       t,
   232  		Fixture: "invalid2.yaml",
   233  		Repo:    repoHTTP,
   234  		GraphQL: api.NewClient(cfg.BuildkiteToken),
   235  	}.Init()
   236  	ctx := context.Background()
   237  	pipelineID := tc.CreatePipeline(ctx)
   238  	tc.StartController(ctx, cfg)
   239  	build := tc.TriggerBuild(ctx, pipelineID)
   240  	tc.AssertFail(ctx, build)
   241  	tc.AssertLogsContain(build, `failed parsing Kubernetes plugin: json: cannot unmarshal number into Go struct field EnvVar.PodSpec.containers.env.value of type string`)
   242  }
   243  
   244  func maxOf(x, y int) int {
   245  	if x < y {
   246  		return y
   247  	}
   248  	return x
   249  }
   250  
   251  func TestCleanupOrphanedPipelines(t *testing.T) {
   252  	if !deleteOrphanedPipelines {
   253  		t.Skip("not cleaning orphaned pipelines")
   254  	}
   255  	ctx := context.Background()
   256  	graphqlClient := api.NewClient(cfg.BuildkiteToken)
   257  
   258  	pipelines, err := api.SearchPipelines(ctx, graphqlClient, cfg.Org, "agent-k8s-", 100)
   259  	require.NoError(t, err)
   260  	var wg sync.WaitGroup
   261  	wg.Add(len(pipelines.Organization.Pipelines.Edges))
   262  	for _, pipeline := range pipelines.Organization.Pipelines.Edges {
   263  		pipeline := pipeline // prevent loop variable capture
   264  		t.Run(pipeline.Node.Name, func(t *testing.T) {
   265  			builds, err := api.GetBuilds(ctx, graphqlClient, fmt.Sprintf("%s/%s", cfg.Org, pipeline.Node.Name), []api.BuildStates{api.BuildStatesRunning}, 100)
   266  			require.NoError(t, err)
   267  			for _, build := range builds.Pipeline.Builds.Edges {
   268  				_, err = api.BuildCancel(ctx, graphqlClient, api.BuildCancelInput{Id: build.Node.Id})
   269  				assert.NoError(t, err)
   270  			}
   271  			tc := testcase{
   272  				T:       t,
   273  				GraphQL: api.NewClient(cfg.BuildkiteToken),
   274  			}.Init()
   275  			tc.PipelineName = pipeline.Node.Name
   276  			tc.deletePipeline(context.Background())
   277  		})
   278  	}
   279  }
   280  
   281  type testcase struct {
   282  	*testing.T
   283  	Logger       *zap.Logger
   284  	Fixture      string
   285  	Repo         string
   286  	GraphQL      graphql.Client
   287  	Kubernetes   kubernetes.Interface
   288  	Buildkite    *buildkite.Client
   289  	PipelineName string
   290  }
   291  
   292  func (t testcase) Init() testcase {
   293  	t.Helper()
   294  	t.Parallel()
   295  
   296  	t.PipelineName = fmt.Sprintf("agent-k8s-%s-%d", strings.ToLower(t.Name()), time.Now().UnixNano())
   297  	t.Logger = zaptest.NewLogger(t).Named(t.Name())
   298  
   299  	clientConfig, err := restconfig.GetConfig()
   300  	require.NoError(t, err)
   301  	clientset, err := kubernetes.NewForConfig(clientConfig)
   302  	require.NoError(t, err)
   303  	t.Kubernetes = clientset
   304  	config, err := buildkite.NewTokenConfig(cfg.BuildkiteToken, false)
   305  	require.NoError(t, err)
   306  
   307  	t.Buildkite = buildkite.NewClient(config.Client())
   308  
   309  	return t
   310  }
   311  
   312  func (t testcase) CreatePipeline(ctx context.Context) string {
   313  	t.Helper()
   314  
   315  	tpl, err := template.ParseFS(fixtures, fmt.Sprintf("fixtures/%s", t.Fixture))
   316  	require.NoError(t, err)
   317  
   318  	var steps bytes.Buffer
   319  	require.NoError(t, tpl.Execute(&steps, map[string]string{
   320  		"queue": t.PipelineName,
   321  	}))
   322  	pipeline, _, err := t.Buildkite.Pipelines.Create(cfg.Org, &buildkite.CreatePipeline{
   323  		Name:       t.PipelineName,
   324  		Repository: t.Repo,
   325  		ProviderSettings: &buildkite.GitHubSettings{
   326  			TriggerMode: strPtr("none"),
   327  		},
   328  		Configuration: steps.String(),
   329  	})
   330  	require.NoError(t, err)
   331  
   332  	if !preservePipelines {
   333  		t.deletePipeline(ctx)
   334  	}
   335  
   336  	return *pipeline.GraphQLID
   337  }
   338  
   339  func (t testcase) StartController(ctx context.Context, cfg api.Config) {
   340  	t.Helper()
   341  
   342  	runCtx, cancel := context.WithCancel(ctx)
   343  	EnsureCleanup(t.T, cancel)
   344  
   345  	cfg.Tags = []string{fmt.Sprintf("queue=%s", t.PipelineName)}
   346  	cfg.Debug = true
   347  	go controller.Run(runCtx, t.Kubernetes, cfg)
   348  }
   349  
   350  func (t testcase) TriggerBuild(ctx context.Context, pipelineID string) api.Build {
   351  	t.Helper()
   352  
   353  	// trigger build
   354  	createBuild, err := api.BuildCreate(ctx, t.GraphQL, api.BuildCreateInput{
   355  		PipelineID: pipelineID,
   356  		Commit:     "HEAD",
   357  		Branch:     branch,
   358  	})
   359  	require.NoError(t, err)
   360  	EnsureCleanup(t.T, func() {
   361  		if _, err := api.BuildCancel(ctx, t.GraphQL, api.BuildCancelInput{
   362  			Id: createBuild.BuildCreate.Build.Id,
   363  		}); err != nil {
   364  			if ignorableError(err) {
   365  				return
   366  			}
   367  			t.Logf("failed to cancel build: %v", err)
   368  		}
   369  	})
   370  	build := createBuild.BuildCreate.Build
   371  	require.GreaterOrEqual(t, len(build.Jobs.Edges), 1)
   372  	node := build.Jobs.Edges[0].Node
   373  	_, ok := node.(*api.JobJobTypeCommand)
   374  	require.True(t, ok)
   375  
   376  	return build.Build
   377  }
   378  
   379  func (t testcase) AssertSuccess(ctx context.Context, build api.Build) {
   380  	t.Helper()
   381  	require.Equal(t, api.BuildStatesPassed, t.waitForBuild(ctx, build))
   382  }
   383  
   384  func (t testcase) AssertLogsContain(build api.Build, content string) {
   385  	t.Helper()
   386  	config, err := buildkite.NewTokenConfig(cfg.BuildkiteToken, false)
   387  	require.NoError(t, err)
   388  
   389  	client := buildkite.NewClient(config.Client())
   390  	job := build.Jobs.Edges[0].Node.(*api.JobJobTypeCommand)
   391  	logs, _, err := client.Jobs.GetJobLog(cfg.Org, t.PipelineName, strconv.Itoa(build.Number), job.Uuid)
   392  	require.NoError(t, err)
   393  	require.NotNil(t, logs.Content)
   394  	require.Contains(t, *logs.Content, content)
   395  
   396  }
   397  func (t testcase) AssertArtifactsContain(build api.Build, expected ...string) {
   398  	t.Helper()
   399  	config, err := buildkite.NewTokenConfig(cfg.BuildkiteToken, false)
   400  	require.NoError(t, err)
   401  	client := buildkite.NewClient(config.Client())
   402  
   403  	artifacts, _, err := client.Artifacts.ListByBuild(cfg.Org, t.PipelineName, strconv.Itoa(build.Number), nil)
   404  	require.NoError(t, err)
   405  	require.Len(t, artifacts, 2)
   406  	filenames := []string{*artifacts[0].Filename, *artifacts[1].Filename}
   407  	for _, filename := range expected {
   408  		require.Contains(t, filenames, filename)
   409  	}
   410  }
   411  
   412  func (t testcase) AssertFail(ctx context.Context, build api.Build) {
   413  	t.Helper()
   414  
   415  	require.Equal(t, api.BuildStatesFailed, t.waitForBuild(ctx, build))
   416  }
   417  
   418  func (t testcase) waitForBuild(ctx context.Context, build api.Build) api.BuildStates {
   419  	t.Helper()
   420  
   421  	for {
   422  		getBuild, err := api.GetBuild(ctx, t.GraphQL, build.Uuid)
   423  		require.NoError(t, err)
   424  		switch getBuild.Build.State {
   425  		case api.BuildStatesPassed, api.BuildStatesFailed, api.BuildStatesCanceled, api.BuildStatesCanceling:
   426  			return getBuild.Build.State
   427  		case api.BuildStatesScheduled, api.BuildStatesRunning:
   428  			t.Logger.Debug("sleeping", zap.Any("build state", getBuild.Build.State))
   429  			time.Sleep(time.Second)
   430  		default:
   431  			t.Errorf("unknown build state %q", getBuild.Build.State)
   432  			return getBuild.Build.State
   433  		}
   434  	}
   435  }
   436  
   437  func (t testcase) AssertMetadata(ctx context.Context, annotations, labelz map[string]string) {
   438  	t.Helper()
   439  
   440  	tagReq, err := labels.NewRequirement(api.TagLabel, selection.Equals, []string{fmt.Sprintf("queue_%s", t.PipelineName)})
   441  	require.NoError(t, err)
   442  	selector := labels.NewSelector().Add(*tagReq)
   443  
   444  	jobs, err := t.Kubernetes.BatchV1().Jobs(cfg.Namespace).List(ctx, v1.ListOptions{LabelSelector: selector.String()})
   445  	require.NoError(t, err)
   446  	require.Len(t, jobs.Items, 1)
   447  	for k, v := range annotations {
   448  		require.Equal(t, jobs.Items[0].ObjectMeta.Annotations[k], v)
   449  		require.Equal(t, jobs.Items[0].Spec.Template.Annotations[k], v)
   450  	}
   451  	for k, v := range labelz {
   452  		require.Equal(t, jobs.Items[0].ObjectMeta.Labels[k], v)
   453  		require.Equal(t, jobs.Items[0].Spec.Template.Labels[k], v)
   454  	}
   455  }
   456  
   457  func strPtr(p string) *string {
   458  	return &p
   459  }
   460  
   461  func ignorableError(err error) bool {
   462  	reasons := []string{
   463  		"already finished",
   464  		"already being canceled",
   465  		"already been canceled",
   466  		"No build found",
   467  	}
   468  	for _, reason := range reasons {
   469  		if strings.Contains(err.Error(), reason) {
   470  			return true
   471  		}
   472  	}
   473  	return false
   474  }
   475  
   476  func (t testcase) deletePipeline(ctx context.Context) {
   477  	t.Helper()
   478  
   479  	EnsureCleanup(t.T, func() {
   480  		err := roko.NewRetrier(
   481  			roko.WithMaxAttempts(10),
   482  			roko.WithStrategy(roko.Exponential(time.Second, 5*time.Second)),
   483  		).DoWithContext(ctx, func(r *roko.Retrier) error {
   484  			resp, err := t.Buildkite.Pipelines.Delete(cfg.Org, t.PipelineName)
   485  			if err != nil {
   486  				if resp.StatusCode == http.StatusNotFound {
   487  					return nil
   488  				}
   489  				t.Logf("waiting for build to be canceled on pipeline %s", t.PipelineName)
   490  				return err
   491  			}
   492  			return nil
   493  		})
   494  		if err != nil {
   495  			t.Logf("failed to cleanup pipeline %s: %v", t.PipelineName, err)
   496  			return
   497  		}
   498  		t.Logf("deleted pipeline! %s", t.PipelineName)
   499  	})
   500  }