github.com/hernad/nomad@v1.6.112/e2e/vaultsecrets/vaultsecrets.go

github.com/hernad/nomad@v1.6.112/e2e/vaultsecrets/vaultsecrets.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package vaultsecrets
     5  
     6  import (
     7  	"context"
     8  	"fmt"
     9  	"io"
    10  	"os"
    11  	"os/exec"
    12  	"regexp"
    13  	"strings"
    14  	"time"
    15  
    16  	e2e "github.com/hernad/nomad/e2e/e2eutil"
    17  	"github.com/hernad/nomad/e2e/framework"
    18  	"github.com/hernad/nomad/helper/uuid"
    19  	"github.com/hernad/nomad/testutil"
    20  )
    21  
    22  const ns = ""
    23  
    24  type VaultSecretsTest struct {
    25  	framework.TC
    26  	secretsPath string
    27  	pkiPath     string
    28  	jobIDs      []string
    29  	policies    []string
    30  }
    31  
    32  func init() {
    33  	framework.AddSuites(&framework.TestSuite{
    34  		Component:   "VaultSecrets",
    35  		CanRunLocal: true,
    36  		Consul:      true,
    37  		Vault:       true,
    38  		Cases: []framework.TestCase{
    39  			new(VaultSecretsTest),
    40  		},
    41  	})
    42  }
    43  
    44  func (tc *VaultSecretsTest) BeforeAll(f *framework.F) {
    45  	e2e.WaitForLeader(f.T(), tc.Nomad())
    46  	e2e.WaitForNodesReady(f.T(), tc.Nomad(), 1)
    47  }
    48  
    49  func (tc *VaultSecretsTest) AfterEach(f *framework.F) {
    50  	if os.Getenv("NOMAD_TEST_SKIPCLEANUP") == "1" {
    51  		return
    52  	}
    53  
    54  	for _, id := range tc.jobIDs {
    55  		_, err := e2e.Command("nomad", "job", "stop", "-purge", id)
    56  		f.Assert().NoError(err, "could not clean up job", id)
    57  	}
    58  	tc.jobIDs = []string{}
    59  
    60  	for _, policy := range tc.policies {
    61  		_, err := e2e.Command("vault", "policy", "delete", policy)
    62  		f.Assert().NoError(err, "could not clean up vault policy", policy)
    63  	}
    64  	tc.policies = []string{}
    65  
    66  	// disabling the secrets engines will wipe all the secrets as well
    67  	_, err := e2e.Command("vault", "secrets", "disable", tc.secretsPath)
    68  	f.Assert().NoError(err)
    69  	_, err = e2e.Command("vault", "secrets", "disable", tc.pkiPath)
    70  	f.Assert().NoError(err)
    71  
    72  	_, err = e2e.Command("nomad", "system", "gc")
    73  	f.NoError(err)
    74  }
    75  
    76  func (tc *VaultSecretsTest) TestVaultSecrets(f *framework.F) {
    77  
    78  	// use a random suffix to encapsulate test keys, polices, etc.
    79  	// for cleanup from vault
    80  	testID := uuid.Generate()[0:8]
    81  	jobID := "test-vault-secrets-" + testID
    82  	tc.secretsPath = "secrets-" + testID
    83  	tc.pkiPath = "pki-" + testID
    84  	secretValue := uuid.Generate()
    85  	secretKey := tc.secretsPath + "/data/myapp"
    86  	pkiCertIssue := tc.pkiPath + "/issue/nomad"
    87  	policyID := "access-secrets-" + testID
    88  	index := 0
    89  	wc := &e2e.WaitConfig{Retries: 500}
    90  	interval, retries := wc.OrDefault()
    91  
    92  	setupCmds := []string{
    93  
    94  		// configure KV secrets engine
    95  		// Note: the secret key is written to 'secret-###/myapp' but the kv2 API
    96  		// for Vault implicitly turns that into 'secret-###/data/myapp' so we
    97  		// need to use the longer path for everything other than kv put/get
    98  		fmt.Sprintf("vault secrets enable -path=%s kv-v2", tc.secretsPath),
    99  		fmt.Sprintf("vault kv put %s/myapp key=%s", tc.secretsPath, secretValue),
   100  		fmt.Sprintf("vault secrets tune -max-lease-ttl=1m %s", tc.secretsPath),
   101  
   102  		// configure PKI secrets engine
   103  		fmt.Sprintf("vault secrets enable -path=%s pki", tc.pkiPath),
   104  		fmt.Sprintf("vault write %s/root/generate/internal "+
   105  			"common_name=service.consul ttl=1h", tc.pkiPath),
   106  		fmt.Sprintf("vault write %s/roles/nomad "+
   107  			"allowed_domains=service.consul "+
   108  			"allow_subdomains=true "+
   109  			"generate_lease=true "+
   110  			"max_ttl=1m", tc.pkiPath),
   111  		fmt.Sprintf("vault secrets tune -max-lease-ttl=1m %s", tc.pkiPath),
   112  	}
   113  
   114  	for _, setupCmd := range setupCmds {
   115  		cmd := strings.Split(setupCmd, " ")
   116  		out, err := e2e.Command(cmd[0], cmd[1:]...)
   117  		f.NoError(err, fmt.Sprintf("error for %q:\n%s", setupCmd, out))
   118  	}
   119  
   120  	// we can't set an empty policy in our job, so write a bogus policy that
   121  	// doesn't have access to any of the paths we're using
   122  	out, err := writePolicy(policyID, "./vaultsecrets/input/policy-bad.hcl", testID)
   123  	f.NoError(err, out)
   124  	tc.policies = append(tc.policies, policyID)
   125  
   126  	index++
   127  	err = runJob(jobID, testID, index)
   128  	f.NoError(err, "could not register job")
   129  	tc.jobIDs = append(tc.jobIDs, jobID)
   130  
   131  	// job doesn't have access to secrets, so they can't start
   132  	err = e2e.WaitForAllocStatusExpected(jobID, ns, []string{"pending"})
   133  	f.NoError(err, "expected pending allocation")
   134  
   135  	// we should get a task event about why they can't start
   136  	expect := fmt.Sprintf("Missing: vault.read(%s), vault.write(%s", secretKey, pkiCertIssue)
   137  
   138  	allocID, err := latestAllocID(jobID)
   139  	f.NoError(err)
   140  
   141  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   142  		time.Sleep(interval)
   143  		out, err := e2e.Command("nomad", "alloc", "status", allocID)
   144  		f.NoError(err, "could not get allocation status")
   145  		return strings.Contains(out, expect),
   146  			fmt.Errorf("expected '%s', got\n%v", expect, out)
   147  	}, func(e error) {
   148  		f.NoError(e)
   149  	})
   150  
   151  	// write a working policy and redeploy
   152  	out, err = writePolicy(policyID, "./vaultsecrets/input/policy-good.hcl", testID)
   153  	f.NoError(err, out)
   154  	index++
   155  	err = runJob(jobID, testID, index)
   156  	f.NoError(err, "could not register job")
   157  
   158  	// record the rough start of vault token TTL window, so that we don't have
   159  	// to wait excessively later on
   160  	ttlStart := time.Now()
   161  
   162  	// job should be now unblocked
   163  	err = e2e.WaitForAllocStatusExpected(jobID, ns, []string{"running", "complete"})
   164  	f.NoError(err, "expected running allocation")
   165  
   166  	allocID, err = latestAllocID(jobID)
   167  	f.NoError(err)
   168  
   169  	renderedCert, err := waitForAllocSecret(allocID, "task", "/secrets/certificate.crt",
   170  		func(out string) bool {
   171  			return strings.Contains(out, "BEGIN CERTIFICATE")
   172  		}, wc)
   173  	f.NoError(err)
   174  
   175  	_, err = waitForAllocSecret(allocID, "task", "/secrets/access.key",
   176  		func(out string) bool {
   177  			return strings.Contains(out, secretValue)
   178  		}, wc)
   179  	f.NoError(err)
   180  
   181  	var re = regexp.MustCompile(`VAULT_TOKEN=(.*)`)
   182  
   183  	// check vault token was written and save it for later comparison
   184  	out, err = e2e.AllocExec(allocID, "task", "env", ns, nil)
   185  	f.NoError(err)
   186  	match := re.FindStringSubmatch(out)
   187  	f.NotNil(match, fmt.Errorf("could not find VAULT_TOKEN, got:%v\n", out))
   188  	taskToken := match[1]
   189  
   190  	// Update secret
   191  	out, err = e2e.Command("vault", "kv", "put",
   192  		fmt.Sprintf("%s/myapp", tc.secretsPath), "key=UPDATED")
   193  	f.NoError(err, out)
   194  
   195  	elapsed := time.Since(ttlStart)
   196  	time.Sleep((time.Second * 60) - elapsed)
   197  
   198  	// tokens will not be updated
   199  	out, err = e2e.AllocExec(allocID, "task", "env", ns, nil)
   200  	f.NoError(err)
   201  	match = re.FindStringSubmatch(out)
   202  	f.NotNil(match, fmt.Errorf("could not find VAULT_TOKEN, got:%v\n", out))
   203  	f.Equal(taskToken, match[1])
   204  
   205  	// cert will be renewed
   206  	_, err = waitForAllocSecret(allocID, "task", "/secrets/certificate.crt",
   207  		func(out string) bool {
   208  			return strings.Contains(out, "BEGIN CERTIFICATE") &&
   209  				out != renderedCert
   210  		}, wc)
   211  	f.NoError(err)
   212  
   213  	// secret will *not* be renewed because it doesn't have a lease to expire
   214  	_, err = waitForAllocSecret(allocID, "task", "/secrets/access.key",
   215  		func(out string) bool {
   216  			return strings.Contains(out, secretValue)
   217  		}, wc)
   218  	f.NoError(err)
   219  
   220  }
   221  
   222  // We need to namespace the keys in the policy, so read it in and replace the
   223  // values of the policy names
   224  func writePolicy(policyID, policyPath, testID string) (string, error) {
   225  	raw, err := os.ReadFile(policyPath)
   226  	if err != nil {
   227  		return "", err
   228  	}
   229  	policyDoc := string(raw)
   230  	policyDoc = strings.ReplaceAll(policyDoc, "TESTID", testID)
   231  
   232  	ctx, cancel := context.WithTimeout(context.Background(), time.Second*10)
   233  	defer cancel()
   234  	cmd := exec.CommandContext(ctx, "vault", "policy", "write", policyID, "-")
   235  	stdin, err := cmd.StdinPipe()
   236  	if err != nil {
   237  		return "", err
   238  	}
   239  
   240  	go func() {
   241  		defer stdin.Close()
   242  		io.WriteString(stdin, policyDoc)
   243  	}()
   244  
   245  	out, err := cmd.CombinedOutput()
   246  	return string(out), err
   247  }
   248  
   249  // We need to namespace the vault paths in the job, so parse it
   250  // and replace the values of the template and vault fields
   251  func runJob(jobID, testID string, index int) error {
   252  
   253  	raw, err := os.ReadFile("./vaultsecrets/input/secrets.nomad")
   254  	if err != nil {
   255  		return err
   256  	}
   257  	jobspec := string(raw)
   258  	jobspec = strings.ReplaceAll(jobspec, "TESTID", testID)
   259  	jobspec = strings.ReplaceAll(jobspec, "DEPLOYNUMBER", string(rune(index)))
   260  
   261  	return e2e.RegisterFromJobspec(jobID, jobspec)
   262  }
   263  
   264  // waitForAllocSecret is similar to e2e.WaitForAllocFile but uses `alloc exec`
   265  // to be able to read the secrets dir, which is not available to `alloc fs`
   266  func waitForAllocSecret(allocID, taskID, path string, test func(string) bool, wc *e2e.WaitConfig) (string, error) {
   267  	var err error
   268  	var out string
   269  	interval, retries := wc.OrDefault()
   270  
   271  	testutil.WaitForResultRetries(retries, func() (bool, error) {
   272  		time.Sleep(interval)
   273  		out, err = e2e.Command("nomad", "alloc", "exec", "-task", taskID, allocID, "cat", path)
   274  		if err != nil {
   275  			return false, fmt.Errorf("could not get file %q from allocation %q: %v",
   276  				path, allocID, err)
   277  		}
   278  		return test(out),
   279  			fmt.Errorf("test for file content failed: got\n%#v", out)
   280  	}, func(e error) {
   281  		err = e
   282  	})
   283  	return out, err
   284  }
   285  
   286  // this will always be sorted
   287  func latestAllocID(jobID string) (string, error) {
   288  	allocs, err := e2e.AllocsForJob(jobID, ns)
   289  	if err != nil {
   290  		return "", err
   291  	}
   292  	return allocs[0]["ID"], nil
   293  }