github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/consul/script_checks.go (about) 1 package consul 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "os" 8 "strings" 9 "time" 10 11 capi "github.com/hashicorp/consul/api" 12 napi "github.com/hashicorp/nomad/api" 13 "github.com/hashicorp/nomad/e2e/e2eutil" 14 "github.com/hashicorp/nomad/e2e/framework" 15 "github.com/hashicorp/nomad/helper/uuid" 16 "github.com/stretchr/testify/require" 17 ) 18 19 type ScriptChecksE2ETest struct { 20 framework.TC 21 jobIds []string 22 } 23 24 func (tc *ScriptChecksE2ETest) BeforeAll(f *framework.F) { 25 // Ensure cluster has leader before running tests 26 e2eutil.WaitForLeader(f.T(), tc.Nomad()) 27 // Ensure that we have at least 1 client node in ready state 28 e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1) 29 } 30 31 // TestGroupScriptCheck runs a job with a single task group with several services 32 // and associated script checks. It updates, stops, etc. the job to verify 33 // that script checks are re-registered as expected. 34 func (tc *ScriptChecksE2ETest) TestGroupScriptCheck(f *framework.F) { 35 r := require.New(f.T()) 36 37 nomadClient := tc.Nomad() 38 consulClient := tc.Consul() 39 40 jobId := "checks_group" + uuid.Short() 41 tc.jobIds = append(tc.jobIds, jobId) 42 43 // Job run: verify that checks were registered in Consul 44 allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), 45 nomadClient, "consul/input/checks_group.nomad", jobId, "") 46 r.Equal(1, len(allocs)) 47 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing) 48 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning) 49 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical) 50 51 // Check in warning state becomes healthy after check passes 52 _, _, err := exec(nomadClient, allocs, 53 []string{"/bin/sh", "-c", "touch /tmp/${NOMAD_ALLOC_ID}-alive-2b"}) 54 r.NoError(err) 55 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthPassing) 56 57 // Job update: verify checks are re-registered in Consul 58 allocs = e2eutil.RegisterAndWaitForAllocs(f.T(), 59 nomadClient, "consul/input/checks_group_update.nomad", jobId, "") 60 r.Equal(1, len(allocs)) 61 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing) 62 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthPassing) 63 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical) 64 65 // Verify we don't have any linger script checks running on the client 66 out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"}) 67 r.NoError(err) 68 running := strings.Split(strings.TrimSpace(out.String()), "\n") 69 r.LessOrEqual(len(running), 2) // task itself + 1 check == 2 70 71 // Clean job stop: verify that checks were deregistered in Consul 72 _, _, err = nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop 73 r.NoError(err) 74 e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-1") 75 e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-2") 76 e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-3") 77 78 // Restore for next test 79 allocs = e2eutil.RegisterAndWaitForAllocs(f.T(), 80 nomadClient, "consul/input/checks_group.nomad", jobId, "") 81 r.Equal(2, len(allocs)) 82 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing) 83 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning) 84 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical) 85 86 // Crash a task: verify that checks become healthy again 87 _, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"}) 88 if err != nil && err.Error() != "plugin is shut down" { 89 r.FailNow("unexpected error: %v", err) 90 } 91 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing) 92 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning) 93 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical) 94 95 // TODO(tgross) ... 96 // Restart client: verify that checks are re-registered 97 } 98 99 // TestTaskScriptCheck runs a job with a single task with several services 100 // and associated script checks. It updates, stops, etc. the job to verify 101 // that script checks are re-registered as expected. 102 func (tc *ScriptChecksE2ETest) TestTaskScriptCheck(f *framework.F) { 103 r := require.New(f.T()) 104 105 nomadClient := tc.Nomad() 106 consulClient := tc.Consul() 107 108 jobId := "checks_task" + uuid.Short() 109 tc.jobIds = append(tc.jobIds, jobId) 110 111 // Job run: verify that checks were registered in Consul 112 allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), 113 nomadClient, "consul/input/checks_task.nomad", jobId, "") 114 r.Equal(1, len(allocs)) 115 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing) 116 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning) 117 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical) 118 119 // Check in warning state becomes healthy after check passes 120 _, _, err := exec(nomadClient, allocs, 121 []string{"/bin/sh", "-c", "touch ${NOMAD_TASK_DIR}/alive-2b"}) 122 r.NoError(err) 123 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthPassing) 124 125 // Job update: verify checks are re-registered in Consul 126 allocs = e2eutil.RegisterAndWaitForAllocs(f.T(), 127 nomadClient, "consul/input/checks_task_update.nomad", jobId, "") 128 r.Equal(1, len(allocs)) 129 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing) 130 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthPassing) 131 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical) 132 133 // Verify we don't have any linger script checks running on the client 134 out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"}) 135 r.NoError(err) 136 running := strings.Split(strings.TrimSpace(out.String()), "\n") 137 r.LessOrEqual(len(running), 2) // task itself + 1 check == 2 138 139 // Clean job stop: verify that checks were deregistered in Consul 140 _, _, err = nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop 141 r.NoError(err) 142 e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-1") 143 e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-2") 144 e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-3") 145 146 // Restore for next test 147 allocs = e2eutil.RegisterAndWaitForAllocs(f.T(), 148 nomadClient, "consul/input/checks_task.nomad", jobId, "") 149 r.Equal(2, len(allocs)) 150 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing) 151 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning) 152 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical) 153 154 // Crash a task: verify that checks become healthy again 155 _, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"}) 156 if err != nil && err.Error() != "plugin is shut down" { 157 r.FailNow("unexpected error: %v", err) 158 } 159 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing) 160 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning) 161 e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical) 162 163 // TODO(tgross) ... 164 // Restart client: verify that checks are re-registered 165 } 166 167 func (tc *ScriptChecksE2ETest) AfterEach(f *framework.F) { 168 r := require.New(f.T()) 169 170 nomadClient := tc.Nomad() 171 jobs := nomadClient.Jobs() 172 // Stop all jobs in test 173 for _, id := range tc.jobIds { 174 _, _, err := jobs.Deregister(id, true, nil) 175 r.NoError(err) 176 } 177 // Garbage collect 178 r.NoError(nomadClient.System().GarbageCollect()) 179 } 180 181 func exec(client *napi.Client, allocs []*napi.AllocationListStub, command []string) (bytes.Buffer, bytes.Buffer, error) { 182 ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second) 183 defer cancelFn() 184 185 // we're getting a list of from the registration call here but 186 // one of them might be stopped or stopping, which will return 187 // an error if we try to exec into it. 188 var alloc *napi.Allocation 189 for _, stub := range allocs { 190 if stub.DesiredStatus == "run" { 191 alloc = &napi.Allocation{ 192 ID: stub.ID, 193 Namespace: stub.Namespace, 194 NodeID: stub.NodeID, 195 } 196 } 197 } 198 var stdout, stderr bytes.Buffer 199 if alloc == nil { 200 return stdout, stderr, fmt.Errorf("no allocation ready for exec") 201 } 202 _, err := client.Allocations().Exec(ctx, 203 alloc, "test", false, 204 command, 205 os.Stdin, &stdout, &stderr, 206 make(chan napi.TerminalSize), nil) 207 return stdout, stderr, err 208 }