github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/e2e/consul/script_checks.go (about) 1 package consul 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "os" 8 "strings" 9 "time" 10 11 capi "github.com/hashicorp/consul/api" 12 "github.com/hashicorp/nomad/api" 13 "github.com/hashicorp/nomad/e2e/e2eutil" 14 "github.com/hashicorp/nomad/e2e/framework" 15 "github.com/hashicorp/nomad/helper/uuid" 16 "github.com/stretchr/testify/require" 17 ) 18 19 type ScriptChecksE2ETest struct { 20 framework.TC 21 jobIds []string 22 } 23 24 func (tc *ScriptChecksE2ETest) BeforeAll(f *framework.F) { 25 // Ensure cluster has leader before running tests 26 e2eutil.WaitForLeader(f.T(), tc.Nomad()) 27 // Ensure that we have at least 1 client node in ready state 28 e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1) 29 } 30 31 // TestGroupScriptCheck runs a job with a single task group with several services 32 // and associated script checks. It updates, stops, etc. the job to verify 33 // that script checks are re-registered as expected. 34 func (tc *ScriptChecksE2ETest) TestGroupScriptCheck(f *framework.F) { 35 nomadClient := tc.Nomad() 36 uuid := uuid.Generate() 37 require := require.New(f.T()) 38 consulClient := tc.Consul() 39 40 jobId := "checks_group" + uuid[0:8] 41 tc.jobIds = append(tc.jobIds, jobId) 42 43 // Job run: verify that checks were registered in Consul 44 allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), 45 nomadClient, "consul/input/checks_group.nomad", jobId, "") 46 require.Equal(1, len(allocs)) 47 e2eutil.RequireConsulStatus(require, consulClient, "group-service-1", capi.HealthPassing) 48 e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthWarning) 49 e2eutil.RequireConsulStatus(require, consulClient, "group-service-3", capi.HealthCritical) 50 51 // Check in warning state becomes healthy after check passes 52 _, _, err := exec(nomadClient, allocs, 53 []string{"/bin/sh", "-c", "touch /tmp/${NOMAD_ALLOC_ID}-alive-2b"}) 54 require.NoError(err) 55 e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthPassing) 56 57 // Job update: verify checks are re-registered in Consul 58 allocs = e2eutil.RegisterAndWaitForAllocs(f.T(), 59 nomadClient, "consul/input/checks_group_update.nomad", jobId, "") 60 require.Equal(1, len(allocs)) 61 e2eutil.RequireConsulStatus(require, consulClient, "group-service-1", capi.HealthPassing) 62 e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthPassing) 63 e2eutil.RequireConsulStatus(require, consulClient, "group-service-3", capi.HealthCritical) 64 65 // Verify we don't have any linger script checks running on the client 66 out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"}) 67 require.NoError(err) 68 running := strings.Split(strings.TrimSpace(out.String()), "\n") 69 require.LessOrEqual(len(running), 2) // task itself + 1 check == 2 70 71 // Clean job stop: verify that checks were deregistered in Consul 72 nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop 73 e2eutil.RequireConsulDeregistered(require, consulClient, "group-service-1") 74 e2eutil.RequireConsulDeregistered(require, consulClient, "group-service-2") 75 e2eutil.RequireConsulDeregistered(require, consulClient, "group-service-3") 76 77 // Restore for next test 78 allocs = e2eutil.RegisterAndWaitForAllocs(f.T(), 79 nomadClient, "consul/input/checks_group.nomad", jobId, "") 80 require.Equal(2, len(allocs)) 81 e2eutil.RequireConsulStatus(require, consulClient, "group-service-1", capi.HealthPassing) 82 e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthWarning) 83 e2eutil.RequireConsulStatus(require, consulClient, "group-service-3", capi.HealthCritical) 84 85 // Crash a task: verify that checks become healthy again 86 _, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"}) 87 if err != nil && err.Error() != "plugin is shut down" { 88 require.FailNow("unexpected error: %v", err) 89 } 90 e2eutil.RequireConsulStatus(require, consulClient, "group-service-1", capi.HealthPassing) 91 e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthWarning) 92 e2eutil.RequireConsulStatus(require, consulClient, "group-service-3", capi.HealthCritical) 93 94 // TODO(tgross) ... 95 // Restart client: verify that checks are re-registered 96 } 97 98 // TestTaskScriptCheck runs a job with a single task with several services 99 // and associated script checks. It updates, stops, etc. the job to verify 100 // that script checks are re-registered as expected. 101 func (tc *ScriptChecksE2ETest) TestTaskScriptCheck(f *framework.F) { 102 nomadClient := tc.Nomad() 103 uuid := uuid.Generate() 104 require := require.New(f.T()) 105 consulClient := tc.Consul() 106 107 jobId := "checks_task" + uuid[0:8] 108 tc.jobIds = append(tc.jobIds, jobId) 109 110 // Job run: verify that checks were registered in Consul 111 allocs := e2eutil.RegisterAndWaitForAllocs(f.T(), 112 nomadClient, "consul/input/checks_task.nomad", jobId, "") 113 require.Equal(1, len(allocs)) 114 e2eutil.RequireConsulStatus(require, consulClient, "task-service-1", capi.HealthPassing) 115 e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthWarning) 116 e2eutil.RequireConsulStatus(require, consulClient, "task-service-3", capi.HealthCritical) 117 118 // Check in warning state becomes healthy after check passes 119 _, _, err := exec(nomadClient, allocs, 120 []string{"/bin/sh", "-c", "touch ${NOMAD_TASK_DIR}/alive-2b"}) 121 require.NoError(err) 122 e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthPassing) 123 124 // Job update: verify checks are re-registered in Consul 125 allocs = e2eutil.RegisterAndWaitForAllocs(f.T(), 126 nomadClient, "consul/input/checks_task_update.nomad", jobId, "") 127 require.Equal(1, len(allocs)) 128 e2eutil.RequireConsulStatus(require, consulClient, "task-service-1", capi.HealthPassing) 129 e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthPassing) 130 e2eutil.RequireConsulStatus(require, consulClient, "task-service-3", capi.HealthCritical) 131 132 // Verify we don't have any linger script checks running on the client 133 out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"}) 134 require.NoError(err) 135 running := strings.Split(strings.TrimSpace(out.String()), "\n") 136 require.LessOrEqual(len(running), 2) // task itself + 1 check == 2 137 138 // Clean job stop: verify that checks were deregistered in Consul 139 nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop 140 e2eutil.RequireConsulDeregistered(require, consulClient, "task-service-1") 141 e2eutil.RequireConsulDeregistered(require, consulClient, "task-service-2") 142 e2eutil.RequireConsulDeregistered(require, consulClient, "task-service-3") 143 144 // Restore for next test 145 allocs = e2eutil.RegisterAndWaitForAllocs(f.T(), 146 nomadClient, "consul/input/checks_task.nomad", jobId, "") 147 require.Equal(2, len(allocs)) 148 e2eutil.RequireConsulStatus(require, consulClient, "task-service-1", capi.HealthPassing) 149 e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthWarning) 150 e2eutil.RequireConsulStatus(require, consulClient, "task-service-3", capi.HealthCritical) 151 152 // Crash a task: verify that checks become healthy again 153 _, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"}) 154 if err != nil && err.Error() != "plugin is shut down" { 155 require.FailNow("unexpected error: %v", err) 156 } 157 e2eutil.RequireConsulStatus(require, consulClient, "task-service-1", capi.HealthPassing) 158 e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthWarning) 159 e2eutil.RequireConsulStatus(require, consulClient, "task-service-3", capi.HealthCritical) 160 161 // TODO(tgross) ... 162 // Restart client: verify that checks are re-registered 163 } 164 165 func (tc *ScriptChecksE2ETest) AfterEach(f *framework.F) { 166 nomadClient := tc.Nomad() 167 jobs := nomadClient.Jobs() 168 // Stop all jobs in test 169 for _, id := range tc.jobIds { 170 jobs.Deregister(id, true, nil) 171 } 172 // Garbage collect 173 nomadClient.System().GarbageCollect() 174 } 175 176 func exec(client *api.Client, allocs []*api.AllocationListStub, command []string) (bytes.Buffer, bytes.Buffer, error) { 177 ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second) 178 defer cancelFn() 179 180 // we're getting a list of from the registration call here but 181 // one of them might be stopped or stopping, which will return 182 // an error if we try to exec into it. 183 var alloc *api.Allocation 184 for _, stub := range allocs { 185 if stub.DesiredStatus == "run" { 186 alloc = &api.Allocation{ 187 ID: stub.ID, 188 Namespace: stub.Namespace, 189 NodeID: stub.NodeID, 190 } 191 } 192 } 193 var stdout, stderr bytes.Buffer 194 if alloc == nil { 195 return stdout, stderr, fmt.Errorf("no allocation ready for exec") 196 } 197 _, err := client.Allocations().Exec(ctx, 198 alloc, "test", false, 199 command, 200 os.Stdin, &stdout, &stderr, 201 make(chan api.TerminalSize), nil) 202 return stdout, stderr, err 203 }