github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/e2e/consul/script_checks.go (about)

     1  package consul
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"os"
     8  	"strings"
     9  	"time"
    10  
    11  	capi "github.com/hashicorp/consul/api"
    12  	napi "github.com/hashicorp/nomad/api"
    13  	"github.com/hashicorp/nomad/e2e/e2eutil"
    14  	"github.com/hashicorp/nomad/e2e/framework"
    15  	"github.com/hashicorp/nomad/helper/uuid"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  type ScriptChecksE2ETest struct {
    20  	framework.TC
    21  	jobIds []string
    22  }
    23  
    24  func (tc *ScriptChecksE2ETest) BeforeAll(f *framework.F) {
    25  	// Ensure cluster has leader before running tests
    26  	e2eutil.WaitForLeader(f.T(), tc.Nomad())
    27  	// Ensure that we have at least 1 client node in ready state
    28  	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
    29  }
    30  
    31  // TestGroupScriptCheck runs a job with a single task group with several services
    32  // and associated script checks. It updates, stops, etc. the job to verify
    33  // that script checks are re-registered as expected.
    34  func (tc *ScriptChecksE2ETest) TestGroupScriptCheck(f *framework.F) {
    35  	r := require.New(f.T())
    36  
    37  	nomadClient := tc.Nomad()
    38  	consulClient := tc.Consul()
    39  
    40  	jobId := "checks_group" + uuid.Short()
    41  	tc.jobIds = append(tc.jobIds, jobId)
    42  
    43  	// Job run: verify that checks were registered in Consul
    44  	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(),
    45  		nomadClient, "consul/input/checks_group.nomad", jobId, "")
    46  	r.Equal(1, len(allocs))
    47  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
    48  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
    49  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
    50  
    51  	// Check in warning state becomes healthy after check passes
    52  	_, _, err := exec(nomadClient, allocs,
    53  		[]string{"/bin/sh", "-c", "touch /tmp/${NOMAD_ALLOC_ID}-alive-2b"})
    54  	r.NoError(err)
    55  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthPassing)
    56  
    57  	// Job update: verify checks are re-registered in Consul
    58  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
    59  		nomadClient, "consul/input/checks_group_update.nomad", jobId, "")
    60  	r.Equal(1, len(allocs))
    61  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
    62  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthPassing)
    63  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
    64  
    65  	// Verify we don't have any linger script checks running on the client
    66  	out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"})
    67  	r.NoError(err)
    68  	running := strings.Split(strings.TrimSpace(out.String()), "\n")
    69  	r.LessOrEqual(len(running), 2) // task itself + 1 check == 2
    70  
    71  	// Clean job stop: verify that checks were deregistered in Consul
    72  	_, _, err = nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop
    73  	r.NoError(err)
    74  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-1")
    75  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-2")
    76  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-3")
    77  
    78  	// Restore for next test
    79  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
    80  		nomadClient, "consul/input/checks_group.nomad", jobId, "")
    81  	r.Equal(2, len(allocs))
    82  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
    83  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
    84  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
    85  
    86  	// Crash a task: verify that checks become healthy again
    87  	_, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"})
    88  	if err != nil && err.Error() != "plugin is shut down" {
    89  		r.FailNow("unexpected error: %v", err)
    90  	}
    91  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
    92  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
    93  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
    94  
    95  	// TODO(tgross) ...
    96  	// Restart client: verify that checks are re-registered
    97  }
    98  
    99  // TestTaskScriptCheck runs a job with a single task with several services
   100  // and associated script checks. It updates, stops, etc. the job to verify
   101  // that script checks are re-registered as expected.
   102  func (tc *ScriptChecksE2ETest) TestTaskScriptCheck(f *framework.F) {
   103  	r := require.New(f.T())
   104  
   105  	nomadClient := tc.Nomad()
   106  	consulClient := tc.Consul()
   107  
   108  	jobId := "checks_task" + uuid.Short()
   109  	tc.jobIds = append(tc.jobIds, jobId)
   110  
   111  	// Job run: verify that checks were registered in Consul
   112  	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(),
   113  		nomadClient, "consul/input/checks_task.nomad", jobId, "")
   114  	r.Equal(1, len(allocs))
   115  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
   116  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
   117  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
   118  
   119  	// Check in warning state becomes healthy after check passes
   120  	_, _, err := exec(nomadClient, allocs,
   121  		[]string{"/bin/sh", "-c", "touch ${NOMAD_TASK_DIR}/alive-2b"})
   122  	r.NoError(err)
   123  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthPassing)
   124  
   125  	// Job update: verify checks are re-registered in Consul
   126  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
   127  		nomadClient, "consul/input/checks_task_update.nomad", jobId, "")
   128  	r.Equal(1, len(allocs))
   129  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
   130  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthPassing)
   131  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
   132  
   133  	// Verify we don't have any linger script checks running on the client
   134  	out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"})
   135  	r.NoError(err)
   136  	running := strings.Split(strings.TrimSpace(out.String()), "\n")
   137  	r.LessOrEqual(len(running), 2) // task itself + 1 check == 2
   138  
   139  	// Clean job stop: verify that checks were deregistered in Consul
   140  	_, _, err = nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop
   141  	r.NoError(err)
   142  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-1")
   143  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-2")
   144  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-3")
   145  
   146  	// Restore for next test
   147  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
   148  		nomadClient, "consul/input/checks_task.nomad", jobId, "")
   149  	r.Equal(2, len(allocs))
   150  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
   151  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
   152  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
   153  
   154  	// Crash a task: verify that checks become healthy again
   155  	_, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"})
   156  	if err != nil && err.Error() != "plugin is shut down" {
   157  		r.FailNow("unexpected error: %v", err)
   158  	}
   159  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
   160  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
   161  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
   162  
   163  	// TODO(tgross) ...
   164  	// Restart client: verify that checks are re-registered
   165  }
   166  
   167  func (tc *ScriptChecksE2ETest) AfterEach(f *framework.F) {
   168  	r := require.New(f.T())
   169  
   170  	nomadClient := tc.Nomad()
   171  	jobs := nomadClient.Jobs()
   172  	// Stop all jobs in test
   173  	for _, id := range tc.jobIds {
   174  		_, _, err := jobs.Deregister(id, true, nil)
   175  		r.NoError(err)
   176  	}
   177  	// Garbage collect
   178  	r.NoError(nomadClient.System().GarbageCollect())
   179  }
   180  
   181  func exec(client *napi.Client, allocs []*napi.AllocationListStub, command []string) (bytes.Buffer, bytes.Buffer, error) {
   182  	ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second)
   183  	defer cancelFn()
   184  
   185  	// we're getting a list of from the registration call here but
   186  	// one of them might be stopped or stopping, which will return
   187  	// an error if we try to exec into it.
   188  	var alloc *napi.Allocation
   189  	for _, stub := range allocs {
   190  		if stub.DesiredStatus == "run" {
   191  			alloc = &napi.Allocation{
   192  				ID:        stub.ID,
   193  				Namespace: stub.Namespace,
   194  				NodeID:    stub.NodeID,
   195  			}
   196  		}
   197  	}
   198  	var stdout, stderr bytes.Buffer
   199  	if alloc == nil {
   200  		return stdout, stderr, fmt.Errorf("no allocation ready for exec")
   201  	}
   202  	_, err := client.Allocations().Exec(ctx,
   203  		alloc, "test", false,
   204  		command,
   205  		os.Stdin, &stdout, &stderr,
   206  		make(chan napi.TerminalSize), nil)
   207  	return stdout, stderr, err
   208  }