github.com/uchennaokeke444/nomad@v0.11.8/e2e/consul/script_checks.go (about)

     1  package consul
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"os"
     8  	"strings"
     9  	"time"
    10  
    11  	capi "github.com/hashicorp/consul/api"
    12  	"github.com/hashicorp/nomad/api"
    13  	"github.com/hashicorp/nomad/e2e/e2eutil"
    14  	"github.com/hashicorp/nomad/e2e/framework"
    15  	"github.com/hashicorp/nomad/helper/uuid"
    16  	"github.com/stretchr/testify/require"
    17  )
    18  
    19  type ScriptChecksE2ETest struct {
    20  	framework.TC
    21  	jobIds []string
    22  }
    23  
    24  func (tc *ScriptChecksE2ETest) BeforeAll(f *framework.F) {
    25  	// Ensure cluster has leader before running tests
    26  	e2eutil.WaitForLeader(f.T(), tc.Nomad())
    27  	// Ensure that we have at least 1 client node in ready state
    28  	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
    29  }
    30  
    31  // TestGroupScriptCheck runs a job with a single task group with several services
    32  // and associated script checks. It updates, stops, etc. the job to verify
    33  // that script checks are re-registered as expected.
    34  func (tc *ScriptChecksE2ETest) TestGroupScriptCheck(f *framework.F) {
    35  	nomadClient := tc.Nomad()
    36  	uuid := uuid.Generate()
    37  	require := require.New(f.T())
    38  	consulClient := tc.Consul()
    39  
    40  	jobId := "checks_group" + uuid[0:8]
    41  	tc.jobIds = append(tc.jobIds, jobId)
    42  
    43  	// Job run: verify that checks were registered in Consul
    44  	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(),
    45  		nomadClient, "consul/input/checks_group.nomad", jobId, "")
    46  	require.Equal(1, len(allocs))
    47  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-1", capi.HealthPassing)
    48  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthWarning)
    49  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-3", capi.HealthCritical)
    50  
    51  	// Check in warning state becomes healthy after check passes
    52  	_, _, err := exec(nomadClient, allocs,
    53  		[]string{"/bin/sh", "-c", "touch /tmp/${NOMAD_ALLOC_ID}-alive-2b"})
    54  	require.NoError(err)
    55  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthPassing)
    56  
    57  	// Job update: verify checks are re-registered in Consul
    58  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
    59  		nomadClient, "consul/input/checks_group_update.nomad", jobId, "")
    60  	require.Equal(1, len(allocs))
    61  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-1", capi.HealthPassing)
    62  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthPassing)
    63  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-3", capi.HealthCritical)
    64  
    65  	// Verify we don't have any linger script checks running on the client
    66  	out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"})
    67  	require.NoError(err)
    68  	running := strings.Split(strings.TrimSpace(out.String()), "\n")
    69  	require.LessOrEqual(len(running), 2) // task itself + 1 check == 2
    70  
    71  	// Clean job stop: verify that checks were deregistered in Consul
    72  	nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop
    73  	e2eutil.RequireConsulDeregistered(require, consulClient, "group-service-1")
    74  	e2eutil.RequireConsulDeregistered(require, consulClient, "group-service-2")
    75  	e2eutil.RequireConsulDeregistered(require, consulClient, "group-service-3")
    76  
    77  	// Restore for next test
    78  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
    79  		nomadClient, "consul/input/checks_group.nomad", jobId, "")
    80  	require.Equal(2, len(allocs))
    81  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-1", capi.HealthPassing)
    82  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthWarning)
    83  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-3", capi.HealthCritical)
    84  
    85  	// Crash a task: verify that checks become healthy again
    86  	_, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"})
    87  	if err != nil && err.Error() != "plugin is shut down" {
    88  		require.FailNow("unexpected error: %v", err)
    89  	}
    90  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-1", capi.HealthPassing)
    91  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-2", capi.HealthWarning)
    92  	e2eutil.RequireConsulStatus(require, consulClient, "group-service-3", capi.HealthCritical)
    93  
    94  	// TODO(tgross) ...
    95  	// Restart client: verify that checks are re-registered
    96  }
    97  
    98  // TestTaskScriptCheck runs a job with a single task with several services
    99  // and associated script checks. It updates, stops, etc. the job to verify
   100  // that script checks are re-registered as expected.
   101  func (tc *ScriptChecksE2ETest) TestTaskScriptCheck(f *framework.F) {
   102  	nomadClient := tc.Nomad()
   103  	uuid := uuid.Generate()
   104  	require := require.New(f.T())
   105  	consulClient := tc.Consul()
   106  
   107  	jobId := "checks_task" + uuid[0:8]
   108  	tc.jobIds = append(tc.jobIds, jobId)
   109  
   110  	// Job run: verify that checks were registered in Consul
   111  	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(),
   112  		nomadClient, "consul/input/checks_task.nomad", jobId, "")
   113  	require.Equal(1, len(allocs))
   114  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-1", capi.HealthPassing)
   115  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthWarning)
   116  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-3", capi.HealthCritical)
   117  
   118  	// Check in warning state becomes healthy after check passes
   119  	_, _, err := exec(nomadClient, allocs,
   120  		[]string{"/bin/sh", "-c", "touch ${NOMAD_TASK_DIR}/alive-2b"})
   121  	require.NoError(err)
   122  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthPassing)
   123  
   124  	// Job update: verify checks are re-registered in Consul
   125  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
   126  		nomadClient, "consul/input/checks_task_update.nomad", jobId, "")
   127  	require.Equal(1, len(allocs))
   128  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-1", capi.HealthPassing)
   129  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthPassing)
   130  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-3", capi.HealthCritical)
   131  
   132  	// Verify we don't have any linger script checks running on the client
   133  	out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"})
   134  	require.NoError(err)
   135  	running := strings.Split(strings.TrimSpace(out.String()), "\n")
   136  	require.LessOrEqual(len(running), 2) // task itself + 1 check == 2
   137  
   138  	// Clean job stop: verify that checks were deregistered in Consul
   139  	nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop
   140  	e2eutil.RequireConsulDeregistered(require, consulClient, "task-service-1")
   141  	e2eutil.RequireConsulDeregistered(require, consulClient, "task-service-2")
   142  	e2eutil.RequireConsulDeregistered(require, consulClient, "task-service-3")
   143  
   144  	// Restore for next test
   145  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
   146  		nomadClient, "consul/input/checks_task.nomad", jobId, "")
   147  	require.Equal(2, len(allocs))
   148  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-1", capi.HealthPassing)
   149  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthWarning)
   150  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-3", capi.HealthCritical)
   151  
   152  	// Crash a task: verify that checks become healthy again
   153  	_, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"})
   154  	if err != nil && err.Error() != "plugin is shut down" {
   155  		require.FailNow("unexpected error: %v", err)
   156  	}
   157  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-1", capi.HealthPassing)
   158  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-2", capi.HealthWarning)
   159  	e2eutil.RequireConsulStatus(require, consulClient, "task-service-3", capi.HealthCritical)
   160  
   161  	// TODO(tgross) ...
   162  	// Restart client: verify that checks are re-registered
   163  }
   164  
   165  func (tc *ScriptChecksE2ETest) AfterEach(f *framework.F) {
   166  	nomadClient := tc.Nomad()
   167  	jobs := nomadClient.Jobs()
   168  	// Stop all jobs in test
   169  	for _, id := range tc.jobIds {
   170  		jobs.Deregister(id, true, nil)
   171  	}
   172  	// Garbage collect
   173  	nomadClient.System().GarbageCollect()
   174  }
   175  
   176  func exec(client *api.Client, allocs []*api.AllocationListStub, command []string) (bytes.Buffer, bytes.Buffer, error) {
   177  	ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second)
   178  	defer cancelFn()
   179  
   180  	// we're getting a list of from the registration call here but
   181  	// one of them might be stopped or stopping, which will return
   182  	// an error if we try to exec into it.
   183  	var alloc *api.Allocation
   184  	for _, stub := range allocs {
   185  		if stub.DesiredStatus == "run" {
   186  			alloc = &api.Allocation{
   187  				ID:        stub.ID,
   188  				Namespace: stub.Namespace,
   189  				NodeID:    stub.NodeID,
   190  			}
   191  		}
   192  	}
   193  	var stdout, stderr bytes.Buffer
   194  	if alloc == nil {
   195  		return stdout, stderr, fmt.Errorf("no allocation ready for exec")
   196  	}
   197  	_, err := client.Allocations().Exec(ctx,
   198  		alloc, "test", false,
   199  		command,
   200  		os.Stdin, &stdout, &stderr,
   201  		make(chan api.TerminalSize), nil)
   202  	return stdout, stderr, err
   203  }