github.com/hernad/nomad@v1.6.112/e2e/consul/script_checks.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package consul
     5  
     6  import (
     7  	"bytes"
     8  	"context"
     9  	"fmt"
    10  	"os"
    11  	"strings"
    12  	"time"
    13  
    14  	capi "github.com/hashicorp/consul/api"
    15  	napi "github.com/hernad/nomad/api"
    16  	"github.com/hernad/nomad/e2e/e2eutil"
    17  	"github.com/hernad/nomad/e2e/framework"
    18  	"github.com/hernad/nomad/helper/uuid"
    19  	"github.com/stretchr/testify/require"
    20  )
    21  
    22  type ScriptChecksE2ETest struct {
    23  	framework.TC
    24  	jobIds []string
    25  }
    26  
    27  func (tc *ScriptChecksE2ETest) BeforeAll(f *framework.F) {
    28  	// Ensure cluster has leader before running tests
    29  	e2eutil.WaitForLeader(f.T(), tc.Nomad())
    30  	// Ensure that we have at least 1 client node in ready state
    31  	e2eutil.WaitForNodesReady(f.T(), tc.Nomad(), 1)
    32  }
    33  
    34  // TestGroupScriptCheck runs a job with a single task group with several services
    35  // and associated script checks. It updates, stops, etc. the job to verify
    36  // that script checks are re-registered as expected.
    37  func (tc *ScriptChecksE2ETest) TestGroupScriptCheck(f *framework.F) {
    38  	r := require.New(f.T())
    39  
    40  	nomadClient := tc.Nomad()
    41  	consulClient := tc.Consul()
    42  
    43  	jobId := "checks_group" + uuid.Short()
    44  	tc.jobIds = append(tc.jobIds, jobId)
    45  
    46  	// Job run: verify that checks were registered in Consul
    47  	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(),
    48  		nomadClient, "consul/input/checks_group.nomad", jobId, "")
    49  	r.Equal(1, len(allocs))
    50  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
    51  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
    52  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
    53  
    54  	// Check in warning state becomes healthy after check passes
    55  	_, _, err := exec(nomadClient, allocs,
    56  		[]string{"/bin/sh", "-c", "touch /tmp/${NOMAD_ALLOC_ID}-alive-2b"})
    57  	r.NoError(err)
    58  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthPassing)
    59  
    60  	// Job update: verify checks are re-registered in Consul
    61  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
    62  		nomadClient, "consul/input/checks_group_update.nomad", jobId, "")
    63  	r.Equal(1, len(allocs))
    64  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
    65  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthPassing)
    66  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
    67  
    68  	// Verify we don't have any linger script checks running on the client
    69  	out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"})
    70  	r.NoError(err)
    71  	running := strings.Split(strings.TrimSpace(out.String()), "\n")
    72  	r.LessOrEqual(len(running), 2) // task itself + 1 check == 2
    73  
    74  	// Clean job stop: verify that checks were deregistered in Consul
    75  	_, _, err = nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop
    76  	r.NoError(err)
    77  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-1")
    78  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-2")
    79  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "group-service-3")
    80  
    81  	// Restore for next test
    82  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
    83  		nomadClient, "consul/input/checks_group.nomad", jobId, "")
    84  	r.Equal(2, len(allocs))
    85  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
    86  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
    87  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
    88  
    89  	// Crash a task: verify that checks become healthy again
    90  	_, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"})
    91  	if err != nil && err.Error() != "plugin is shut down" {
    92  		r.FailNow("unexpected error: %v", err)
    93  	}
    94  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-1", capi.HealthPassing)
    95  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-2", capi.HealthWarning)
    96  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "group-service-3", capi.HealthCritical)
    97  
    98  	// TODO(tgross) ...
    99  	// Restart client: verify that checks are re-registered
   100  }
   101  
   102  // TestTaskScriptCheck runs a job with a single task with several services
   103  // and associated script checks. It updates, stops, etc. the job to verify
   104  // that script checks are re-registered as expected.
   105  func (tc *ScriptChecksE2ETest) TestTaskScriptCheck(f *framework.F) {
   106  	r := require.New(f.T())
   107  
   108  	nomadClient := tc.Nomad()
   109  	consulClient := tc.Consul()
   110  
   111  	jobId := "checks_task" + uuid.Short()
   112  	tc.jobIds = append(tc.jobIds, jobId)
   113  
   114  	// Job run: verify that checks were registered in Consul
   115  	allocs := e2eutil.RegisterAndWaitForAllocs(f.T(),
   116  		nomadClient, "consul/input/checks_task.nomad", jobId, "")
   117  	r.Equal(1, len(allocs))
   118  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
   119  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
   120  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
   121  
   122  	// Check in warning state becomes healthy after check passes
   123  	_, _, err := exec(nomadClient, allocs,
   124  		[]string{"/bin/sh", "-c", "touch ${NOMAD_TASK_DIR}/alive-2b"})
   125  	r.NoError(err)
   126  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthPassing)
   127  
   128  	// Job update: verify checks are re-registered in Consul
   129  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
   130  		nomadClient, "consul/input/checks_task_update.nomad", jobId, "")
   131  	r.Equal(1, len(allocs))
   132  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
   133  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthPassing)
   134  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
   135  
   136  	// Verify we don't have any linger script checks running on the client
   137  	out, _, err := exec(nomadClient, allocs, []string{"pgrep", "sleep"})
   138  	r.NoError(err)
   139  	running := strings.Split(strings.TrimSpace(out.String()), "\n")
   140  	r.LessOrEqual(len(running), 2) // task itself + 1 check == 2
   141  
   142  	// Clean job stop: verify that checks were deregistered in Consul
   143  	_, _, err = nomadClient.Jobs().Deregister(jobId, false, nil) // nomad job stop
   144  	r.NoError(err)
   145  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-1")
   146  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-2")
   147  	e2eutil.RequireConsulDeregistered(r, consulClient, consulNamespace, "task-service-3")
   148  
   149  	// Restore for next test
   150  	allocs = e2eutil.RegisterAndWaitForAllocs(f.T(),
   151  		nomadClient, "consul/input/checks_task.nomad", jobId, "")
   152  	r.Equal(2, len(allocs))
   153  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
   154  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
   155  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
   156  
   157  	// Crash a task: verify that checks become healthy again
   158  	_, _, err = exec(nomadClient, allocs, []string{"pkill", "sleep"})
   159  	if err != nil && err.Error() != "plugin is shut down" {
   160  		r.FailNow("unexpected error: %v", err)
   161  	}
   162  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-1", capi.HealthPassing)
   163  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-2", capi.HealthWarning)
   164  	e2eutil.RequireConsulStatus(r, consulClient, consulNamespace, "task-service-3", capi.HealthCritical)
   165  
   166  	// TODO(tgross) ...
   167  	// Restart client: verify that checks are re-registered
   168  }
   169  
   170  func (tc *ScriptChecksE2ETest) AfterEach(f *framework.F) {
   171  	r := require.New(f.T())
   172  
   173  	nomadClient := tc.Nomad()
   174  	jobs := nomadClient.Jobs()
   175  	// Stop all jobs in test
   176  	for _, id := range tc.jobIds {
   177  		_, _, err := jobs.Deregister(id, true, nil)
   178  		r.NoError(err)
   179  	}
   180  	// Garbage collect
   181  	r.NoError(nomadClient.System().GarbageCollect())
   182  }
   183  
   184  func exec(client *napi.Client, allocs []*napi.AllocationListStub, command []string) (bytes.Buffer, bytes.Buffer, error) {
   185  	ctx, cancelFn := context.WithTimeout(context.Background(), 5*time.Second)
   186  	defer cancelFn()
   187  
   188  	// we're getting a list of from the registration call here but
   189  	// one of them might be stopped or stopping, which will return
   190  	// an error if we try to exec into it.
   191  	var alloc *napi.Allocation
   192  	for _, stub := range allocs {
   193  		if stub.DesiredStatus == "run" {
   194  			alloc = &napi.Allocation{
   195  				ID:        stub.ID,
   196  				Namespace: stub.Namespace,
   197  				NodeID:    stub.NodeID,
   198  			}
   199  		}
   200  	}
   201  	var stdout, stderr bytes.Buffer
   202  	if alloc == nil {
   203  		return stdout, stderr, fmt.Errorf("no allocation ready for exec")
   204  	}
   205  	_, err := client.Allocations().Exec(ctx,
   206  		alloc, "test", false,
   207  		command,
   208  		os.Stdin, &stdout, &stderr,
   209  		make(chan napi.TerminalSize), nil)
   210  	return stdout, stderr, err
   211  }