github.com/ilhicas/nomad@v0.11.8/command/agent/testagent.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	"math/rand"
     8  	"net/http"
     9  	"net/http/httptest"
    10  	"os"
    11  	"path/filepath"
    12  	"strings"
    13  	"time"
    14  
    15  	testing "github.com/mitchellh/go-testing-interface"
    16  
    17  	metrics "github.com/armon/go-metrics"
    18  	"github.com/hashicorp/go-hclog"
    19  	"github.com/hashicorp/nomad/api"
    20  	"github.com/hashicorp/nomad/client/fingerprint"
    21  	"github.com/hashicorp/nomad/helper/freeport"
    22  	"github.com/hashicorp/nomad/helper/testlog"
    23  	"github.com/hashicorp/nomad/nomad"
    24  	"github.com/hashicorp/nomad/nomad/mock"
    25  	"github.com/hashicorp/nomad/nomad/structs"
    26  	sconfig "github.com/hashicorp/nomad/nomad/structs/config"
    27  	"github.com/hashicorp/nomad/testutil"
    28  )
    29  
    30  func init() {
    31  	rand.Seed(time.Now().UnixNano()) // seed random number generator
    32  }
    33  
    34  // TempDir defines the base dir for temporary directories.
    35  var TempDir = os.TempDir()
    36  
    37  // TestAgent encapsulates an Agent with a default configuration and startup
    38  // procedure suitable for testing. It manages a temporary data directory which
    39  // is removed after shutdown.
    40  type TestAgent struct {
    41  	// T is the testing object
    42  	T testing.T
    43  
    44  	// Name is an optional name of the agent.
    45  	Name string
    46  
    47  	// ConfigCallback is an optional callback that allows modification of the
    48  	// configuration before the agent is started.
    49  	ConfigCallback func(*Config)
    50  
    51  	// Config is the agent configuration. If Config is nil then
    52  	// TestConfig() is used. If Config.DataDir is set then it is
    53  	// the callers responsibility to clean up the data directory.
    54  	// Otherwise, a temporary data directory is created and removed
    55  	// when Shutdown() is called.
    56  	Config *Config
    57  
    58  	// LogOutput is the sink for the logs. If nil, logs are written
    59  	// to os.Stderr.
    60  	LogOutput io.Writer
    61  
    62  	// DataDir is the data directory which is used when Config.DataDir
    63  	// is not set. It is created automatically and removed when
    64  	// Shutdown() is called.
    65  	DataDir string
    66  
    67  	// Key is the optional encryption key for the keyring.
    68  	Key string
    69  
    70  	// Server is a reference to the started HTTP endpoint.
    71  	// It is valid after Start().
    72  	Server *HTTPServer
    73  
    74  	// Agent is the embedded Nomad agent.
    75  	// It is valid after Start().
    76  	*Agent
    77  
    78  	// RootToken is auto-bootstrapped if ACLs are enabled
    79  	RootToken *structs.ACLToken
    80  
    81  	// ports that are reserved through freeport that must be returned at
    82  	// the end of a test, done when Shutdown() is called.
    83  	ports []int
    84  
    85  	// Enterprise specifies if the agent is enterprise or not
    86  	Enterprise bool
    87  }
    88  
    89  // NewTestAgent returns a started agent with the given name and
    90  // configuration. The caller should call Shutdown() to stop the agent and
    91  // remove temporary directories.
    92  func NewTestAgent(t testing.T, name string, configCallback func(*Config)) *TestAgent {
    93  	a := &TestAgent{
    94  		T:              t,
    95  		Name:           name,
    96  		ConfigCallback: configCallback,
    97  		Enterprise:     EnterpriseTestAgent,
    98  	}
    99  
   100  	a.Start()
   101  	return a
   102  }
   103  
   104  // Start starts a test agent.
   105  func (a *TestAgent) Start() *TestAgent {
   106  	if a.Agent != nil {
   107  		a.T.Fatalf("TestAgent already started")
   108  	}
   109  	if a.Config == nil {
   110  		a.Config = a.config()
   111  	}
   112  	if a.Config.DataDir == "" {
   113  		name := "agent"
   114  		if a.Name != "" {
   115  			name = a.Name + "-agent"
   116  		}
   117  		name = strings.Replace(name, "/", "_", -1)
   118  		d, err := ioutil.TempDir(TempDir, name)
   119  		if err != nil {
   120  			a.T.Fatalf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err)
   121  		}
   122  		a.DataDir = d
   123  		a.Config.DataDir = d
   124  		a.Config.NomadConfig.DataDir = d
   125  	}
   126  
   127  	i := 10
   128  
   129  RETRY:
   130  	i--
   131  	a.pickRandomPorts(a.Config)
   132  	if a.Config.NodeName == "" {
   133  		a.Config.NodeName = fmt.Sprintf("Node %d", a.Config.Ports.RPC)
   134  	}
   135  
   136  	// write the keyring
   137  	if a.Key != "" {
   138  		writeKey := func(key, filename string) {
   139  			path := filepath.Join(a.Config.DataDir, filename)
   140  			if err := initKeyring(path, key); err != nil {
   141  				a.T.Fatalf("Error creating keyring %s: %s", path, err)
   142  			}
   143  		}
   144  		writeKey(a.Key, serfKeyring)
   145  	}
   146  
   147  	// we need the err var in the next exit condition
   148  	agent, err := a.start()
   149  	if err == nil {
   150  		a.Agent = agent
   151  	} else if i == 0 {
   152  		a.T.Fatalf("%s: Error starting agent: %v", a.Name, err)
   153  	} else {
   154  
   155  		if agent != nil {
   156  			agent.Shutdown()
   157  		}
   158  		wait := time.Duration(rand.Int31n(2000)) * time.Millisecond
   159  		a.T.Logf("%s: retrying in %v", a.Name, wait)
   160  		time.Sleep(wait)
   161  
   162  		// Clean out the data dir if we are responsible for it before we
   163  		// try again, since the old ports may have gotten written to
   164  		// the data dir, such as in the Raft configuration.
   165  		if a.DataDir != "" {
   166  			if err := os.RemoveAll(a.DataDir); err != nil {
   167  				a.T.Fatalf("%s: Error resetting data dir: %v", a.Name, err)
   168  			}
   169  		}
   170  
   171  		goto RETRY
   172  	}
   173  
   174  	failed := false
   175  	if a.Config.NomadConfig.BootstrapExpect == 1 && a.Config.Server.Enabled {
   176  		testutil.WaitForResult(func() (bool, error) {
   177  			args := &structs.GenericRequest{}
   178  			var leader string
   179  			err := a.RPC("Status.Leader", args, &leader)
   180  			return leader != "", err
   181  		}, func(err error) {
   182  			a.T.Logf("failed to find leader: %v", err)
   183  			failed = true
   184  		})
   185  	} else {
   186  		testutil.WaitForResult(func() (bool, error) {
   187  			req, _ := http.NewRequest("GET", "/v1/agent/self", nil)
   188  			resp := httptest.NewRecorder()
   189  			_, err := a.Server.AgentSelfRequest(resp, req)
   190  			return err == nil && resp.Code == 200, err
   191  		}, func(err error) {
   192  			a.T.Logf("failed to find leader: %v", err)
   193  			failed = true
   194  		})
   195  	}
   196  	if failed {
   197  		a.Agent.Shutdown()
   198  		if i == 0 {
   199  			a.T.Fatalf("ran out of retries trying to start test agent")
   200  		}
   201  		goto RETRY
   202  	}
   203  
   204  	// Check if ACLs enabled. Use special value of PolicyTTL 0s
   205  	// to do a bypass of this step. This is so we can test bootstrap
   206  	// without having to pass down a special flag.
   207  	if a.Config.ACL.Enabled && a.Config.Server.Enabled && a.Config.ACL.PolicyTTL != 0 {
   208  		a.RootToken = mock.ACLManagementToken()
   209  		state := a.Agent.server.State()
   210  		if err := state.BootstrapACLTokens(1, 0, a.RootToken); err != nil {
   211  			a.T.Fatalf("token bootstrap failed: %v", err)
   212  		}
   213  	}
   214  	return a
   215  }
   216  
   217  func (a *TestAgent) start() (*Agent, error) {
   218  	if a.LogOutput == nil {
   219  		a.LogOutput = testlog.NewWriter(a.T)
   220  	}
   221  
   222  	inm := metrics.NewInmemSink(10*time.Second, time.Minute)
   223  	metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm)
   224  
   225  	if inm == nil {
   226  		return nil, fmt.Errorf("unable to set up in memory metrics needed for agent initialization")
   227  	}
   228  
   229  	logger := hclog.NewInterceptLogger(&hclog.LoggerOptions{
   230  		Name:       "agent",
   231  		Level:      hclog.LevelFromString(a.Config.LogLevel),
   232  		Output:     a.LogOutput,
   233  		JSONFormat: a.Config.LogJson,
   234  	})
   235  
   236  	agent, err := NewAgent(a.Config, logger, a.LogOutput, inm)
   237  	if err != nil {
   238  		return nil, err
   239  	}
   240  
   241  	// Setup the HTTP server
   242  	http, err := NewHTTPServer(agent, a.Config)
   243  	if err != nil {
   244  		return agent, err
   245  	}
   246  
   247  	a.Server = http
   248  	return agent, nil
   249  }
   250  
   251  // Shutdown stops the agent and removes the data directory if it is
   252  // managed by the test agent.
   253  func (a *TestAgent) Shutdown() error {
   254  	defer freeport.Return(a.ports)
   255  
   256  	defer func() {
   257  		if a.DataDir != "" {
   258  			os.RemoveAll(a.DataDir)
   259  		}
   260  	}()
   261  
   262  	// shutdown agent before endpoints
   263  	ch := make(chan error, 1)
   264  	go func() {
   265  		defer close(ch)
   266  		a.Server.Shutdown()
   267  		ch <- a.Agent.Shutdown()
   268  	}()
   269  
   270  	select {
   271  	case err := <-ch:
   272  		return err
   273  	case <-time.After(1 * time.Minute):
   274  		return fmt.Errorf("timed out while shutting down test agent")
   275  	}
   276  }
   277  
   278  func (a *TestAgent) HTTPAddr() string {
   279  	if a.Server == nil {
   280  		return ""
   281  	}
   282  	proto := "http://"
   283  	if a.Config.TLSConfig != nil && a.Config.TLSConfig.EnableHTTP {
   284  		proto = "https://"
   285  	}
   286  	return proto + a.Server.Addr
   287  }
   288  
   289  func (a *TestAgent) Client() *api.Client {
   290  	conf := api.DefaultConfig()
   291  	conf.Address = a.HTTPAddr()
   292  	c, err := api.NewClient(conf)
   293  	if err != nil {
   294  		a.T.Fatalf("Error creating Nomad API client: %s", err)
   295  	}
   296  	return c
   297  }
   298  
   299  // pickRandomPorts selects random ports from fixed size random blocks of
   300  // ports. This does not eliminate the chance for port conflict but
   301  // reduces it significantly with little overhead. Furthermore, asking
   302  // the kernel for a random port by binding to port 0 prolongs the test
   303  // execution (in our case +20sec) while also not fully eliminating the
   304  // chance of port conflicts for concurrently executed test binaries.
   305  // Instead of relying on one set of ports to be sufficient we retry
   306  // starting the agent with different ports on port conflict.
   307  func (a *TestAgent) pickRandomPorts(c *Config) {
   308  	ports := freeport.MustTake(3)
   309  	a.ports = append(a.ports, ports...)
   310  
   311  	c.Ports.HTTP = ports[0]
   312  	c.Ports.RPC = ports[1]
   313  	c.Ports.Serf = ports[2]
   314  
   315  	// Clear out the advertise addresses such that through retries we
   316  	// re-normalize the addresses correctly instead of using the values from the
   317  	// last port selection that had a port conflict.
   318  	if c.AdvertiseAddrs != nil {
   319  		c.AdvertiseAddrs.HTTP = ""
   320  		c.AdvertiseAddrs.RPC = ""
   321  		c.AdvertiseAddrs.Serf = ""
   322  	}
   323  
   324  	if err := c.normalizeAddrs(); err != nil {
   325  		a.T.Fatalf("error normalizing config: %v", err)
   326  	}
   327  }
   328  
   329  // TestConfig returns a unique default configuration for testing an
   330  // agent.
   331  func (a *TestAgent) config() *Config {
   332  	conf := DevConfig(nil)
   333  
   334  	// Customize the server configuration
   335  	config := nomad.DefaultConfig()
   336  	conf.NomadConfig = config
   337  
   338  	// Set the name
   339  	conf.NodeName = a.Name
   340  
   341  	// Bind and set ports
   342  	conf.BindAddr = "127.0.0.1"
   343  
   344  	conf.Consul = sconfig.DefaultConsulConfig()
   345  	conf.Vault.Enabled = new(bool)
   346  
   347  	// Tighten the Serf timing
   348  	config.SerfConfig.MemberlistConfig.SuspicionMult = 2
   349  	config.SerfConfig.MemberlistConfig.RetransmitMult = 2
   350  	config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond
   351  	config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond
   352  	config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond
   353  
   354  	// Tighten the Raft timing
   355  	config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond
   356  	config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond
   357  	config.RaftConfig.ElectionTimeout = 40 * time.Millisecond
   358  	config.RaftTimeout = 500 * time.Millisecond
   359  
   360  	// Tighten the autopilot timing
   361  	config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond
   362  	config.ServerHealthInterval = 50 * time.Millisecond
   363  	config.AutopilotInterval = 100 * time.Millisecond
   364  
   365  	// Tighten the fingerprinter timeouts
   366  	if conf.Client.Options == nil {
   367  		conf.Client.Options = make(map[string]string)
   368  	}
   369  	conf.Client.Options[fingerprint.TightenNetworkTimeoutsConfig] = "true"
   370  
   371  	if a.ConfigCallback != nil {
   372  		a.ConfigCallback(conf)
   373  	}
   374  
   375  	return conf
   376  }