github.com/smithx10/nomad@v0.9.1-rc1/command/agent/testagent.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	"math/rand"
     8  	"net/http"
     9  	"net/http/httptest"
    10  	"os"
    11  	"path/filepath"
    12  	"runtime"
    13  	"strings"
    14  	"time"
    15  
    16  	testing "github.com/mitchellh/go-testing-interface"
    17  
    18  	metrics "github.com/armon/go-metrics"
    19  	"github.com/hashicorp/consul/lib/freeport"
    20  	"github.com/hashicorp/go-hclog"
    21  	"github.com/hashicorp/nomad/api"
    22  	"github.com/hashicorp/nomad/client/fingerprint"
    23  	"github.com/hashicorp/nomad/helper/testlog"
    24  	"github.com/hashicorp/nomad/nomad"
    25  	"github.com/hashicorp/nomad/nomad/mock"
    26  	"github.com/hashicorp/nomad/nomad/structs"
    27  	sconfig "github.com/hashicorp/nomad/nomad/structs/config"
    28  	"github.com/hashicorp/nomad/testutil"
    29  )
    30  
    31  func init() {
    32  	rand.Seed(time.Now().UnixNano()) // seed random number generator
    33  }
    34  
    35  // TempDir defines the base dir for temporary directories.
    36  var TempDir = os.TempDir()
    37  
    38  // TestAgent encapsulates an Agent with a default configuration and startup
    39  // procedure suitable for testing. It manages a temporary data directory which
    40  // is removed after shutdown.
    41  type TestAgent struct {
    42  	// T is the testing object
    43  	T testing.T
    44  
    45  	// Name is an optional name of the agent.
    46  	Name string
    47  
    48  	// ConfigCallback is an optional callback that allows modification of the
    49  	// configuration before the agent is started.
    50  	ConfigCallback func(*Config)
    51  
    52  	// Config is the agent configuration. If Config is nil then
    53  	// TestConfig() is used. If Config.DataDir is set then it is
    54  	// the callers responsibility to clean up the data directory.
    55  	// Otherwise, a temporary data directory is created and removed
    56  	// when Shutdown() is called.
    57  	Config *Config
    58  
    59  	// LogOutput is the sink for the logs. If nil, logs are written
    60  	// to os.Stderr.
    61  	LogOutput io.Writer
    62  
    63  	// DataDir is the data directory which is used when Config.DataDir
    64  	// is not set. It is created automatically and removed when
    65  	// Shutdown() is called.
    66  	DataDir string
    67  
    68  	// Key is the optional encryption key for the keyring.
    69  	Key string
    70  
    71  	// Server is a reference to the started HTTP endpoint.
    72  	// It is valid after Start().
    73  	Server *HTTPServer
    74  
    75  	// Agent is the embedded Nomad agent.
    76  	// It is valid after Start().
    77  	*Agent
    78  
    79  	// RootToken is auto-bootstrapped if ACLs are enabled
    80  	RootToken *structs.ACLToken
    81  }
    82  
    83  // NewTestAgent returns a started agent with the given name and
    84  // configuration. The caller should call Shutdown() to stop the agent and
    85  // remove temporary directories.
    86  func NewTestAgent(t testing.T, name string, configCallback func(*Config)) *TestAgent {
    87  	a := &TestAgent{
    88  		T:              t,
    89  		Name:           name,
    90  		ConfigCallback: configCallback,
    91  	}
    92  
    93  	a.Start()
    94  	return a
    95  }
    96  
    97  // Start starts a test agent.
    98  func (a *TestAgent) Start() *TestAgent {
    99  	if a.Agent != nil {
   100  		a.T.Fatalf("TestAgent already started")
   101  	}
   102  	if a.Config == nil {
   103  		a.Config = a.config()
   104  	}
   105  	if a.Config.DataDir == "" {
   106  		name := "agent"
   107  		if a.Name != "" {
   108  			name = a.Name + "-agent"
   109  		}
   110  		name = strings.Replace(name, "/", "_", -1)
   111  		d, err := ioutil.TempDir(TempDir, name)
   112  		if err != nil {
   113  			a.T.Fatalf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err)
   114  		}
   115  		a.DataDir = d
   116  		a.Config.DataDir = d
   117  		a.Config.NomadConfig.DataDir = d
   118  	}
   119  
   120  	i := 10
   121  
   122  RETRY:
   123  	for ; i >= 0; i-- {
   124  		a.pickRandomPorts(a.Config)
   125  		if a.Config.NodeName == "" {
   126  			a.Config.NodeName = fmt.Sprintf("Node %d", a.Config.Ports.RPC)
   127  		}
   128  
   129  		// write the keyring
   130  		if a.Key != "" {
   131  			writeKey := func(key, filename string) {
   132  				path := filepath.Join(a.Config.DataDir, filename)
   133  				if err := initKeyring(path, key); err != nil {
   134  					a.T.Fatalf("Error creating keyring %s: %s", path, err)
   135  				}
   136  			}
   137  			writeKey(a.Key, serfKeyring)
   138  		}
   139  
   140  		// we need the err var in the next exit condition
   141  		if agent, err := a.start(); err == nil {
   142  			a.Agent = agent
   143  			break
   144  		} else if i == 0 {
   145  			a.T.Logf("%s: Error starting agent: %v", a.Name, err)
   146  			runtime.Goexit()
   147  		} else {
   148  			if agent != nil {
   149  				agent.Shutdown()
   150  			}
   151  			wait := time.Duration(rand.Int31n(2000)) * time.Millisecond
   152  			a.T.Logf("%s: retrying in %v", a.Name, wait)
   153  			time.Sleep(wait)
   154  		}
   155  
   156  		// Clean out the data dir if we are responsible for it before we
   157  		// try again, since the old ports may have gotten written to
   158  		// the data dir, such as in the Raft configuration.
   159  		if a.DataDir != "" {
   160  			if err := os.RemoveAll(a.DataDir); err != nil {
   161  				a.T.Logf("%s: Error resetting data dir: %v", a.Name, err)
   162  				runtime.Goexit()
   163  			}
   164  		}
   165  	}
   166  
   167  	failed := false
   168  	if a.Config.NomadConfig.Bootstrap && a.Config.Server.Enabled {
   169  		testutil.WaitForResult(func() (bool, error) {
   170  			args := &structs.GenericRequest{}
   171  			var leader string
   172  			err := a.RPC("Status.Leader", args, &leader)
   173  			return leader != "", err
   174  		}, func(err error) {
   175  			a.T.Logf("failed to find leader: %v", err)
   176  			failed = true
   177  		})
   178  	} else {
   179  		testutil.WaitForResult(func() (bool, error) {
   180  			req, _ := http.NewRequest("GET", "/v1/agent/self", nil)
   181  			resp := httptest.NewRecorder()
   182  			_, err := a.Server.AgentSelfRequest(resp, req)
   183  			return err == nil && resp.Code == 200, err
   184  		}, func(err error) {
   185  			a.T.Logf("failed to find leader: %v", err)
   186  			failed = true
   187  		})
   188  	}
   189  	if failed {
   190  		a.Agent.Shutdown()
   191  		goto RETRY
   192  	}
   193  
   194  	// Check if ACLs enabled. Use special value of PolicyTTL 0s
   195  	// to do a bypass of this step. This is so we can test bootstrap
   196  	// without having to pass down a special flag.
   197  	if a.Config.ACL.Enabled && a.Config.Server.Enabled && a.Config.ACL.PolicyTTL != 0 {
   198  		a.RootToken = mock.ACLManagementToken()
   199  		state := a.Agent.server.State()
   200  		if err := state.BootstrapACLTokens(1, 0, a.RootToken); err != nil {
   201  			a.T.Fatalf("token bootstrap failed: %v", err)
   202  		}
   203  	}
   204  	return a
   205  }
   206  
   207  func (a *TestAgent) start() (*Agent, error) {
   208  	if a.LogOutput == nil {
   209  		a.LogOutput = testlog.NewWriter(a.T)
   210  	}
   211  
   212  	inm := metrics.NewInmemSink(10*time.Second, time.Minute)
   213  	metrics.NewGlobal(metrics.DefaultConfig("service-name"), inm)
   214  
   215  	if inm == nil {
   216  		return nil, fmt.Errorf("unable to set up in memory metrics needed for agent initialization")
   217  	}
   218  
   219  	logger := hclog.New(&hclog.LoggerOptions{
   220  		Name:       "agent",
   221  		Level:      hclog.LevelFromString(a.Config.LogLevel),
   222  		Output:     a.LogOutput,
   223  		JSONFormat: a.Config.LogJson,
   224  	})
   225  
   226  	agent, err := NewAgent(a.Config, logger, a.LogOutput, inm)
   227  	if err != nil {
   228  		return nil, err
   229  	}
   230  
   231  	// Setup the HTTP server
   232  	http, err := NewHTTPServer(agent, a.Config)
   233  	if err != nil {
   234  		return agent, err
   235  	}
   236  
   237  	a.Server = http
   238  	return agent, nil
   239  }
   240  
   241  // Shutdown stops the agent and removes the data directory if it is
   242  // managed by the test agent.
   243  func (a *TestAgent) Shutdown() error {
   244  	defer func() {
   245  		if a.DataDir != "" {
   246  			os.RemoveAll(a.DataDir)
   247  		}
   248  	}()
   249  
   250  	// shutdown agent before endpoints
   251  	ch := make(chan error, 1)
   252  	go func() {
   253  		defer close(ch)
   254  		a.Server.Shutdown()
   255  		ch <- a.Agent.Shutdown()
   256  	}()
   257  
   258  	select {
   259  	case err := <-ch:
   260  		return err
   261  	case <-time.After(1 * time.Minute):
   262  		return fmt.Errorf("timed out while shutting down test agent")
   263  	}
   264  }
   265  
   266  func (a *TestAgent) HTTPAddr() string {
   267  	if a.Server == nil {
   268  		return ""
   269  	}
   270  	return "http://" + a.Server.Addr
   271  }
   272  
   273  func (a *TestAgent) Client() *api.Client {
   274  	conf := api.DefaultConfig()
   275  	conf.Address = a.HTTPAddr()
   276  	c, err := api.NewClient(conf)
   277  	if err != nil {
   278  		a.T.Fatalf("Error creating Nomad API client: %s", err)
   279  	}
   280  	return c
   281  }
   282  
   283  // pickRandomPorts selects random ports from fixed size random blocks of
   284  // ports. This does not eliminate the chance for port conflict but
   285  // reduces it significantly with little overhead. Furthermore, asking
   286  // the kernel for a random port by binding to port 0 prolongs the test
   287  // execution (in our case +20sec) while also not fully eliminating the
   288  // chance of port conflicts for concurrently executed test binaries.
   289  // Instead of relying on one set of ports to be sufficient we retry
   290  // starting the agent with different ports on port conflict.
   291  func (a *TestAgent) pickRandomPorts(c *Config) {
   292  	ports := freeport.GetT(a.T, 3)
   293  	c.Ports.HTTP = ports[0]
   294  	c.Ports.RPC = ports[1]
   295  	c.Ports.Serf = ports[2]
   296  
   297  	// Clear out the advertise addresses such that through retries we
   298  	// re-normalize the addresses correctly instead of using the values from the
   299  	// last port selection that had a port conflict.
   300  	if c.AdvertiseAddrs != nil {
   301  		c.AdvertiseAddrs.HTTP = ""
   302  		c.AdvertiseAddrs.RPC = ""
   303  		c.AdvertiseAddrs.Serf = ""
   304  	}
   305  
   306  	if err := c.normalizeAddrs(); err != nil {
   307  		a.T.Fatalf("error normalizing config: %v", err)
   308  	}
   309  }
   310  
   311  // TestConfig returns a unique default configuration for testing an
   312  // agent.
   313  func (a *TestAgent) config() *Config {
   314  	conf := DevConfig()
   315  
   316  	// Customize the server configuration
   317  	config := nomad.DefaultConfig()
   318  	conf.NomadConfig = config
   319  
   320  	// Set the name
   321  	conf.NodeName = a.Name
   322  
   323  	// Bind and set ports
   324  	conf.BindAddr = "127.0.0.1"
   325  
   326  	conf.Consul = sconfig.DefaultConsulConfig()
   327  	conf.Vault.Enabled = new(bool)
   328  
   329  	// Tighten the Serf timing
   330  	config.SerfConfig.MemberlistConfig.SuspicionMult = 2
   331  	config.SerfConfig.MemberlistConfig.RetransmitMult = 2
   332  	config.SerfConfig.MemberlistConfig.ProbeTimeout = 50 * time.Millisecond
   333  	config.SerfConfig.MemberlistConfig.ProbeInterval = 100 * time.Millisecond
   334  	config.SerfConfig.MemberlistConfig.GossipInterval = 100 * time.Millisecond
   335  
   336  	// Tighten the Raft timing
   337  	config.RaftConfig.LeaderLeaseTimeout = 20 * time.Millisecond
   338  	config.RaftConfig.HeartbeatTimeout = 40 * time.Millisecond
   339  	config.RaftConfig.ElectionTimeout = 40 * time.Millisecond
   340  	config.RaftConfig.StartAsLeader = true
   341  	config.RaftTimeout = 500 * time.Millisecond
   342  
   343  	// Tighten the autopilot timing
   344  	config.AutopilotConfig.ServerStabilizationTime = 100 * time.Millisecond
   345  	config.ServerHealthInterval = 50 * time.Millisecond
   346  	config.AutopilotInterval = 100 * time.Millisecond
   347  
   348  	// Bootstrap ourselves
   349  	config.Bootstrap = true
   350  	config.BootstrapExpect = 1
   351  
   352  	// Tighten the fingerprinter timeouts
   353  	if conf.Client.Options == nil {
   354  		conf.Client.Options = make(map[string]string)
   355  	}
   356  	conf.Client.Options[fingerprint.TightenNetworkTimeoutsConfig] = "true"
   357  
   358  	if a.ConfigCallback != nil {
   359  		a.ConfigCallback(conf)
   360  	}
   361  
   362  	return conf
   363  }