github.com/sl1pm4t/consul@v1.4.5-0.20190325224627-74c31c540f9c/agent/testagent.go (about)

     1  package agent
     2  
     3  import (
     4  	"fmt"
     5  	"io"
     6  	"io/ioutil"
     7  	"log"
     8  	"math/rand"
     9  	"net/http"
    10  	"net/http/httptest"
    11  	"os"
    12  	"path/filepath"
    13  	"strconv"
    14  	"strings"
    15  	"testing"
    16  	"time"
    17  
    18  	metrics "github.com/armon/go-metrics"
    19  	uuid "github.com/hashicorp/go-uuid"
    20  
    21  	"github.com/hashicorp/consul/agent/config"
    22  	"github.com/hashicorp/consul/agent/connect"
    23  	"github.com/hashicorp/consul/agent/consul"
    24  	"github.com/hashicorp/consul/agent/structs"
    25  	"github.com/hashicorp/consul/api"
    26  	"github.com/hashicorp/consul/lib/freeport"
    27  	"github.com/hashicorp/consul/logger"
    28  	"github.com/hashicorp/consul/testutil/retry"
    29  
    30  	"github.com/stretchr/testify/require"
    31  )
    32  
    33  func init() {
    34  	rand.Seed(time.Now().UnixNano()) // seed random number generator
    35  }
    36  
    37  // TempDir defines the base dir for temporary directories.
    38  var TempDir = os.TempDir()
    39  
    40  // TestAgent encapsulates an Agent with a default configuration and
    41  // startup procedure suitable for testing. It panics if there are errors
    42  // during creation or startup instead of returning errors. It manages a
    43  // temporary data directory which is removed after shutdown.
    44  type TestAgent struct {
    45  	// Name is an optional name of the agent.
    46  	Name string
    47  
    48  	HCL string
    49  
    50  	// ExpectConfigError can be set to prevent the agent retrying Start on errors
    51  	// and eventually blowing up with runtime.Goexit. This enables tests to assert
    52  	// that some specific bit of config actually does prevent startup entirely in
    53  	// a reasonable way without reproducing a lot of the boilerplate here.
    54  	ExpectConfigError bool
    55  
    56  	// Config is the agent configuration. If Config is nil then
    57  	// TestConfig() is used. If Config.DataDir is set then it is
    58  	// the callers responsibility to clean up the data directory.
    59  	// Otherwise, a temporary data directory is created and removed
    60  	// when Shutdown() is called.
    61  	Config *config.RuntimeConfig
    62  
    63  	// LogOutput is the sink for the logs. If nil, logs are written
    64  	// to os.Stderr.
    65  	LogOutput io.Writer
    66  
    67  	// LogWriter is used for streaming logs.
    68  	LogWriter *logger.LogWriter
    69  
    70  	// DataDir is the data directory which is used when Config.DataDir
    71  	// is not set. It is created automatically and removed when
    72  	// Shutdown() is called.
    73  	DataDir string
    74  
    75  	// Key is the optional encryption key for the LAN and WAN keyring.
    76  	Key string
    77  
    78  	// UseTLS, if true, will disable the HTTP port and enable the HTTPS
    79  	// one.
    80  	UseTLS bool
    81  
    82  	// dns is a reference to the first started DNS endpoint.
    83  	// It is valid after Start().
    84  	dns *DNSServer
    85  
    86  	// srv is a reference to the first started HTTP endpoint.
    87  	// It is valid after Start().
    88  	srv *HTTPServer
    89  
    90  	// Agent is the embedded consul agent.
    91  	// It is valid after Start().
    92  	*Agent
    93  }
    94  
    95  // NewTestAgent returns a started agent with the given name and
    96  // configuration. It fails the test if the Agent could not be started. The
    97  // caller should call Shutdown() to stop the agent and remove temporary
    98  // directories.
    99  func NewTestAgent(t *testing.T, name string, hcl string) *TestAgent {
   100  	a := &TestAgent{Name: name, HCL: hcl}
   101  	a.Start(t)
   102  	return a
   103  }
   104  
   105  func NewUnstartedAgent(t *testing.T, name string, hcl string) (*Agent, error) {
   106  	c := TestConfig(config.Source{Name: name, Format: "hcl", Data: hcl})
   107  	a, err := New(c)
   108  	if err != nil {
   109  		return nil, err
   110  	}
   111  	return a, nil
   112  }
   113  
   114  // Start starts a test agent. It fails the test if the agent could not be started.
   115  func (a *TestAgent) Start(t *testing.T) *TestAgent {
   116  	require := require.New(t)
   117  	require.Nil(a.Agent, "TestAgent already started")
   118  	var hclDataDir string
   119  	if a.DataDir == "" {
   120  		name := "agent"
   121  		if a.Name != "" {
   122  			name = a.Name + "-agent"
   123  		}
   124  		name = strings.Replace(name, "/", "_", -1)
   125  		d, err := ioutil.TempDir(TempDir, name)
   126  		require.NoError(err, fmt.Sprintf("Error creating data dir %s: %s", filepath.Join(TempDir, name), err))
   127  		hclDataDir = `data_dir = "` + d + `"`
   128  	}
   129  	id := NodeID()
   130  
   131  	for i := 10; i >= 0; i-- {
   132  		a.Config = TestConfig(
   133  			randomPortsSource(a.UseTLS),
   134  			config.Source{Name: a.Name, Format: "hcl", Data: a.HCL},
   135  			config.Source{Name: a.Name + ".data_dir", Format: "hcl", Data: hclDataDir},
   136  		)
   137  
   138  		// write the keyring
   139  		if a.Key != "" {
   140  			writeKey := func(key, filename string) {
   141  				path := filepath.Join(a.Config.DataDir, filename)
   142  				err := initKeyring(path, key)
   143  				require.NoError(err, fmt.Sprintf("Error creating keyring %s: %s", path, err))
   144  			}
   145  			writeKey(a.Key, SerfLANKeyring)
   146  			writeKey(a.Key, SerfWANKeyring)
   147  		}
   148  
   149  		agent, err := New(a.Config)
   150  		require.NoError(err, fmt.Sprintf("Error creating agent: %s", err))
   151  
   152  		logOutput := a.LogOutput
   153  		if logOutput == nil {
   154  			logOutput = os.Stderr
   155  		}
   156  		agent.LogOutput = logOutput
   157  		agent.LogWriter = a.LogWriter
   158  		agent.logger = log.New(logOutput, a.Name+" - ", log.LstdFlags|log.Lmicroseconds)
   159  		agent.MemSink = metrics.NewInmemSink(1*time.Second, time.Minute)
   160  
   161  		// we need the err var in the next exit condition
   162  		if err := agent.Start(); err == nil {
   163  			a.Agent = agent
   164  			break
   165  		} else if i == 0 {
   166  			require.Fail("%s %s Error starting agent: %s", id, a.Name, err)
   167  		} else if a.ExpectConfigError {
   168  			// Panic the error since this can be caught if needed. Pretty gross way to
   169  			// detect errors but enough for now and this is a tiny edge case that I'd
   170  			// otherwise not have a way to test at all...
   171  			panic(err)
   172  		} else {
   173  			agent.ShutdownAgent()
   174  			agent.ShutdownEndpoints()
   175  			wait := time.Duration(rand.Int31n(2000)) * time.Millisecond
   176  			fmt.Println(id, a.Name, "retrying in", wait)
   177  			time.Sleep(wait)
   178  		}
   179  
   180  		// Clean out the data dir if we are responsible for it before we
   181  		// try again, since the old ports may have gotten written to
   182  		// the data dir, such as in the Raft configuration.
   183  		if a.DataDir != "" {
   184  			if err := os.RemoveAll(a.DataDir); err != nil {
   185  				require.Fail("%s %s Error resetting data dir: %s", id, a.Name, err)
   186  			}
   187  		}
   188  	}
   189  
   190  	// Start the anti-entropy syncer
   191  	a.Agent.StartSync()
   192  
   193  	var out structs.IndexedNodes
   194  	retry.Run(t, func(r *retry.R) {
   195  		if len(a.httpServers) == 0 {
   196  			r.Fatal(a.Name, "waiting for server")
   197  		}
   198  		if a.Config.Bootstrap && a.Config.ServerMode {
   199  			// Ensure we have a leader and a node registration.
   200  			args := &structs.DCSpecificRequest{
   201  				Datacenter: a.Config.Datacenter,
   202  				QueryOptions: structs.QueryOptions{
   203  					MinQueryIndex: out.Index,
   204  					MaxQueryTime:  25 * time.Millisecond,
   205  				},
   206  			}
   207  			if err := a.RPC("Catalog.ListNodes", args, &out); err != nil {
   208  				r.Fatal(a.Name, "Catalog.ListNodes failed:", err)
   209  			}
   210  			if !out.QueryMeta.KnownLeader {
   211  				r.Fatal(a.Name, "No leader")
   212  			}
   213  			if out.Index == 0 {
   214  				r.Fatal(a.Name, ": Consul index is 0")
   215  			}
   216  		} else {
   217  			req, _ := http.NewRequest("GET", "/v1/agent/self", nil)
   218  			resp := httptest.NewRecorder()
   219  			_, err := a.httpServers[0].AgentSelf(resp, req)
   220  			if err != nil || resp.Code != 200 {
   221  				r.Fatal(a.Name, "failed OK response", err)
   222  			}
   223  		}
   224  	})
   225  	a.dns = a.dnsServers[0]
   226  	a.srv = a.httpServers[0]
   227  	return a
   228  }
   229  
   230  // Shutdown stops the agent and removes the data directory if it is
   231  // managed by the test agent.
   232  func (a *TestAgent) Shutdown() error {
   233  	/* Removed this because it was breaking persistence tests where we would
   234  	persist a service and load it through a new agent with the same data-dir.
   235  	Not sure if we still need this for other things, everywhere we manually make
   236  	a data dir we already do 'defer os.RemoveAll()'
   237  	defer func() {
   238  		if a.DataDir != "" {
   239  			os.RemoveAll(a.DataDir)
   240  		}
   241  	}()*/
   242  
   243  	// shutdown agent before endpoints
   244  	defer a.Agent.ShutdownEndpoints()
   245  	return a.Agent.ShutdownAgent()
   246  }
   247  
   248  func (a *TestAgent) DNSAddr() string {
   249  	if a.dns == nil {
   250  		return ""
   251  	}
   252  	return a.dns.Addr
   253  }
   254  
   255  func (a *TestAgent) HTTPAddr() string {
   256  	if a.srv == nil {
   257  		return ""
   258  	}
   259  	return a.srv.Addr
   260  }
   261  
   262  func (a *TestAgent) SegmentAddr(name string) string {
   263  	if server, ok := a.Agent.delegate.(*consul.Server); ok {
   264  		return server.LANSegmentAddr(name)
   265  	}
   266  	return ""
   267  }
   268  
   269  func (a *TestAgent) Client() *api.Client {
   270  	conf := api.DefaultConfig()
   271  	conf.Address = a.HTTPAddr()
   272  	c, err := api.NewClient(conf)
   273  	if err != nil {
   274  		panic(fmt.Sprintf("Error creating consul API client: %s", err))
   275  	}
   276  	return c
   277  }
   278  
   279  // DNSDisableCompression disables compression for all started DNS servers.
   280  func (a *TestAgent) DNSDisableCompression(b bool) {
   281  	for _, srv := range a.dnsServers {
   282  		srv.disableCompression.Store(b)
   283  	}
   284  }
   285  
   286  func (a *TestAgent) consulConfig() *consul.Config {
   287  	c, err := a.Agent.consulConfig()
   288  	if err != nil {
   289  		panic(err)
   290  	}
   291  	return c
   292  }
   293  
   294  // pickRandomPorts selects random ports from fixed size random blocks of
   295  // ports. This does not eliminate the chance for port conflict but
   296  // reduces it significantly with little overhead. Furthermore, asking
   297  // the kernel for a random port by binding to port 0 prolongs the test
   298  // execution (in our case +20sec) while also not fully eliminating the
   299  // chance of port conflicts for concurrently executed test binaries.
   300  // Instead of relying on one set of ports to be sufficient we retry
   301  // starting the agent with different ports on port conflict.
   302  func randomPortsSource(tls bool) config.Source {
   303  	ports := freeport.Get(6)
   304  	if tls {
   305  		ports[1] = -1
   306  	} else {
   307  		ports[2] = -1
   308  	}
   309  	return config.Source{
   310  		Name:   "ports",
   311  		Format: "hcl",
   312  		Data: `
   313  			ports = {
   314  				dns = ` + strconv.Itoa(ports[0]) + `
   315  				http = ` + strconv.Itoa(ports[1]) + `
   316  				https = ` + strconv.Itoa(ports[2]) + `
   317  				serf_lan = ` + strconv.Itoa(ports[3]) + `
   318  				serf_wan = ` + strconv.Itoa(ports[4]) + `
   319  				server = ` + strconv.Itoa(ports[5]) + `
   320  			}
   321  		`,
   322  	}
   323  }
   324  
   325  func NodeID() string {
   326  	id, err := uuid.GenerateUUID()
   327  	if err != nil {
   328  		panic(err)
   329  	}
   330  	return id
   331  }
   332  
   333  // TestConfig returns a unique default configuration for testing an
   334  // agent.
   335  func TestConfig(sources ...config.Source) *config.RuntimeConfig {
   336  	nodeID := NodeID()
   337  	testsrc := config.Source{
   338  		Name:   "test",
   339  		Format: "hcl",
   340  		Data: `
   341  			bind_addr = "127.0.0.1"
   342  			advertise_addr = "127.0.0.1"
   343  			datacenter = "dc1"
   344  			bootstrap = true
   345  			server = true
   346  			node_id = "` + nodeID + `"
   347  			node_name = "Node ` + nodeID + `"
   348  			connect {
   349  				enabled = true
   350  				ca_config {
   351  					cluster_id = "` + connect.TestClusterID + `"
   352  				}
   353  			}
   354  			performance {
   355  				raft_multiplier = 1
   356  			}
   357  		`,
   358  	}
   359  
   360  	b, err := config.NewBuilder(config.Flags{})
   361  	if err != nil {
   362  		panic("NewBuilder failed: " + err.Error())
   363  	}
   364  	b.Head = append(b.Head, testsrc)
   365  	b.Tail = append(b.Tail, config.DefaultConsulSource(), config.DevConsulSource())
   366  	b.Tail = append(b.Tail, sources...)
   367  
   368  	cfg, err := b.BuildAndValidate()
   369  	if err != nil {
   370  		panic("Error building config: " + err.Error())
   371  	}
   372  
   373  	for _, w := range b.Warnings {
   374  		fmt.Println("WARNING:", w)
   375  	}
   376  
   377  	// Disable connect proxy execution since it causes all kinds of problems with
   378  	// self-executing tests etc.
   379  	cfg.ConnectTestDisableManagedProxies = true
   380  	// Effectively disables the delay after root rotation before requesting CSRs
   381  	// to make test deterministic. 0 results in default jitter being applied but a
   382  	// tiny delay is effectively thre same.
   383  	cfg.ConnectTestCALeafRootChangeSpread = 1 * time.Nanosecond
   384  
   385  	return &cfg
   386  }
   387  
   388  // TestACLConfig returns a default configuration for testing an agent
   389  // with ACLs.
   390  func TestACLConfig() string {
   391  	return `
   392  		acl_datacenter = "dc1"
   393  		acl_default_policy = "deny"
   394  		acl_master_token = "root"
   395  		acl_agent_token = "root"
   396  		acl_agent_master_token = "towel"
   397  		acl_enforce_version_8 = true
   398  	`
   399  }