github.com/hernad/nomad@v1.6.112/command/operator_debug.go (about)

     1  // Copyright (c) HashiCorp, Inc.
     2  // SPDX-License-Identifier: MPL-2.0
     3  
     4  package command
     5  
     6  import (
     7  	"archive/tar"
     8  	"compress/gzip"
     9  	"context"
    10  	"crypto/tls"
    11  	"encoding/json"
    12  	"errors"
    13  	"flag"
    14  	"fmt"
    15  	"html/template"
    16  	"io"
    17  	"net/http"
    18  	"os"
    19  	"os/signal"
    20  	"path/filepath"
    21  	"strconv"
    22  	"strings"
    23  	"syscall"
    24  	"time"
    25  
    26  	"github.com/hashicorp/go-cleanhttp"
    27  	"github.com/hashicorp/go-multierror"
    28  	goversion "github.com/hashicorp/go-version"
    29  	"github.com/hernad/nomad/api"
    30  	"github.com/hernad/nomad/api/contexts"
    31  	"github.com/hernad/nomad/helper"
    32  	"github.com/hernad/nomad/helper/escapingfs"
    33  	"github.com/hernad/nomad/version"
    34  	"github.com/posener/complete"
    35  	"golang.org/x/exp/maps"
    36  	"golang.org/x/exp/slices"
    37  )
    38  
    39  type OperatorDebugCommand struct {
    40  	Meta
    41  
    42  	timestamp     string
    43  	collectDir    string
    44  	duration      time.Duration
    45  	interval      time.Duration
    46  	pprofInterval time.Duration
    47  	pprofDuration time.Duration
    48  	logLevel      string
    49  	maxNodes      int
    50  	nodeClass     string
    51  	nodeIDs       []string
    52  	serverIDs     []string
    53  	topics        map[api.Topic][]string
    54  	index         uint64
    55  	consul        *external
    56  	vault         *external
    57  	manifest      []string
    58  	ctx           context.Context
    59  	cancel        context.CancelFunc
    60  	opts          *api.QueryOptions
    61  	verbose       bool
    62  	members       *api.ServerMembers
    63  	nodes         []*api.NodeListStub
    64  }
    65  
    66  const (
    67  	userAgent                     = "nomad operator debug"
    68  	clusterDir                    = "cluster"
    69  	clientDir                     = "client"
    70  	serverDir                     = "server"
    71  	intervalDir                   = "interval"
    72  	minimumVersionPprofConstraint = ">= 0.11.0, <= 0.11.2"
    73  )
    74  
    75  func (c *OperatorDebugCommand) Help() string {
    76  	helpText := `
    77  Usage: nomad operator debug [options]
    78  
    79    Build an archive containing Nomad cluster configuration and state, and Consul
    80    and Vault status. Include logs and pprof profiles for selected servers and
    81    client nodes.
    82  
    83    If ACLs are enabled, this command will require a token with the 'node:read'
    84    capability to run. In order to collect information, the token will also
    85    require the 'agent:read' and 'operator:read' capabilities, as well as the
    86    'list-jobs' capability for all namespaces. To collect pprof profiles the
    87    token will also require 'agent:write', or enable_debug configuration set to
    88    true.
    89  
    90    If event stream capture is enabled, the Job, Allocation, Deployment,
    91    and Evaluation topics require 'namespace:read-job' capabilities, the Node
    92    topic requires 'node:read'.  A 'management' token is required to capture
    93    ACLToken, ACLPolicy, or all all events.
    94  
    95  General Options:
    96  
    97    ` + generalOptionsUsage(usageOptsDefault|usageOptsNoNamespace) + `
    98  
    99  Consul Options:
   100  
   101    -consul-http-addr=<addr>
   102      The address and port of the Consul HTTP agent. Overrides the
   103      CONSUL_HTTP_ADDR environment variable.
   104  
   105    -consul-token=<token>
   106      Token used to query Consul. Overrides the CONSUL_HTTP_TOKEN environment
   107      variable and the Consul token file.
   108  
   109    -consul-token-file=<path>
   110      Path to the Consul token file. Overrides the CONSUL_HTTP_TOKEN_FILE
   111      environment variable.
   112  
   113    -consul-client-cert=<path>
   114      Path to the Consul client cert file. Overrides the CONSUL_CLIENT_CERT
   115      environment variable.
   116  
   117    -consul-client-key=<path>
   118      Path to the Consul client key file. Overrides the CONSUL_CLIENT_KEY
   119      environment variable.
   120  
   121    -consul-ca-cert=<path>
   122      Path to a CA file to use with Consul. Overrides the CONSUL_CACERT
   123      environment variable and the Consul CA path.
   124  
   125    -consul-ca-path=<path>
   126      Path to a directory of PEM encoded CA cert files to verify the Consul
   127      certificate. Overrides the CONSUL_CAPATH environment variable.
   128  
   129  Vault Options:
   130  
   131    -vault-address=<addr>
   132      The address and port of the Vault HTTP agent. Overrides the VAULT_ADDR
   133      environment variable.
   134  
   135    -vault-token=<token>
   136      Token used to query Vault. Overrides the VAULT_TOKEN environment
   137      variable.
   138  
   139    -vault-client-cert=<path>
   140      Path to the Vault client cert file. Overrides the VAULT_CLIENT_CERT
   141      environment variable.
   142  
   143    -vault-client-key=<path>
   144      Path to the Vault client key file. Overrides the VAULT_CLIENT_KEY
   145      environment variable.
   146  
   147    -vault-ca-cert=<path>
   148      Path to a CA file to use with Vault. Overrides the VAULT_CACERT
   149      environment variable and the Vault CA path.
   150  
   151    -vault-ca-path=<path>
   152      Path to a directory of PEM encoded CA cert files to verify the Vault
   153      certificate. Overrides the VAULT_CAPATH environment variable.
   154  
   155  Debug Options:
   156  
   157    -duration=<duration>
   158      Set the duration of the debug capture. Logs will be captured from specified servers and
   159      nodes at "log-level". Defaults to 2m.
   160  
   161    -event-index=<index>
   162      Specifies the index to start streaming events from. If the requested index is
   163      no longer in the buffer the stream will start at the next available index.
   164      Defaults to 0.
   165  
   166    -event-topic=<Allocation,Evaluation,Job,Node,*>:<filter>
   167      Enable event stream capture, filtered by comma delimited list of topic filters.
   168      Examples:
   169        "all" or "*:*" for all events
   170        "Evaluation" or "Evaluation:*" for all evaluation events
   171        "*:example" for all events related to the job "example"
   172      Defaults to "none" (disabled).
   173  
   174    -interval=<interval>
   175      The interval between snapshots of the Nomad state. Set interval equal to
   176      duration to capture a single snapshot. Defaults to 30s.
   177  
   178    -log-level=<level>
   179      The log level to monitor. Defaults to DEBUG.
   180  
   181    -max-nodes=<count>
   182      Cap the maximum number of client nodes included in the capture. Defaults
   183      to 10, set to 0 for unlimited.
   184  
   185    -node-id=<node1>,<node2>
   186      Comma separated list of Nomad client node ids to monitor for logs, API
   187      outputs, and pprof profiles. Accepts id prefixes, and "all" to select all
   188      nodes (up to count = max-nodes). Defaults to "all".
   189  
   190    -node-class=<node-class>
   191      Filter client nodes based on node class.
   192  
   193    -pprof-duration=<duration>
   194      Duration for pprof collection. Defaults to 1s or -duration, whichever is less.
   195  
   196    -pprof-interval=<pprof-interval>
   197      The interval between pprof collections. Set interval equal to
   198      duration to capture a single snapshot. Defaults to 250ms or
   199     -pprof-duration, whichever is less.
   200  
   201    -server-id=<server1>,<server2>
   202      Comma separated list of Nomad server names to monitor for logs, API
   203      outputs, and pprof profiles. Accepts server names, "leader", or "all".
   204      Defaults to "all".
   205  
   206    -stale=<true|false>
   207      If "false", the default, get membership data from the cluster leader. If
   208      the cluster is in an outage unable to establish leadership, it may be
   209      necessary to get the configuration from a non-leader server.
   210  
   211    -output=<path>
   212      Path to the parent directory of the output directory. If specified, no
   213      archive is built. Defaults to the current directory.
   214  
   215    -verbose
   216      Enable verbose output.
   217  `
   218  	return strings.TrimSpace(helpText)
   219  }
   220  
   221  func (c *OperatorDebugCommand) Synopsis() string {
   222  	return "Build a debug archive"
   223  }
   224  
   225  func (c *OperatorDebugCommand) AutocompleteFlags() complete.Flags {
   226  	return mergeAutocompleteFlags(c.Meta.AutocompleteFlags(FlagSetClient),
   227  		complete.Flags{
   228  			"-duration":       complete.PredictAnything,
   229  			"-event-index":    complete.PredictAnything,
   230  			"-event-topic":    complete.PredictAnything,
   231  			"-interval":       complete.PredictAnything,
   232  			"-log-level":      complete.PredictSet("TRACE", "DEBUG", "INFO", "WARN", "ERROR"),
   233  			"-max-nodes":      complete.PredictAnything,
   234  			"-node-class":     NodeClassPredictor(c.Client),
   235  			"-node-id":        NodePredictor(c.Client),
   236  			"-server-id":      ServerPredictor(c.Client),
   237  			"-output":         complete.PredictDirs("*"),
   238  			"-pprof-duration": complete.PredictAnything,
   239  			"-consul-token":   complete.PredictAnything,
   240  			"-vault-token":    complete.PredictAnything,
   241  			"-verbose":        complete.PredictAnything,
   242  		})
   243  }
   244  
   245  func (c *OperatorDebugCommand) AutocompleteArgs() complete.Predictor {
   246  	return complete.PredictNothing
   247  }
   248  
   249  // NodePredictor returns a client node predictor
   250  func NodePredictor(factory ApiClientFactory) complete.Predictor {
   251  	return complete.PredictFunc(func(a complete.Args) []string {
   252  		client, err := factory()
   253  		if err != nil {
   254  			return nil
   255  		}
   256  
   257  		// note we can't use the -stale flag here because we're in the
   258  		// predictor, but a stale query should be safe for prediction;
   259  		// we also can't use region forwarding because we can't rely
   260  		// on the server being up
   261  		resp, _, err := client.Search().PrefixSearch(
   262  			a.Last, contexts.Nodes, &api.QueryOptions{AllowStale: true})
   263  		if err != nil {
   264  			return []string{}
   265  		}
   266  		return resp.Matches[contexts.Nodes]
   267  	})
   268  }
   269  
   270  // NodeClassPredictor returns a client node class predictor
   271  // TODO dmay: Consider API options for node class filtering
   272  func NodeClassPredictor(factory ApiClientFactory) complete.Predictor {
   273  	return complete.PredictFunc(func(a complete.Args) []string {
   274  		client, err := factory()
   275  		if err != nil {
   276  			return nil
   277  		}
   278  
   279  		// note we can't use the -stale flag here because we're in the
   280  		// predictor, but a stale query should be safe for prediction;
   281  		// we also can't use region forwarding because we can't rely
   282  		// on the server being up
   283  		nodes, _, err := client.Nodes().List(&api.QueryOptions{AllowStale: true})
   284  		if err != nil {
   285  			return []string{}
   286  		}
   287  
   288  		// Build map of unique node classes across all nodes
   289  		classes := make(map[string]bool)
   290  		for _, node := range nodes {
   291  			classes[node.NodeClass] = true
   292  		}
   293  
   294  		// Iterate over node classes looking for match
   295  		filtered := []string{}
   296  		for class := range classes {
   297  			if strings.HasPrefix(class, a.Last) {
   298  				filtered = append(filtered, class)
   299  			}
   300  		}
   301  
   302  		return filtered
   303  	})
   304  }
   305  
   306  // ServerPredictor returns a server member predictor
   307  // TODO dmay: Consider API options for server member filtering
   308  func ServerPredictor(factory ApiClientFactory) complete.Predictor {
   309  	return complete.PredictFunc(func(a complete.Args) []string {
   310  		client, err := factory()
   311  		if err != nil {
   312  			return nil
   313  		}
   314  
   315  		// note we can't use the -stale flag here because we're in the
   316  		// predictor, but a stale query should be safe for prediction;
   317  		// we also can't use region forwarding because we can't rely
   318  		// on the server being up
   319  		members, err := client.Agent().MembersOpts(&api.QueryOptions{AllowStale: true})
   320  		if err != nil {
   321  			return []string{}
   322  		}
   323  
   324  		// Iterate over server members looking for match
   325  		filtered := []string{}
   326  		for _, member := range members.Members {
   327  			if strings.HasPrefix(member.Name, a.Last) {
   328  				filtered = append(filtered, member.Name)
   329  			}
   330  		}
   331  
   332  		return filtered
   333  	})
   334  }
   335  
   336  // queryOpts returns a copy of the shared api.QueryOptions so
   337  // that api package methods can safely modify the options
   338  func (c *OperatorDebugCommand) queryOpts() *api.QueryOptions {
   339  	qo := new(api.QueryOptions)
   340  	*qo = *c.opts
   341  	qo.Params = maps.Clone(c.opts.Params)
   342  	return qo
   343  }
   344  
   345  func (c *OperatorDebugCommand) Name() string { return "debug" }
   346  
   347  func (c *OperatorDebugCommand) Run(args []string) int {
   348  	flags := c.Meta.FlagSet(c.Name(), FlagSetClient)
   349  	flags.Usage = func() { c.Ui.Output(c.Help()) }
   350  
   351  	var duration, interval, pprofInterval, output, pprofDuration, eventTopic string
   352  	var eventIndex int64
   353  	var nodeIDs, serverIDs string
   354  	var allowStale bool
   355  
   356  	flags.StringVar(&duration, "duration", "2m", "")
   357  	flags.Int64Var(&eventIndex, "event-index", 0, "")
   358  	flags.StringVar(&eventTopic, "event-topic", "none", "")
   359  	flags.StringVar(&interval, "interval", "30s", "")
   360  	flags.StringVar(&c.logLevel, "log-level", "DEBUG", "")
   361  	flags.IntVar(&c.maxNodes, "max-nodes", 10, "")
   362  	flags.StringVar(&c.nodeClass, "node-class", "", "")
   363  	flags.StringVar(&nodeIDs, "node-id", "all", "")
   364  	flags.StringVar(&serverIDs, "server-id", "all", "")
   365  	flags.BoolVar(&allowStale, "stale", false, "")
   366  	flags.StringVar(&output, "output", "", "")
   367  	flags.StringVar(&pprofDuration, "pprof-duration", "1s", "")
   368  	flags.StringVar(&pprofInterval, "pprof-interval", "250ms", "")
   369  	flags.BoolVar(&c.verbose, "verbose", false, "")
   370  
   371  	c.consul = &external{tls: &api.TLSConfig{}}
   372  	flags.StringVar(&c.consul.addrVal, "consul-http-addr", os.Getenv("CONSUL_HTTP_ADDR"), "")
   373  	ssl := os.Getenv("CONSUL_HTTP_SSL")
   374  	c.consul.ssl, _ = strconv.ParseBool(ssl)
   375  	flags.StringVar(&c.consul.auth, "consul-auth", os.Getenv("CONSUL_HTTP_AUTH"), "")
   376  	flags.StringVar(&c.consul.tokenVal, "consul-token", os.Getenv("CONSUL_HTTP_TOKEN"), "")
   377  	flags.StringVar(&c.consul.tokenFile, "consul-token-file", os.Getenv("CONSUL_HTTP_TOKEN_FILE"), "")
   378  	flags.StringVar(&c.consul.tls.ClientCert, "consul-client-cert", os.Getenv("CONSUL_CLIENT_CERT"), "")
   379  	flags.StringVar(&c.consul.tls.ClientKey, "consul-client-key", os.Getenv("CONSUL_CLIENT_KEY"), "")
   380  	flags.StringVar(&c.consul.tls.CACert, "consul-ca-cert", os.Getenv("CONSUL_CACERT"), "")
   381  	flags.StringVar(&c.consul.tls.CAPath, "consul-ca-path", os.Getenv("CONSUL_CAPATH"), "")
   382  
   383  	c.vault = &external{tls: &api.TLSConfig{}}
   384  	flags.StringVar(&c.vault.addrVal, "vault-address", os.Getenv("VAULT_ADDR"), "")
   385  	flags.StringVar(&c.vault.tokenVal, "vault-token", os.Getenv("VAULT_TOKEN"), "")
   386  	flags.StringVar(&c.vault.tls.CACert, "vault-ca-cert", os.Getenv("VAULT_CACERT"), "")
   387  	flags.StringVar(&c.vault.tls.CAPath, "vault-ca-path", os.Getenv("VAULT_CAPATH"), "")
   388  	flags.StringVar(&c.vault.tls.ClientCert, "vault-client-cert", os.Getenv("VAULT_CLIENT_CERT"), "")
   389  	flags.StringVar(&c.vault.tls.ClientKey, "vault-client-key", os.Getenv("VAULT_CLIENT_KEY"), "")
   390  
   391  	if err := flags.Parse(args); err != nil {
   392  		c.Ui.Error(fmt.Sprintf("Error parsing arguments: %q", err))
   393  		return 1
   394  	}
   395  
   396  	// Parse the capture duration
   397  	d, err := time.ParseDuration(duration)
   398  	if err != nil {
   399  		c.Ui.Error(fmt.Sprintf("Error parsing duration: %s: %s", duration, err.Error()))
   400  		return 1
   401  	}
   402  	c.duration = d
   403  
   404  	// Parse the capture interval
   405  	i, err := time.ParseDuration(interval)
   406  	if err != nil {
   407  		c.Ui.Error(fmt.Sprintf("Error parsing interval: %s: %s", interval, err.Error()))
   408  		return 1
   409  	}
   410  	c.interval = i
   411  
   412  	// Validate interval
   413  	if i.Seconds() > d.Seconds() {
   414  		c.Ui.Error(fmt.Sprintf("Error parsing interval: %s is greater than duration %s", interval, duration))
   415  		return 1
   416  	}
   417  
   418  	// Parse and clamp the pprof capture duration
   419  	pd, err := time.ParseDuration(pprofDuration)
   420  	if err != nil {
   421  		c.Ui.Error(fmt.Sprintf("Error parsing pprof duration: %s: %s", pprofDuration, err.Error()))
   422  		return 1
   423  	}
   424  	if pd.Seconds() > d.Seconds() {
   425  		pd = d
   426  	}
   427  	c.pprofDuration = pd
   428  
   429  	// Parse and clamp the pprof capture interval
   430  	pi, err := time.ParseDuration(pprofInterval)
   431  	if err != nil {
   432  		c.Ui.Error(fmt.Sprintf("Error parsing pprof-interval: %s: %s", pprofInterval, err.Error()))
   433  		return 1
   434  	}
   435  	if pi.Seconds() > pd.Seconds() {
   436  		pi = pd
   437  	}
   438  	c.pprofInterval = pi
   439  
   440  	// Parse event stream topic filter
   441  	t, err := topicsFromString(eventTopic)
   442  	if err != nil {
   443  		c.Ui.Error(fmt.Sprintf("Error parsing event topics: %v", err))
   444  		return 1
   445  	}
   446  	c.topics = t
   447  
   448  	// Validate and set initial event stream index
   449  	if eventIndex < 0 {
   450  		c.Ui.Error("Event stream index must be greater than zero")
   451  		return 1
   452  	}
   453  	c.index = uint64(eventIndex)
   454  
   455  	// Verify there are no extra arguments
   456  	args = flags.Args()
   457  	if l := len(args); l != 0 {
   458  		c.Ui.Error("This command takes no arguments")
   459  		c.Ui.Error(commandErrorText(c))
   460  		return 1
   461  	}
   462  
   463  	// Initialize capture variables and structs
   464  	c.manifest = make([]string, 0)
   465  	ctx, cancel := context.WithCancel(context.Background())
   466  	c.ctx = ctx
   467  	c.cancel = cancel
   468  	c.trap()
   469  
   470  	// Generate timestamped file name
   471  	format := "2006-01-02-150405Z"
   472  	c.timestamp = time.Now().UTC().Format(format)
   473  	stamped := "nomad-debug-" + c.timestamp
   474  
   475  	// Create the output directory
   476  	var tmp string
   477  	if output != "" {
   478  		// User specified output directory
   479  		tmp = filepath.Join(output, stamped)
   480  		_, err := os.Stat(tmp)
   481  		if !os.IsNotExist(err) {
   482  			c.Ui.Error("Output directory already exists")
   483  			return 2
   484  		}
   485  	} else {
   486  		// Generate temp directory
   487  		tmp, err = os.MkdirTemp(os.TempDir(), stamped)
   488  		if err != nil {
   489  			c.Ui.Error(fmt.Sprintf("Error creating tmp directory: %s", err.Error()))
   490  			return 2
   491  		}
   492  		defer os.RemoveAll(tmp)
   493  	}
   494  
   495  	c.collectDir = tmp
   496  
   497  	// Write CLI flags to JSON file
   498  	c.writeFlags(flags)
   499  
   500  	// Create an instance of the API client
   501  	client, err := c.Meta.Client()
   502  	if err != nil {
   503  		c.Ui.Error(fmt.Sprintf("Error initializing client: %s", err.Error()))
   504  		return 1
   505  	}
   506  
   507  	c.opts = &api.QueryOptions{
   508  		Region:     c.Meta.region,
   509  		AllowStale: allowStale,
   510  		AuthToken:  c.Meta.token,
   511  	}
   512  
   513  	// Get complete list of client nodes
   514  	c.nodes, _, err = client.Nodes().List(c.queryOpts())
   515  	if err != nil {
   516  		c.Ui.Error(fmt.Sprintf("Error querying node info: %v", err))
   517  		return 1
   518  	}
   519  
   520  	// Write nodes to file
   521  	c.reportErr(writeResponseToFile(c.nodes, c.newFile(clusterDir, "nodes.json")))
   522  
   523  	// Search all nodes If a node class is specified without a list of node id prefixes
   524  	if c.nodeClass != "" && nodeIDs == "" {
   525  		nodeIDs = "all"
   526  	}
   527  
   528  	// Resolve client node id prefixes
   529  	nodesFound := 0
   530  	nodeLookupFailCount := 0
   531  	nodeCaptureCount := 0
   532  
   533  	for _, id := range stringToSlice(nodeIDs) {
   534  		if id == "all" {
   535  			// Capture from all nodes using empty prefix filter
   536  			id = ""
   537  		} else {
   538  			// Capture from nodes starting with prefix id
   539  			id = sanitizeUUIDPrefix(id)
   540  		}
   541  		nodes, _, err := client.Nodes().PrefixListOpts(id, c.queryOpts())
   542  		if err != nil {
   543  			c.Ui.Error(fmt.Sprintf("Error querying node info: %s", err))
   544  			return 1
   545  		}
   546  
   547  		// Increment fail count if no nodes are found
   548  		if len(nodes) == 0 {
   549  			c.Ui.Error(fmt.Sprintf("No node(s) with prefix %q found", id))
   550  			nodeLookupFailCount++
   551  			continue
   552  		}
   553  
   554  		nodesFound += len(nodes)
   555  
   556  		// Apply constraints to nodes found
   557  		for _, n := range nodes {
   558  			// Ignore nodes that do not match specified class
   559  			if c.nodeClass != "" && n.NodeClass != c.nodeClass {
   560  				continue
   561  			}
   562  
   563  			// Add node to capture list
   564  			c.nodeIDs = append(c.nodeIDs, n.ID)
   565  			nodeCaptureCount++
   566  
   567  			// Stop looping when we reach the max
   568  			if c.maxNodes != 0 && nodeCaptureCount >= c.maxNodes {
   569  				break
   570  			}
   571  		}
   572  	}
   573  
   574  	// Return error if nodes were specified but none were found
   575  	if len(nodeIDs) > 0 && nodeCaptureCount == 0 {
   576  		if nodeIDs == "all" {
   577  			// It's okay to have zero clients for default "all"
   578  			c.Ui.Info("Note: \"-node-id=all\" specified but no clients found")
   579  		} else {
   580  			c.Ui.Error(fmt.Sprintf("Failed to retrieve clients, 0 nodes found in list: %s", nodeIDs))
   581  			return 1
   582  		}
   583  	}
   584  
   585  	// Resolve servers
   586  	c.members, err = client.Agent().MembersOpts(c.queryOpts())
   587  	if err != nil {
   588  		c.Ui.Error(fmt.Sprintf("Failed to retrieve server list; err: %v", err))
   589  		return 1
   590  	}
   591  
   592  	// Write complete list of server members to file
   593  	c.reportErr(writeResponseToFile(c.members, c.newFile(clusterDir, "members.json")))
   594  
   595  	// Get leader and write to file; there's no option for AllowStale
   596  	// on this API and a stale result wouldn't even be meaningful, so
   597  	// only warn if we fail so that we don't stop the rest of the
   598  	// debugging
   599  	leader, err := client.Status().Leader()
   600  	if err != nil {
   601  		c.Ui.Warn(fmt.Sprintf("Failed to retrieve leader; err: %v", err))
   602  	}
   603  	if len(leader) > 0 {
   604  		c.reportErr(writeResponseToFile(leader, c.newFile(clusterDir, "leader.json")))
   605  	}
   606  
   607  	// Filter for servers matching criteria
   608  	c.serverIDs, err = filterServerMembers(c.members, serverIDs, c.region)
   609  	if err != nil {
   610  		c.Ui.Error(fmt.Sprintf("Failed to parse server list; err: %v", err))
   611  		return 1
   612  	}
   613  
   614  	serversFound := 0
   615  	serverCaptureCount := 0
   616  
   617  	if c.members != nil {
   618  		serversFound = len(c.members.Members)
   619  	}
   620  	if c.serverIDs != nil {
   621  		serverCaptureCount = len(c.serverIDs)
   622  	}
   623  
   624  	// Return error if servers were specified but not found
   625  	if len(serverIDs) > 0 && serverCaptureCount == 0 {
   626  		c.Ui.Error(fmt.Sprintf("Failed to retrieve servers, 0 members found in list: %s", serverIDs))
   627  		return 1
   628  	}
   629  
   630  	// Display general info about the capture
   631  	c.Ui.Output("Starting debugger...")
   632  	c.Ui.Output("")
   633  	c.Ui.Output(fmt.Sprintf("Nomad CLI Version: %s", version.GetVersion().FullVersionNumber(true)))
   634  	c.Ui.Output(fmt.Sprintf("           Region: %s", c.region))
   635  	c.Ui.Output(fmt.Sprintf("        Namespace: %s", c.namespace))
   636  	c.Ui.Output(fmt.Sprintf("          Servers: (%d/%d) %v", serverCaptureCount, serversFound, c.serverIDs))
   637  	c.Ui.Output(fmt.Sprintf("          Clients: (%d/%d) %v", nodeCaptureCount, nodesFound, c.nodeIDs))
   638  	if nodeCaptureCount > 0 && nodeCaptureCount == c.maxNodes {
   639  		c.Ui.Output(fmt.Sprintf("                   Max node count reached (%d)", c.maxNodes))
   640  	}
   641  	if nodeLookupFailCount > 0 {
   642  		c.Ui.Output(fmt.Sprintf("Client fail count: %v", nodeLookupFailCount))
   643  	}
   644  	if c.nodeClass != "" {
   645  		c.Ui.Output(fmt.Sprintf("       Node Class: %s", c.nodeClass))
   646  	}
   647  	c.Ui.Output(fmt.Sprintf("         Interval: %s", interval))
   648  	c.Ui.Output(fmt.Sprintf("         Duration: %s", duration))
   649  	c.Ui.Output(fmt.Sprintf("   pprof Interval: %s", pprofInterval))
   650  	if c.pprofDuration.Seconds() != 1 {
   651  		c.Ui.Output(fmt.Sprintf("   pprof Duration: %s", c.pprofDuration))
   652  	}
   653  	if c.topics != nil {
   654  		c.Ui.Output(fmt.Sprintf("     Event topics: %+v", c.topics))
   655  	}
   656  	c.Ui.Output("")
   657  	c.Ui.Output("Capturing cluster data...")
   658  
   659  	// Start collecting data
   660  	err = c.collect(client)
   661  	if err != nil {
   662  		c.Ui.Error(fmt.Sprintf("Error collecting data: %s", err.Error()))
   663  		return 2
   664  	}
   665  
   666  	// Write index json/html manifest files
   667  	c.writeManifest()
   668  
   669  	// Exit before archive if output directory was specified
   670  	if output != "" {
   671  		c.Ui.Output(fmt.Sprintf("Created debug directory: %s", c.collectDir))
   672  		return 0
   673  	}
   674  
   675  	// Create archive tarball
   676  	archiveFile := stamped + ".tar.gz"
   677  	err = TarCZF(archiveFile, tmp, stamped)
   678  	if err != nil {
   679  		c.Ui.Error(fmt.Sprintf("Error creating archive: %s", err.Error()))
   680  		return 2
   681  	}
   682  
   683  	// Final output with name of tarball
   684  	c.Ui.Output(fmt.Sprintf("Created debug archive: %s", archiveFile))
   685  	return 0
   686  }
   687  
   688  // collect collects data from our endpoints and writes the archive bundle
   689  func (c *OperatorDebugCommand) collect(client *api.Client) error {
   690  	// Start background captures
   691  	c.startMonitors(client)
   692  	c.startEventStream(client)
   693  
   694  	// Collect cluster data
   695  	self, err := client.Agent().Self()
   696  	c.reportErr(writeResponseOrErrorToFile(
   697  		self, err, c.newFile(clusterDir, "agent-self.json")))
   698  
   699  	namespaces, _, err := client.Namespaces().List(c.queryOpts())
   700  	c.reportErr(writeResponseOrErrorToFile(
   701  		namespaces, err, c.newFile(clusterDir, "namespaces.json")))
   702  
   703  	regions, err := client.Regions().List()
   704  	c.reportErr(writeResponseOrErrorToFile(
   705  		regions, err, c.newFile(clusterDir, "regions.json")))
   706  
   707  	// Collect data from Consul
   708  	if c.consul.addrVal == "" {
   709  		c.getConsulAddrFromSelf(self)
   710  	}
   711  	c.collectConsul(clusterDir)
   712  
   713  	// Collect data from Vault
   714  	vaultAddr := c.vault.addrVal
   715  	if vaultAddr == "" {
   716  		vaultAddr = c.getVaultAddrFromSelf(self)
   717  	}
   718  	c.collectVault(clusterDir, vaultAddr)
   719  
   720  	c.collectAgentHosts(client)
   721  	c.collectPeriodicPprofs(client)
   722  
   723  	c.collectPeriodic(client)
   724  
   725  	return nil
   726  }
   727  
   728  // path returns platform specific paths in the tmp root directory
   729  func (c *OperatorDebugCommand) path(paths ...string) string {
   730  	ps := []string{c.collectDir}
   731  	ps = append(ps, paths...)
   732  	return filepath.Join(ps...)
   733  }
   734  
   735  // mkdir creates directories in the tmp root directory
   736  func (c *OperatorDebugCommand) mkdir(paths ...string) error {
   737  	joinedPath := c.path(paths...)
   738  
   739  	// Ensure path doesn't escape the sandbox of the capture directory
   740  	escapes := escapingfs.PathEscapesSandbox(c.collectDir, joinedPath)
   741  	if escapes {
   742  		return fmt.Errorf("file path escapes capture directory")
   743  	}
   744  
   745  	return escapingfs.EnsurePath(joinedPath, true)
   746  }
   747  
   748  // startMonitors starts go routines for each node and client
   749  func (c *OperatorDebugCommand) startMonitors(client *api.Client) {
   750  	for _, id := range c.nodeIDs {
   751  		go c.startMonitor(clientDir, "node_id", id, client)
   752  	}
   753  
   754  	for _, id := range c.serverIDs {
   755  		go c.startMonitor(serverDir, "server_id", id, client)
   756  	}
   757  }
   758  
   759  // startMonitor starts one monitor api request, writing to a file. It blocks and should be
   760  // called in a go routine. Errors are ignored, we want to build the archive even if a node
   761  // is unavailable
   762  func (c *OperatorDebugCommand) startMonitor(path, idKey, nodeID string, client *api.Client) {
   763  	c.mkdir(path, nodeID)
   764  	fh, err := os.Create(c.path(path, nodeID, "monitor.log"))
   765  	if err != nil {
   766  		return
   767  	}
   768  	defer fh.Close()
   769  
   770  	qo := api.QueryOptions{
   771  		Params: map[string]string{
   772  			idKey:       nodeID,
   773  			"log_level": c.logLevel,
   774  		},
   775  		AllowStale: c.queryOpts().AllowStale,
   776  	}
   777  
   778  	outCh, errCh := client.Agent().Monitor(c.ctx.Done(), &qo)
   779  	for {
   780  		select {
   781  		case out := <-outCh:
   782  			if out == nil {
   783  				continue
   784  			}
   785  			fh.Write(out.Data)
   786  
   787  		case err := <-errCh:
   788  			fh.WriteString(fmt.Sprintf("monitor: %s\n", err.Error()))
   789  			return
   790  
   791  		case <-c.ctx.Done():
   792  			return
   793  		}
   794  	}
   795  }
   796  
   797  // captureEventStream wraps the event stream capture process.
   798  func (c *OperatorDebugCommand) startEventStream(client *api.Client) {
   799  	c.verboseOut("Launching eventstream goroutine...")
   800  
   801  	go func() {
   802  		if err := c.captureEventStream(client); err != nil {
   803  			var es string
   804  			if mErr, ok := err.(*multierror.Error); ok {
   805  				es = multierror.ListFormatFunc(mErr.Errors)
   806  			} else {
   807  				es = err.Error()
   808  			}
   809  
   810  			c.Ui.Error(fmt.Sprintf("Error capturing event stream: %s", es))
   811  		}
   812  	}()
   813  }
   814  
   815  func (c *OperatorDebugCommand) captureEventStream(client *api.Client) error {
   816  	// Ensure output directory is present
   817  	path := clusterDir
   818  	if err := c.mkdir(c.path(path)); err != nil {
   819  		return err
   820  	}
   821  
   822  	// Create the output file
   823  	fh, err := os.Create(c.path(path, "eventstream.json"))
   824  	if err != nil {
   825  		return err
   826  	}
   827  	defer fh.Close()
   828  
   829  	// Get handle to events endpoint
   830  	events := client.EventStream()
   831  
   832  	// Start streaming events
   833  	eventCh, err := events.Stream(c.ctx, c.topics, c.index, c.queryOpts())
   834  	if err != nil {
   835  		if errors.Is(err, context.Canceled) {
   836  			c.verboseOut("Event stream canceled: No events captured")
   837  			return nil
   838  		}
   839  		return fmt.Errorf("failed to stream events: %w", err)
   840  	}
   841  
   842  	eventCount := 0
   843  	errCount := 0
   844  	heartbeatCount := 0
   845  	channelEventCount := 0
   846  
   847  	var mErrs *multierror.Error
   848  
   849  	for {
   850  		select {
   851  		case event := <-eventCh:
   852  			channelEventCount++
   853  			if event.Err != nil {
   854  				errCount++
   855  				c.verboseOutf("error from event stream: index; %d err: %v", event.Index, event.Err)
   856  				mErrs = multierror.Append(mErrs, fmt.Errorf("error at index: %d, Err: %w", event.Index, event.Err))
   857  				break
   858  			}
   859  
   860  			if event.IsHeartbeat() {
   861  				heartbeatCount++
   862  				continue
   863  			}
   864  
   865  			for _, e := range event.Events {
   866  				eventCount++
   867  				c.verboseOutf("Event: %4d, Index: %d, Topic: %-10s, Type: %s, FilterKeys: %s", eventCount, e.Index, e.Topic, e.Type, e.FilterKeys)
   868  
   869  				bytes, err := json.Marshal(e)
   870  				if err != nil {
   871  					errCount++
   872  					mErrs = multierror.Append(mErrs, fmt.Errorf("failed to marshal json from Topic: %s, Type: %s, Err: %w", e.Topic, e.Type, err))
   873  				}
   874  
   875  				n, err := fh.Write(bytes)
   876  				if err != nil {
   877  					errCount++
   878  					mErrs = multierror.Append(mErrs, fmt.Errorf("failed to write bytes to eventstream.json; bytes written: %d, Err: %w", n, err))
   879  					break
   880  				}
   881  				n, err = fh.WriteString("\n")
   882  				if err != nil {
   883  					errCount++
   884  					mErrs = multierror.Append(mErrs, fmt.Errorf("failed to write string to eventstream.json; chars written: %d, Err: %w", n, err))
   885  				}
   886  			}
   887  		case <-c.ctx.Done():
   888  			c.verboseOutf("Event stream captured %d events, %d frames, %d heartbeats, %d errors", eventCount, channelEventCount, heartbeatCount, errCount)
   889  			return mErrs.ErrorOrNil()
   890  		}
   891  	}
   892  }
   893  
   894  // collectAgentHosts calls collectAgentHost for each selected node
   895  func (c *OperatorDebugCommand) collectAgentHosts(client *api.Client) {
   896  	for _, n := range c.nodeIDs {
   897  		c.collectAgentHost(clientDir, n, client)
   898  	}
   899  
   900  	for _, n := range c.serverIDs {
   901  		c.collectAgentHost(serverDir, n, client)
   902  	}
   903  }
   904  
   905  // collectAgentHost gets the agent host data
   906  func (c *OperatorDebugCommand) collectAgentHost(path, id string, client *api.Client) {
   907  	var host *api.HostDataResponse
   908  	var err error
   909  	if path == serverDir {
   910  		host, err = client.Agent().Host(id, "", c.queryOpts())
   911  	} else {
   912  		host, err = client.Agent().Host("", id, c.queryOpts())
   913  	}
   914  
   915  	if isRedirectError(err) {
   916  		c.Ui.Warn(fmt.Sprintf("%s/%s: /v1/agent/host unavailable on this agent", path, id))
   917  		return
   918  	}
   919  
   920  	if err != nil {
   921  		c.Ui.Error(fmt.Sprintf("%s/%s: Failed to retrieve agent host data, err: %v", path, id, err))
   922  
   923  		if strings.Contains(err.Error(), api.PermissionDeniedErrorContent) {
   924  			// Drop a hint to help the operator resolve the error
   925  			c.Ui.Warn("Agent host retrieval requires agent:read ACL or enable_debug=true.  See https://www.nomadproject.io/api-docs/agent#host for more information.")
   926  		}
   927  		return // exit on any error
   928  	}
   929  
   930  	path = filepath.Join(path, id)
   931  	c.reportErr(writeResponseToFile(host, c.newFile(path, "agent-host.json")))
   932  }
   933  
   934  func (c *OperatorDebugCommand) collectPeriodicPprofs(client *api.Client) {
   935  
   936  	pprofNodeIDs := []string{}
   937  	pprofServerIDs := []string{}
   938  
   939  	// threadcreate pprof causes a panic on Nomad 0.11.0 to 0.11.2 -- skip those versions
   940  	for _, serverID := range c.serverIDs {
   941  		version := c.getNomadVersion(serverID, "")
   942  		err := checkVersion(version, minimumVersionPprofConstraint)
   943  		if err != nil {
   944  			c.Ui.Warn(fmt.Sprintf("Skipping pprof: %v", err))
   945  		}
   946  		pprofServerIDs = append(pprofServerIDs, serverID)
   947  	}
   948  
   949  	for _, nodeID := range c.nodeIDs {
   950  		version := c.getNomadVersion("", nodeID)
   951  		err := checkVersion(version, minimumVersionPprofConstraint)
   952  		if err != nil {
   953  			c.Ui.Warn(fmt.Sprintf("Skipping pprof: %v", err))
   954  		}
   955  		pprofNodeIDs = append(pprofNodeIDs, nodeID)
   956  	}
   957  
   958  	// Take the first set of pprofs synchronously...
   959  	c.Ui.Output("    Capture pprofInterval 0000")
   960  	c.collectPprofs(client, pprofServerIDs, pprofNodeIDs, 0)
   961  	if c.pprofInterval == c.pprofDuration {
   962  		return
   963  	}
   964  
   965  	// ... and then move the rest off into a goroutine
   966  	go func() {
   967  		ctx, cancel := context.WithTimeout(c.ctx, c.duration)
   968  		defer cancel()
   969  		timer, stop := helper.NewSafeTimer(c.pprofInterval)
   970  		defer stop()
   971  
   972  		pprofIntervalCount := 1
   973  		for {
   974  			select {
   975  			case <-ctx.Done():
   976  				return
   977  			case <-timer.C:
   978  				c.Ui.Output(fmt.Sprintf("    Capture pprofInterval %04d", pprofIntervalCount))
   979  				c.collectPprofs(client, pprofServerIDs, pprofNodeIDs, pprofIntervalCount)
   980  				timer.Reset(c.pprofInterval)
   981  				pprofIntervalCount++
   982  			}
   983  		}
   984  	}()
   985  }
   986  
   987  // collectPprofs captures the /agent/pprof for each listed node
   988  func (c *OperatorDebugCommand) collectPprofs(client *api.Client, serverIDs, nodeIDs []string, interval int) {
   989  	for _, n := range nodeIDs {
   990  		c.collectPprof(clientDir, n, client, interval)
   991  	}
   992  
   993  	for _, n := range serverIDs {
   994  		c.collectPprof(serverDir, n, client, interval)
   995  	}
   996  }
   997  
   998  // collectPprof captures pprof data for the node
   999  func (c *OperatorDebugCommand) collectPprof(path, id string, client *api.Client, interval int) {
  1000  	pprofDurationSeconds := int(c.pprofDuration.Seconds())
  1001  	opts := api.PprofOptions{Seconds: pprofDurationSeconds}
  1002  	if path == serverDir {
  1003  		opts.ServerID = id
  1004  	} else {
  1005  		opts.NodeID = id
  1006  	}
  1007  
  1008  	path = filepath.Join(path, id)
  1009  	filename := fmt.Sprintf("profile_%04d.prof", interval)
  1010  
  1011  	bs, err := client.Agent().CPUProfile(opts, c.queryOpts())
  1012  	if err != nil {
  1013  		c.Ui.Error(fmt.Sprintf("%s: Failed to retrieve pprof %s, err: %v", filename, path, err))
  1014  		if strings.Contains(err.Error(), api.PermissionDeniedErrorContent) {
  1015  			// All Profiles require the same permissions, so we only need to see
  1016  			// one permission failure before we bail.
  1017  			// But lets first drop a hint to help the operator resolve the error
  1018  
  1019  			c.Ui.Warn("Pprof retrieval requires agent:write ACL or enable_debug=true.  See https://www.nomadproject.io/api-docs/agent#agent-runtime-profiles for more information.")
  1020  			return // only exit on 403
  1021  		}
  1022  	} else {
  1023  		err := c.writeBytes(path, filename, bs)
  1024  		if err != nil {
  1025  			c.Ui.Error(err.Error())
  1026  		}
  1027  	}
  1028  
  1029  	// goroutine debug type 1 = legacy text format for human readable output
  1030  	opts.Debug = 1
  1031  	c.savePprofProfile(path, "goroutine", opts, client)
  1032  
  1033  	// goroutine debug type 2 = goroutine stacks in panic format
  1034  	opts.Debug = 2
  1035  	c.savePprofProfile(path, "goroutine", opts, client)
  1036  
  1037  	// Reset to pprof binary format
  1038  	opts.Debug = 0
  1039  
  1040  	c.savePprofProfile(path, "goroutine", opts, client)    // Stack traces of all current goroutines
  1041  	c.savePprofProfile(path, "trace", opts, client)        // A trace of execution of the current program
  1042  	c.savePprofProfile(path, "heap", opts, client)         // A sampling of memory allocations of live objects. You can specify the gc GET parameter to run GC before taking the heap sample.
  1043  	c.savePprofProfile(path, "allocs", opts, client)       // A sampling of all past memory allocations
  1044  	c.savePprofProfile(path, "threadcreate", opts, client) // Stack traces that led to the creation of new OS threads
  1045  }
  1046  
  1047  // savePprofProfile retrieves a pprof profile and writes to disk
  1048  func (c *OperatorDebugCommand) savePprofProfile(path string, profile string, opts api.PprofOptions, client *api.Client) {
  1049  	fileName := fmt.Sprintf("%s.prof", profile)
  1050  	if opts.Debug > 0 {
  1051  		fileName = fmt.Sprintf("%s-debug%d.txt", profile, opts.Debug)
  1052  	}
  1053  
  1054  	bs, err := retrievePprofProfile(profile, opts, client, c.queryOpts())
  1055  	if err != nil {
  1056  		c.Ui.Error(fmt.Sprintf("%s: Failed to retrieve pprof %s, err: %s", path, fileName, err.Error()))
  1057  	}
  1058  
  1059  	err = c.writeBytes(path, fileName, bs)
  1060  	if err != nil {
  1061  		c.Ui.Error(fmt.Sprintf("%s: Failed to write file %s, err: %s", path, fileName, err.Error()))
  1062  	}
  1063  }
  1064  
  1065  // retrievePprofProfile gets a pprof profile from the node specified
  1066  // in opts using the API client
  1067  func retrievePprofProfile(profile string, opts api.PprofOptions, client *api.Client, qopts *api.QueryOptions) (bs []byte, err error) {
  1068  	switch profile {
  1069  	case "cpuprofile":
  1070  		bs, err = client.Agent().CPUProfile(opts, qopts)
  1071  	case "trace":
  1072  		bs, err = client.Agent().Trace(opts, qopts)
  1073  	default:
  1074  		bs, err = client.Agent().Lookup(profile, opts, qopts)
  1075  	}
  1076  
  1077  	return bs, err
  1078  }
  1079  
  1080  // collectPeriodic runs for duration, capturing the cluster state
  1081  // every interval. It flushes and stops the monitor requests
  1082  func (c *OperatorDebugCommand) collectPeriodic(client *api.Client) {
  1083  	duration := time.After(c.duration)
  1084  	// Set interval to 0 so that we immediately execute, wait the interval next time
  1085  	interval := time.After(0 * time.Second)
  1086  	var intervalCount int
  1087  	var name, dir string
  1088  
  1089  	for {
  1090  		select {
  1091  		case <-duration:
  1092  			c.cancel()
  1093  			return
  1094  
  1095  		case <-interval:
  1096  			name = fmt.Sprintf("%04d", intervalCount)
  1097  			dir = filepath.Join(intervalDir, name)
  1098  			c.Ui.Output(fmt.Sprintf("    Capture interval %s", name))
  1099  			c.collectNomad(dir, client)
  1100  			c.collectOperator(dir, client)
  1101  			interval = time.After(c.interval)
  1102  			intervalCount++
  1103  
  1104  		case <-c.ctx.Done():
  1105  			return
  1106  		}
  1107  	}
  1108  }
  1109  
  1110  // collectOperator captures some cluster meta information
  1111  func (c *OperatorDebugCommand) collectOperator(dir string, client *api.Client) {
  1112  	rc, err := client.Operator().RaftGetConfiguration(c.queryOpts())
  1113  	c.reportErr(writeResponseOrErrorToFile(rc, err, c.newFile(dir, "operator-raft.json")))
  1114  
  1115  	sc, _, err := client.Operator().SchedulerGetConfiguration(c.queryOpts())
  1116  	c.reportErr(writeResponseOrErrorToFile(sc, err, c.newFile(dir, "operator-scheduler.json")))
  1117  
  1118  	ah, _, err := client.Operator().AutopilotServerHealth(c.queryOpts())
  1119  	c.reportErr(writeResponseOrErrorToFile(
  1120  		ah, err, c.newFile(dir, "operator-autopilot-health.json")))
  1121  
  1122  	lic, _, err := client.Operator().LicenseGet(c.queryOpts())
  1123  	c.reportErr(writeResponseOrErrorToFile(lic, err, c.newFile(dir, "license.json")))
  1124  }
  1125  
  1126  // collectNomad captures the nomad cluster state
  1127  func (c *OperatorDebugCommand) collectNomad(dir string, client *api.Client) error {
  1128  
  1129  	js, _, err := client.Jobs().List(c.queryOpts())
  1130  	c.reportErr(writeResponseStreamOrErrorToFile(js, err, c.newFile(dir, "jobs.json")))
  1131  
  1132  	ds, _, err := client.Deployments().List(c.queryOpts())
  1133  	c.reportErr(writeResponseStreamOrErrorToFile(ds, err, c.newFile(dir, "deployments.json")))
  1134  
  1135  	es, _, err := client.Evaluations().List(c.queryOpts())
  1136  	c.reportErr(writeResponseStreamOrErrorToFile(es, err, c.newFile(dir, "evaluations.json")))
  1137  
  1138  	as, _, err := client.Allocations().List(c.queryOpts())
  1139  	c.reportErr(writeResponseStreamOrErrorToFile(as, err, c.newFile(dir, "allocations.json")))
  1140  
  1141  	ns, _, err := client.Nodes().List(c.queryOpts())
  1142  	c.reportErr(writeResponseStreamOrErrorToFile(ns, err, c.newFile(dir, "nodes.json")))
  1143  
  1144  	// CSI Plugins - /v1/plugins?type=csi
  1145  	ps, _, err := client.CSIPlugins().List(c.queryOpts())
  1146  	c.reportErr(writeResponseStreamOrErrorToFile(ps, err, c.newFile(dir, "csi-plugins.json")))
  1147  
  1148  	// CSI Plugin details - /v1/plugin/csi/:plugin_id
  1149  	for _, p := range ps {
  1150  		csiPlugin, _, err := client.CSIPlugins().Info(p.ID, c.queryOpts())
  1151  		csiPluginFileName := fmt.Sprintf("csi-plugin-id-%s.json", p.ID)
  1152  		c.reportErr(writeResponseOrErrorToFile(csiPlugin, err, c.newFile(dir, csiPluginFileName)))
  1153  	}
  1154  
  1155  	// CSI Volumes - /v1/volumes?type=csi
  1156  	csiVolumes, _, err := client.CSIVolumes().List(c.queryOpts())
  1157  	c.reportErr(writeResponseStreamOrErrorToFile(
  1158  		csiVolumes, err, c.newFile(dir, "csi-volumes.json")))
  1159  
  1160  	// CSI Volume details - /v1/volumes/csi/:volume-id
  1161  	for _, v := range csiVolumes {
  1162  		csiVolume, _, err := client.CSIVolumes().Info(v.ID, c.queryOpts())
  1163  		csiFileName := fmt.Sprintf("csi-volume-id-%s.json", v.ID)
  1164  		c.reportErr(writeResponseOrErrorToFile(csiVolume, err, c.newFile(dir, csiFileName)))
  1165  	}
  1166  
  1167  	metrics, _, err := client.Operator().MetricsSummary(c.queryOpts())
  1168  	c.reportErr(writeResponseOrErrorToFile(metrics, err, c.newFile(dir, "metrics.json")))
  1169  
  1170  	return nil
  1171  }
  1172  
  1173  // collectConsul calls the Consul API to collect data
  1174  func (c *OperatorDebugCommand) collectConsul(dir string) {
  1175  	if c.consul.addrVal == "" {
  1176  		c.Ui.Output("Consul - Skipping, no API address found")
  1177  		return
  1178  	}
  1179  
  1180  	c.Ui.Info(fmt.Sprintf("Consul - Collecting Consul API data from: %s", c.consul.addrVal))
  1181  
  1182  	client, err := c.consulAPIClient()
  1183  	if err != nil {
  1184  		c.Ui.Error(fmt.Sprintf("failed to create Consul API client: %s", err))
  1185  		return
  1186  	}
  1187  
  1188  	// Exit if we are unable to retrieve the leader
  1189  	err = c.collectConsulAPIRequest(client, "/v1/status/leader", dir, "consul-leader.json")
  1190  	if err != nil {
  1191  		c.Ui.Output(fmt.Sprintf("Unable to contact Consul leader, skipping: %s", err))
  1192  		return
  1193  	}
  1194  
  1195  	c.collectConsulAPI(client, "/v1/agent/host", dir, "consul-agent-host.json")
  1196  	c.collectConsulAPI(client, "/v1/agent/members", dir, "consul-agent-members.json")
  1197  	c.collectConsulAPI(client, "/v1/agent/metrics", dir, "consul-agent-metrics.json")
  1198  	c.collectConsulAPI(client, "/v1/agent/self", dir, "consul-agent-self.json")
  1199  }
  1200  
  1201  func (c *OperatorDebugCommand) consulAPIClient() (*http.Client, error) {
  1202  	httpClient := defaultHttpClient()
  1203  
  1204  	err := api.ConfigureTLS(httpClient, c.consul.tls)
  1205  	if err != nil {
  1206  		return nil, fmt.Errorf("failed to configure TLS: %w", err)
  1207  	}
  1208  
  1209  	return httpClient, nil
  1210  }
  1211  
  1212  func (c *OperatorDebugCommand) collectConsulAPI(client *http.Client, urlPath string, dir string, file string) {
  1213  	err := c.collectConsulAPIRequest(client, urlPath, dir, file)
  1214  	if err != nil {
  1215  		c.Ui.Error(fmt.Sprintf("Error collecting from Consul API: %s", err.Error()))
  1216  	}
  1217  }
  1218  
  1219  func (c *OperatorDebugCommand) collectConsulAPIRequest(client *http.Client, urlPath string, dir string, file string) error {
  1220  	url := c.consul.addrVal + urlPath
  1221  
  1222  	req, err := http.NewRequest("GET", url, nil)
  1223  	if err != nil {
  1224  		return fmt.Errorf("failed to create HTTP request for Consul API URL=%q: %w", url, err)
  1225  	}
  1226  
  1227  	req.Header.Add("X-Consul-Token", c.consul.token())
  1228  	req.Header.Add("User-Agent", userAgent)
  1229  
  1230  	resp, err := client.Do(req)
  1231  	if err != nil {
  1232  		return err
  1233  	}
  1234  
  1235  	c.writeBody(dir, file, resp, err)
  1236  
  1237  	return nil
  1238  }
  1239  
  1240  // collectVault calls the Vault API directly to collect data
  1241  func (c *OperatorDebugCommand) collectVault(dir, vault string) error {
  1242  	vaultAddr := c.vault.addr(vault)
  1243  	if vaultAddr == "" {
  1244  		return nil
  1245  	}
  1246  
  1247  	c.Ui.Info(fmt.Sprintf("Vault - Collecting Vault API data from: %s", vaultAddr))
  1248  	client := defaultHttpClient()
  1249  	if c.vault.ssl {
  1250  		err := api.ConfigureTLS(client, c.vault.tls)
  1251  		if err != nil {
  1252  			return fmt.Errorf("failed to configure TLS: %w", err)
  1253  		}
  1254  	}
  1255  
  1256  	req, err := http.NewRequest("GET", vaultAddr+"/v1/sys/health", nil)
  1257  	if err != nil {
  1258  		return fmt.Errorf("failed to create HTTP request for Vault API URL=%q: %w", vaultAddr, err)
  1259  	}
  1260  
  1261  	req.Header.Add("X-Vault-Token", c.vault.token())
  1262  	req.Header.Add("User-Agent", userAgent)
  1263  	resp, err := client.Do(req)
  1264  	c.writeBody(dir, "vault-sys-health.json", resp, err)
  1265  
  1266  	return nil
  1267  }
  1268  
  1269  // writeBytes writes a file to the archive, recording it in the manifest
  1270  func (c *OperatorDebugCommand) writeBytes(dir, file string, data []byte) error {
  1271  	// Replace invalid characters in filename
  1272  	filename := helper.CleanFilename(file, "_")
  1273  
  1274  	relativePath := filepath.Join(dir, filename)
  1275  	c.manifest = append(c.manifest, relativePath)
  1276  	dirPath := filepath.Join(c.collectDir, dir)
  1277  	filePath := filepath.Join(dirPath, filename)
  1278  
  1279  	// Ensure parent directories exist
  1280  	err := escapingfs.EnsurePath(dirPath, true)
  1281  	if err != nil {
  1282  		return fmt.Errorf("failed to create parent directories of %q: %w", dirPath, err)
  1283  	}
  1284  
  1285  	// Ensure filename doesn't escape the sandbox of the capture directory
  1286  	escapes := escapingfs.PathEscapesSandbox(c.collectDir, filePath)
  1287  	if escapes {
  1288  		return fmt.Errorf("file path %q escapes capture directory %q", filePath, c.collectDir)
  1289  	}
  1290  
  1291  	// Create the file
  1292  	fh, err := os.Create(filePath)
  1293  	if err != nil {
  1294  		return fmt.Errorf("failed to create file %q, err: %w", filePath, err)
  1295  	}
  1296  	defer fh.Close()
  1297  
  1298  	_, err = fh.Write(data)
  1299  	if err != nil {
  1300  		return fmt.Errorf("Failed to write data to file %q, err: %w", filePath, err)
  1301  	}
  1302  	return nil
  1303  }
  1304  
  1305  // newFilePath returns a validated filepath rooted in the provided directory and
  1306  // path. It has been checked that it falls inside the sandbox and has been added
  1307  // to the manifest tracking.
  1308  func (c *OperatorDebugCommand) newFilePath(dir, file string) (string, error) {
  1309  
  1310  	// Replace invalid characters in filename
  1311  	filename := helper.CleanFilename(file, "_")
  1312  
  1313  	relativePath := filepath.Join(dir, filename)
  1314  	c.manifest = append(c.manifest, relativePath)
  1315  	dirPath := filepath.Join(c.collectDir, dir)
  1316  	filePath := filepath.Join(dirPath, filename)
  1317  
  1318  	// Ensure parent directories exist
  1319  	err := escapingfs.EnsurePath(dirPath, true)
  1320  	if err != nil {
  1321  		return "", fmt.Errorf("failed to create parent directories of %q: %w", dirPath, err)
  1322  	}
  1323  
  1324  	// Ensure filename doesn't escape the sandbox of the capture directory
  1325  	escapes := escapingfs.PathEscapesSandbox(c.collectDir, filePath)
  1326  	if escapes {
  1327  		return "", fmt.Errorf("file path %q escapes capture directory %q", filePath, c.collectDir)
  1328  	}
  1329  
  1330  	return filePath, nil
  1331  }
  1332  
  1333  type writerGetter func() (io.WriteCloser, error)
  1334  
  1335  // newFile returns a func that creates a new file for writing and returns it as
  1336  // an io.WriterCloser interface. The caller is responsible for closing the
  1337  // io.Writer when its done.
  1338  //
  1339  // Note: methods cannot be generic in go, so this function returns a function
  1340  // that closes over our command so that we can still reference the command
  1341  // object's fields to validate the file. In future iterations it might be nice
  1342  // if we could move most of the command into standalone functions.
  1343  func (c *OperatorDebugCommand) newFile(dir, file string) writerGetter {
  1344  	return func() (io.WriteCloser, error) {
  1345  		filePath, err := c.newFilePath(dir, file)
  1346  		if err != nil {
  1347  			return nil, err
  1348  		}
  1349  
  1350  		writer, err := os.Create(filePath)
  1351  		if err != nil {
  1352  			return nil, fmt.Errorf("failed to create file %q: %w", filePath, err)
  1353  		}
  1354  		return writer, nil
  1355  	}
  1356  }
  1357  
  1358  // writeResponseToFile writes a response object to a file. It returns an error
  1359  // that the caller should report to the UI.
  1360  func writeResponseToFile(obj any, getWriterFn writerGetter) error {
  1361  
  1362  	writer, err := getWriterFn()
  1363  	if err != nil {
  1364  		return err
  1365  	}
  1366  	defer writer.Close()
  1367  
  1368  	err = writeJSON(obj, writer)
  1369  	if err != nil {
  1370  		return err
  1371  	}
  1372  	return nil
  1373  }
  1374  
  1375  // writeResponseOrErrorToFile writes a response object to a file, or the error
  1376  // for that response if one was received. It returns an error that the caller
  1377  // should report to the UI.
  1378  func writeResponseOrErrorToFile(obj any, apiErr error, getWriterFn writerGetter) error {
  1379  
  1380  	writer, err := getWriterFn()
  1381  	if err != nil {
  1382  		return err
  1383  	}
  1384  	defer writer.Close()
  1385  
  1386  	if apiErr != nil {
  1387  		obj = errorWrapper{Error: apiErr.Error()}
  1388  	}
  1389  
  1390  	err = writeJSON(obj, writer)
  1391  	if err != nil {
  1392  		return err
  1393  	}
  1394  	return nil
  1395  }
  1396  
  1397  // writeResponseStreamOrErrorToFile writes a stream of response objects to a
  1398  // file in newline-delimited JSON format, or the error for that response if one
  1399  // was received. It returns an error that the caller should report to the UI.
  1400  func writeResponseStreamOrErrorToFile[T any](obj []T, apiErr error, getWriterFn writerGetter) error {
  1401  
  1402  	writer, err := getWriterFn()
  1403  	if err != nil {
  1404  		return err
  1405  	}
  1406  	defer writer.Close()
  1407  
  1408  	if apiErr != nil {
  1409  		wrapped := errorWrapper{Error: apiErr.Error()}
  1410  		return writeJSON(wrapped, writer)
  1411  	}
  1412  
  1413  	err = writeNDJSON(obj, writer)
  1414  	if err != nil {
  1415  		return err
  1416  	}
  1417  	return nil
  1418  }
  1419  
  1420  // writeNDJSON writes a single Nomad API objects (or response error) to the
  1421  // archive file as a JSON object.
  1422  func writeJSON(obj any, writer io.Writer) error {
  1423  	buf, err := json.Marshal(obj)
  1424  	if err != nil {
  1425  		buf, err = json.Marshal(errorWrapper{Error: err.Error()})
  1426  		if err != nil {
  1427  			return fmt.Errorf("could not serialize our own error: %v", err)
  1428  		}
  1429  	}
  1430  	n, err := writer.Write(buf)
  1431  	if err != nil {
  1432  		return fmt.Errorf("write error, wrote %d bytes of %d: %v", n, len(buf), err)
  1433  	}
  1434  	return nil
  1435  }
  1436  
  1437  // writeNDJSON writes a slice of Nomad API objects to the archive file as
  1438  // newline-delimited JSON objects.
  1439  func writeNDJSON[T any](data []T, writer io.Writer) error {
  1440  	for _, obj := range data {
  1441  		err := writeJSON(obj, writer)
  1442  		if err != nil {
  1443  			return fmt.Errorf("failed to write to file: %w", err)
  1444  		}
  1445  		_, err = writer.Write([]byte{'\n'})
  1446  		if err != nil {
  1447  			return fmt.Errorf("failed to write to file: %w", err)
  1448  		}
  1449  	}
  1450  
  1451  	return nil
  1452  }
  1453  
  1454  // writeError writes a JSON error object to capture errors in the debug bundle without
  1455  // reporting
  1456  func (c *OperatorDebugCommand) writeError(dir, file string, err error) error {
  1457  	bytes, err := json.Marshal(errorWrapper{Error: err.Error()})
  1458  	if err != nil {
  1459  		return err
  1460  	}
  1461  	return c.writeBytes(dir, file, bytes)
  1462  }
  1463  
  1464  type errorWrapper struct {
  1465  	Error string
  1466  }
  1467  
  1468  // writeBody is a helper that writes the body of an http.Response to the archive
  1469  func (c *OperatorDebugCommand) writeBody(dir, file string, resp *http.Response, err error) {
  1470  	if err != nil {
  1471  		c.writeError(dir, file, err)
  1472  		return
  1473  	}
  1474  
  1475  	if resp.ContentLength == 0 {
  1476  		return
  1477  	}
  1478  
  1479  	defer resp.Body.Close()
  1480  
  1481  	body, err := io.ReadAll(resp.Body)
  1482  	if err != nil {
  1483  		c.writeError(dir, file, err)
  1484  		return
  1485  	}
  1486  
  1487  	if err := c.writeBytes(dir, file, body); err != nil {
  1488  		c.Ui.Error(err.Error())
  1489  	}
  1490  }
  1491  
  1492  type flagExport struct {
  1493  	Name      string
  1494  	Parsed    bool
  1495  	Actual    map[string]*flag.Flag
  1496  	Formal    map[string]*flag.Flag
  1497  	Effective map[string]*flag.Flag // All flags with non-empty value
  1498  	Args      []string              // arguments after flags
  1499  	OsArgs    []string
  1500  }
  1501  
  1502  // writeFlags exports the CLI flags to JSON file
  1503  func (c *OperatorDebugCommand) writeFlags(flags *flag.FlagSet) {
  1504  
  1505  	var f flagExport
  1506  	f.Name = flags.Name()
  1507  	f.Parsed = flags.Parsed()
  1508  	f.Formal = make(map[string]*flag.Flag)
  1509  	f.Actual = make(map[string]*flag.Flag)
  1510  	f.Effective = make(map[string]*flag.Flag)
  1511  	f.Args = flags.Args()
  1512  	f.OsArgs = os.Args
  1513  
  1514  	// Formal flags (all flags)
  1515  	flags.VisitAll(func(flagA *flag.Flag) {
  1516  		f.Formal[flagA.Name] = flagA
  1517  
  1518  		// Determine which of thees are "effective" flags by comparing to empty string
  1519  		if flagA.Value.String() != "" {
  1520  			f.Effective[flagA.Name] = flagA
  1521  		}
  1522  	})
  1523  	// Actual flags (everything passed on cmdline)
  1524  	flags.Visit(func(flag *flag.Flag) {
  1525  		f.Actual[flag.Name] = flag
  1526  	})
  1527  
  1528  	c.reportErr(writeResponseToFile(f, c.newFile(clusterDir, "cli-flags.json")))
  1529  }
  1530  
  1531  func (c *OperatorDebugCommand) reportErr(err error) {
  1532  	if err != nil {
  1533  		c.Ui.Error(err.Error())
  1534  	}
  1535  }
  1536  
  1537  // writeManifest creates the index files
  1538  func (c *OperatorDebugCommand) writeManifest() error {
  1539  	// Write the JSON
  1540  	path := filepath.Join(c.collectDir, "index.json")
  1541  	jsonFh, err := os.Create(path)
  1542  	if err != nil {
  1543  		return err
  1544  	}
  1545  	defer jsonFh.Close()
  1546  
  1547  	json.NewEncoder(jsonFh).Encode(c.manifest)
  1548  
  1549  	// Write the HTML
  1550  	path = filepath.Join(c.collectDir, "index.html")
  1551  	htmlFh, err := os.Create(path)
  1552  	if err != nil {
  1553  		return err
  1554  	}
  1555  	defer htmlFh.Close()
  1556  
  1557  	head, _ := template.New("head").Parse("<html><head><title>{{.}}</title></head>\n<body><h1>{{.}}</h1>\n<ul>")
  1558  	line, _ := template.New("line").Parse("<li><a href=\"{{.}}\">{{.}}</a></li>\n")
  1559  	if err != nil {
  1560  		return fmt.Errorf("%v", err)
  1561  	}
  1562  	tail := "</ul></body></html>\n"
  1563  
  1564  	head.Execute(htmlFh, c.timestamp)
  1565  	for _, f := range c.manifest {
  1566  		line.Execute(htmlFh, f)
  1567  	}
  1568  	htmlFh.WriteString(tail)
  1569  
  1570  	return nil
  1571  }
  1572  
  1573  // trap captures signals, and closes stopCh
  1574  func (c *OperatorDebugCommand) trap() {
  1575  	sigCh := make(chan os.Signal, 1)
  1576  	signal.Notify(sigCh,
  1577  		syscall.SIGHUP,
  1578  		syscall.SIGINT,
  1579  		syscall.SIGTERM,
  1580  		syscall.SIGQUIT)
  1581  
  1582  	go func() {
  1583  		<-sigCh
  1584  		c.cancel()
  1585  	}()
  1586  }
  1587  
  1588  func (c *OperatorDebugCommand) verboseOut(out string) {
  1589  	if c.verbose {
  1590  		c.Ui.Output(out)
  1591  	}
  1592  }
  1593  
  1594  func (c *OperatorDebugCommand) verboseOutf(format string, a ...interface{}) {
  1595  	c.verboseOut(fmt.Sprintf(format, a...))
  1596  }
  1597  
  1598  // TarCZF like the tar command, recursively builds a gzip compressed tar
  1599  // archive from a directory. If not empty, all files in the bundle are prefixed
  1600  // with the target path.
  1601  func TarCZF(archive string, src, target string) error {
  1602  	// ensure the src actually exists before trying to tar it
  1603  	if _, err := os.Stat(src); err != nil {
  1604  		return fmt.Errorf("Unable to tar files - %v", err.Error())
  1605  	}
  1606  
  1607  	// create the archive
  1608  	fh, err := os.Create(archive)
  1609  	if err != nil {
  1610  		return err
  1611  	}
  1612  	defer fh.Close()
  1613  
  1614  	zz := gzip.NewWriter(fh)
  1615  	defer zz.Close()
  1616  
  1617  	tw := tar.NewWriter(zz)
  1618  	defer tw.Close()
  1619  
  1620  	// tar
  1621  	return filepath.Walk(src, func(file string, fi os.FileInfo, err error) error {
  1622  
  1623  		// return on any error
  1624  		if err != nil {
  1625  			return err
  1626  		}
  1627  
  1628  		if !fi.Mode().IsRegular() {
  1629  			return nil
  1630  		}
  1631  
  1632  		header, err := tar.FileInfoHeader(fi, fi.Name())
  1633  		if err != nil {
  1634  			return err
  1635  		}
  1636  
  1637  		// remove leading path to the src, so files are relative to the archive
  1638  		path := strings.ReplaceAll(file, src, "")
  1639  		if target != "" {
  1640  			path = filepath.Join([]string{target, path}...)
  1641  		}
  1642  		path = strings.TrimPrefix(path, string(filepath.Separator))
  1643  
  1644  		header.Name = path
  1645  
  1646  		if err := tw.WriteHeader(header); err != nil {
  1647  			return err
  1648  		}
  1649  
  1650  		// copy the file contents
  1651  		f, err := os.Open(file)
  1652  		if err != nil {
  1653  			return err
  1654  		}
  1655  
  1656  		if _, err := io.Copy(tw, f); err != nil {
  1657  			return err
  1658  		}
  1659  
  1660  		f.Close()
  1661  
  1662  		return nil
  1663  	})
  1664  }
  1665  
  1666  // filterServerMembers returns a slice of server member names matching the search criteria
  1667  func filterServerMembers(serverMembers *api.ServerMembers, serverIDs string, region string) (membersFound []string, err error) {
  1668  	if serverMembers.Members == nil {
  1669  		return nil, fmt.Errorf("Failed to parse server members, members==nil")
  1670  	}
  1671  
  1672  	prefixes := stringToSlice(serverIDs)
  1673  
  1674  	// "leader" is a special case which Nomad handles in the API.  If "leader"
  1675  	// appears in serverIDs, add it to membersFound and remove it from the list
  1676  	// so that it isn't processed by the range loop
  1677  	if slices.Contains(prefixes, "leader") {
  1678  		membersFound = append(membersFound, "leader")
  1679  		helper.RemoveEqualFold(&prefixes, "leader")
  1680  	}
  1681  
  1682  	for _, member := range serverMembers.Members {
  1683  		// If region is provided it must match exactly
  1684  		if region != "" && member.Tags["region"] != region {
  1685  			continue
  1686  		}
  1687  
  1688  		// Always include "all"
  1689  		if serverIDs == "all" {
  1690  			membersFound = append(membersFound, member.Name)
  1691  			continue
  1692  		}
  1693  
  1694  		// Include member if name matches any prefix from serverIDs
  1695  		if helper.StringHasPrefixInSlice(member.Name, prefixes) {
  1696  			membersFound = append(membersFound, member.Name)
  1697  		}
  1698  	}
  1699  
  1700  	return membersFound, nil
  1701  }
  1702  
  1703  // stringToSlice splits comma-separated input string into slice, trims
  1704  // whitespace, and prunes empty values
  1705  func stringToSlice(input string) []string {
  1706  	ns := strings.Split(input, ",")
  1707  	var out []string
  1708  	for _, n := range ns {
  1709  		s := strings.TrimSpace(n)
  1710  		if s == "" {
  1711  			continue
  1712  		}
  1713  		out = append(out, s)
  1714  	}
  1715  	return out
  1716  }
  1717  
  1718  func parseEventTopics(topicList []string) (map[api.Topic][]string, error) {
  1719  	topics := make(map[api.Topic][]string)
  1720  
  1721  	var mErrs *multierror.Error
  1722  
  1723  	for _, topic := range topicList {
  1724  		k, v, err := parseTopic(topic)
  1725  		if err != nil {
  1726  			mErrs = multierror.Append(mErrs, err)
  1727  		}
  1728  
  1729  		topics[api.Topic(k)] = append(topics[api.Topic(k)], v)
  1730  	}
  1731  
  1732  	return topics, mErrs.ErrorOrNil()
  1733  }
  1734  
  1735  func parseTopic(input string) (string, string, error) {
  1736  	var topic, filter string
  1737  
  1738  	parts := strings.Split(input, ":")
  1739  	switch len(parts) {
  1740  	case 1:
  1741  		// infer wildcard if only given a topic
  1742  		topic = input
  1743  		filter = "*"
  1744  	case 2:
  1745  		topic = parts[0]
  1746  		filter = parts[1]
  1747  	default:
  1748  		return "", "", fmt.Errorf("Invalid key value pair for topic: %s", topic)
  1749  	}
  1750  
  1751  	return strings.Title(topic), filter, nil
  1752  }
  1753  
  1754  func allTopics() map[api.Topic][]string {
  1755  	return map[api.Topic][]string{"*": {"*"}}
  1756  }
  1757  
  1758  // topicsFromString parses a comma separated list into a topicMap
  1759  func topicsFromString(topicList string) (map[api.Topic][]string, error) {
  1760  	if topicList == "none" {
  1761  		return nil, nil
  1762  	}
  1763  	if topicList == "all" {
  1764  		return allTopics(), nil
  1765  	}
  1766  
  1767  	topics := stringToSlice(topicList)
  1768  	topicMap, err := parseEventTopics(topics)
  1769  	if err != nil {
  1770  		return nil, err
  1771  	}
  1772  	return topicMap, nil
  1773  }
  1774  
  1775  // external holds address configuration for Consul and Vault APIs
  1776  type external struct {
  1777  	tls       *api.TLSConfig
  1778  	addrVal   string
  1779  	auth      string
  1780  	ssl       bool
  1781  	tokenVal  string
  1782  	tokenFile string
  1783  }
  1784  
  1785  func (e *external) addr(defaultAddr string) string {
  1786  	if e.addrVal == "" {
  1787  		return defaultAddr
  1788  	}
  1789  
  1790  	// Return address as-is if it contains a protocol
  1791  	if strings.Contains(e.addrVal, "://") {
  1792  		return e.addrVal
  1793  	}
  1794  
  1795  	if e.ssl {
  1796  		return "https://" + e.addrVal
  1797  	}
  1798  
  1799  	return "http://" + e.addrVal
  1800  }
  1801  
  1802  func (e *external) setAddr(addr string) {
  1803  	// Handle no protocol scenario first
  1804  	if !strings.Contains(addr, "://") {
  1805  		e.addrVal = "http://" + addr
  1806  		if e.ssl {
  1807  			e.addrVal = "https://" + addr
  1808  		}
  1809  		return
  1810  	}
  1811  
  1812  	// Set SSL boolean based on protocol
  1813  	e.ssl = false
  1814  	if strings.Contains(addr, "https") {
  1815  		e.ssl = true
  1816  	}
  1817  	e.addrVal = addr
  1818  }
  1819  
  1820  func (e *external) token() string {
  1821  	if e.tokenVal != "" {
  1822  		return e.tokenVal
  1823  	}
  1824  
  1825  	if e.tokenFile != "" {
  1826  		bs, err := os.ReadFile(e.tokenFile)
  1827  		if err == nil {
  1828  			return strings.TrimSpace(string(bs))
  1829  		}
  1830  	}
  1831  
  1832  	return ""
  1833  }
  1834  
  1835  func (c *OperatorDebugCommand) getConsulAddrFromSelf(self *api.AgentSelf) string {
  1836  	if self == nil {
  1837  		return ""
  1838  	}
  1839  
  1840  	var consulAddr string
  1841  	r, ok := self.Config["Consul"]
  1842  	if ok {
  1843  		m, ok := r.(map[string]interface{})
  1844  		if ok {
  1845  			raw := m["EnableSSL"]
  1846  			c.consul.ssl, _ = raw.(bool)
  1847  			raw = m["Addr"]
  1848  			c.consul.setAddr(raw.(string))
  1849  			raw = m["Auth"]
  1850  			c.consul.auth, _ = raw.(string)
  1851  			raw = m["Token"]
  1852  			c.consul.tokenVal = raw.(string)
  1853  
  1854  			consulAddr = c.consul.addr("")
  1855  		}
  1856  	}
  1857  	return consulAddr
  1858  }
  1859  
  1860  func (c *OperatorDebugCommand) getVaultAddrFromSelf(self *api.AgentSelf) string {
  1861  	if self == nil {
  1862  		return ""
  1863  	}
  1864  
  1865  	var vaultAddr string
  1866  	r, ok := self.Config["Vault"]
  1867  	if ok {
  1868  		m, ok := r.(map[string]interface{})
  1869  		if ok {
  1870  			raw := m["EnableSSL"]
  1871  			c.vault.ssl, _ = raw.(bool)
  1872  			raw = m["Addr"]
  1873  			c.vault.setAddr(raw.(string))
  1874  			raw = m["Auth"]
  1875  			c.vault.auth, _ = raw.(string)
  1876  			raw = m["Token"]
  1877  			c.vault.tokenVal = raw.(string)
  1878  
  1879  			vaultAddr = c.vault.addr("")
  1880  		}
  1881  	}
  1882  	return vaultAddr
  1883  }
  1884  
  1885  // defaultHttpClient configures a basic httpClient
  1886  func defaultHttpClient() *http.Client {
  1887  	httpClient := cleanhttp.DefaultClient()
  1888  	transport := httpClient.Transport.(*http.Transport)
  1889  	transport.TLSHandshakeTimeout = 10 * time.Second
  1890  	transport.TLSClientConfig = &tls.Config{
  1891  		MinVersion: tls.VersionTLS12,
  1892  	}
  1893  
  1894  	return httpClient
  1895  }
  1896  
  1897  // isRedirectError returns true if an error is a redirect error.
  1898  func isRedirectError(err error) bool {
  1899  	if err == nil {
  1900  		return false
  1901  	}
  1902  
  1903  	const redirectErr string = `invalid character '<' looking for beginning of value`
  1904  	return strings.Contains(err.Error(), redirectErr)
  1905  }
  1906  
  1907  // getNomadVersion fetches the version of Nomad running on a given server/client node ID
  1908  func (c *OperatorDebugCommand) getNomadVersion(serverID string, nodeID string) string {
  1909  	if serverID == "" && nodeID == "" {
  1910  		return ""
  1911  	}
  1912  
  1913  	version := ""
  1914  	if serverID != "" {
  1915  		for _, server := range c.members.Members {
  1916  			// Raft v2 server
  1917  			if server.Name == serverID {
  1918  				version = server.Tags["build"]
  1919  			}
  1920  
  1921  			// Raft v3 server
  1922  			if server.Tags["id"] == serverID {
  1923  				version = server.Tags["version"]
  1924  			}
  1925  		}
  1926  	}
  1927  
  1928  	if nodeID != "" {
  1929  		for _, node := range c.nodes {
  1930  			if node.ID == nodeID {
  1931  				version = node.Version
  1932  			}
  1933  		}
  1934  	}
  1935  
  1936  	return version
  1937  }
  1938  
  1939  // checkVersion verifies that version satisfies the constraint
  1940  func checkVersion(version string, versionConstraint string) error {
  1941  	v, err := goversion.NewVersion(version)
  1942  	if err != nil {
  1943  		return fmt.Errorf("error: %v", err)
  1944  	}
  1945  
  1946  	c, err := goversion.NewConstraint(versionConstraint)
  1947  	if err != nil {
  1948  		return fmt.Errorf("error: %v", err)
  1949  	}
  1950  
  1951  	if !c.Check(v) {
  1952  		return nil
  1953  	}
  1954  	return fmt.Errorf("unsupported version=%s matches version filter %s", version, minimumVersionPprofConstraint)
  1955  }