github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/consul_grpc_sock_hook.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"io"
     8  	"net"
     9  	"os"
    10  	"path/filepath"
    11  	"strings"
    12  	"sync"
    13  	"time"
    14  
    15  	"github.com/hashicorp/go-hclog"
    16  	"github.com/hashicorp/nomad/client/allocdir"
    17  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    18  	"github.com/hashicorp/nomad/nomad/structs"
    19  	"github.com/hashicorp/nomad/nomad/structs/config"
    20  )
    21  
    22  const (
    23  	consulGRPCSockHookName = "consul_grpc_socket"
    24  
    25  	// socketProxyStopWaitTime is the amount of time to wait for a socket proxy
    26  	// to stop before assuming something went awry and return a timeout error.
    27  	socketProxyStopWaitTime = 3 * time.Second
    28  
    29  	// consulGRPCFallbackPort is the last resort fallback port to use in
    30  	// combination with the Consul HTTP config address when creating the
    31  	// socket.
    32  	consulGRPCFallbackPort = "8502"
    33  )
    34  
    35  var (
    36  	errSocketProxyTimeout = errors.New("timed out waiting for socket proxy to exit")
    37  )
    38  
    39  // consulGRPCSocketHook creates Unix sockets to allow communication from inside a
    40  // netns to Consul gRPC endpoint.
    41  //
    42  // Noop for allocations without a group Connect stanza using bridge networking.
    43  type consulGRPCSocketHook struct {
    44  	logger hclog.Logger
    45  
    46  	// mu synchronizes proxy and alloc which may be mutated and read concurrently
    47  	// via Prerun, Update, Postrun.
    48  	mu    sync.Mutex
    49  	alloc *structs.Allocation
    50  	proxy *grpcSocketProxy
    51  }
    52  
    53  func newConsulGRPCSocketHook(
    54  	logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir,
    55  	config *config.ConsulConfig, nodeAttrs map[string]string) *consulGRPCSocketHook {
    56  
    57  	// Attempt to find the gRPC port via the node attributes, otherwise use the
    58  	// default fallback.
    59  	consulGRPCPort, ok := nodeAttrs["consul.grpc"]
    60  	if !ok {
    61  		consulGRPCPort = consulGRPCFallbackPort
    62  	}
    63  
    64  	return &consulGRPCSocketHook{
    65  		alloc:  alloc,
    66  		proxy:  newGRPCSocketProxy(logger, allocDir, config, consulGRPCPort),
    67  		logger: logger.Named(consulGRPCSockHookName),
    68  	}
    69  }
    70  
    71  func (*consulGRPCSocketHook) Name() string {
    72  	return consulGRPCSockHookName
    73  }
    74  
    75  // shouldRun returns true if the Unix socket should be created and proxied.
    76  // Requires the mutex to be held.
    77  func (h *consulGRPCSocketHook) shouldRun() bool {
    78  	tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
    79  
    80  	// we must be in bridge networking and at least one connect sidecar task
    81  	if !tgFirstNetworkIsBridge(tg) {
    82  		return false
    83  	}
    84  
    85  	for _, s := range tg.Services {
    86  		if s.Connect.HasSidecar() || s.Connect.IsGateway() {
    87  			return true
    88  		}
    89  	}
    90  
    91  	return false
    92  }
    93  
    94  func (h *consulGRPCSocketHook) Prerun() error {
    95  	h.mu.Lock()
    96  	defer h.mu.Unlock()
    97  
    98  	if !h.shouldRun() {
    99  		return nil
   100  	}
   101  
   102  	return h.proxy.run(h.alloc)
   103  }
   104  
   105  // Update creates a gRPC socket file and proxy if there are any Connect
   106  // services.
   107  func (h *consulGRPCSocketHook) Update(req *interfaces.RunnerUpdateRequest) error {
   108  	h.mu.Lock()
   109  	defer h.mu.Unlock()
   110  
   111  	h.alloc = req.Alloc
   112  
   113  	if !h.shouldRun() {
   114  		return nil
   115  	}
   116  
   117  	return h.proxy.run(h.alloc)
   118  }
   119  
   120  func (h *consulGRPCSocketHook) Postrun() error {
   121  	h.mu.Lock()
   122  	defer h.mu.Unlock()
   123  
   124  	if err := h.proxy.stop(); err != nil {
   125  		// Only log failures to stop proxies. Worst case scenario is a
   126  		// small goroutine leak.
   127  		h.logger.Debug("error stopping Consul proxy", "error", err)
   128  	}
   129  	return nil
   130  }
   131  
   132  type grpcSocketProxy struct {
   133  	logger   hclog.Logger
   134  	allocDir *allocdir.AllocDir
   135  	config   *config.ConsulConfig
   136  
   137  	// consulGRPCFallbackPort is the port to use if the operator did not
   138  	// specify a gRPC config address.
   139  	consulGRPCFallbackPort string
   140  
   141  	ctx     context.Context
   142  	cancel  func()
   143  	doneCh  chan struct{}
   144  	runOnce bool
   145  }
   146  
   147  func newGRPCSocketProxy(
   148  	logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig,
   149  	consulGRPCFallbackPort string) *grpcSocketProxy {
   150  
   151  	ctx, cancel := context.WithCancel(context.Background())
   152  	return &grpcSocketProxy{
   153  		allocDir:               allocDir,
   154  		config:                 config,
   155  		consulGRPCFallbackPort: consulGRPCFallbackPort,
   156  		ctx:                    ctx,
   157  		cancel:                 cancel,
   158  		doneCh:                 make(chan struct{}),
   159  		logger:                 logger,
   160  	}
   161  }
   162  
   163  // run socket proxy if allocation requires it, it isn't already running, and it
   164  // hasn't been told to stop.
   165  //
   166  // NOT safe for concurrent use.
   167  func (p *grpcSocketProxy) run(alloc *structs.Allocation) error {
   168  	// Only run once.
   169  	if p.runOnce {
   170  		return nil
   171  	}
   172  
   173  	// Only run once. Never restart.
   174  	select {
   175  	case <-p.doneCh:
   176  		p.logger.Trace("socket proxy already shutdown; exiting")
   177  		return nil
   178  	case <-p.ctx.Done():
   179  		p.logger.Trace("socket proxy already done; exiting")
   180  		return nil
   181  	default:
   182  	}
   183  
   184  	// make sure either grpc or http consul address has been configured
   185  	if p.config.GRPCAddr == "" && p.config.Addr == "" {
   186  		return errors.New("consul address must be set on nomad client")
   187  	}
   188  
   189  	destAddr := p.config.GRPCAddr
   190  	if destAddr == "" {
   191  		// No GRPCAddr defined. Use Addr but replace port with the gRPC
   192  		// default of 8502.
   193  		host, _, err := net.SplitHostPort(p.config.Addr)
   194  		if err != nil {
   195  			return fmt.Errorf("error parsing Consul address %q: %v",
   196  				p.config.Addr, err)
   197  		}
   198  
   199  		destAddr = net.JoinHostPort(host, p.consulGRPCFallbackPort)
   200  	}
   201  
   202  	hostGRPCSocketPath := filepath.Join(p.allocDir.AllocDir, allocdir.AllocGRPCSocket)
   203  
   204  	// if the socket already exists we'll try to remove it, but if not then any
   205  	// other errors will bubble up to the caller here or when we try to listen
   206  	_, err := os.Stat(hostGRPCSocketPath)
   207  	if err == nil {
   208  		err := os.Remove(hostGRPCSocketPath)
   209  		if err != nil {
   210  			return fmt.Errorf(
   211  				"unable to remove existing unix socket for Consul gRPC endpoint: %v", err)
   212  		}
   213  	}
   214  
   215  	listener, err := net.Listen("unix", hostGRPCSocketPath)
   216  	if err != nil {
   217  		return fmt.Errorf("unable to create unix socket for Consul gRPC endpoint: %v", err)
   218  	}
   219  
   220  	// The gRPC socket should be usable by all users in case a task is
   221  	// running as an unprivileged user.  Unix does not allow setting domain
   222  	// socket permissions when creating the file, so we must manually call
   223  	// chmod afterwards.
   224  	// https://github.com/golang/go/issues/11822
   225  	if err := os.Chmod(hostGRPCSocketPath, os.ModePerm); err != nil {
   226  		return fmt.Errorf("unable to set permissions on unix socket for Consul gRPC endpoint: %v", err)
   227  	}
   228  
   229  	go func() {
   230  		proxy(p.ctx, p.logger, destAddr, listener)
   231  		p.cancel()
   232  		close(p.doneCh)
   233  	}()
   234  
   235  	p.runOnce = true
   236  	return nil
   237  }
   238  
   239  // stop the proxy and blocks until the proxy has stopped. Returns an error if
   240  // the proxy does not exit in a timely fashion.
   241  func (p *grpcSocketProxy) stop() error {
   242  	p.cancel()
   243  
   244  	// If proxy was never run, don't wait for anything to shutdown.
   245  	if !p.runOnce {
   246  		return nil
   247  	}
   248  
   249  	select {
   250  	case <-p.doneCh:
   251  		return nil
   252  	case <-time.After(socketProxyStopWaitTime):
   253  		return errSocketProxyTimeout
   254  	}
   255  }
   256  
   257  // Proxy between a listener and destination.
   258  func proxy(ctx context.Context, logger hclog.Logger, destAddr string, l net.Listener) {
   259  	// Wait for all connections to be done before exiting to prevent
   260  	// goroutine leaks.
   261  	wg := sync.WaitGroup{}
   262  	ctx, cancel := context.WithCancel(ctx)
   263  	defer func() {
   264  		// Must cancel context and close listener before waiting
   265  		cancel()
   266  		_ = l.Close()
   267  		wg.Wait()
   268  	}()
   269  
   270  	// Close Accept() when context is cancelled
   271  	go func() {
   272  		<-ctx.Done()
   273  		_ = l.Close()
   274  	}()
   275  
   276  	for ctx.Err() == nil {
   277  		conn, err := l.Accept()
   278  		if err != nil {
   279  			if ctx.Err() != nil {
   280  				// Accept errors during shutdown are to be expected
   281  				return
   282  			}
   283  			logger.Error("error in socket proxy; shutting down proxy", "error", err, "dest", destAddr)
   284  			return
   285  		}
   286  
   287  		wg.Add(1)
   288  		go func() {
   289  			defer wg.Done()
   290  			proxyConn(ctx, logger, destAddr, conn)
   291  		}()
   292  	}
   293  }
   294  
   295  // proxyConn proxies between an existing net.Conn and a destination address. If
   296  // the destAddr starts with "unix://" it is treated as a path to a unix socket.
   297  // Otherwise it is treated as a host for a TCP connection.
   298  //
   299  // When the context is cancelled proxyConn blocks until all goroutines shutdown
   300  // to prevent leaks.
   301  func proxyConn(ctx context.Context, logger hclog.Logger, destAddr string, conn net.Conn) {
   302  	// Close the connection when we're done with it.
   303  	defer conn.Close()
   304  
   305  	ctx, cancel := context.WithCancel(ctx)
   306  	defer cancel()
   307  
   308  	// Detect unix sockets
   309  	network := "tcp"
   310  	const unixPrefix = "unix://"
   311  	if strings.HasPrefix(destAddr, unixPrefix) {
   312  		network = "unix"
   313  		destAddr = destAddr[len(unixPrefix):]
   314  	}
   315  
   316  	dialer := &net.Dialer{}
   317  	dest, err := dialer.DialContext(ctx, network, destAddr)
   318  	if err == context.Canceled || err == context.DeadlineExceeded {
   319  		logger.Trace("proxy exiting gracefully", "error", err, "dest", destAddr,
   320  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr())
   321  		return
   322  	}
   323  	if err != nil {
   324  		logger.Error("error connecting to grpc", "error", err, "dest", destAddr)
   325  		return
   326  	}
   327  
   328  	// Wait for goroutines to exit before exiting to prevent leaking.
   329  	wg := sync.WaitGroup{}
   330  	defer wg.Wait()
   331  
   332  	// socket -> consul
   333  	wg.Add(1)
   334  	go func() {
   335  		defer wg.Done()
   336  		defer cancel()
   337  		n, err := io.Copy(dest, conn)
   338  		if ctx.Err() == nil && err != nil {
   339  			// expect disconnects when proxying http
   340  			logger.Trace("error message received proxying to Consul",
   341  				"msg", err, "dest", destAddr, "src_local", conn.LocalAddr(),
   342  				"src_remote", conn.RemoteAddr(), "bytes", n)
   343  			return
   344  		}
   345  		logger.Trace("proxy to Consul complete",
   346  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   347  			"bytes", n,
   348  		)
   349  	}()
   350  
   351  	// consul -> socket
   352  	wg.Add(1)
   353  	go func() {
   354  		defer wg.Done()
   355  		defer cancel()
   356  		n, err := io.Copy(conn, dest)
   357  		if ctx.Err() == nil && err != nil {
   358  			logger.Trace("error message received proxying from Consul",
   359  				"msg", err, "dest", destAddr, "src_local", conn.LocalAddr(),
   360  				"src_remote", conn.RemoteAddr(), "bytes", n)
   361  			return
   362  		}
   363  		logger.Trace("proxy from Consul complete",
   364  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   365  			"bytes", n,
   366  		)
   367  	}()
   368  
   369  	// When cancelled close connections to break out of copies goroutines.
   370  	<-ctx.Done()
   371  	_ = conn.Close()
   372  	_ = dest.Close()
   373  }