github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allocrunner/consul_grpc_sock_hook.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	hclog "github.com/hashicorp/go-hclog"
    15  	"github.com/hashicorp/nomad/client/allocdir"
    16  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/hashicorp/nomad/nomad/structs/config"
    19  	"github.com/pkg/errors"
    20  )
    21  
    22  const (
    23  	consulGRPCSockHookName = "consul_grpc_socket"
    24  
    25  	// socketProxyStopWaitTime is the amount of time to wait for a socket proxy
    26  	// to stop before assuming something went awry and return a timeout error.
    27  	socketProxyStopWaitTime = 3 * time.Second
    28  )
    29  
    30  var (
    31  	errSocketProxyTimeout = errors.New("timed out waiting for socket proxy to exit")
    32  )
    33  
    34  // consulGRPCSocketHook creates Unix sockets to allow communication from inside a
    35  // netns to Consul gRPC endpoint.
    36  //
    37  // Noop for allocations without a group Connect stanza using bridge networking.
    38  type consulGRPCSocketHook struct {
    39  	logger hclog.Logger
    40  
    41  	// mu synchronizes proxy and alloc which may be mutated and read concurrently
    42  	// via Prerun, Update, Postrun.
    43  	mu    sync.Mutex
    44  	alloc *structs.Allocation
    45  	proxy *grpcSocketProxy
    46  }
    47  
    48  func newConsulGRPCSocketHook(logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *consulGRPCSocketHook {
    49  	return &consulGRPCSocketHook{
    50  		alloc:  alloc,
    51  		proxy:  newGRPCSocketProxy(logger, allocDir, config),
    52  		logger: logger.Named(consulGRPCSockHookName),
    53  	}
    54  }
    55  
    56  func (*consulGRPCSocketHook) Name() string {
    57  	return consulGRPCSockHookName
    58  }
    59  
    60  // shouldRun returns true if the Unix socket should be created and proxied.
    61  // Requires the mutex to be held.
    62  func (h *consulGRPCSocketHook) shouldRun() bool {
    63  	tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
    64  
    65  	// we must be in bridge networking and at least one connect sidecar task
    66  	if !tgFirstNetworkIsBridge(tg) {
    67  		return false
    68  	}
    69  
    70  	for _, s := range tg.Services {
    71  		if s.Connect.HasSidecar() || s.Connect.IsGateway() {
    72  			return true
    73  		}
    74  	}
    75  
    76  	return false
    77  }
    78  
    79  func (h *consulGRPCSocketHook) Prerun() error {
    80  	h.mu.Lock()
    81  	defer h.mu.Unlock()
    82  
    83  	if !h.shouldRun() {
    84  		return nil
    85  	}
    86  
    87  	return h.proxy.run(h.alloc)
    88  }
    89  
    90  // Update creates a gRPC socket file and proxy if there are any Connect
    91  // services.
    92  func (h *consulGRPCSocketHook) Update(req *interfaces.RunnerUpdateRequest) error {
    93  	h.mu.Lock()
    94  	defer h.mu.Unlock()
    95  
    96  	h.alloc = req.Alloc
    97  
    98  	if !h.shouldRun() {
    99  		return nil
   100  	}
   101  
   102  	return h.proxy.run(h.alloc)
   103  }
   104  
   105  func (h *consulGRPCSocketHook) Postrun() error {
   106  	h.mu.Lock()
   107  	defer h.mu.Unlock()
   108  
   109  	if err := h.proxy.stop(); err != nil {
   110  		// Only log failures to stop proxies. Worst case scenario is a
   111  		// small goroutine leak.
   112  		h.logger.Debug("error stopping Consul proxy", "error", err)
   113  	}
   114  	return nil
   115  }
   116  
   117  type grpcSocketProxy struct {
   118  	logger   hclog.Logger
   119  	allocDir *allocdir.AllocDir
   120  	config   *config.ConsulConfig
   121  
   122  	ctx     context.Context
   123  	cancel  func()
   124  	doneCh  chan struct{}
   125  	runOnce bool
   126  }
   127  
   128  func newGRPCSocketProxy(logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *grpcSocketProxy {
   129  	ctx, cancel := context.WithCancel(context.Background())
   130  	return &grpcSocketProxy{
   131  		allocDir: allocDir,
   132  		config:   config,
   133  		ctx:      ctx,
   134  		cancel:   cancel,
   135  		doneCh:   make(chan struct{}),
   136  		logger:   logger,
   137  	}
   138  }
   139  
   140  // run socket proxy if allocation requires it, it isn't already running, and it
   141  // hasn't been told to stop.
   142  //
   143  // NOT safe for concurrent use.
   144  func (p *grpcSocketProxy) run(alloc *structs.Allocation) error {
   145  	// Only run once.
   146  	if p.runOnce {
   147  		return nil
   148  	}
   149  
   150  	// Only run once. Never restart.
   151  	select {
   152  	case <-p.doneCh:
   153  		p.logger.Trace("socket proxy already shutdown; exiting")
   154  		return nil
   155  	case <-p.ctx.Done():
   156  		p.logger.Trace("socket proxy already done; exiting")
   157  		return nil
   158  	default:
   159  	}
   160  
   161  	// make sure either grpc or http consul address has been configured
   162  	if p.config.GRPCAddr == "" && p.config.Addr == "" {
   163  		return errors.New("consul address must be set on nomad client")
   164  	}
   165  
   166  	destAddr := p.config.GRPCAddr
   167  	if destAddr == "" {
   168  		// No GRPCAddr defined. Use Addr but replace port with the gRPC
   169  		// default of 8502.
   170  		host, _, err := net.SplitHostPort(p.config.Addr)
   171  		if err != nil {
   172  			return fmt.Errorf("error parsing Consul address %q: %v",
   173  				p.config.Addr, err)
   174  		}
   175  
   176  		destAddr = net.JoinHostPort(host, "8502")
   177  	}
   178  
   179  	hostGRPCSocketPath := filepath.Join(p.allocDir.AllocDir, allocdir.AllocGRPCSocket)
   180  
   181  	// if the socket already exists we'll try to remove it, but if not then any
   182  	// other errors will bubble up to the caller here or when we try to listen
   183  	_, err := os.Stat(hostGRPCSocketPath)
   184  	if err == nil {
   185  		err := os.Remove(hostGRPCSocketPath)
   186  		if err != nil {
   187  			return fmt.Errorf(
   188  				"unable to remove existing unix socket for Consul gRPC endpoint: %v", err)
   189  		}
   190  	}
   191  
   192  	listener, err := net.Listen("unix", hostGRPCSocketPath)
   193  	if err != nil {
   194  		return fmt.Errorf("unable to create unix socket for Consul gRPC endpoint: %v", err)
   195  	}
   196  
   197  	// The gRPC socket should be usable by all users in case a task is
   198  	// running as an unprivileged user.  Unix does not allow setting domain
   199  	// socket permissions when creating the file, so we must manually call
   200  	// chmod afterwards.
   201  	// https://github.com/golang/go/issues/11822
   202  	if err := os.Chmod(hostGRPCSocketPath, os.ModePerm); err != nil {
   203  		return fmt.Errorf("unable to set permissions on unix socket for Consul gRPC endpoint: %v", err)
   204  	}
   205  
   206  	go func() {
   207  		proxy(p.ctx, p.logger, destAddr, listener)
   208  		p.cancel()
   209  		close(p.doneCh)
   210  	}()
   211  
   212  	p.runOnce = true
   213  	return nil
   214  }
   215  
   216  // stop the proxy and blocks until the proxy has stopped. Returns an error if
   217  // the proxy does not exit in a timely fashion.
   218  func (p *grpcSocketProxy) stop() error {
   219  	p.cancel()
   220  
   221  	// If proxy was never run, don't wait for anything to shutdown.
   222  	if !p.runOnce {
   223  		return nil
   224  	}
   225  
   226  	select {
   227  	case <-p.doneCh:
   228  		return nil
   229  	case <-time.After(socketProxyStopWaitTime):
   230  		return errSocketProxyTimeout
   231  	}
   232  }
   233  
   234  // Proxy between a listener and destination.
   235  func proxy(ctx context.Context, logger hclog.Logger, destAddr string, l net.Listener) {
   236  	// Wait for all connections to be done before exiting to prevent
   237  	// goroutine leaks.
   238  	wg := sync.WaitGroup{}
   239  	ctx, cancel := context.WithCancel(ctx)
   240  	defer func() {
   241  		// Must cancel context and close listener before waiting
   242  		cancel()
   243  		_ = l.Close()
   244  		wg.Wait()
   245  	}()
   246  
   247  	// Close Accept() when context is cancelled
   248  	go func() {
   249  		<-ctx.Done()
   250  		_ = l.Close()
   251  	}()
   252  
   253  	for ctx.Err() == nil {
   254  		conn, err := l.Accept()
   255  		if err != nil {
   256  			if ctx.Err() != nil {
   257  				// Accept errors during shutdown are to be expected
   258  				return
   259  			}
   260  			logger.Error("error in socket proxy; shutting down proxy", "error", err, "dest", destAddr)
   261  			return
   262  		}
   263  
   264  		wg.Add(1)
   265  		go func() {
   266  			defer wg.Done()
   267  			proxyConn(ctx, logger, destAddr, conn)
   268  		}()
   269  	}
   270  }
   271  
   272  // proxyConn proxies between an existing net.Conn and a destination address. If
   273  // the destAddr starts with "unix://" it is treated as a path to a unix socket.
   274  // Otherwise it is treated as a host for a TCP connection.
   275  //
   276  // When the context is cancelled proxyConn blocks until all goroutines shutdown
   277  // to prevent leaks.
   278  func proxyConn(ctx context.Context, logger hclog.Logger, destAddr string, conn net.Conn) {
   279  	// Close the connection when we're done with it.
   280  	defer conn.Close()
   281  
   282  	ctx, cancel := context.WithCancel(ctx)
   283  	defer cancel()
   284  
   285  	// Detect unix sockets
   286  	network := "tcp"
   287  	const unixPrefix = "unix://"
   288  	if strings.HasPrefix(destAddr, unixPrefix) {
   289  		network = "unix"
   290  		destAddr = destAddr[len(unixPrefix):]
   291  	}
   292  
   293  	dialer := &net.Dialer{}
   294  	dest, err := dialer.DialContext(ctx, network, destAddr)
   295  	if err == context.Canceled || err == context.DeadlineExceeded {
   296  		logger.Trace("proxy exiting gracefully", "error", err, "dest", destAddr,
   297  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr())
   298  		return
   299  	}
   300  	if err != nil {
   301  		logger.Error("error connecting to grpc", "error", err, "dest", destAddr)
   302  		return
   303  	}
   304  
   305  	// Wait for goroutines to exit before exiting to prevent leaking.
   306  	wg := sync.WaitGroup{}
   307  	defer wg.Wait()
   308  
   309  	// socket -> consul
   310  	wg.Add(1)
   311  	go func() {
   312  		defer wg.Done()
   313  		defer cancel()
   314  		n, err := io.Copy(dest, conn)
   315  		if ctx.Err() == nil && err != nil {
   316  			logger.Warn("error proxying to Consul", "error", err, "dest", destAddr,
   317  				"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   318  				"bytes", n,
   319  			)
   320  			return
   321  		}
   322  		logger.Trace("proxy to Consul complete",
   323  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   324  			"bytes", n,
   325  		)
   326  	}()
   327  
   328  	// consul -> socket
   329  	wg.Add(1)
   330  	go func() {
   331  		defer wg.Done()
   332  		defer cancel()
   333  		n, err := io.Copy(conn, dest)
   334  		if ctx.Err() == nil && err != nil {
   335  			logger.Warn("error proxying from Consul", "error", err, "dest", destAddr,
   336  				"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   337  				"bytes", n,
   338  			)
   339  			return
   340  		}
   341  		logger.Trace("proxy from Consul complete",
   342  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   343  			"bytes", n,
   344  		)
   345  	}()
   346  
   347  	// When cancelled close connections to break out of copies goroutines.
   348  	<-ctx.Done()
   349  	_ = conn.Close()
   350  	_ = dest.Close()
   351  }