github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/consulsock_hook.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"io"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"strings"
    11  	"sync"
    12  	"time"
    13  
    14  	hclog "github.com/hashicorp/go-hclog"
    15  	"github.com/hashicorp/nomad/client/allocdir"
    16  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    17  	"github.com/hashicorp/nomad/nomad/structs"
    18  	"github.com/hashicorp/nomad/nomad/structs/config"
    19  )
    20  
    21  // consulSockHook creates Unix sockets to allow communication from inside a
    22  // netns to Consul.
    23  //
    24  // Noop for allocations without a group Connect stanza.
    25  type consulSockHook struct {
    26  	alloc *structs.Allocation
    27  
    28  	proxy *sockProxy
    29  
    30  	// mu synchronizes group & cancel as they may be mutated and accessed
    31  	// concurrently via Prerun, Update, Postrun.
    32  	mu sync.Mutex
    33  
    34  	logger hclog.Logger
    35  }
    36  
    37  func newConsulSockHook(logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *consulSockHook {
    38  	h := &consulSockHook{
    39  		alloc: alloc,
    40  		proxy: newSockProxy(logger, allocDir, config),
    41  	}
    42  	h.logger = logger.Named(h.Name())
    43  	return h
    44  }
    45  
    46  func (*consulSockHook) Name() string {
    47  	return "consul_socket"
    48  }
    49  
    50  // shouldRun returns true if the Unix socket should be created and proxied.
    51  // Requires the mutex to be held.
    52  func (h *consulSockHook) shouldRun() bool {
    53  	tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
    54  	for _, s := range tg.Services {
    55  		if s.Connect != nil {
    56  			return true
    57  		}
    58  	}
    59  
    60  	return false
    61  }
    62  
    63  func (h *consulSockHook) Prerun() error {
    64  	h.mu.Lock()
    65  	defer h.mu.Unlock()
    66  
    67  	if !h.shouldRun() {
    68  		return nil
    69  	}
    70  
    71  	return h.proxy.run(h.alloc)
    72  }
    73  
    74  // Update creates a gRPC socket file and proxy if there are any Connect
    75  // services.
    76  func (h *consulSockHook) Update(req *interfaces.RunnerUpdateRequest) error {
    77  	h.mu.Lock()
    78  	defer h.mu.Unlock()
    79  
    80  	h.alloc = req.Alloc
    81  
    82  	if !h.shouldRun() {
    83  		return nil
    84  	}
    85  
    86  	return h.proxy.run(h.alloc)
    87  }
    88  
    89  func (h *consulSockHook) Postrun() error {
    90  	h.mu.Lock()
    91  	defer h.mu.Unlock()
    92  
    93  	if err := h.proxy.stop(); err != nil {
    94  		// Only log failures to stop proxies. Worst case scenario is a
    95  		// small goroutine leak.
    96  		h.logger.Debug("error stopping Consul proxy", "error", err)
    97  	}
    98  	return nil
    99  }
   100  
   101  type sockProxy struct {
   102  	allocDir *allocdir.AllocDir
   103  	config   *config.ConsulConfig
   104  
   105  	ctx     context.Context
   106  	cancel  func()
   107  	doneCh  chan struct{}
   108  	runOnce bool
   109  
   110  	logger hclog.Logger
   111  }
   112  
   113  func newSockProxy(logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *sockProxy {
   114  	ctx, cancel := context.WithCancel(context.Background())
   115  	return &sockProxy{
   116  		allocDir: allocDir,
   117  		config:   config,
   118  		ctx:      ctx,
   119  		cancel:   cancel,
   120  		doneCh:   make(chan struct{}),
   121  		logger:   logger,
   122  	}
   123  }
   124  
   125  // run socket proxy if allocation requires it, it isn't already running, and it
   126  // hasn't been told to stop.
   127  //
   128  // NOT safe for concurrent use.
   129  func (s *sockProxy) run(alloc *structs.Allocation) error {
   130  	// Only run once.
   131  	if s.runOnce {
   132  		return nil
   133  	}
   134  
   135  	// Only run once. Never restart.
   136  	select {
   137  	case <-s.doneCh:
   138  		s.logger.Trace("socket proxy already shutdown; exiting")
   139  		return nil
   140  	case <-s.ctx.Done():
   141  		s.logger.Trace("socket proxy already done; exiting")
   142  		return nil
   143  	default:
   144  	}
   145  
   146  	destAddr := s.config.GRPCAddr
   147  	if destAddr == "" {
   148  		// No GRPCAddr defined. Use Addr but replace port with the gRPC
   149  		// default of 8502.
   150  		host, _, err := net.SplitHostPort(s.config.Addr)
   151  		if err != nil {
   152  			return fmt.Errorf("error parsing Consul address %q: %v",
   153  				s.config.Addr, err)
   154  		}
   155  
   156  		destAddr = net.JoinHostPort(host, "8502")
   157  	}
   158  
   159  	hostGRPCSockPath := filepath.Join(s.allocDir.AllocDir, allocdir.AllocGRPCSocket)
   160  
   161  	// if the socket already exists we'll try to remove it, but if not then any
   162  	// other errors will bubble up to the caller here or when we try to listen
   163  	_, err := os.Stat(hostGRPCSockPath)
   164  	if err == nil {
   165  		err := os.Remove(hostGRPCSockPath)
   166  		if err != nil {
   167  			return fmt.Errorf(
   168  				"unable to remove existing unix socket for Consul gRPC endpoint: %v", err)
   169  		}
   170  	}
   171  
   172  	listener, err := net.Listen("unix", hostGRPCSockPath)
   173  	if err != nil {
   174  		return fmt.Errorf("unable to create unix socket for Consul gRPC endpoint: %v", err)
   175  	}
   176  
   177  	// The gRPC socket should be usable by all users in case a task is
   178  	// running as an unprivileged user.  Unix does not allow setting domain
   179  	// socket permissions when creating the file, so we must manually call
   180  	// chmod afterwards.
   181  	// https://github.com/golang/go/issues/11822
   182  	if err := os.Chmod(hostGRPCSockPath, os.ModePerm); err != nil {
   183  		return fmt.Errorf("unable to set permissions on unix socket for Consul gRPC endpoint: %v", err)
   184  	}
   185  
   186  	go func() {
   187  		proxy(s.ctx, s.logger, destAddr, listener)
   188  		s.cancel()
   189  		close(s.doneCh)
   190  	}()
   191  
   192  	s.runOnce = true
   193  	return nil
   194  }
   195  
   196  // stop the proxy and blocks until the proxy has stopped. Returns an error if
   197  // the proxy does not exit in a timely fashion.
   198  func (s *sockProxy) stop() error {
   199  	s.cancel()
   200  
   201  	// If proxy was never run, don't wait for anything to shutdown.
   202  	if !s.runOnce {
   203  		return nil
   204  	}
   205  
   206  	select {
   207  	case <-s.doneCh:
   208  		return nil
   209  	case <-time.After(3 * time.Second):
   210  		return fmt.Errorf("timed out waiting for proxy to exit")
   211  	}
   212  }
   213  
   214  // Proxy between a listener and dest
   215  func proxy(ctx context.Context, logger hclog.Logger, dest string, l net.Listener) {
   216  	// Wait for all connections to be done before exiting to prevent
   217  	// goroutine leaks.
   218  	wg := sync.WaitGroup{}
   219  	ctx, cancel := context.WithCancel(ctx)
   220  	defer func() {
   221  		// Must cancel context and close listener before waiting
   222  		cancel()
   223  		l.Close()
   224  		wg.Wait()
   225  	}()
   226  
   227  	// Close Accept() when context is cancelled
   228  	go func() {
   229  		<-ctx.Done()
   230  		l.Close()
   231  	}()
   232  
   233  	for ctx.Err() == nil {
   234  		conn, err := l.Accept()
   235  		if err != nil {
   236  			if ctx.Err() != nil {
   237  				// Accept errors during shutdown are to be expected
   238  				return
   239  			}
   240  			logger.Error("error in grpc proxy; shutting down proxy", "error", err, "dest", dest)
   241  			return
   242  		}
   243  
   244  		wg.Add(1)
   245  		go func() {
   246  			defer wg.Done()
   247  			proxyConn(ctx, logger, dest, conn)
   248  		}()
   249  	}
   250  }
   251  
   252  // proxyConn proxies between an existing net.Conn and a destination address. If
   253  // the destAddr starts with "unix://" it is treated as a path to a unix socket.
   254  // Otherwise it is treated as a host for a TCP connection.
   255  //
   256  // When the context is cancelled proxyConn blocks until all goroutines shutdown
   257  // to prevent leaks.
   258  func proxyConn(ctx context.Context, logger hclog.Logger, destAddr string, conn net.Conn) {
   259  	// Close the connection when we're done with it.
   260  	defer conn.Close()
   261  
   262  	ctx, cancel := context.WithCancel(ctx)
   263  	defer cancel()
   264  
   265  	// Detect unix sockets
   266  	network := "tcp"
   267  	const unixPrefix = "unix://"
   268  	if strings.HasPrefix(destAddr, unixPrefix) {
   269  		network = "unix"
   270  		destAddr = destAddr[len(unixPrefix):]
   271  	}
   272  
   273  	dialer := &net.Dialer{}
   274  	dest, err := dialer.DialContext(ctx, network, destAddr)
   275  	if err == context.Canceled || err == context.DeadlineExceeded {
   276  		logger.Trace("proxy exiting gracefully", "error", err, "dest", destAddr,
   277  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr())
   278  		return
   279  	}
   280  	if err != nil {
   281  		logger.Error("error connecting to grpc", "error", err, "dest", destAddr)
   282  		return
   283  	}
   284  
   285  	// Wait for goroutines to exit before exiting to prevent leaking.
   286  	wg := sync.WaitGroup{}
   287  	defer wg.Wait()
   288  
   289  	// socket -> gRPC
   290  	wg.Add(1)
   291  	go func() {
   292  		defer wg.Done()
   293  		defer cancel()
   294  		n, err := io.Copy(dest, conn)
   295  		if ctx.Err() == nil && err != nil {
   296  			logger.Warn("error proxying to Consul", "error", err, "dest", destAddr,
   297  				"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   298  				"bytes", n,
   299  			)
   300  			return
   301  		}
   302  		logger.Trace("proxy to Consul complete",
   303  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   304  			"bytes", n,
   305  		)
   306  	}()
   307  
   308  	// gRPC -> socket
   309  	wg.Add(1)
   310  	go func() {
   311  		defer wg.Done()
   312  		defer cancel()
   313  		n, err := io.Copy(conn, dest)
   314  		if ctx.Err() == nil && err != nil {
   315  			logger.Warn("error proxying from Consul", "error", err, "dest", destAddr,
   316  				"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   317  				"bytes", n,
   318  			)
   319  			return
   320  		}
   321  		logger.Trace("proxy from Consul complete",
   322  			"src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(),
   323  			"bytes", n,
   324  		)
   325  	}()
   326  
   327  	// When cancelled close connections to break out of copies goroutines.
   328  	<-ctx.Done()
   329  	conn.Close()
   330  	dest.Close()
   331  }