github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/consul_http_sock_hook.go (about)

     1  package allocrunner
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"net"
     8  	"os"
     9  	"path/filepath"
    10  	"sync"
    11  	"time"
    12  
    13  	"github.com/hashicorp/go-hclog"
    14  	"github.com/hashicorp/nomad/client/allocdir"
    15  	"github.com/hashicorp/nomad/client/allocrunner/interfaces"
    16  	"github.com/hashicorp/nomad/nomad/structs"
    17  	"github.com/hashicorp/nomad/nomad/structs/config"
    18  )
    19  
    20  func tgFirstNetworkIsBridge(tg *structs.TaskGroup) bool {
    21  	if len(tg.Networks) < 1 || tg.Networks[0].Mode != "bridge" {
    22  		return false
    23  	}
    24  	return true
    25  }
    26  
    27  const (
    28  	consulHTTPSocketHookName = "consul_http_socket"
    29  )
    30  
    31  type consulHTTPSockHook struct {
    32  	logger hclog.Logger
    33  
    34  	// lock synchronizes proxy and alloc which may be mutated and read concurrently
    35  	// via Prerun, Update, and Postrun.
    36  	lock  sync.Mutex
    37  	alloc *structs.Allocation
    38  	proxy *httpSocketProxy
    39  }
    40  
    41  func newConsulHTTPSocketHook(logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *consulHTTPSockHook {
    42  	return &consulHTTPSockHook{
    43  		alloc:  alloc,
    44  		proxy:  newHTTPSocketProxy(logger, allocDir, config),
    45  		logger: logger.Named(consulHTTPSocketHookName),
    46  	}
    47  }
    48  
    49  func (*consulHTTPSockHook) Name() string {
    50  	return consulHTTPSocketHookName
    51  }
    52  
    53  // shouldRun returns true if the alloc contains at least one connect native
    54  // task and has a network configured in bridge mode
    55  //
    56  // todo(shoenig): what about CNI networks?
    57  func (h *consulHTTPSockHook) shouldRun() bool {
    58  	tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup)
    59  
    60  	// we must be in bridge networking and at least one connect native task
    61  	if !tgFirstNetworkIsBridge(tg) {
    62  		return false
    63  	}
    64  
    65  	for _, service := range tg.Services {
    66  		if service.Connect.IsNative() {
    67  			return true
    68  		}
    69  	}
    70  	return false
    71  }
    72  
    73  func (h *consulHTTPSockHook) Prerun() error {
    74  	h.lock.Lock()
    75  	defer h.lock.Unlock()
    76  
    77  	if !h.shouldRun() {
    78  		return nil
    79  	}
    80  
    81  	return h.proxy.run(h.alloc)
    82  }
    83  
    84  func (h *consulHTTPSockHook) Update(req *interfaces.RunnerUpdateRequest) error {
    85  	h.lock.Lock()
    86  	defer h.lock.Unlock()
    87  
    88  	h.alloc = req.Alloc
    89  
    90  	if !h.shouldRun() {
    91  		return nil
    92  	}
    93  
    94  	return h.proxy.run(h.alloc)
    95  }
    96  
    97  func (h *consulHTTPSockHook) Postrun() error {
    98  	h.lock.Lock()
    99  	defer h.lock.Unlock()
   100  
   101  	if err := h.proxy.stop(); err != nil {
   102  		// Only log a failure to stop, worst case is the proxy leaks a goroutine.
   103  		h.logger.Warn("error stopping Consul HTTP proxy", "error", err)
   104  	}
   105  
   106  	return nil
   107  }
   108  
   109  type httpSocketProxy struct {
   110  	logger   hclog.Logger
   111  	allocDir *allocdir.AllocDir
   112  	config   *config.ConsulConfig
   113  
   114  	ctx     context.Context
   115  	cancel  func()
   116  	doneCh  chan struct{}
   117  	runOnce bool
   118  }
   119  
   120  func newHTTPSocketProxy(logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *httpSocketProxy {
   121  	ctx, cancel := context.WithCancel(context.Background())
   122  	return &httpSocketProxy{
   123  		logger:   logger,
   124  		allocDir: allocDir,
   125  		config:   config,
   126  		ctx:      ctx,
   127  		cancel:   cancel,
   128  		doneCh:   make(chan struct{}),
   129  	}
   130  }
   131  
   132  // run the httpSocketProxy for the given allocation.
   133  //
   134  // Assumes locking done by the calling alloc runner.
   135  func (p *httpSocketProxy) run(alloc *structs.Allocation) error {
   136  	// Only run once.
   137  	if p.runOnce {
   138  		return nil
   139  	}
   140  
   141  	// Never restart.
   142  	select {
   143  	case <-p.doneCh:
   144  		p.logger.Trace("consul http socket proxy already shutdown; exiting")
   145  		return nil
   146  	case <-p.ctx.Done():
   147  		p.logger.Trace("consul http socket proxy already done; exiting")
   148  		return nil
   149  	default:
   150  	}
   151  
   152  	// consul http dest addr
   153  	destAddr := p.config.Addr
   154  	if destAddr == "" {
   155  		return errors.New("consul address must be set on nomad client")
   156  	}
   157  
   158  	hostHTTPSockPath := filepath.Join(p.allocDir.AllocDir, allocdir.AllocHTTPSocket)
   159  	if err := maybeRemoveOldSocket(hostHTTPSockPath); err != nil {
   160  		return err
   161  	}
   162  
   163  	listener, err := net.Listen("unix", hostHTTPSockPath)
   164  	if err != nil {
   165  		return fmt.Errorf("unable to create unix socket for Consul HTTP endpoint: %w", err)
   166  	}
   167  
   168  	// The Consul HTTP socket should be usable by all users in case a task is
   169  	// running as a non-privileged user. Unix does not allow setting domain
   170  	// socket permissions when creating the file, so we must manually call
   171  	// chmod afterwards.
   172  	if err := os.Chmod(hostHTTPSockPath, os.ModePerm); err != nil {
   173  		return fmt.Errorf("unable to set permissions on unix socket: %w", err)
   174  	}
   175  
   176  	go func() {
   177  		proxy(p.ctx, p.logger, destAddr, listener)
   178  		p.cancel()
   179  		close(p.doneCh)
   180  	}()
   181  
   182  	p.runOnce = true
   183  	return nil
   184  }
   185  
   186  func (p *httpSocketProxy) stop() error {
   187  	p.cancel()
   188  
   189  	// if proxy was never run, no need to wait before shutdown
   190  	if !p.runOnce {
   191  		return nil
   192  	}
   193  
   194  	select {
   195  	case <-p.doneCh:
   196  	case <-time.After(socketProxyStopWaitTime):
   197  		return errSocketProxyTimeout
   198  	}
   199  
   200  	return nil
   201  }
   202  
   203  func maybeRemoveOldSocket(socketPath string) error {
   204  	_, err := os.Stat(socketPath)
   205  	if err == nil {
   206  		if err = os.Remove(socketPath); err != nil {
   207  			return fmt.Errorf("unable to remove existing unix socket: %w", err)
   208  		}
   209  	}
   210  	return nil
   211  }