github.com/anth0d/nomad@v0.0.0-20221214183521-ae3a0a2cad06/client/allocrunner/consul_grpc_sock_hook.go (about) 1 package allocrunner 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "io" 8 "net" 9 "os" 10 "path/filepath" 11 "strings" 12 "sync" 13 "time" 14 15 "github.com/hashicorp/go-hclog" 16 "github.com/hashicorp/nomad/client/allocdir" 17 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 18 "github.com/hashicorp/nomad/nomad/structs" 19 "github.com/hashicorp/nomad/nomad/structs/config" 20 ) 21 22 const ( 23 consulGRPCSockHookName = "consul_grpc_socket" 24 25 // socketProxyStopWaitTime is the amount of time to wait for a socket proxy 26 // to stop before assuming something went awry and return a timeout error. 27 socketProxyStopWaitTime = 3 * time.Second 28 29 // consulGRPCFallbackPort is the last resort fallback port to use in 30 // combination with the Consul HTTP config address when creating the 31 // socket. 32 consulGRPCFallbackPort = "8502" 33 ) 34 35 var ( 36 errSocketProxyTimeout = errors.New("timed out waiting for socket proxy to exit") 37 ) 38 39 // consulGRPCSocketHook creates Unix sockets to allow communication from inside a 40 // netns to Consul gRPC endpoint. 41 // 42 // Noop for allocations without a group Connect stanza using bridge networking. 43 type consulGRPCSocketHook struct { 44 logger hclog.Logger 45 46 // mu synchronizes proxy and alloc which may be mutated and read concurrently 47 // via Prerun, Update, Postrun. 48 mu sync.Mutex 49 alloc *structs.Allocation 50 proxy *grpcSocketProxy 51 } 52 53 func newConsulGRPCSocketHook( 54 logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, 55 config *config.ConsulConfig, nodeAttrs map[string]string) *consulGRPCSocketHook { 56 57 // Attempt to find the gRPC port via the node attributes, otherwise use the 58 // default fallback. 59 consulGRPCPort, ok := nodeAttrs["consul.grpc"] 60 if !ok { 61 consulGRPCPort = consulGRPCFallbackPort 62 } 63 64 return &consulGRPCSocketHook{ 65 alloc: alloc, 66 proxy: newGRPCSocketProxy(logger, allocDir, config, consulGRPCPort), 67 logger: logger.Named(consulGRPCSockHookName), 68 } 69 } 70 71 func (*consulGRPCSocketHook) Name() string { 72 return consulGRPCSockHookName 73 } 74 75 // shouldRun returns true if the Unix socket should be created and proxied. 76 // Requires the mutex to be held. 77 func (h *consulGRPCSocketHook) shouldRun() bool { 78 tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) 79 80 // we must be in bridge networking and at least one connect sidecar task 81 if !tgFirstNetworkIsBridge(tg) { 82 return false 83 } 84 85 for _, s := range tg.Services { 86 if s.Connect.HasSidecar() || s.Connect.IsGateway() { 87 return true 88 } 89 } 90 91 return false 92 } 93 94 func (h *consulGRPCSocketHook) Prerun() error { 95 h.mu.Lock() 96 defer h.mu.Unlock() 97 98 if !h.shouldRun() { 99 return nil 100 } 101 102 return h.proxy.run(h.alloc) 103 } 104 105 // Update creates a gRPC socket file and proxy if there are any Connect 106 // services. 107 func (h *consulGRPCSocketHook) Update(req *interfaces.RunnerUpdateRequest) error { 108 h.mu.Lock() 109 defer h.mu.Unlock() 110 111 h.alloc = req.Alloc 112 113 if !h.shouldRun() { 114 return nil 115 } 116 117 return h.proxy.run(h.alloc) 118 } 119 120 func (h *consulGRPCSocketHook) Postrun() error { 121 h.mu.Lock() 122 defer h.mu.Unlock() 123 124 if err := h.proxy.stop(); err != nil { 125 // Only log failures to stop proxies. Worst case scenario is a 126 // small goroutine leak. 127 h.logger.Debug("error stopping Consul proxy", "error", err) 128 } 129 return nil 130 } 131 132 type grpcSocketProxy struct { 133 logger hclog.Logger 134 allocDir *allocdir.AllocDir 135 config *config.ConsulConfig 136 137 // consulGRPCFallbackPort is the port to use if the operator did not 138 // specify a gRPC config address. 139 consulGRPCFallbackPort string 140 141 ctx context.Context 142 cancel func() 143 doneCh chan struct{} 144 runOnce bool 145 } 146 147 func newGRPCSocketProxy( 148 logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig, 149 consulGRPCFallbackPort string) *grpcSocketProxy { 150 151 ctx, cancel := context.WithCancel(context.Background()) 152 return &grpcSocketProxy{ 153 allocDir: allocDir, 154 config: config, 155 consulGRPCFallbackPort: consulGRPCFallbackPort, 156 ctx: ctx, 157 cancel: cancel, 158 doneCh: make(chan struct{}), 159 logger: logger, 160 } 161 } 162 163 // run socket proxy if allocation requires it, it isn't already running, and it 164 // hasn't been told to stop. 165 // 166 // NOT safe for concurrent use. 167 func (p *grpcSocketProxy) run(alloc *structs.Allocation) error { 168 // Only run once. 169 if p.runOnce { 170 return nil 171 } 172 173 // Only run once. Never restart. 174 select { 175 case <-p.doneCh: 176 p.logger.Trace("socket proxy already shutdown; exiting") 177 return nil 178 case <-p.ctx.Done(): 179 p.logger.Trace("socket proxy already done; exiting") 180 return nil 181 default: 182 } 183 184 // make sure either grpc or http consul address has been configured 185 if p.config.GRPCAddr == "" && p.config.Addr == "" { 186 return errors.New("consul address must be set on nomad client") 187 } 188 189 destAddr := p.config.GRPCAddr 190 if destAddr == "" { 191 // No GRPCAddr defined. Use Addr but replace port with the gRPC 192 // default of 8502. 193 host, _, err := net.SplitHostPort(p.config.Addr) 194 if err != nil { 195 return fmt.Errorf("error parsing Consul address %q: %v", 196 p.config.Addr, err) 197 } 198 199 destAddr = net.JoinHostPort(host, p.consulGRPCFallbackPort) 200 } 201 202 hostGRPCSocketPath := filepath.Join(p.allocDir.AllocDir, allocdir.AllocGRPCSocket) 203 204 // if the socket already exists we'll try to remove it, but if not then any 205 // other errors will bubble up to the caller here or when we try to listen 206 _, err := os.Stat(hostGRPCSocketPath) 207 if err == nil { 208 err := os.Remove(hostGRPCSocketPath) 209 if err != nil { 210 return fmt.Errorf( 211 "unable to remove existing unix socket for Consul gRPC endpoint: %v", err) 212 } 213 } 214 215 listener, err := net.Listen("unix", hostGRPCSocketPath) 216 if err != nil { 217 return fmt.Errorf("unable to create unix socket for Consul gRPC endpoint: %v", err) 218 } 219 220 // The gRPC socket should be usable by all users in case a task is 221 // running as an unprivileged user. Unix does not allow setting domain 222 // socket permissions when creating the file, so we must manually call 223 // chmod afterwards. 224 // https://github.com/golang/go/issues/11822 225 if err := os.Chmod(hostGRPCSocketPath, os.ModePerm); err != nil { 226 return fmt.Errorf("unable to set permissions on unix socket for Consul gRPC endpoint: %v", err) 227 } 228 229 go func() { 230 proxy(p.ctx, p.logger, destAddr, listener) 231 p.cancel() 232 close(p.doneCh) 233 }() 234 235 p.runOnce = true 236 return nil 237 } 238 239 // stop the proxy and blocks until the proxy has stopped. Returns an error if 240 // the proxy does not exit in a timely fashion. 241 func (p *grpcSocketProxy) stop() error { 242 p.cancel() 243 244 // If proxy was never run, don't wait for anything to shutdown. 245 if !p.runOnce { 246 return nil 247 } 248 249 select { 250 case <-p.doneCh: 251 return nil 252 case <-time.After(socketProxyStopWaitTime): 253 return errSocketProxyTimeout 254 } 255 } 256 257 // Proxy between a listener and destination. 258 func proxy(ctx context.Context, logger hclog.Logger, destAddr string, l net.Listener) { 259 // Wait for all connections to be done before exiting to prevent 260 // goroutine leaks. 261 wg := sync.WaitGroup{} 262 ctx, cancel := context.WithCancel(ctx) 263 defer func() { 264 // Must cancel context and close listener before waiting 265 cancel() 266 _ = l.Close() 267 wg.Wait() 268 }() 269 270 // Close Accept() when context is cancelled 271 go func() { 272 <-ctx.Done() 273 _ = l.Close() 274 }() 275 276 for ctx.Err() == nil { 277 conn, err := l.Accept() 278 if err != nil { 279 if ctx.Err() != nil { 280 // Accept errors during shutdown are to be expected 281 return 282 } 283 logger.Error("error in socket proxy; shutting down proxy", "error", err, "dest", destAddr) 284 return 285 } 286 287 wg.Add(1) 288 go func() { 289 defer wg.Done() 290 proxyConn(ctx, logger, destAddr, conn) 291 }() 292 } 293 } 294 295 // proxyConn proxies between an existing net.Conn and a destination address. If 296 // the destAddr starts with "unix://" it is treated as a path to a unix socket. 297 // Otherwise it is treated as a host for a TCP connection. 298 // 299 // When the context is cancelled proxyConn blocks until all goroutines shutdown 300 // to prevent leaks. 301 func proxyConn(ctx context.Context, logger hclog.Logger, destAddr string, conn net.Conn) { 302 // Close the connection when we're done with it. 303 defer conn.Close() 304 305 ctx, cancel := context.WithCancel(ctx) 306 defer cancel() 307 308 // Detect unix sockets 309 network := "tcp" 310 const unixPrefix = "unix://" 311 if strings.HasPrefix(destAddr, unixPrefix) { 312 network = "unix" 313 destAddr = destAddr[len(unixPrefix):] 314 } 315 316 dialer := &net.Dialer{} 317 dest, err := dialer.DialContext(ctx, network, destAddr) 318 if err == context.Canceled || err == context.DeadlineExceeded { 319 logger.Trace("proxy exiting gracefully", "error", err, "dest", destAddr, 320 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr()) 321 return 322 } 323 if err != nil { 324 logger.Error("error connecting to grpc", "error", err, "dest", destAddr) 325 return 326 } 327 328 // Wait for goroutines to exit before exiting to prevent leaking. 329 wg := sync.WaitGroup{} 330 defer wg.Wait() 331 332 // socket -> consul 333 wg.Add(1) 334 go func() { 335 defer wg.Done() 336 defer cancel() 337 n, err := io.Copy(dest, conn) 338 if ctx.Err() == nil && err != nil { 339 // expect disconnects when proxying http 340 logger.Trace("error message received proxying to Consul", 341 "msg", err, "dest", destAddr, "src_local", conn.LocalAddr(), 342 "src_remote", conn.RemoteAddr(), "bytes", n) 343 return 344 } 345 logger.Trace("proxy to Consul complete", 346 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 347 "bytes", n, 348 ) 349 }() 350 351 // consul -> socket 352 wg.Add(1) 353 go func() { 354 defer wg.Done() 355 defer cancel() 356 n, err := io.Copy(conn, dest) 357 if ctx.Err() == nil && err != nil { 358 logger.Trace("error message received proxying from Consul", 359 "msg", err, "dest", destAddr, "src_local", conn.LocalAddr(), 360 "src_remote", conn.RemoteAddr(), "bytes", n) 361 return 362 } 363 logger.Trace("proxy from Consul complete", 364 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 365 "bytes", n, 366 ) 367 }() 368 369 // When cancelled close connections to break out of copies goroutines. 370 <-ctx.Done() 371 _ = conn.Close() 372 _ = dest.Close() 373 }