github.com/Ilhicas/nomad@v1.0.4-0.20210304152020-e86851182bc3/client/allocrunner/consul_grpc_sock_hook.go (about) 1 package allocrunner 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "net" 8 "os" 9 "path/filepath" 10 "strings" 11 "sync" 12 "time" 13 14 hclog "github.com/hashicorp/go-hclog" 15 "github.com/hashicorp/nomad/client/allocdir" 16 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 17 "github.com/hashicorp/nomad/nomad/structs" 18 "github.com/hashicorp/nomad/nomad/structs/config" 19 "github.com/pkg/errors" 20 ) 21 22 const ( 23 consulGRPCSockHookName = "consul_grpc_socket" 24 25 // socketProxyStopWaitTime is the amount of time to wait for a socket proxy 26 // to stop before assuming something went awry and return a timeout error. 27 socketProxyStopWaitTime = 3 * time.Second 28 ) 29 30 var ( 31 errSocketProxyTimeout = errors.New("timed out waiting for socket proxy to exit") 32 ) 33 34 // consulGRPCSocketHook creates Unix sockets to allow communication from inside a 35 // netns to Consul gRPC endpoint. 36 // 37 // Noop for allocations without a group Connect stanza using bridge networking. 38 type consulGRPCSocketHook struct { 39 logger hclog.Logger 40 41 // mu synchronizes proxy and alloc which may be mutated and read concurrently 42 // via Prerun, Update, Postrun. 43 mu sync.Mutex 44 alloc *structs.Allocation 45 proxy *grpcSocketProxy 46 } 47 48 func newConsulGRPCSocketHook(logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *consulGRPCSocketHook { 49 return &consulGRPCSocketHook{ 50 alloc: alloc, 51 proxy: newGRPCSocketProxy(logger, allocDir, config), 52 logger: logger.Named(consulGRPCSockHookName), 53 } 54 } 55 56 func (*consulGRPCSocketHook) Name() string { 57 return consulGRPCSockHookName 58 } 59 60 // shouldRun returns true if the Unix socket should be created and proxied. 61 // Requires the mutex to be held. 62 func (h *consulGRPCSocketHook) shouldRun() bool { 63 tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) 64 65 // we must be in bridge networking and at least one connect sidecar task 66 if !tgFirstNetworkIsBridge(tg) { 67 return false 68 } 69 70 for _, s := range tg.Services { 71 if s.Connect.HasSidecar() || s.Connect.IsGateway() { 72 return true 73 } 74 } 75 76 return false 77 } 78 79 func (h *consulGRPCSocketHook) Prerun() error { 80 h.mu.Lock() 81 defer h.mu.Unlock() 82 83 if !h.shouldRun() { 84 return nil 85 } 86 87 return h.proxy.run(h.alloc) 88 } 89 90 // Update creates a gRPC socket file and proxy if there are any Connect 91 // services. 92 func (h *consulGRPCSocketHook) Update(req *interfaces.RunnerUpdateRequest) error { 93 h.mu.Lock() 94 defer h.mu.Unlock() 95 96 h.alloc = req.Alloc 97 98 if !h.shouldRun() { 99 return nil 100 } 101 102 return h.proxy.run(h.alloc) 103 } 104 105 func (h *consulGRPCSocketHook) Postrun() error { 106 h.mu.Lock() 107 defer h.mu.Unlock() 108 109 if err := h.proxy.stop(); err != nil { 110 // Only log failures to stop proxies. Worst case scenario is a 111 // small goroutine leak. 112 h.logger.Debug("error stopping Consul proxy", "error", err) 113 } 114 return nil 115 } 116 117 type grpcSocketProxy struct { 118 logger hclog.Logger 119 allocDir *allocdir.AllocDir 120 config *config.ConsulConfig 121 122 ctx context.Context 123 cancel func() 124 doneCh chan struct{} 125 runOnce bool 126 } 127 128 func newGRPCSocketProxy(logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *grpcSocketProxy { 129 ctx, cancel := context.WithCancel(context.Background()) 130 return &grpcSocketProxy{ 131 allocDir: allocDir, 132 config: config, 133 ctx: ctx, 134 cancel: cancel, 135 doneCh: make(chan struct{}), 136 logger: logger, 137 } 138 } 139 140 // run socket proxy if allocation requires it, it isn't already running, and it 141 // hasn't been told to stop. 142 // 143 // NOT safe for concurrent use. 144 func (p *grpcSocketProxy) run(alloc *structs.Allocation) error { 145 // Only run once. 146 if p.runOnce { 147 return nil 148 } 149 150 // Only run once. Never restart. 151 select { 152 case <-p.doneCh: 153 p.logger.Trace("socket proxy already shutdown; exiting") 154 return nil 155 case <-p.ctx.Done(): 156 p.logger.Trace("socket proxy already done; exiting") 157 return nil 158 default: 159 } 160 161 // make sure either grpc or http consul address has been configured 162 if p.config.GRPCAddr == "" && p.config.Addr == "" { 163 return errors.New("consul address must be set on nomad client") 164 } 165 166 destAddr := p.config.GRPCAddr 167 if destAddr == "" { 168 // No GRPCAddr defined. Use Addr but replace port with the gRPC 169 // default of 8502. 170 host, _, err := net.SplitHostPort(p.config.Addr) 171 if err != nil { 172 return fmt.Errorf("error parsing Consul address %q: %v", 173 p.config.Addr, err) 174 } 175 176 destAddr = net.JoinHostPort(host, "8502") 177 } 178 179 hostGRPCSocketPath := filepath.Join(p.allocDir.AllocDir, allocdir.AllocGRPCSocket) 180 181 // if the socket already exists we'll try to remove it, but if not then any 182 // other errors will bubble up to the caller here or when we try to listen 183 _, err := os.Stat(hostGRPCSocketPath) 184 if err == nil { 185 err := os.Remove(hostGRPCSocketPath) 186 if err != nil { 187 return fmt.Errorf( 188 "unable to remove existing unix socket for Consul gRPC endpoint: %v", err) 189 } 190 } 191 192 listener, err := net.Listen("unix", hostGRPCSocketPath) 193 if err != nil { 194 return fmt.Errorf("unable to create unix socket for Consul gRPC endpoint: %v", err) 195 } 196 197 // The gRPC socket should be usable by all users in case a task is 198 // running as an unprivileged user. Unix does not allow setting domain 199 // socket permissions when creating the file, so we must manually call 200 // chmod afterwards. 201 // https://github.com/golang/go/issues/11822 202 if err := os.Chmod(hostGRPCSocketPath, os.ModePerm); err != nil { 203 return fmt.Errorf("unable to set permissions on unix socket for Consul gRPC endpoint: %v", err) 204 } 205 206 go func() { 207 proxy(p.ctx, p.logger, destAddr, listener) 208 p.cancel() 209 close(p.doneCh) 210 }() 211 212 p.runOnce = true 213 return nil 214 } 215 216 // stop the proxy and blocks until the proxy has stopped. Returns an error if 217 // the proxy does not exit in a timely fashion. 218 func (p *grpcSocketProxy) stop() error { 219 p.cancel() 220 221 // If proxy was never run, don't wait for anything to shutdown. 222 if !p.runOnce { 223 return nil 224 } 225 226 select { 227 case <-p.doneCh: 228 return nil 229 case <-time.After(socketProxyStopWaitTime): 230 return errSocketProxyTimeout 231 } 232 } 233 234 // Proxy between a listener and destination. 235 func proxy(ctx context.Context, logger hclog.Logger, destAddr string, l net.Listener) { 236 // Wait for all connections to be done before exiting to prevent 237 // goroutine leaks. 238 wg := sync.WaitGroup{} 239 ctx, cancel := context.WithCancel(ctx) 240 defer func() { 241 // Must cancel context and close listener before waiting 242 cancel() 243 _ = l.Close() 244 wg.Wait() 245 }() 246 247 // Close Accept() when context is cancelled 248 go func() { 249 <-ctx.Done() 250 _ = l.Close() 251 }() 252 253 for ctx.Err() == nil { 254 conn, err := l.Accept() 255 if err != nil { 256 if ctx.Err() != nil { 257 // Accept errors during shutdown are to be expected 258 return 259 } 260 logger.Error("error in socket proxy; shutting down proxy", "error", err, "dest", destAddr) 261 return 262 } 263 264 wg.Add(1) 265 go func() { 266 defer wg.Done() 267 proxyConn(ctx, logger, destAddr, conn) 268 }() 269 } 270 } 271 272 // proxyConn proxies between an existing net.Conn and a destination address. If 273 // the destAddr starts with "unix://" it is treated as a path to a unix socket. 274 // Otherwise it is treated as a host for a TCP connection. 275 // 276 // When the context is cancelled proxyConn blocks until all goroutines shutdown 277 // to prevent leaks. 278 func proxyConn(ctx context.Context, logger hclog.Logger, destAddr string, conn net.Conn) { 279 // Close the connection when we're done with it. 280 defer conn.Close() 281 282 ctx, cancel := context.WithCancel(ctx) 283 defer cancel() 284 285 // Detect unix sockets 286 network := "tcp" 287 const unixPrefix = "unix://" 288 if strings.HasPrefix(destAddr, unixPrefix) { 289 network = "unix" 290 destAddr = destAddr[len(unixPrefix):] 291 } 292 293 dialer := &net.Dialer{} 294 dest, err := dialer.DialContext(ctx, network, destAddr) 295 if err == context.Canceled || err == context.DeadlineExceeded { 296 logger.Trace("proxy exiting gracefully", "error", err, "dest", destAddr, 297 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr()) 298 return 299 } 300 if err != nil { 301 logger.Error("error connecting to grpc", "error", err, "dest", destAddr) 302 return 303 } 304 305 // Wait for goroutines to exit before exiting to prevent leaking. 306 wg := sync.WaitGroup{} 307 defer wg.Wait() 308 309 // socket -> consul 310 wg.Add(1) 311 go func() { 312 defer wg.Done() 313 defer cancel() 314 n, err := io.Copy(dest, conn) 315 if ctx.Err() == nil && err != nil { 316 logger.Warn("error proxying to Consul", "error", err, "dest", destAddr, 317 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 318 "bytes", n, 319 ) 320 return 321 } 322 logger.Trace("proxy to Consul complete", 323 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 324 "bytes", n, 325 ) 326 }() 327 328 // consul -> socket 329 wg.Add(1) 330 go func() { 331 defer wg.Done() 332 defer cancel() 333 n, err := io.Copy(conn, dest) 334 if ctx.Err() == nil && err != nil { 335 logger.Warn("error proxying from Consul", "error", err, "dest", destAddr, 336 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 337 "bytes", n, 338 ) 339 return 340 } 341 logger.Trace("proxy from Consul complete", 342 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 343 "bytes", n, 344 ) 345 }() 346 347 // When cancelled close connections to break out of copies goroutines. 348 <-ctx.Done() 349 _ = conn.Close() 350 _ = dest.Close() 351 }