github.com/iqoqo/nomad@v0.11.3-0.20200911112621-d7021c74d101/client/allocrunner/consulsock_hook.go (about) 1 package allocrunner 2 3 import ( 4 "context" 5 "fmt" 6 "io" 7 "net" 8 "os" 9 "path/filepath" 10 "strings" 11 "sync" 12 "time" 13 14 hclog "github.com/hashicorp/go-hclog" 15 "github.com/hashicorp/nomad/client/allocdir" 16 "github.com/hashicorp/nomad/client/allocrunner/interfaces" 17 "github.com/hashicorp/nomad/nomad/structs" 18 "github.com/hashicorp/nomad/nomad/structs/config" 19 ) 20 21 // consulSockHook creates Unix sockets to allow communication from inside a 22 // netns to Consul. 23 // 24 // Noop for allocations without a group Connect stanza. 25 type consulSockHook struct { 26 alloc *structs.Allocation 27 28 proxy *sockProxy 29 30 // mu synchronizes group & cancel as they may be mutated and accessed 31 // concurrently via Prerun, Update, Postrun. 32 mu sync.Mutex 33 34 logger hclog.Logger 35 } 36 37 func newConsulSockHook(logger hclog.Logger, alloc *structs.Allocation, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *consulSockHook { 38 h := &consulSockHook{ 39 alloc: alloc, 40 proxy: newSockProxy(logger, allocDir, config), 41 } 42 h.logger = logger.Named(h.Name()) 43 return h 44 } 45 46 func (*consulSockHook) Name() string { 47 return "consul_socket" 48 } 49 50 // shouldRun returns true if the Unix socket should be created and proxied. 51 // Requires the mutex to be held. 52 func (h *consulSockHook) shouldRun() bool { 53 tg := h.alloc.Job.LookupTaskGroup(h.alloc.TaskGroup) 54 for _, s := range tg.Services { 55 if s.Connect != nil { 56 return true 57 } 58 } 59 60 return false 61 } 62 63 func (h *consulSockHook) Prerun() error { 64 h.mu.Lock() 65 defer h.mu.Unlock() 66 67 if !h.shouldRun() { 68 return nil 69 } 70 71 return h.proxy.run(h.alloc) 72 } 73 74 // Update creates a gRPC socket file and proxy if there are any Connect 75 // services. 76 func (h *consulSockHook) Update(req *interfaces.RunnerUpdateRequest) error { 77 h.mu.Lock() 78 defer h.mu.Unlock() 79 80 h.alloc = req.Alloc 81 82 if !h.shouldRun() { 83 return nil 84 } 85 86 return h.proxy.run(h.alloc) 87 } 88 89 func (h *consulSockHook) Postrun() error { 90 h.mu.Lock() 91 defer h.mu.Unlock() 92 93 if err := h.proxy.stop(); err != nil { 94 // Only log failures to stop proxies. Worst case scenario is a 95 // small goroutine leak. 96 h.logger.Debug("error stopping Consul proxy", "error", err) 97 } 98 return nil 99 } 100 101 type sockProxy struct { 102 allocDir *allocdir.AllocDir 103 config *config.ConsulConfig 104 105 ctx context.Context 106 cancel func() 107 doneCh chan struct{} 108 runOnce bool 109 110 logger hclog.Logger 111 } 112 113 func newSockProxy(logger hclog.Logger, allocDir *allocdir.AllocDir, config *config.ConsulConfig) *sockProxy { 114 ctx, cancel := context.WithCancel(context.Background()) 115 return &sockProxy{ 116 allocDir: allocDir, 117 config: config, 118 ctx: ctx, 119 cancel: cancel, 120 doneCh: make(chan struct{}), 121 logger: logger, 122 } 123 } 124 125 // run socket proxy if allocation requires it, it isn't already running, and it 126 // hasn't been told to stop. 127 // 128 // NOT safe for concurrent use. 129 func (s *sockProxy) run(alloc *structs.Allocation) error { 130 // Only run once. 131 if s.runOnce { 132 return nil 133 } 134 135 // Only run once. Never restart. 136 select { 137 case <-s.doneCh: 138 s.logger.Trace("socket proxy already shutdown; exiting") 139 return nil 140 case <-s.ctx.Done(): 141 s.logger.Trace("socket proxy already done; exiting") 142 return nil 143 default: 144 } 145 146 destAddr := s.config.GRPCAddr 147 if destAddr == "" { 148 // No GRPCAddr defined. Use Addr but replace port with the gRPC 149 // default of 8502. 150 host, _, err := net.SplitHostPort(s.config.Addr) 151 if err != nil { 152 return fmt.Errorf("error parsing Consul address %q: %v", 153 s.config.Addr, err) 154 } 155 156 destAddr = net.JoinHostPort(host, "8502") 157 } 158 159 hostGRPCSockPath := filepath.Join(s.allocDir.AllocDir, allocdir.AllocGRPCSocket) 160 161 // if the socket already exists we'll try to remove it, but if not then any 162 // other errors will bubble up to the caller here or when we try to listen 163 _, err := os.Stat(hostGRPCSockPath) 164 if err == nil { 165 err := os.Remove(hostGRPCSockPath) 166 if err != nil { 167 return fmt.Errorf( 168 "unable to remove existing unix socket for Consul gRPC endpoint: %v", err) 169 } 170 } 171 172 listener, err := net.Listen("unix", hostGRPCSockPath) 173 if err != nil { 174 return fmt.Errorf("unable to create unix socket for Consul gRPC endpoint: %v", err) 175 } 176 177 // The gRPC socket should be usable by all users in case a task is 178 // running as an unprivileged user. Unix does not allow setting domain 179 // socket permissions when creating the file, so we must manually call 180 // chmod afterwards. 181 // https://github.com/golang/go/issues/11822 182 if err := os.Chmod(hostGRPCSockPath, os.ModePerm); err != nil { 183 return fmt.Errorf("unable to set permissions on unix socket for Consul gRPC endpoint: %v", err) 184 } 185 186 go func() { 187 proxy(s.ctx, s.logger, destAddr, listener) 188 s.cancel() 189 close(s.doneCh) 190 }() 191 192 s.runOnce = true 193 return nil 194 } 195 196 // stop the proxy and blocks until the proxy has stopped. Returns an error if 197 // the proxy does not exit in a timely fashion. 198 func (s *sockProxy) stop() error { 199 s.cancel() 200 201 // If proxy was never run, don't wait for anything to shutdown. 202 if !s.runOnce { 203 return nil 204 } 205 206 select { 207 case <-s.doneCh: 208 return nil 209 case <-time.After(3 * time.Second): 210 return fmt.Errorf("timed out waiting for proxy to exit") 211 } 212 } 213 214 // Proxy between a listener and dest 215 func proxy(ctx context.Context, logger hclog.Logger, dest string, l net.Listener) { 216 // Wait for all connections to be done before exiting to prevent 217 // goroutine leaks. 218 wg := sync.WaitGroup{} 219 ctx, cancel := context.WithCancel(ctx) 220 defer func() { 221 // Must cancel context and close listener before waiting 222 cancel() 223 l.Close() 224 wg.Wait() 225 }() 226 227 // Close Accept() when context is cancelled 228 go func() { 229 <-ctx.Done() 230 l.Close() 231 }() 232 233 for ctx.Err() == nil { 234 conn, err := l.Accept() 235 if err != nil { 236 if ctx.Err() != nil { 237 // Accept errors during shutdown are to be expected 238 return 239 } 240 logger.Error("error in grpc proxy; shutting down proxy", "error", err, "dest", dest) 241 return 242 } 243 244 wg.Add(1) 245 go func() { 246 defer wg.Done() 247 proxyConn(ctx, logger, dest, conn) 248 }() 249 } 250 } 251 252 // proxyConn proxies between an existing net.Conn and a destination address. If 253 // the destAddr starts with "unix://" it is treated as a path to a unix socket. 254 // Otherwise it is treated as a host for a TCP connection. 255 // 256 // When the context is cancelled proxyConn blocks until all goroutines shutdown 257 // to prevent leaks. 258 func proxyConn(ctx context.Context, logger hclog.Logger, destAddr string, conn net.Conn) { 259 // Close the connection when we're done with it. 260 defer conn.Close() 261 262 ctx, cancel := context.WithCancel(ctx) 263 defer cancel() 264 265 // Detect unix sockets 266 network := "tcp" 267 const unixPrefix = "unix://" 268 if strings.HasPrefix(destAddr, unixPrefix) { 269 network = "unix" 270 destAddr = destAddr[len(unixPrefix):] 271 } 272 273 dialer := &net.Dialer{} 274 dest, err := dialer.DialContext(ctx, network, destAddr) 275 if err == context.Canceled || err == context.DeadlineExceeded { 276 logger.Trace("proxy exiting gracefully", "error", err, "dest", destAddr, 277 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr()) 278 return 279 } 280 if err != nil { 281 logger.Error("error connecting to grpc", "error", err, "dest", destAddr) 282 return 283 } 284 285 // Wait for goroutines to exit before exiting to prevent leaking. 286 wg := sync.WaitGroup{} 287 defer wg.Wait() 288 289 // socket -> gRPC 290 wg.Add(1) 291 go func() { 292 defer wg.Done() 293 defer cancel() 294 n, err := io.Copy(dest, conn) 295 if ctx.Err() == nil && err != nil { 296 logger.Warn("error proxying to Consul", "error", err, "dest", destAddr, 297 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 298 "bytes", n, 299 ) 300 return 301 } 302 logger.Trace("proxy to Consul complete", 303 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 304 "bytes", n, 305 ) 306 }() 307 308 // gRPC -> socket 309 wg.Add(1) 310 go func() { 311 defer wg.Done() 312 defer cancel() 313 n, err := io.Copy(conn, dest) 314 if ctx.Err() == nil && err != nil { 315 logger.Warn("error proxying from Consul", "error", err, "dest", destAddr, 316 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 317 "bytes", n, 318 ) 319 return 320 } 321 logger.Trace("proxy from Consul complete", 322 "src_local", conn.LocalAddr(), "src_remote", conn.RemoteAddr(), 323 "bytes", n, 324 ) 325 }() 326 327 // When cancelled close connections to break out of copies goroutines. 328 <-ctx.Done() 329 conn.Close() 330 dest.Close() 331 }