github.imxd.top/hashicorp/consul@v1.4.5/agent/proxycfg/manager.go (about) 1 package proxycfg 2 3 import ( 4 "errors" 5 "log" 6 "sync" 7 8 "github.com/hashicorp/consul/agent/cache" 9 "github.com/hashicorp/consul/agent/local" 10 "github.com/hashicorp/consul/agent/structs" 11 ) 12 13 var ( 14 // ErrStopped is returned from Run if the manager instance has already been 15 // stopped. 16 ErrStopped = errors.New("manager stopped") 17 18 // ErrStarted is returned from Run if the manager instance has already run. 19 ErrStarted = errors.New("manager was already run") 20 ) 21 22 // CancelFunc is a type for a returned function that can be called to cancel a 23 // watch. 24 type CancelFunc func() 25 26 // Manager is a component that integrates into the agent and manages Connect 27 // proxy configuration state. This should not be confused with the deprecated 28 // "managed proxy" concept where the agent supervises the actual proxy process. 29 // proxycfg.Manager is oblivious to the distinction and manages state for any 30 // service registered with Kind == connect-proxy. 31 // 32 // The Manager ensures that any Connect proxy registered on the agent has all 33 // the state it needs cached locally via the agent cache. State includes 34 // certificates, intentions, and service discovery results for any declared 35 // upstreams. See package docs for more detail. 36 type Manager struct { 37 ManagerConfig 38 39 // stateCh is notified for any service changes in local state. We only use 40 // this to trigger on _new_ service addition since it has no data and we don't 41 // want to maintain a full copy of the state in order to diff and figure out 42 // what changed. Luckily each service has it's own WatchCh so we can figure 43 // out changes and removals with those efficiently. 44 stateCh chan struct{} 45 46 mu sync.Mutex 47 started bool 48 proxies map[string]*state 49 watchers map[string]map[uint64]chan *ConfigSnapshot 50 } 51 52 // ManagerConfig holds the required external dependencies for a Manager 53 // instance. All fields must be set to something valid or the manager will 54 // panic. The ManagerConfig is passed by value to NewManager so the passed value 55 // can be mutated safely. 56 type ManagerConfig struct { 57 // Cache is the agent's cache instance that can be used to retrieve, store and 58 // monitor state for the proxies. 59 Cache *cache.Cache 60 // state is the agent's local state to be watched for new proxy registrations. 61 State *local.State 62 // source describes the current agent's identity, it's used directly for 63 // prepared query discovery but also indirectly as a way to pass current 64 // Datacenter name into other request types that need it. This is sufficient 65 // for now and cleaner than passing the entire RuntimeConfig. 66 Source *structs.QuerySource 67 // logger is the agent's logger to be used for logging logs. 68 Logger *log.Logger 69 } 70 71 // NewManager constructs a manager from the provided agent cache. 72 func NewManager(cfg ManagerConfig) (*Manager, error) { 73 if cfg.Cache == nil || cfg.State == nil || cfg.Source == nil || 74 cfg.Logger == nil { 75 return nil, errors.New("all ManagerConfig fields must be provided") 76 } 77 m := &Manager{ 78 ManagerConfig: cfg, 79 // Single item buffer is enough since there is no data transferred so this 80 // is "level triggering" and we can't miss actual data. 81 stateCh: make(chan struct{}, 1), 82 proxies: make(map[string]*state), 83 watchers: make(map[string]map[uint64]chan *ConfigSnapshot), 84 } 85 return m, nil 86 } 87 88 // Run is the long-running method that handles state syncing. It should be run 89 // in it's own goroutine and will continue until a fatal error is hit or Close 90 // is called. Run will return an error if it is called more than once, or called 91 // after Close. 92 func (m *Manager) Run() error { 93 m.mu.Lock() 94 alreadyStarted := m.started 95 m.started = true 96 stateCh := m.stateCh 97 m.mu.Unlock() 98 99 // Protect against multiple Run calls. 100 if alreadyStarted { 101 return ErrStarted 102 } 103 104 // Protect against being run after Close. 105 if stateCh == nil { 106 return ErrStopped 107 } 108 109 // Register for notifications about state changes 110 m.State.Notify(stateCh) 111 defer m.State.StopNotify(stateCh) 112 113 for { 114 m.syncState() 115 116 // Wait for a state change 117 _, ok := <-stateCh 118 if !ok { 119 // Stopped 120 return nil 121 } 122 } 123 } 124 125 // syncState is called whenever the local state notifies a change. It holds the 126 // lock while finding any new or updated proxies and removing deleted ones. 127 func (m *Manager) syncState() { 128 m.mu.Lock() 129 defer m.mu.Unlock() 130 131 // Traverse the local state and ensure all proxy services are registered 132 services := m.State.Services() 133 for svcID, svc := range services { 134 if svc.Kind != structs.ServiceKindConnectProxy { 135 continue 136 } 137 // TODO(banks): need to work out when to default some stuff. For example 138 // Proxy.LocalServicePort is practically necessary for any sidecar and can 139 // default to the port of the sidecar service, but only if it's already 140 // registered and once we get past here, we don't have enough context to 141 // know that so we'd need to set it here if not during registration of the 142 // proxy service. Sidecar Service and managed proxies in the interim can 143 // do that, but we should validate more generally that that is always 144 // true. 145 err := m.ensureProxyServiceLocked(svc, m.State.ServiceToken(svcID)) 146 if err != nil { 147 m.Logger.Printf("[ERR] failed to watch proxy service %s: %s", svc.ID, 148 err) 149 } 150 } 151 152 // Now see if any proxies were removed 153 for proxyID := range m.proxies { 154 if _, ok := services[proxyID]; !ok { 155 // Remove them 156 m.removeProxyServiceLocked(proxyID) 157 } 158 } 159 } 160 161 // ensureProxyServiceLocked adds or changes the proxy to our state. 162 func (m *Manager) ensureProxyServiceLocked(ns *structs.NodeService, token string) error { 163 state, ok := m.proxies[ns.ID] 164 165 if ok { 166 if !state.Changed(ns, token) { 167 // No change 168 return nil 169 } 170 171 // We are updating the proxy, close it's old state 172 state.Close() 173 } 174 175 var err error 176 state, err = newState(ns, token) 177 if err != nil { 178 return err 179 } 180 181 // Set the necessary dependencies 182 state.logger = m.Logger 183 state.cache = m.Cache 184 state.source = m.Source 185 186 ch, err := state.Watch() 187 if err != nil { 188 return err 189 } 190 m.proxies[ns.ID] = state 191 192 // Start a goroutine that will wait for changes and broadcast them to watchers. 193 go func(ch <-chan ConfigSnapshot) { 194 // Run until ch is closed 195 for snap := range ch { 196 m.notify(&snap) 197 } 198 }(ch) 199 200 return nil 201 } 202 203 // removeProxyService is called when a service deregisters and frees all 204 // resources for that service. 205 func (m *Manager) removeProxyServiceLocked(proxyID string) { 206 state, ok := m.proxies[proxyID] 207 if !ok { 208 return 209 } 210 211 // Closing state will let the goroutine we started in Ensure finish since 212 // watch chan is closed. 213 state.Close() 214 delete(m.proxies, proxyID) 215 216 // We intentionally leave potential watchers hanging here - there is no new 217 // config for them and closing their channels might be indistinguishable from 218 // an error that they should retry. We rely for them to eventually give up 219 // (because they are in fact not running any more) and so the watches be 220 // cleaned up naturally. 221 } 222 223 func (m *Manager) notify(snap *ConfigSnapshot) { 224 m.mu.Lock() 225 defer m.mu.Unlock() 226 227 watchers, ok := m.watchers[snap.ProxyID] 228 if !ok { 229 return 230 } 231 232 for _, ch := range watchers { 233 m.deliverLatest(snap, ch) 234 } 235 } 236 237 // deliverLatest delivers the snapshot to a watch chan. If the delivery blocks, 238 // it will drain the chan and then re-attempt delivery so that a slow consumer 239 // gets the latest config earlier. This MUST be called from a method where m.mu 240 // is held to be safe since it assumes we are the only goroutine sending on ch. 241 func (m *Manager) deliverLatest(snap *ConfigSnapshot, ch chan *ConfigSnapshot) { 242 // Send if chan is empty 243 select { 244 case ch <- snap: 245 return 246 default: 247 } 248 249 // Not empty, drain the chan of older snapshots and redeliver. For now we only 250 // use 1-buffered chans but this will still work if we change that later. 251 OUTER: 252 for { 253 select { 254 case <-ch: 255 continue 256 default: 257 break OUTER 258 } 259 } 260 261 // Now send again 262 select { 263 case ch <- snap: 264 return 265 default: 266 // This should not be possible since we should be the only sender, enforced 267 // by m.mu but error and drop the update rather than panic. 268 m.Logger.Printf("[ERR] proxycfg: failed to deliver ConfigSnapshot to %q", 269 snap.ProxyID) 270 } 271 } 272 273 // Watch registers a watch on a proxy. It might not exist yet in which case this 274 // will not fail, but no updates will be delivered until the proxy is 275 // registered. If there is already a valid snapshot in memory, it will be 276 // delivered immediately. 277 func (m *Manager) Watch(proxyID string) (<-chan *ConfigSnapshot, CancelFunc) { 278 m.mu.Lock() 279 defer m.mu.Unlock() 280 281 // This buffering is crucial otherwise we'd block immediately trying to 282 // deliver the current snapshot below if we already have one. 283 ch := make(chan *ConfigSnapshot, 1) 284 watchers, ok := m.watchers[proxyID] 285 if !ok { 286 watchers = make(map[uint64]chan *ConfigSnapshot) 287 } 288 idx := uint64(len(watchers)) 289 watchers[idx] = ch 290 m.watchers[proxyID] = watchers 291 292 // Deliver the current snapshot immediately if there is one ready 293 if state, ok := m.proxies[proxyID]; ok { 294 if snap := state.CurrentSnapshot(); snap != nil { 295 // We rely on ch being buffered above and that it's not been passed 296 // anywhere so we must be the only writer so this will never block and 297 // deadlock. 298 ch <- snap 299 } 300 } 301 302 return ch, func() { 303 m.mu.Lock() 304 defer m.mu.Unlock() 305 m.closeWatchLocked(proxyID, idx) 306 } 307 } 308 309 // closeWatchLocked cleans up state related to a single watcher. It assumes the 310 // lock is held. 311 func (m *Manager) closeWatchLocked(proxyID string, watchIdx uint64) { 312 if watchers, ok := m.watchers[proxyID]; ok { 313 if ch, ok := watchers[watchIdx]; ok { 314 delete(watchers, watchIdx) 315 close(ch) 316 if len(watchers) == 0 { 317 delete(m.watchers, proxyID) 318 } 319 } 320 } 321 } 322 323 // Close removes all state and stops all running goroutines. 324 func (m *Manager) Close() error { 325 m.mu.Lock() 326 defer m.mu.Unlock() 327 328 if m.stateCh != nil { 329 close(m.stateCh) 330 m.stateCh = nil 331 } 332 333 // Close all current watchers first 334 for proxyID, watchers := range m.watchers { 335 for idx := range watchers { 336 m.closeWatchLocked(proxyID, idx) 337 } 338 } 339 340 // Then close all states 341 for proxyID, state := range m.proxies { 342 state.Close() 343 delete(m.proxies, proxyID) 344 } 345 return nil 346 }