github.imxd.top/hashicorp/consul@v1.4.5/agent/proxycfg/state.go (about) 1 package proxycfg 2 3 import ( 4 "context" 5 "errors" 6 "fmt" 7 "log" 8 "reflect" 9 "strings" 10 "time" 11 12 "github.com/hashicorp/consul/agent/cache" 13 cachetype "github.com/hashicorp/consul/agent/cache-types" 14 "github.com/hashicorp/consul/agent/structs" 15 "github.com/mitchellh/copystructure" 16 ) 17 18 const ( 19 coalesceTimeout = 200 * time.Millisecond 20 rootsWatchID = "roots" 21 leafWatchID = "leaf" 22 intentionsWatchID = "intentions" 23 serviceIDPrefix = string(structs.UpstreamDestTypeService) + ":" 24 preparedQueryIDPrefix = string(structs.UpstreamDestTypePreparedQuery) + ":" 25 defaultPreparedQueryPollInterval = 30 * time.Second 26 ) 27 28 // state holds all the state needed to maintain the config for a registered 29 // connect-proxy service. When a proxy registration is changed, the entire state 30 // is discarded and a new one created. 31 type state struct { 32 // logger, source and cache are required to be set before calling Watch. 33 logger *log.Logger 34 source *structs.QuerySource 35 cache *cache.Cache 36 37 // ctx and cancel store the context created during initWatches call 38 ctx context.Context 39 cancel func() 40 41 proxyID string 42 address string 43 port int 44 proxyCfg structs.ConnectProxyConfig 45 token string 46 47 ch chan cache.UpdateEvent 48 snapCh chan ConfigSnapshot 49 reqCh chan chan *ConfigSnapshot 50 } 51 52 // newState populates the state struct by copying relevant fields from the 53 // NodeService and Token. We copy so that we can use them in a separate 54 // goroutine later without reasoning about races with the NodeService passed 55 // (especially for embedded fields like maps and slices). 56 // 57 // The returned state needs it's required dependencies to be set before Watch 58 // can be called. 59 func newState(ns *structs.NodeService, token string) (*state, error) { 60 if ns.Kind != structs.ServiceKindConnectProxy { 61 return nil, errors.New("not a connect-proxy") 62 } 63 64 // Copy the config map 65 proxyCfgRaw, err := copystructure.Copy(ns.Proxy) 66 if err != nil { 67 return nil, err 68 } 69 proxyCfg, ok := proxyCfgRaw.(structs.ConnectProxyConfig) 70 if !ok { 71 return nil, errors.New("failed to copy proxy config") 72 } 73 74 return &state{ 75 proxyID: ns.ID, 76 address: ns.Address, 77 port: ns.Port, 78 proxyCfg: proxyCfg, 79 token: token, 80 // 10 is fairly arbitrary here but allow for the 3 mandatory and a 81 // reasonable number of upstream watches to all deliver their initial 82 // messages in parallel without blocking the cache.Notify loops. It's not a 83 // huge deal if we do for a short period so we don't need to be more 84 // conservative to handle larger numbers of upstreams correctly but gives 85 // some head room for normal operation to be non-blocking in most typical 86 // cases. 87 ch: make(chan cache.UpdateEvent, 10), 88 snapCh: make(chan ConfigSnapshot, 1), 89 reqCh: make(chan chan *ConfigSnapshot, 1), 90 }, nil 91 } 92 93 // Watch initialized watches on all necessary cache data for the current proxy 94 // registration state and returns a chan to observe updates to the 95 // ConfigSnapshot that contains all necessary config state. The chan is closed 96 // when the state is Closed. 97 func (s *state) Watch() (<-chan ConfigSnapshot, error) { 98 s.ctx, s.cancel = context.WithCancel(context.Background()) 99 100 err := s.initWatches() 101 if err != nil { 102 s.cancel() 103 return nil, err 104 } 105 106 go s.run() 107 108 return s.snapCh, nil 109 } 110 111 // Close discards the state and stops any long-running watches. 112 func (s *state) Close() error { 113 if s.cancel != nil { 114 s.cancel() 115 } 116 return nil 117 } 118 119 // initWatches sets up the watches needed based on current proxy registration 120 // state. 121 func (s *state) initWatches() error { 122 // Watch for root changes 123 err := s.cache.Notify(s.ctx, cachetype.ConnectCARootName, &structs.DCSpecificRequest{ 124 Datacenter: s.source.Datacenter, 125 QueryOptions: structs.QueryOptions{Token: s.token}, 126 }, rootsWatchID, s.ch) 127 if err != nil { 128 return err 129 } 130 131 // Watch the leaf cert 132 err = s.cache.Notify(s.ctx, cachetype.ConnectCALeafName, &cachetype.ConnectCALeafRequest{ 133 Datacenter: s.source.Datacenter, 134 Token: s.token, 135 Service: s.proxyCfg.DestinationServiceName, 136 }, leafWatchID, s.ch) 137 if err != nil { 138 return err 139 } 140 141 // Watch for intention updates 142 err = s.cache.Notify(s.ctx, cachetype.IntentionMatchName, &structs.IntentionQueryRequest{ 143 Datacenter: s.source.Datacenter, 144 QueryOptions: structs.QueryOptions{Token: s.token}, 145 Match: &structs.IntentionQueryMatch{ 146 Type: structs.IntentionMatchDestination, 147 Entries: []structs.IntentionMatchEntry{ 148 { 149 Namespace: structs.IntentionDefaultNamespace, 150 Name: s.proxyCfg.DestinationServiceName, 151 }, 152 }, 153 }, 154 }, intentionsWatchID, s.ch) 155 if err != nil { 156 return err 157 } 158 159 // Watch for updates to service endpoints for all upstreams 160 for _, u := range s.proxyCfg.Upstreams { 161 dc := s.source.Datacenter 162 if u.Datacenter != "" { 163 dc = u.Datacenter 164 } 165 166 switch u.DestinationType { 167 case structs.UpstreamDestTypePreparedQuery: 168 err = s.cache.Notify(s.ctx, cachetype.PreparedQueryName, &structs.PreparedQueryExecuteRequest{ 169 Datacenter: dc, 170 QueryOptions: structs.QueryOptions{Token: s.token, MaxAge: defaultPreparedQueryPollInterval}, 171 QueryIDOrName: u.DestinationName, 172 Connect: true, 173 }, u.Identifier(), s.ch) 174 case structs.UpstreamDestTypeService: 175 fallthrough 176 case "": // Treat unset as the default Service type 177 err = s.cache.Notify(s.ctx, cachetype.HealthServicesName, &structs.ServiceSpecificRequest{ 178 Datacenter: dc, 179 QueryOptions: structs.QueryOptions{Token: s.token}, 180 ServiceName: u.DestinationName, 181 Connect: true, 182 }, u.Identifier(), s.ch) 183 184 if err != nil { 185 return err 186 } 187 188 default: 189 return fmt.Errorf("unknown upstream type: %q", u.DestinationType) 190 } 191 } 192 return nil 193 } 194 195 func (s *state) run() { 196 // Close the channel we return from Watch when we stop so consumers can stop 197 // watching and clean up their goroutines. It's important we do this here and 198 // not in Close since this routine sends on this chan and so might panic if it 199 // gets closed from another goroutine. 200 defer close(s.snapCh) 201 202 snap := ConfigSnapshot{ 203 ProxyID: s.proxyID, 204 Address: s.address, 205 Port: s.port, 206 Proxy: s.proxyCfg, 207 UpstreamEndpoints: make(map[string]structs.CheckServiceNodes), 208 } 209 // This turns out to be really fiddly/painful by just using time.Timer.C 210 // directly in the code below since you can't detect when a timer is stopped 211 // vs waiting in order to know to reset it. So just use a chan to send 212 // ourselves messages. 213 sendCh := make(chan struct{}) 214 var coalesceTimer *time.Timer 215 216 for { 217 select { 218 case <-s.ctx.Done(): 219 return 220 case u := <-s.ch: 221 if err := s.handleUpdate(u, &snap); err != nil { 222 s.logger.Printf("[ERR] %s watch error: %s", u.CorrelationID, err) 223 continue 224 } 225 226 case <-sendCh: 227 // Make a deep copy of snap so we don't mutate any of the embedded structs 228 // etc on future updates. 229 snapCopy, err := snap.Clone() 230 if err != nil { 231 s.logger.Printf("[ERR] Failed to copy config snapshot for proxy %s", 232 s.proxyID) 233 continue 234 } 235 s.snapCh <- *snapCopy 236 // Allow the next change to trigger a send 237 coalesceTimer = nil 238 239 // Skip rest of loop - there is nothing to send since nothing changed on 240 // this iteration 241 continue 242 243 case replyCh := <-s.reqCh: 244 if !snap.Valid() { 245 // Not valid yet just respond with nil and move on to next task. 246 replyCh <- nil 247 continue 248 } 249 // Make a deep copy of snap so we don't mutate any of the embedded structs 250 // etc on future updates. 251 snapCopy, err := snap.Clone() 252 if err != nil { 253 s.logger.Printf("[ERR] Failed to copy config snapshot for proxy %s", 254 s.proxyID) 255 continue 256 } 257 replyCh <- snapCopy 258 259 // Skip rest of loop - there is nothing to send since nothing changed on 260 // this iteration 261 continue 262 } 263 264 // Check if snap is complete enough to be a valid config to deliver to a 265 // proxy yet. 266 if snap.Valid() { 267 // Don't send it right away, set a short timer that will wait for updates 268 // from any of the other cache values and deliver them all together. 269 if coalesceTimer == nil { 270 coalesceTimer = time.AfterFunc(coalesceTimeout, func() { 271 // This runs in another goroutine so we can't just do the send 272 // directly here as access to snap is racy. Instead, signal the main 273 // loop above. 274 sendCh <- struct{}{} 275 }) 276 } 277 } 278 } 279 } 280 281 func (s *state) handleUpdate(u cache.UpdateEvent, snap *ConfigSnapshot) error { 282 switch u.CorrelationID { 283 case rootsWatchID: 284 roots, ok := u.Result.(*structs.IndexedCARoots) 285 if !ok { 286 return fmt.Errorf("invalid type for roots response: %T", u.Result) 287 } 288 snap.Roots = roots 289 case leafWatchID: 290 leaf, ok := u.Result.(*structs.IssuedCert) 291 if !ok { 292 return fmt.Errorf("invalid type for leaf response: %T", u.Result) 293 } 294 snap.Leaf = leaf 295 case intentionsWatchID: 296 // Not in snapshot currently, no op 297 default: 298 // Service discovery result, figure out which type 299 switch { 300 case strings.HasPrefix(u.CorrelationID, serviceIDPrefix): 301 resp, ok := u.Result.(*structs.IndexedCheckServiceNodes) 302 if !ok { 303 return fmt.Errorf("invalid type for service response: %T", u.Result) 304 } 305 snap.UpstreamEndpoints[u.CorrelationID] = resp.Nodes 306 307 case strings.HasPrefix(u.CorrelationID, preparedQueryIDPrefix): 308 resp, ok := u.Result.(*structs.PreparedQueryExecuteResponse) 309 if !ok { 310 return fmt.Errorf("invalid type for prepared query response: %T", u.Result) 311 } 312 snap.UpstreamEndpoints[u.CorrelationID] = resp.Nodes 313 314 default: 315 return errors.New("unknown correlation ID") 316 } 317 } 318 return nil 319 } 320 321 // CurrentSnapshot synchronously returns the current ConfigSnapshot if there is 322 // one ready. If we don't have one yet because not all necessary parts have been 323 // returned (i.e. both roots and leaf cert), nil is returned. 324 func (s *state) CurrentSnapshot() *ConfigSnapshot { 325 // Make a chan for the response to be sent on 326 ch := make(chan *ConfigSnapshot, 1) 327 s.reqCh <- ch 328 // Wait for the response 329 return <-ch 330 } 331 332 // Changed returns whether or not the passed NodeService has had any of the 333 // fields we care about for config state watching changed or a different token. 334 func (s *state) Changed(ns *structs.NodeService, token string) bool { 335 if ns == nil { 336 return true 337 } 338 return ns.Kind != structs.ServiceKindConnectProxy || 339 s.proxyID != ns.ID || 340 s.address != ns.Address || 341 s.port != ns.Port || 342 !reflect.DeepEqual(s.proxyCfg, ns.Proxy) || 343 s.token != token 344 }