github.imxd.top/hashicorp/consul@v1.4.5/agent/proxycfg/state.go (about)

     1  package proxycfg
     2  
     3  import (
     4  	"context"
     5  	"errors"
     6  	"fmt"
     7  	"log"
     8  	"reflect"
     9  	"strings"
    10  	"time"
    11  
    12  	"github.com/hashicorp/consul/agent/cache"
    13  	cachetype "github.com/hashicorp/consul/agent/cache-types"
    14  	"github.com/hashicorp/consul/agent/structs"
    15  	"github.com/mitchellh/copystructure"
    16  )
    17  
    18  const (
    19  	coalesceTimeout                  = 200 * time.Millisecond
    20  	rootsWatchID                     = "roots"
    21  	leafWatchID                      = "leaf"
    22  	intentionsWatchID                = "intentions"
    23  	serviceIDPrefix                  = string(structs.UpstreamDestTypeService) + ":"
    24  	preparedQueryIDPrefix            = string(structs.UpstreamDestTypePreparedQuery) + ":"
    25  	defaultPreparedQueryPollInterval = 30 * time.Second
    26  )
    27  
    28  // state holds all the state needed to maintain the config for a registered
    29  // connect-proxy service. When a proxy registration is changed, the entire state
    30  // is discarded and a new one created.
    31  type state struct {
    32  	// logger, source and cache are required to be set before calling Watch.
    33  	logger *log.Logger
    34  	source *structs.QuerySource
    35  	cache  *cache.Cache
    36  
    37  	// ctx and cancel store the context created during initWatches call
    38  	ctx    context.Context
    39  	cancel func()
    40  
    41  	proxyID  string
    42  	address  string
    43  	port     int
    44  	proxyCfg structs.ConnectProxyConfig
    45  	token    string
    46  
    47  	ch     chan cache.UpdateEvent
    48  	snapCh chan ConfigSnapshot
    49  	reqCh  chan chan *ConfigSnapshot
    50  }
    51  
    52  // newState populates the state struct by copying relevant fields from the
    53  // NodeService and Token. We copy so that we can use them in a separate
    54  // goroutine later without reasoning about races with the NodeService passed
    55  // (especially for embedded fields like maps and slices).
    56  //
    57  // The returned state needs it's required dependencies to be set before Watch
    58  // can be called.
    59  func newState(ns *structs.NodeService, token string) (*state, error) {
    60  	if ns.Kind != structs.ServiceKindConnectProxy {
    61  		return nil, errors.New("not a connect-proxy")
    62  	}
    63  
    64  	// Copy the config map
    65  	proxyCfgRaw, err := copystructure.Copy(ns.Proxy)
    66  	if err != nil {
    67  		return nil, err
    68  	}
    69  	proxyCfg, ok := proxyCfgRaw.(structs.ConnectProxyConfig)
    70  	if !ok {
    71  		return nil, errors.New("failed to copy proxy config")
    72  	}
    73  
    74  	return &state{
    75  		proxyID:  ns.ID,
    76  		address:  ns.Address,
    77  		port:     ns.Port,
    78  		proxyCfg: proxyCfg,
    79  		token:    token,
    80  		// 10 is fairly arbitrary here but allow for the 3 mandatory and a
    81  		// reasonable number of upstream watches to all deliver their initial
    82  		// messages in parallel without blocking the cache.Notify loops. It's not a
    83  		// huge deal if we do for a short period so we don't need to be more
    84  		// conservative to handle larger numbers of upstreams correctly but gives
    85  		// some head room for normal operation to be non-blocking in most typical
    86  		// cases.
    87  		ch:     make(chan cache.UpdateEvent, 10),
    88  		snapCh: make(chan ConfigSnapshot, 1),
    89  		reqCh:  make(chan chan *ConfigSnapshot, 1),
    90  	}, nil
    91  }
    92  
    93  // Watch initialized watches on all necessary cache data for the current proxy
    94  // registration state and returns a chan to observe updates to the
    95  // ConfigSnapshot that contains all necessary config state. The chan is closed
    96  // when the state is Closed.
    97  func (s *state) Watch() (<-chan ConfigSnapshot, error) {
    98  	s.ctx, s.cancel = context.WithCancel(context.Background())
    99  
   100  	err := s.initWatches()
   101  	if err != nil {
   102  		s.cancel()
   103  		return nil, err
   104  	}
   105  
   106  	go s.run()
   107  
   108  	return s.snapCh, nil
   109  }
   110  
   111  // Close discards the state and stops any long-running watches.
   112  func (s *state) Close() error {
   113  	if s.cancel != nil {
   114  		s.cancel()
   115  	}
   116  	return nil
   117  }
   118  
   119  // initWatches sets up the watches needed based on current proxy registration
   120  // state.
   121  func (s *state) initWatches() error {
   122  	// Watch for root changes
   123  	err := s.cache.Notify(s.ctx, cachetype.ConnectCARootName, &structs.DCSpecificRequest{
   124  		Datacenter:   s.source.Datacenter,
   125  		QueryOptions: structs.QueryOptions{Token: s.token},
   126  	}, rootsWatchID, s.ch)
   127  	if err != nil {
   128  		return err
   129  	}
   130  
   131  	// Watch the leaf cert
   132  	err = s.cache.Notify(s.ctx, cachetype.ConnectCALeafName, &cachetype.ConnectCALeafRequest{
   133  		Datacenter: s.source.Datacenter,
   134  		Token:      s.token,
   135  		Service:    s.proxyCfg.DestinationServiceName,
   136  	}, leafWatchID, s.ch)
   137  	if err != nil {
   138  		return err
   139  	}
   140  
   141  	// Watch for intention updates
   142  	err = s.cache.Notify(s.ctx, cachetype.IntentionMatchName, &structs.IntentionQueryRequest{
   143  		Datacenter:   s.source.Datacenter,
   144  		QueryOptions: structs.QueryOptions{Token: s.token},
   145  		Match: &structs.IntentionQueryMatch{
   146  			Type: structs.IntentionMatchDestination,
   147  			Entries: []structs.IntentionMatchEntry{
   148  				{
   149  					Namespace: structs.IntentionDefaultNamespace,
   150  					Name:      s.proxyCfg.DestinationServiceName,
   151  				},
   152  			},
   153  		},
   154  	}, intentionsWatchID, s.ch)
   155  	if err != nil {
   156  		return err
   157  	}
   158  
   159  	// Watch for updates to service endpoints for all upstreams
   160  	for _, u := range s.proxyCfg.Upstreams {
   161  		dc := s.source.Datacenter
   162  		if u.Datacenter != "" {
   163  			dc = u.Datacenter
   164  		}
   165  
   166  		switch u.DestinationType {
   167  		case structs.UpstreamDestTypePreparedQuery:
   168  			err = s.cache.Notify(s.ctx, cachetype.PreparedQueryName, &structs.PreparedQueryExecuteRequest{
   169  				Datacenter:    dc,
   170  				QueryOptions:  structs.QueryOptions{Token: s.token, MaxAge: defaultPreparedQueryPollInterval},
   171  				QueryIDOrName: u.DestinationName,
   172  				Connect:       true,
   173  			}, u.Identifier(), s.ch)
   174  		case structs.UpstreamDestTypeService:
   175  			fallthrough
   176  		case "": // Treat unset as the default Service type
   177  			err = s.cache.Notify(s.ctx, cachetype.HealthServicesName, &structs.ServiceSpecificRequest{
   178  				Datacenter:   dc,
   179  				QueryOptions: structs.QueryOptions{Token: s.token},
   180  				ServiceName:  u.DestinationName,
   181  				Connect:      true,
   182  			}, u.Identifier(), s.ch)
   183  
   184  			if err != nil {
   185  				return err
   186  			}
   187  
   188  		default:
   189  			return fmt.Errorf("unknown upstream type: %q", u.DestinationType)
   190  		}
   191  	}
   192  	return nil
   193  }
   194  
   195  func (s *state) run() {
   196  	// Close the channel we return from Watch when we stop so consumers can stop
   197  	// watching and clean up their goroutines. It's important we do this here and
   198  	// not in Close since this routine sends on this chan and so might panic if it
   199  	// gets closed from another goroutine.
   200  	defer close(s.snapCh)
   201  
   202  	snap := ConfigSnapshot{
   203  		ProxyID:           s.proxyID,
   204  		Address:           s.address,
   205  		Port:              s.port,
   206  		Proxy:             s.proxyCfg,
   207  		UpstreamEndpoints: make(map[string]structs.CheckServiceNodes),
   208  	}
   209  	// This turns out to be really fiddly/painful by just using time.Timer.C
   210  	// directly in the code below since you can't detect when a timer is stopped
   211  	// vs waiting in order to know to reset it. So just use a chan to send
   212  	// ourselves messages.
   213  	sendCh := make(chan struct{})
   214  	var coalesceTimer *time.Timer
   215  
   216  	for {
   217  		select {
   218  		case <-s.ctx.Done():
   219  			return
   220  		case u := <-s.ch:
   221  			if err := s.handleUpdate(u, &snap); err != nil {
   222  				s.logger.Printf("[ERR] %s watch error: %s", u.CorrelationID, err)
   223  				continue
   224  			}
   225  
   226  		case <-sendCh:
   227  			// Make a deep copy of snap so we don't mutate any of the embedded structs
   228  			// etc on future updates.
   229  			snapCopy, err := snap.Clone()
   230  			if err != nil {
   231  				s.logger.Printf("[ERR] Failed to copy config snapshot for proxy %s",
   232  					s.proxyID)
   233  				continue
   234  			}
   235  			s.snapCh <- *snapCopy
   236  			// Allow the next change to trigger a send
   237  			coalesceTimer = nil
   238  
   239  			// Skip rest of loop - there is nothing to send since nothing changed on
   240  			// this iteration
   241  			continue
   242  
   243  		case replyCh := <-s.reqCh:
   244  			if !snap.Valid() {
   245  				// Not valid yet just respond with nil and move on to next task.
   246  				replyCh <- nil
   247  				continue
   248  			}
   249  			// Make a deep copy of snap so we don't mutate any of the embedded structs
   250  			// etc on future updates.
   251  			snapCopy, err := snap.Clone()
   252  			if err != nil {
   253  				s.logger.Printf("[ERR] Failed to copy config snapshot for proxy %s",
   254  					s.proxyID)
   255  				continue
   256  			}
   257  			replyCh <- snapCopy
   258  
   259  			// Skip rest of loop - there is nothing to send since nothing changed on
   260  			// this iteration
   261  			continue
   262  		}
   263  
   264  		// Check if snap is complete enough to be a valid config to deliver to a
   265  		// proxy yet.
   266  		if snap.Valid() {
   267  			// Don't send it right away, set a short timer that will wait for updates
   268  			// from any of the other cache values and deliver them all together.
   269  			if coalesceTimer == nil {
   270  				coalesceTimer = time.AfterFunc(coalesceTimeout, func() {
   271  					// This runs in another goroutine so we can't just do the send
   272  					// directly here as access to snap is racy. Instead, signal the main
   273  					// loop above.
   274  					sendCh <- struct{}{}
   275  				})
   276  			}
   277  		}
   278  	}
   279  }
   280  
   281  func (s *state) handleUpdate(u cache.UpdateEvent, snap *ConfigSnapshot) error {
   282  	switch u.CorrelationID {
   283  	case rootsWatchID:
   284  		roots, ok := u.Result.(*structs.IndexedCARoots)
   285  		if !ok {
   286  			return fmt.Errorf("invalid type for roots response: %T", u.Result)
   287  		}
   288  		snap.Roots = roots
   289  	case leafWatchID:
   290  		leaf, ok := u.Result.(*structs.IssuedCert)
   291  		if !ok {
   292  			return fmt.Errorf("invalid type for leaf response: %T", u.Result)
   293  		}
   294  		snap.Leaf = leaf
   295  	case intentionsWatchID:
   296  		// Not in snapshot currently, no op
   297  	default:
   298  		// Service discovery result, figure out which type
   299  		switch {
   300  		case strings.HasPrefix(u.CorrelationID, serviceIDPrefix):
   301  			resp, ok := u.Result.(*structs.IndexedCheckServiceNodes)
   302  			if !ok {
   303  				return fmt.Errorf("invalid type for service response: %T", u.Result)
   304  			}
   305  			snap.UpstreamEndpoints[u.CorrelationID] = resp.Nodes
   306  
   307  		case strings.HasPrefix(u.CorrelationID, preparedQueryIDPrefix):
   308  			resp, ok := u.Result.(*structs.PreparedQueryExecuteResponse)
   309  			if !ok {
   310  				return fmt.Errorf("invalid type for prepared query response: %T", u.Result)
   311  			}
   312  			snap.UpstreamEndpoints[u.CorrelationID] = resp.Nodes
   313  
   314  		default:
   315  			return errors.New("unknown correlation ID")
   316  		}
   317  	}
   318  	return nil
   319  }
   320  
   321  // CurrentSnapshot synchronously returns the current ConfigSnapshot if there is
   322  // one ready. If we don't have one yet because not all necessary parts have been
   323  // returned (i.e. both roots and leaf cert), nil is returned.
   324  func (s *state) CurrentSnapshot() *ConfigSnapshot {
   325  	// Make a chan for the response to be sent on
   326  	ch := make(chan *ConfigSnapshot, 1)
   327  	s.reqCh <- ch
   328  	// Wait for the response
   329  	return <-ch
   330  }
   331  
   332  // Changed returns whether or not the passed NodeService has had any of the
   333  // fields we care about for config state watching changed or a different token.
   334  func (s *state) Changed(ns *structs.NodeService, token string) bool {
   335  	if ns == nil {
   336  		return true
   337  	}
   338  	return ns.Kind != structs.ServiceKindConnectProxy ||
   339  		s.proxyID != ns.ID ||
   340  		s.address != ns.Address ||
   341  		s.port != ns.Port ||
   342  		!reflect.DeepEqual(s.proxyCfg, ns.Proxy) ||
   343  		s.token != token
   344  }