github.com/clly/consul@v1.4.5/agent/consul/acl_replication_legacy.go (about)

     1  package consul
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"sort"
     7  	"time"
     8  
     9  	"github.com/armon/go-metrics"
    10  	"github.com/hashicorp/consul/agent/structs"
    11  )
    12  
    13  // aclIterator simplifies the algorithm below by providing a basic iterator that
    14  // moves through a list of ACLs and returns nil when it's exhausted. It also has
    15  // methods for pre-sorting the ACLs being iterated over by ID, which should
    16  // already be true, but since this is crucial for correctness and we are taking
    17  // input from other servers, we sort to make sure.
    18  type aclIterator struct {
    19  	acls structs.ACLs
    20  
    21  	// index is the current position of the iterator.
    22  	index int
    23  }
    24  
    25  // newACLIterator returns a new ACL iterator.
    26  func newACLIterator(acls structs.ACLs) *aclIterator {
    27  	return &aclIterator{acls: acls}
    28  }
    29  
    30  // See sort.Interface.
    31  func (a *aclIterator) Len() int {
    32  	return len(a.acls)
    33  }
    34  
    35  // See sort.Interface.
    36  func (a *aclIterator) Swap(i, j int) {
    37  	a.acls[i], a.acls[j] = a.acls[j], a.acls[i]
    38  }
    39  
    40  // See sort.Interface.
    41  func (a *aclIterator) Less(i, j int) bool {
    42  	return a.acls[i].ID < a.acls[j].ID
    43  }
    44  
    45  // Front returns the item at index position, or nil if the list is exhausted.
    46  func (a *aclIterator) Front() *structs.ACL {
    47  	if a.index < len(a.acls) {
    48  		return a.acls[a.index]
    49  	}
    50  	return nil
    51  }
    52  
    53  // Next advances the iterator to the next index.
    54  func (a *aclIterator) Next() {
    55  	a.index++
    56  }
    57  
    58  // reconcileACLs takes the local and remote ACL state, and produces a list of
    59  // changes required in order to bring the local ACLs into sync with the remote
    60  // ACLs. You can supply lastRemoteIndex as a hint that replication has succeeded
    61  // up to that remote index and it will make this process more efficient by only
    62  // comparing ACL entries modified after that index. Setting this to 0 will force
    63  // a full compare of all existing ACLs.
    64  func reconcileLegacyACLs(local, remote structs.ACLs, lastRemoteIndex uint64) structs.ACLRequests {
    65  	// Since sorting the lists is crucial for correctness, we are depending
    66  	// on data coming from other servers potentially running a different,
    67  	// version of Consul, and sorted-ness is kind of a subtle property of
    68  	// the state store indexing, it's prudent to make sure things are sorted
    69  	// before we begin.
    70  	localIter, remoteIter := newACLIterator(local), newACLIterator(remote)
    71  	sort.Sort(localIter)
    72  	sort.Sort(remoteIter)
    73  
    74  	// Run through both lists and reconcile them.
    75  	var changes structs.ACLRequests
    76  	for localIter.Front() != nil || remoteIter.Front() != nil {
    77  		// If the local list is exhausted, then process this as a remote
    78  		// add. We know from the loop condition that there's something
    79  		// in the remote list.
    80  		if localIter.Front() == nil {
    81  			changes = append(changes, &structs.ACLRequest{
    82  				Op:  structs.ACLSet,
    83  				ACL: *(remoteIter.Front()),
    84  			})
    85  			remoteIter.Next()
    86  			continue
    87  		}
    88  
    89  		// If the remote list is exhausted, then process this as a local
    90  		// delete. We know from the loop condition that there's something
    91  		// in the local list.
    92  		if remoteIter.Front() == nil {
    93  			changes = append(changes, &structs.ACLRequest{
    94  				Op:  structs.ACLDelete,
    95  				ACL: *(localIter.Front()),
    96  			})
    97  			localIter.Next()
    98  			continue
    99  		}
   100  
   101  		// At this point we know there's something at the front of each
   102  		// list we need to resolve.
   103  
   104  		// If the remote list has something local doesn't, we add it.
   105  		if localIter.Front().ID > remoteIter.Front().ID {
   106  			changes = append(changes, &structs.ACLRequest{
   107  				Op:  structs.ACLSet,
   108  				ACL: *(remoteIter.Front()),
   109  			})
   110  			remoteIter.Next()
   111  			continue
   112  		}
   113  
   114  		// If local has something remote doesn't, we delete it.
   115  		if localIter.Front().ID < remoteIter.Front().ID {
   116  			changes = append(changes, &structs.ACLRequest{
   117  				Op:  structs.ACLDelete,
   118  				ACL: *(localIter.Front()),
   119  			})
   120  			localIter.Next()
   121  			continue
   122  		}
   123  
   124  		// Local and remote have an ACL with the same ID, so we might
   125  		// need to compare them.
   126  		l, r := localIter.Front(), remoteIter.Front()
   127  		if r.RaftIndex.ModifyIndex > lastRemoteIndex && !r.IsSame(l) {
   128  			changes = append(changes, &structs.ACLRequest{
   129  				Op:  structs.ACLSet,
   130  				ACL: *r,
   131  			})
   132  		}
   133  		localIter.Next()
   134  		remoteIter.Next()
   135  	}
   136  	return changes
   137  }
   138  
   139  // FetchLocalACLs returns the ACLs in the local state store.
   140  func (s *Server) fetchLocalLegacyACLs() (structs.ACLs, error) {
   141  	_, local, err := s.fsm.State().ACLTokenList(nil, false, true, "")
   142  	if err != nil {
   143  		return nil, err
   144  	}
   145  
   146  	var acls structs.ACLs
   147  	for _, token := range local {
   148  		if acl, err := token.Convert(); err == nil && acl != nil {
   149  			acls = append(acls, acl)
   150  		}
   151  	}
   152  
   153  	return acls, nil
   154  }
   155  
   156  // FetchRemoteACLs is used to get the remote set of ACLs from the ACL
   157  // datacenter. The lastIndex parameter is a hint about which remote index we
   158  // have replicated to, so this is expected to block until something changes.
   159  func (s *Server) fetchRemoteLegacyACLs(lastRemoteIndex uint64) (*structs.IndexedACLs, error) {
   160  	defer metrics.MeasureSince([]string{"leader", "fetchRemoteACLs"}, time.Now())
   161  
   162  	args := structs.DCSpecificRequest{
   163  		Datacenter: s.config.ACLDatacenter,
   164  		QueryOptions: structs.QueryOptions{
   165  			Token:         s.tokens.ReplicationToken(),
   166  			MinQueryIndex: lastRemoteIndex,
   167  			AllowStale:    true,
   168  		},
   169  	}
   170  	var remote structs.IndexedACLs
   171  	if err := s.RPC("ACL.List", &args, &remote); err != nil {
   172  		return nil, err
   173  	}
   174  	return &remote, nil
   175  }
   176  
   177  // UpdateLocalACLs is given a list of changes to apply in order to bring the
   178  // local ACLs in-line with the remote ACLs from the ACL datacenter.
   179  func (s *Server) updateLocalLegacyACLs(changes structs.ACLRequests, ctx context.Context) (bool, error) {
   180  	defer metrics.MeasureSince([]string{"leader", "updateLocalACLs"}, time.Now())
   181  
   182  	minTimePerOp := time.Second / time.Duration(s.config.ACLReplicationApplyLimit)
   183  	for _, change := range changes {
   184  		// Note that we are using the single ACL interface here and not
   185  		// performing all this inside a single transaction. This is OK
   186  		// for two reasons. First, there's nothing else other than this
   187  		// replication routine that alters the local ACLs, so there's
   188  		// nothing to contend with locally. Second, if an apply fails
   189  		// in the middle (most likely due to losing leadership), the
   190  		// next replication pass will clean up and check everything
   191  		// again.
   192  		var reply string
   193  		start := time.Now()
   194  		if err := aclApplyInternal(s, change, &reply); err != nil {
   195  			return false, err
   196  		}
   197  
   198  		// Do a smooth rate limit to wait out the min time allowed for
   199  		// each op. If this op took longer than the min, then the sleep
   200  		// time will be negative and we will just move on.
   201  		elapsed := time.Since(start)
   202  		select {
   203  		case <-ctx.Done():
   204  			return true, nil
   205  		case <-time.After(minTimePerOp - elapsed):
   206  			// do nothing
   207  		}
   208  	}
   209  	return false, nil
   210  }
   211  
   212  // replicateACLs is a runs one pass of the algorithm for replicating ACLs from
   213  // a remote ACL datacenter to local state. If there's any error, this will return
   214  // 0 for the lastRemoteIndex, which will cause us to immediately do a full sync
   215  // next time.
   216  func (s *Server) replicateLegacyACLs(lastRemoteIndex uint64, ctx context.Context) (uint64, bool, error) {
   217  	remote, err := s.fetchRemoteLegacyACLs(lastRemoteIndex)
   218  	if err != nil {
   219  		return 0, false, fmt.Errorf("failed to retrieve remote ACLs: %v", err)
   220  	}
   221  
   222  	// Need to check if we should be stopping. This will be common as the fetching process is a blocking
   223  	// RPC which could have been hanging around for a long time and during that time leadership could
   224  	// have been lost.
   225  	select {
   226  	case <-ctx.Done():
   227  		return 0, true, nil
   228  	default:
   229  		// do nothing
   230  	}
   231  
   232  	// Measure everything after the remote query, which can block for long
   233  	// periods of time. This metric is a good measure of how expensive the
   234  	// replication process is.
   235  	defer metrics.MeasureSince([]string{"leader", "replicateACLs"}, time.Now())
   236  
   237  	local, err := s.fetchLocalLegacyACLs()
   238  	if err != nil {
   239  		return 0, false, fmt.Errorf("failed to retrieve local ACLs: %v", err)
   240  	}
   241  
   242  	// If the remote index ever goes backwards, it's a good indication that
   243  	// the remote side was rebuilt and we should do a full sync since we
   244  	// can't make any assumptions about what's going on.
   245  	if remote.QueryMeta.Index < lastRemoteIndex {
   246  		s.logger.Printf("[WARN] consul: Legacy ACL replication remote index moved backwards (%d to %d), forcing a full ACL sync", lastRemoteIndex, remote.QueryMeta.Index)
   247  		lastRemoteIndex = 0
   248  	}
   249  
   250  	// Calculate the changes required to bring the state into sync and then
   251  	// apply them.
   252  	changes := reconcileLegacyACLs(local, remote.ACLs, lastRemoteIndex)
   253  	exit, err := s.updateLocalLegacyACLs(changes, ctx)
   254  	if exit {
   255  		return 0, true, nil
   256  	}
   257  
   258  	if err != nil {
   259  		return 0, false, fmt.Errorf("failed to sync ACL changes: %v", err)
   260  	}
   261  
   262  	// Return the index we got back from the remote side, since we've synced
   263  	// up with the remote state as of that index.
   264  	return remote.QueryMeta.Index, false, nil
   265  }