github.imxd.top/hashicorp/consul@v1.4.5/agent/consul/acl_replication_legacy.go (about) 1 package consul 2 3 import ( 4 "context" 5 "fmt" 6 "sort" 7 "time" 8 9 "github.com/armon/go-metrics" 10 "github.com/hashicorp/consul/agent/structs" 11 ) 12 13 // aclIterator simplifies the algorithm below by providing a basic iterator that 14 // moves through a list of ACLs and returns nil when it's exhausted. It also has 15 // methods for pre-sorting the ACLs being iterated over by ID, which should 16 // already be true, but since this is crucial for correctness and we are taking 17 // input from other servers, we sort to make sure. 18 type aclIterator struct { 19 acls structs.ACLs 20 21 // index is the current position of the iterator. 22 index int 23 } 24 25 // newACLIterator returns a new ACL iterator. 26 func newACLIterator(acls structs.ACLs) *aclIterator { 27 return &aclIterator{acls: acls} 28 } 29 30 // See sort.Interface. 31 func (a *aclIterator) Len() int { 32 return len(a.acls) 33 } 34 35 // See sort.Interface. 36 func (a *aclIterator) Swap(i, j int) { 37 a.acls[i], a.acls[j] = a.acls[j], a.acls[i] 38 } 39 40 // See sort.Interface. 41 func (a *aclIterator) Less(i, j int) bool { 42 return a.acls[i].ID < a.acls[j].ID 43 } 44 45 // Front returns the item at index position, or nil if the list is exhausted. 46 func (a *aclIterator) Front() *structs.ACL { 47 if a.index < len(a.acls) { 48 return a.acls[a.index] 49 } 50 return nil 51 } 52 53 // Next advances the iterator to the next index. 54 func (a *aclIterator) Next() { 55 a.index++ 56 } 57 58 // reconcileACLs takes the local and remote ACL state, and produces a list of 59 // changes required in order to bring the local ACLs into sync with the remote 60 // ACLs. You can supply lastRemoteIndex as a hint that replication has succeeded 61 // up to that remote index and it will make this process more efficient by only 62 // comparing ACL entries modified after that index. Setting this to 0 will force 63 // a full compare of all existing ACLs. 64 func reconcileLegacyACLs(local, remote structs.ACLs, lastRemoteIndex uint64) structs.ACLRequests { 65 // Since sorting the lists is crucial for correctness, we are depending 66 // on data coming from other servers potentially running a different, 67 // version of Consul, and sorted-ness is kind of a subtle property of 68 // the state store indexing, it's prudent to make sure things are sorted 69 // before we begin. 70 localIter, remoteIter := newACLIterator(local), newACLIterator(remote) 71 sort.Sort(localIter) 72 sort.Sort(remoteIter) 73 74 // Run through both lists and reconcile them. 75 var changes structs.ACLRequests 76 for localIter.Front() != nil || remoteIter.Front() != nil { 77 // If the local list is exhausted, then process this as a remote 78 // add. We know from the loop condition that there's something 79 // in the remote list. 80 if localIter.Front() == nil { 81 changes = append(changes, &structs.ACLRequest{ 82 Op: structs.ACLSet, 83 ACL: *(remoteIter.Front()), 84 }) 85 remoteIter.Next() 86 continue 87 } 88 89 // If the remote list is exhausted, then process this as a local 90 // delete. We know from the loop condition that there's something 91 // in the local list. 92 if remoteIter.Front() == nil { 93 changes = append(changes, &structs.ACLRequest{ 94 Op: structs.ACLDelete, 95 ACL: *(localIter.Front()), 96 }) 97 localIter.Next() 98 continue 99 } 100 101 // At this point we know there's something at the front of each 102 // list we need to resolve. 103 104 // If the remote list has something local doesn't, we add it. 105 if localIter.Front().ID > remoteIter.Front().ID { 106 changes = append(changes, &structs.ACLRequest{ 107 Op: structs.ACLSet, 108 ACL: *(remoteIter.Front()), 109 }) 110 remoteIter.Next() 111 continue 112 } 113 114 // If local has something remote doesn't, we delete it. 115 if localIter.Front().ID < remoteIter.Front().ID { 116 changes = append(changes, &structs.ACLRequest{ 117 Op: structs.ACLDelete, 118 ACL: *(localIter.Front()), 119 }) 120 localIter.Next() 121 continue 122 } 123 124 // Local and remote have an ACL with the same ID, so we might 125 // need to compare them. 126 l, r := localIter.Front(), remoteIter.Front() 127 if r.RaftIndex.ModifyIndex > lastRemoteIndex && !r.IsSame(l) { 128 changes = append(changes, &structs.ACLRequest{ 129 Op: structs.ACLSet, 130 ACL: *r, 131 }) 132 } 133 localIter.Next() 134 remoteIter.Next() 135 } 136 return changes 137 } 138 139 // FetchLocalACLs returns the ACLs in the local state store. 140 func (s *Server) fetchLocalLegacyACLs() (structs.ACLs, error) { 141 _, local, err := s.fsm.State().ACLTokenList(nil, false, true, "") 142 if err != nil { 143 return nil, err 144 } 145 146 var acls structs.ACLs 147 for _, token := range local { 148 if acl, err := token.Convert(); err == nil && acl != nil { 149 acls = append(acls, acl) 150 } 151 } 152 153 return acls, nil 154 } 155 156 // FetchRemoteACLs is used to get the remote set of ACLs from the ACL 157 // datacenter. The lastIndex parameter is a hint about which remote index we 158 // have replicated to, so this is expected to block until something changes. 159 func (s *Server) fetchRemoteLegacyACLs(lastRemoteIndex uint64) (*structs.IndexedACLs, error) { 160 defer metrics.MeasureSince([]string{"leader", "fetchRemoteACLs"}, time.Now()) 161 162 args := structs.DCSpecificRequest{ 163 Datacenter: s.config.ACLDatacenter, 164 QueryOptions: structs.QueryOptions{ 165 Token: s.tokens.ReplicationToken(), 166 MinQueryIndex: lastRemoteIndex, 167 AllowStale: true, 168 }, 169 } 170 var remote structs.IndexedACLs 171 if err := s.RPC("ACL.List", &args, &remote); err != nil { 172 return nil, err 173 } 174 return &remote, nil 175 } 176 177 // UpdateLocalACLs is given a list of changes to apply in order to bring the 178 // local ACLs in-line with the remote ACLs from the ACL datacenter. 179 func (s *Server) updateLocalLegacyACLs(changes structs.ACLRequests, ctx context.Context) (bool, error) { 180 defer metrics.MeasureSince([]string{"leader", "updateLocalACLs"}, time.Now()) 181 182 minTimePerOp := time.Second / time.Duration(s.config.ACLReplicationApplyLimit) 183 for _, change := range changes { 184 // Note that we are using the single ACL interface here and not 185 // performing all this inside a single transaction. This is OK 186 // for two reasons. First, there's nothing else other than this 187 // replication routine that alters the local ACLs, so there's 188 // nothing to contend with locally. Second, if an apply fails 189 // in the middle (most likely due to losing leadership), the 190 // next replication pass will clean up and check everything 191 // again. 192 var reply string 193 start := time.Now() 194 if err := aclApplyInternal(s, change, &reply); err != nil { 195 return false, err 196 } 197 198 // Do a smooth rate limit to wait out the min time allowed for 199 // each op. If this op took longer than the min, then the sleep 200 // time will be negative and we will just move on. 201 elapsed := time.Since(start) 202 select { 203 case <-ctx.Done(): 204 return true, nil 205 case <-time.After(minTimePerOp - elapsed): 206 // do nothing 207 } 208 } 209 return false, nil 210 } 211 212 // replicateACLs is a runs one pass of the algorithm for replicating ACLs from 213 // a remote ACL datacenter to local state. If there's any error, this will return 214 // 0 for the lastRemoteIndex, which will cause us to immediately do a full sync 215 // next time. 216 func (s *Server) replicateLegacyACLs(lastRemoteIndex uint64, ctx context.Context) (uint64, bool, error) { 217 remote, err := s.fetchRemoteLegacyACLs(lastRemoteIndex) 218 if err != nil { 219 return 0, false, fmt.Errorf("failed to retrieve remote ACLs: %v", err) 220 } 221 222 // Need to check if we should be stopping. This will be common as the fetching process is a blocking 223 // RPC which could have been hanging around for a long time and during that time leadership could 224 // have been lost. 225 select { 226 case <-ctx.Done(): 227 return 0, true, nil 228 default: 229 // do nothing 230 } 231 232 // Measure everything after the remote query, which can block for long 233 // periods of time. This metric is a good measure of how expensive the 234 // replication process is. 235 defer metrics.MeasureSince([]string{"leader", "replicateACLs"}, time.Now()) 236 237 local, err := s.fetchLocalLegacyACLs() 238 if err != nil { 239 return 0, false, fmt.Errorf("failed to retrieve local ACLs: %v", err) 240 } 241 242 // If the remote index ever goes backwards, it's a good indication that 243 // the remote side was rebuilt and we should do a full sync since we 244 // can't make any assumptions about what's going on. 245 if remote.QueryMeta.Index < lastRemoteIndex { 246 s.logger.Printf("[WARN] consul: Legacy ACL replication remote index moved backwards (%d to %d), forcing a full ACL sync", lastRemoteIndex, remote.QueryMeta.Index) 247 lastRemoteIndex = 0 248 } 249 250 // Calculate the changes required to bring the state into sync and then 251 // apply them. 252 changes := reconcileLegacyACLs(local, remote.ACLs, lastRemoteIndex) 253 exit, err := s.updateLocalLegacyACLs(changes, ctx) 254 if exit { 255 return 0, true, nil 256 } 257 258 if err != nil { 259 return 0, false, fmt.Errorf("failed to sync ACL changes: %v", err) 260 } 261 262 // Return the index we got back from the remote side, since we've synced 263 // up with the remote state as of that index. 264 return remote.QueryMeta.Index, false, nil 265 }