github.com/kaisenlinux/docker.io@v0.0.0-20230510090727-ea55db55fac7/swarmkit/ca/reconciler.go (about) 1 package ca 2 3 import ( 4 "bytes" 5 "context" 6 "fmt" 7 "reflect" 8 "sync" 9 "time" 10 11 "github.com/cloudflare/cfssl/helpers" 12 "github.com/docker/swarmkit/api" 13 "github.com/docker/swarmkit/api/equality" 14 "github.com/docker/swarmkit/log" 15 "github.com/docker/swarmkit/manager/state/store" 16 "github.com/pkg/errors" 17 ) 18 19 // IssuanceStateRotateMaxBatchSize is the maximum number of nodes we'll tell to rotate their certificates in any given update 20 const IssuanceStateRotateMaxBatchSize = 30 21 22 func hasIssuer(n *api.Node, info *IssuerInfo) bool { 23 if n.Description == nil || n.Description.TLSInfo == nil { 24 return false 25 } 26 return bytes.Equal(info.Subject, n.Description.TLSInfo.CertIssuerSubject) && bytes.Equal(info.PublicKey, n.Description.TLSInfo.CertIssuerPublicKey) 27 } 28 29 var errRootRotationChanged = errors.New("target root rotation has changed") 30 31 // rootRotationReconciler keeps track of all the nodes in the store so that we can determine which ones need reconciliation when nodes are updated 32 // or the root CA is updated. This is meant to be used with watches on nodes and the cluster, and provides functions to be called when the 33 // cluster's RootCA has changed and when a node is added, updated, or removed. 34 type rootRotationReconciler struct { 35 mu sync.Mutex 36 clusterID string 37 batchUpdateInterval time.Duration 38 ctx context.Context 39 store *store.MemoryStore 40 41 currentRootCA *api.RootCA 42 currentIssuer IssuerInfo 43 unconvergedNodes map[string]*api.Node 44 45 wg sync.WaitGroup 46 cancel func() 47 } 48 49 // IssuerFromAPIRootCA returns the desired issuer given an API root CA object 50 func IssuerFromAPIRootCA(rootCA *api.RootCA) (*IssuerInfo, error) { 51 wantedIssuer := rootCA.CACert 52 if rootCA.RootRotation != nil { 53 wantedIssuer = rootCA.RootRotation.CACert 54 } 55 issuerCerts, err := helpers.ParseCertificatesPEM(wantedIssuer) 56 if err != nil { 57 return nil, errors.Wrap(err, "invalid certificate in cluster root CA object") 58 } 59 if len(issuerCerts) == 0 { 60 return nil, errors.New("invalid certificate in cluster root CA object") 61 } 62 return &IssuerInfo{ 63 Subject: issuerCerts[0].RawSubject, 64 PublicKey: issuerCerts[0].RawSubjectPublicKeyInfo, 65 }, nil 66 } 67 68 // assumption: UpdateRootCA will never be called with a `nil` root CA because the caller will be acting in response to 69 // a store update event 70 func (r *rootRotationReconciler) UpdateRootCA(newRootCA *api.RootCA) { 71 issuerInfo, err := IssuerFromAPIRootCA(newRootCA) 72 if err != nil { 73 log.G(r.ctx).WithError(err).Error("unable to update process the current root CA") 74 return 75 } 76 77 var ( 78 shouldStartNewLoop, waitForPrevLoop bool 79 loopCtx context.Context 80 ) 81 r.mu.Lock() 82 defer func() { 83 r.mu.Unlock() 84 if shouldStartNewLoop { 85 if waitForPrevLoop { 86 r.wg.Wait() 87 } 88 r.wg.Add(1) 89 go r.runReconcilerLoop(loopCtx, newRootCA) 90 } 91 }() 92 93 // check if the issuer has changed, first 94 if reflect.DeepEqual(&r.currentIssuer, issuerInfo) { 95 r.currentRootCA = newRootCA 96 return 97 } 98 // If the issuer has changed, iterate through all the nodes to figure out which ones need rotation 99 if newRootCA.RootRotation != nil { 100 var nodes []*api.Node 101 r.store.View(func(tx store.ReadTx) { 102 nodes, err = store.FindNodes(tx, store.All) 103 }) 104 if err != nil { 105 log.G(r.ctx).WithError(err).Error("unable to list nodes, so unable to process the current root CA") 106 return 107 } 108 109 // from here on out, there will be no more errors that cause us to have to abandon updating the Root CA, 110 // so we can start making changes to r's fields 111 r.unconvergedNodes = make(map[string]*api.Node) 112 for _, n := range nodes { 113 if !hasIssuer(n, issuerInfo) { 114 r.unconvergedNodes[n.ID] = n 115 } 116 } 117 shouldStartNewLoop = true 118 if r.cancel != nil { // there's already a loop going, so cancel it 119 r.cancel() 120 waitForPrevLoop = true 121 } 122 loopCtx, r.cancel = context.WithCancel(r.ctx) 123 } else { 124 r.unconvergedNodes = nil 125 } 126 r.currentRootCA = newRootCA 127 r.currentIssuer = *issuerInfo 128 } 129 130 // assumption: UpdateNode will never be called with a `nil` node because the caller will be acting in response to 131 // a store update event 132 func (r *rootRotationReconciler) UpdateNode(node *api.Node) { 133 r.mu.Lock() 134 defer r.mu.Unlock() 135 // if we're not in the middle of a root rotation ignore the update 136 if r.currentRootCA == nil || r.currentRootCA.RootRotation == nil { 137 return 138 } 139 if hasIssuer(node, &r.currentIssuer) { 140 delete(r.unconvergedNodes, node.ID) 141 } else { 142 r.unconvergedNodes[node.ID] = node 143 } 144 } 145 146 // assumption: DeleteNode will never be called with a `nil` node because the caller will be acting in response to 147 // a store update event 148 func (r *rootRotationReconciler) DeleteNode(node *api.Node) { 149 r.mu.Lock() 150 delete(r.unconvergedNodes, node.ID) 151 r.mu.Unlock() 152 } 153 154 func (r *rootRotationReconciler) runReconcilerLoop(ctx context.Context, loopRootCA *api.RootCA) { 155 defer r.wg.Done() 156 for { 157 r.mu.Lock() 158 if len(r.unconvergedNodes) == 0 { 159 r.mu.Unlock() 160 161 err := r.store.Update(func(tx store.Tx) error { 162 return r.finishRootRotation(tx, loopRootCA) 163 }) 164 if err == nil { 165 log.G(r.ctx).Info("completed root rotation") 166 return 167 } 168 log.G(r.ctx).WithError(err).Error("could not complete root rotation") 169 if err == errRootRotationChanged { 170 // if the root rotation has changed, this loop will be cancelled anyway, so may as well abort early 171 return 172 } 173 } else { 174 var toUpdate []*api.Node 175 for _, n := range r.unconvergedNodes { 176 iState := n.Certificate.Status.State 177 if iState != api.IssuanceStateRenew && iState != api.IssuanceStatePending && iState != api.IssuanceStateRotate { 178 n = n.Copy() 179 n.Certificate.Status.State = api.IssuanceStateRotate 180 toUpdate = append(toUpdate, n) 181 if len(toUpdate) >= IssuanceStateRotateMaxBatchSize { 182 break 183 } 184 } 185 } 186 r.mu.Unlock() 187 188 if err := r.batchUpdateNodes(toUpdate); err != nil { 189 log.G(r.ctx).WithError(err).Errorf("store error when trying to batch update %d nodes to request certificate rotation", len(toUpdate)) 190 } 191 } 192 193 select { 194 case <-ctx.Done(): 195 return 196 case <-time.After(r.batchUpdateInterval): 197 } 198 } 199 } 200 201 // This function assumes that the expected root CA has root rotation. This is intended to be used by 202 // `reconcileNodeRootsAndCerts`, which uses the root CA from the `lastSeenClusterRootCA`, and checks 203 // that it has a root rotation before calling this function. 204 func (r *rootRotationReconciler) finishRootRotation(tx store.Tx, expectedRootCA *api.RootCA) error { 205 cluster := store.GetCluster(tx, r.clusterID) 206 if cluster == nil { 207 return fmt.Errorf("unable to get cluster %s", r.clusterID) 208 } 209 210 // If the RootCA object has changed (because another root rotation was started or because some other node 211 // had finished the root rotation), we cannot finish the root rotation that we were working on. 212 if !equality.RootCAEqualStable(expectedRootCA, &cluster.RootCA) { 213 return errRootRotationChanged 214 } 215 216 var signerCert []byte 217 if len(cluster.RootCA.RootRotation.CAKey) > 0 { 218 signerCert = cluster.RootCA.RootRotation.CACert 219 } 220 // we don't actually have to parse out the default node expiration from the cluster - we are just using 221 // the ca.RootCA object to generate new tokens and the digest 222 updatedRootCA, err := NewRootCA(cluster.RootCA.RootRotation.CACert, signerCert, cluster.RootCA.RootRotation.CAKey, 223 DefaultNodeCertExpiration, nil) 224 if err != nil { 225 return errors.Wrap(err, "invalid cluster root rotation object") 226 } 227 cluster.RootCA = api.RootCA{ 228 CACert: cluster.RootCA.RootRotation.CACert, 229 CAKey: cluster.RootCA.RootRotation.CAKey, 230 CACertHash: updatedRootCA.Digest.String(), 231 JoinTokens: api.JoinTokens{ 232 Worker: GenerateJoinToken(&updatedRootCA, cluster.FIPS), 233 Manager: GenerateJoinToken(&updatedRootCA, cluster.FIPS), 234 }, 235 LastForcedRotation: cluster.RootCA.LastForcedRotation, 236 } 237 return store.UpdateCluster(tx, cluster) 238 } 239 240 func (r *rootRotationReconciler) batchUpdateNodes(toUpdate []*api.Node) error { 241 if len(toUpdate) == 0 { 242 return nil 243 } 244 err := r.store.Batch(func(batch *store.Batch) error { 245 // Directly update the nodes rather than get + update, and ignore version errors. Since 246 // `rootRotationReconciler` should be hooked up to all node update/delete/create events, we should have 247 // close to the latest versions of all the nodes. If not, the node will updated later and the 248 // next batch of updates should catch it. 249 for _, n := range toUpdate { 250 if err := batch.Update(func(tx store.Tx) error { 251 return store.UpdateNode(tx, n) 252 }); err != nil && err != store.ErrSequenceConflict { 253 log.G(r.ctx).WithError(err).Errorf("unable to update node %s to request a certificate rotation", n.ID) 254 } 255 } 256 return nil 257 }) 258 return err 259 }