github.com/IBM-Blockchain/fabric-operator@v1.0.4/pkg/restart/restart.go (about) 1 /* 2 * Copyright contributors to the Hyperledger Fabric Operator project 3 * 4 * SPDX-License-Identifier: Apache-2.0 5 * 6 * Licensed under the Apache License, Version 2.0 (the "License"); 7 * you may not use this file except in compliance with the License. 8 * You may obtain a copy of the License at: 9 * 10 * http://www.apache.org/licenses/LICENSE-2.0 11 * 12 * Unless required by applicable law or agreed to in writing, software 13 * distributed under the License is distributed on an "AS IS" BASIS, 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 15 * See the License for the specific language governing permissions and 16 * limitations under the License. 17 */ 18 19 package restart 20 21 import ( 22 "fmt" 23 "strings" 24 "time" 25 26 "github.com/IBM-Blockchain/fabric-operator/pkg/initializer/common" 27 k8sclient "github.com/IBM-Blockchain/fabric-operator/pkg/k8s/controllerclient" 28 "github.com/IBM-Blockchain/fabric-operator/pkg/restart/configmap" 29 "github.com/IBM-Blockchain/fabric-operator/pkg/restart/staggerrestarts" 30 "github.com/pkg/errors" 31 v1 "k8s.io/apimachinery/pkg/apis/meta/v1" 32 33 logf "sigs.k8s.io/controller-runtime/pkg/log" 34 ) 35 36 var log = logf.Log.WithName("restart_manager") 37 38 type RestartManager struct { 39 Client k8sclient.Client 40 Timers map[string]*time.Timer 41 WaitTime time.Duration 42 ConfigMapManager *configmap.Manager 43 StaggerRestartsService *staggerrestarts.StaggerRestartsService 44 } 45 46 func New(client k8sclient.Client, waitTime, timeout time.Duration) *RestartManager { 47 r := &RestartManager{ 48 Client: client, 49 Timers: map[string]*time.Timer{}, 50 WaitTime: waitTime, 51 ConfigMapManager: configmap.NewManager(client), 52 StaggerRestartsService: staggerrestarts.New(client, timeout), 53 } 54 55 return r 56 } 57 58 func (r *RestartManager) ForAdminCertUpdate(instance v1.Object) error { 59 return r.updateConfigFor(instance, ADMINCERT) 60 } 61 62 func (r *RestartManager) ForCertUpdate(certType common.SecretType, instance v1.Object) error { 63 var err error 64 switch certType { 65 case common.TLS: 66 err = r.ForTLSReenroll(instance) 67 case common.ECERT: 68 err = r.ForEcertReenroll(instance) 69 } 70 71 if err != nil { 72 return err 73 } 74 75 return nil 76 } 77 78 func (r *RestartManager) ForEcertReenroll(instance v1.Object) error { 79 return r.updateConfigFor(instance, ECERTUPDATE) 80 } 81 82 func (r *RestartManager) ForTLSReenroll(instance v1.Object) error { 83 return r.updateConfigFor(instance, TLSUPDATE) 84 } 85 86 func (r *RestartManager) ForConfigOverride(instance v1.Object) error { 87 return r.updateConfigFor(instance, CONFIGOVERRIDE) 88 } 89 90 func (r *RestartManager) ForMigration(instance v1.Object) error { 91 return r.updateConfigFor(instance, MIGRATION) 92 } 93 94 func (r *RestartManager) ForNodeOU(instance v1.Object) error { 95 return r.updateConfigFor(instance, NODEOU) 96 } 97 98 func (r *RestartManager) ForConfigMapUpdate(instance v1.Object) error { 99 return r.updateConfigFor(instance, CONFIGMAPUPDATE) 100 } 101 102 func (r *RestartManager) ForRestartAction(instance v1.Object) error { 103 return r.updateConfigFor(instance, RESTARTACTION) 104 } 105 106 // Updates the operator-config for the given reason by setting the request 107 // status to 'pending' and request timestamp to the current time: 108 // 109 // instances[instance_name].Requests[reason].Status = "pending" 110 func (r *RestartManager) updateConfigFor(instance v1.Object, reason Reason) error { 111 cfg, err := r.GetConfig(instance) 112 if err != nil { 113 return err 114 } 115 116 if cfg.Instances == nil { 117 cfg.Instances = map[string]*Restart{} 118 } 119 _, ok := cfg.Instances[instance.GetName()] 120 if !ok { 121 cfg.Instances[instance.GetName()] = &Restart{} 122 } 123 124 restart := cfg.Instances[instance.GetName()] 125 updateRestartRequest(restart, reason) 126 127 log.Info(fmt.Sprintf("Updating operator-config map, %s restart requested due to %s", instance.GetName(), reason)) 128 err = r.UpdateConfigMap(cfg, instance) 129 if err != nil { 130 return err 131 } 132 133 return nil 134 } 135 136 func updateRestartRequest(restart *Restart, reason Reason) { 137 if restart.Requests == nil { 138 restart.Requests = map[Reason]*Request{} 139 } 140 141 if restart.Requests[reason] == nil { 142 restart.Requests[reason] = &Request{} 143 } 144 145 // Set request time 146 req := restart.Requests[reason] 147 if req.Status != Pending { 148 req.Status = Pending 149 req.RequestTimestamp = time.Now().UTC().Format(time.RFC3339) 150 } 151 } 152 153 type Instance interface { 154 v1.Object 155 GetMSPID() string 156 } 157 158 // TriggerIfNeeded checks operator-config for any pending restarts, sets a timer to restart 159 // the deployment if required, and restarts the deployment. 160 func (r *RestartManager) TriggerIfNeeded(instance Instance) error { 161 var trigger bool 162 163 cfg, err := r.GetConfig(instance) 164 if err != nil { 165 return err 166 } 167 168 restart := cfg.Instances[instance.GetName()] 169 if restart == nil || restart.Requests == nil { 170 // Do nothing if restart doesn't have any pending requests 171 return nil 172 } 173 174 reasonList := []string{} 175 for reason, req := range restart.Requests { 176 if req != nil { 177 if req.Status == Pending { 178 reasonList = append(reasonList, string(reason)) 179 if r.triggerRestart(req) { 180 trigger = true 181 } 182 } 183 184 } 185 } 186 reasonString := strings.Join(reasonList, ",") 187 188 if trigger { 189 err = r.RestartDeployment(instance, reasonString) 190 if err != nil { 191 return err 192 } 193 } else if r.pendingRequests(restart) { 194 err = r.SetTimer(instance, reasonString) 195 if err != nil { 196 return errors.Wrap(err, "failed to set timer to restart deployment") 197 } 198 } 199 200 return nil 201 } 202 203 func (r *RestartManager) triggerRestart(req *Request) bool { 204 if req != nil { 205 if req.Status == Pending { 206 if req.LastActionTimestamp == "" { // no previous restart has occurred 207 return true 208 } 209 210 lastRestart, err := time.Parse(time.RFC3339, req.LastActionTimestamp) 211 if err != nil { 212 return true 213 } 214 215 requestedRestart, err := time.Parse(time.RFC3339, req.RequestTimestamp) 216 if err != nil { 217 return true 218 } 219 220 if requestedRestart.Sub(lastRestart) >= r.WaitTime { 221 return true 222 } 223 } 224 } 225 226 return false 227 } 228 229 func (r *RestartManager) pendingRequests(restart *Restart) bool { 230 for _, req := range restart.Requests { 231 if req.Status == Pending { 232 return true 233 } 234 } 235 return false 236 } 237 238 func (r *RestartManager) SetTimer(instance Instance, reason string) error { 239 cfg, err := r.GetConfig(instance) 240 if err != nil { 241 return err 242 } 243 244 restart := cfg.Instances[instance.GetName()] 245 246 oldestRequestTime := time.Now().UTC() 247 lastActionTime := "" 248 // Want to set timer duration based on oldest pending request 249 for _, req := range restart.Requests { 250 if req != nil { 251 requestTime, err := time.Parse(time.RFC3339, req.RequestTimestamp) 252 if err == nil { 253 if requestTime.Before(oldestRequestTime) { 254 oldestRequestTime = requestTime 255 lastActionTime = req.LastActionTimestamp 256 } 257 } 258 } 259 } 260 261 // Set timer if not already running 262 if r.Timers[instance.GetName()] == nil { 263 dur := r.getTimerDuration(lastActionTime, oldestRequestTime) 264 log.Info(fmt.Sprintf("Setting timer to restart %s in %f minutes", instance.GetName(), dur.Minutes())) 265 266 r.Timers[instance.GetName()] = time.AfterFunc(dur, func() { 267 err := r.RestartDeployment(instance, reason) 268 if err != nil { 269 log.Error(err, fmt.Sprintf("failed to restart deployment for %s", instance.GetName())) 270 } 271 }) 272 } else { 273 log.Info(fmt.Sprintf("Timer already set to restart %s shortly", instance.GetName())) 274 } 275 276 return nil 277 } 278 279 // If lastRestartTime was less than 10 min (or value of WaitTime) ago, calculate how much 280 // time remains before WaitTime has passed to trigger next restart 281 func (r *RestartManager) getTimerDuration(actionTime string, requestTime time.Time) time.Duration { 282 lastRestartTime, err := time.Parse(time.RFC3339, actionTime) 283 if err != nil { 284 // Default to WaitTime 285 return r.WaitTime 286 } 287 timePassed := requestTime.Sub(lastRestartTime) 288 return r.WaitTime - timePassed 289 } 290 291 // RestartDeployment adds the instance to the queue to stagger restarts 292 func (r *RestartManager) RestartDeployment(instance Instance, reason string) error { 293 log.Info(fmt.Sprintf("Queuing instance %s for restart", instance.GetName())) 294 295 err := r.ClearRestartConfigForInstance(instance) 296 if err != nil { 297 return errors.Wrap(err, "failed to clear restart config") 298 } 299 300 err = r.StaggerRestartsService.Restart(instance, reason) 301 if err != nil { 302 return errors.Wrap(err, "failed to add restart request to queue") 303 } 304 305 return nil 306 } 307 308 func (r *RestartManager) ClearRestartConfigForInstance(instance v1.Object) error { 309 cfg, err := r.GetConfig(instance) 310 if err != nil { 311 return err 312 } 313 314 if cfg.Instances == nil || cfg.Instances[instance.GetName()] == nil { 315 return nil 316 } 317 318 for _, req := range cfg.Instances[instance.GetName()].Requests { 319 if req != nil && req.Status == Pending { 320 clearRestart(req) 321 } 322 } 323 324 // Stop timer if previously set 325 if r.Timers[instance.GetName()] != nil { 326 r.Timers[instance.GetName()].Stop() 327 r.Timers[instance.GetName()] = nil 328 } 329 330 err = r.UpdateConfigMap(cfg, instance) 331 if err != nil { 332 return err 333 } 334 335 return nil 336 } 337 338 func clearRestart(req *Request) { 339 req.LastActionTimestamp = time.Now().UTC().Format(time.RFC3339) 340 req.RequestTimestamp = "" 341 req.Status = Complete 342 } 343 344 func (r *RestartManager) GetConfig(instance v1.Object) (*Config, error) { 345 cmName := "operator-config" 346 347 cfg := &Config{} 348 err := r.ConfigMapManager.GetRestartConfigFrom(cmName, instance.GetNamespace(), cfg) 349 if err != nil { 350 return nil, err 351 } 352 353 return cfg, nil 354 } 355 356 func (r *RestartManager) UpdateConfigMap(cfg *Config, instance v1.Object) error { 357 cmName := "operator-config" 358 359 return r.ConfigMapManager.UpdateConfig(cmName, instance.GetNamespace(), cfg) 360 }