github.com/IBM-Blockchain/fabric-operator@v1.0.4/pkg/restart/restart.go (about)

     1  /*
     2   * Copyright contributors to the Hyperledger Fabric Operator project
     3   *
     4   * SPDX-License-Identifier: Apache-2.0
     5   *
     6   * Licensed under the Apache License, Version 2.0 (the "License");
     7   * you may not use this file except in compliance with the License.
     8   * You may obtain a copy of the License at:
     9   *
    10   * 	  http://www.apache.org/licenses/LICENSE-2.0
    11   *
    12   * Unless required by applicable law or agreed to in writing, software
    13   * distributed under the License is distributed on an "AS IS" BASIS,
    14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    15   * See the License for the specific language governing permissions and
    16   * limitations under the License.
    17   */
    18  
    19  package restart
    20  
    21  import (
    22  	"fmt"
    23  	"strings"
    24  	"time"
    25  
    26  	"github.com/IBM-Blockchain/fabric-operator/pkg/initializer/common"
    27  	k8sclient "github.com/IBM-Blockchain/fabric-operator/pkg/k8s/controllerclient"
    28  	"github.com/IBM-Blockchain/fabric-operator/pkg/restart/configmap"
    29  	"github.com/IBM-Blockchain/fabric-operator/pkg/restart/staggerrestarts"
    30  	"github.com/pkg/errors"
    31  	v1 "k8s.io/apimachinery/pkg/apis/meta/v1"
    32  
    33  	logf "sigs.k8s.io/controller-runtime/pkg/log"
    34  )
    35  
    36  var log = logf.Log.WithName("restart_manager")
    37  
    38  type RestartManager struct {
    39  	Client                 k8sclient.Client
    40  	Timers                 map[string]*time.Timer
    41  	WaitTime               time.Duration
    42  	ConfigMapManager       *configmap.Manager
    43  	StaggerRestartsService *staggerrestarts.StaggerRestartsService
    44  }
    45  
    46  func New(client k8sclient.Client, waitTime, timeout time.Duration) *RestartManager {
    47  	r := &RestartManager{
    48  		Client:                 client,
    49  		Timers:                 map[string]*time.Timer{},
    50  		WaitTime:               waitTime,
    51  		ConfigMapManager:       configmap.NewManager(client),
    52  		StaggerRestartsService: staggerrestarts.New(client, timeout),
    53  	}
    54  
    55  	return r
    56  }
    57  
    58  func (r *RestartManager) ForAdminCertUpdate(instance v1.Object) error {
    59  	return r.updateConfigFor(instance, ADMINCERT)
    60  }
    61  
    62  func (r *RestartManager) ForCertUpdate(certType common.SecretType, instance v1.Object) error {
    63  	var err error
    64  	switch certType {
    65  	case common.TLS:
    66  		err = r.ForTLSReenroll(instance)
    67  	case common.ECERT:
    68  		err = r.ForEcertReenroll(instance)
    69  	}
    70  
    71  	if err != nil {
    72  		return err
    73  	}
    74  
    75  	return nil
    76  }
    77  
    78  func (r *RestartManager) ForEcertReenroll(instance v1.Object) error {
    79  	return r.updateConfigFor(instance, ECERTUPDATE)
    80  }
    81  
    82  func (r *RestartManager) ForTLSReenroll(instance v1.Object) error {
    83  	return r.updateConfigFor(instance, TLSUPDATE)
    84  }
    85  
    86  func (r *RestartManager) ForConfigOverride(instance v1.Object) error {
    87  	return r.updateConfigFor(instance, CONFIGOVERRIDE)
    88  }
    89  
    90  func (r *RestartManager) ForMigration(instance v1.Object) error {
    91  	return r.updateConfigFor(instance, MIGRATION)
    92  }
    93  
    94  func (r *RestartManager) ForNodeOU(instance v1.Object) error {
    95  	return r.updateConfigFor(instance, NODEOU)
    96  }
    97  
    98  func (r *RestartManager) ForConfigMapUpdate(instance v1.Object) error {
    99  	return r.updateConfigFor(instance, CONFIGMAPUPDATE)
   100  }
   101  
   102  func (r *RestartManager) ForRestartAction(instance v1.Object) error {
   103  	return r.updateConfigFor(instance, RESTARTACTION)
   104  }
   105  
   106  // Updates the operator-config for the given reason by setting the request
   107  // status to 'pending' and request timestamp to the current time:
   108  //
   109  // instances[instance_name].Requests[reason].Status = "pending"
   110  func (r *RestartManager) updateConfigFor(instance v1.Object, reason Reason) error {
   111  	cfg, err := r.GetConfig(instance)
   112  	if err != nil {
   113  		return err
   114  	}
   115  
   116  	if cfg.Instances == nil {
   117  		cfg.Instances = map[string]*Restart{}
   118  	}
   119  	_, ok := cfg.Instances[instance.GetName()]
   120  	if !ok {
   121  		cfg.Instances[instance.GetName()] = &Restart{}
   122  	}
   123  
   124  	restart := cfg.Instances[instance.GetName()]
   125  	updateRestartRequest(restart, reason)
   126  
   127  	log.Info(fmt.Sprintf("Updating operator-config map, %s restart requested due to %s", instance.GetName(), reason))
   128  	err = r.UpdateConfigMap(cfg, instance)
   129  	if err != nil {
   130  		return err
   131  	}
   132  
   133  	return nil
   134  }
   135  
   136  func updateRestartRequest(restart *Restart, reason Reason) {
   137  	if restart.Requests == nil {
   138  		restart.Requests = map[Reason]*Request{}
   139  	}
   140  
   141  	if restart.Requests[reason] == nil {
   142  		restart.Requests[reason] = &Request{}
   143  	}
   144  
   145  	// Set request time
   146  	req := restart.Requests[reason]
   147  	if req.Status != Pending {
   148  		req.Status = Pending
   149  		req.RequestTimestamp = time.Now().UTC().Format(time.RFC3339)
   150  	}
   151  }
   152  
   153  type Instance interface {
   154  	v1.Object
   155  	GetMSPID() string
   156  }
   157  
   158  // TriggerIfNeeded checks operator-config for any pending restarts, sets a timer to restart
   159  // the deployment if required, and restarts the deployment.
   160  func (r *RestartManager) TriggerIfNeeded(instance Instance) error {
   161  	var trigger bool
   162  
   163  	cfg, err := r.GetConfig(instance)
   164  	if err != nil {
   165  		return err
   166  	}
   167  
   168  	restart := cfg.Instances[instance.GetName()]
   169  	if restart == nil || restart.Requests == nil {
   170  		// Do nothing if restart doesn't have any pending requests
   171  		return nil
   172  	}
   173  
   174  	reasonList := []string{}
   175  	for reason, req := range restart.Requests {
   176  		if req != nil {
   177  			if req.Status == Pending {
   178  				reasonList = append(reasonList, string(reason))
   179  				if r.triggerRestart(req) {
   180  					trigger = true
   181  				}
   182  			}
   183  
   184  		}
   185  	}
   186  	reasonString := strings.Join(reasonList, ",")
   187  
   188  	if trigger {
   189  		err = r.RestartDeployment(instance, reasonString)
   190  		if err != nil {
   191  			return err
   192  		}
   193  	} else if r.pendingRequests(restart) {
   194  		err = r.SetTimer(instance, reasonString)
   195  		if err != nil {
   196  			return errors.Wrap(err, "failed to set timer to restart deployment")
   197  		}
   198  	}
   199  
   200  	return nil
   201  }
   202  
   203  func (r *RestartManager) triggerRestart(req *Request) bool {
   204  	if req != nil {
   205  		if req.Status == Pending {
   206  			if req.LastActionTimestamp == "" { // no previous restart has occurred
   207  				return true
   208  			}
   209  
   210  			lastRestart, err := time.Parse(time.RFC3339, req.LastActionTimestamp)
   211  			if err != nil {
   212  				return true
   213  			}
   214  
   215  			requestedRestart, err := time.Parse(time.RFC3339, req.RequestTimestamp)
   216  			if err != nil {
   217  				return true
   218  			}
   219  
   220  			if requestedRestart.Sub(lastRestart) >= r.WaitTime {
   221  				return true
   222  			}
   223  		}
   224  	}
   225  
   226  	return false
   227  }
   228  
   229  func (r *RestartManager) pendingRequests(restart *Restart) bool {
   230  	for _, req := range restart.Requests {
   231  		if req.Status == Pending {
   232  			return true
   233  		}
   234  	}
   235  	return false
   236  }
   237  
   238  func (r *RestartManager) SetTimer(instance Instance, reason string) error {
   239  	cfg, err := r.GetConfig(instance)
   240  	if err != nil {
   241  		return err
   242  	}
   243  
   244  	restart := cfg.Instances[instance.GetName()]
   245  
   246  	oldestRequestTime := time.Now().UTC()
   247  	lastActionTime := ""
   248  	// Want to set timer duration based on oldest pending request
   249  	for _, req := range restart.Requests {
   250  		if req != nil {
   251  			requestTime, err := time.Parse(time.RFC3339, req.RequestTimestamp)
   252  			if err == nil {
   253  				if requestTime.Before(oldestRequestTime) {
   254  					oldestRequestTime = requestTime
   255  					lastActionTime = req.LastActionTimestamp
   256  				}
   257  			}
   258  		}
   259  	}
   260  
   261  	// Set timer if not already running
   262  	if r.Timers[instance.GetName()] == nil {
   263  		dur := r.getTimerDuration(lastActionTime, oldestRequestTime)
   264  		log.Info(fmt.Sprintf("Setting timer to restart %s in %f minutes", instance.GetName(), dur.Minutes()))
   265  
   266  		r.Timers[instance.GetName()] = time.AfterFunc(dur, func() {
   267  			err := r.RestartDeployment(instance, reason)
   268  			if err != nil {
   269  				log.Error(err, fmt.Sprintf("failed to restart deployment for %s", instance.GetName()))
   270  			}
   271  		})
   272  	} else {
   273  		log.Info(fmt.Sprintf("Timer already set to restart %s shortly", instance.GetName()))
   274  	}
   275  
   276  	return nil
   277  }
   278  
   279  // If lastRestartTime was less than 10 min (or value of WaitTime) ago, calculate how much
   280  // time remains before WaitTime has passed to trigger next restart
   281  func (r *RestartManager) getTimerDuration(actionTime string, requestTime time.Time) time.Duration {
   282  	lastRestartTime, err := time.Parse(time.RFC3339, actionTime)
   283  	if err != nil {
   284  		// Default to WaitTime
   285  		return r.WaitTime
   286  	}
   287  	timePassed := requestTime.Sub(lastRestartTime)
   288  	return r.WaitTime - timePassed
   289  }
   290  
   291  // RestartDeployment adds the instance to the queue to stagger restarts
   292  func (r *RestartManager) RestartDeployment(instance Instance, reason string) error {
   293  	log.Info(fmt.Sprintf("Queuing instance %s for restart", instance.GetName()))
   294  
   295  	err := r.ClearRestartConfigForInstance(instance)
   296  	if err != nil {
   297  		return errors.Wrap(err, "failed to clear restart config")
   298  	}
   299  
   300  	err = r.StaggerRestartsService.Restart(instance, reason)
   301  	if err != nil {
   302  		return errors.Wrap(err, "failed to add restart request to queue")
   303  	}
   304  
   305  	return nil
   306  }
   307  
   308  func (r *RestartManager) ClearRestartConfigForInstance(instance v1.Object) error {
   309  	cfg, err := r.GetConfig(instance)
   310  	if err != nil {
   311  		return err
   312  	}
   313  
   314  	if cfg.Instances == nil || cfg.Instances[instance.GetName()] == nil {
   315  		return nil
   316  	}
   317  
   318  	for _, req := range cfg.Instances[instance.GetName()].Requests {
   319  		if req != nil && req.Status == Pending {
   320  			clearRestart(req)
   321  		}
   322  	}
   323  
   324  	// Stop timer if previously set
   325  	if r.Timers[instance.GetName()] != nil {
   326  		r.Timers[instance.GetName()].Stop()
   327  		r.Timers[instance.GetName()] = nil
   328  	}
   329  
   330  	err = r.UpdateConfigMap(cfg, instance)
   331  	if err != nil {
   332  		return err
   333  	}
   334  
   335  	return nil
   336  }
   337  
   338  func clearRestart(req *Request) {
   339  	req.LastActionTimestamp = time.Now().UTC().Format(time.RFC3339)
   340  	req.RequestTimestamp = ""
   341  	req.Status = Complete
   342  }
   343  
   344  func (r *RestartManager) GetConfig(instance v1.Object) (*Config, error) {
   345  	cmName := "operator-config"
   346  
   347  	cfg := &Config{}
   348  	err := r.ConfigMapManager.GetRestartConfigFrom(cmName, instance.GetNamespace(), cfg)
   349  	if err != nil {
   350  		return nil, err
   351  	}
   352  
   353  	return cfg, nil
   354  }
   355  
   356  func (r *RestartManager) UpdateConfigMap(cfg *Config, instance v1.Object) error {
   357  	cmName := "operator-config"
   358  
   359  	return r.ConfigMapManager.UpdateConfig(cmName, instance.GetNamespace(), cfg)
   360  }