github.com/verrazzano/verrazzano@v1.7.0/tools/psr/backend/workers/weblogic/scale/scale.go (about)

     1  // Copyright (c) 2022, Oracle and/or its affiliates.
     2  // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl.
     3  
     4  package scale
     5  
     6  import (
     7  	"fmt"
     8  	"github.com/verrazzano/verrazzano/tools/psr/backend/pkg/k8sclient"
     9  	"strconv"
    10  	"sync/atomic"
    11  	"time"
    12  
    13  	"github.com/prometheus/client_golang/prometheus"
    14  	"github.com/verrazzano/verrazzano/pkg/log/vzlog"
    15  	"github.com/verrazzano/verrazzano/tools/psr/backend/config"
    16  	"github.com/verrazzano/verrazzano/tools/psr/backend/metrics"
    17  	"github.com/verrazzano/verrazzano/tools/psr/backend/osenv"
    18  	"github.com/verrazzano/verrazzano/tools/psr/backend/pkg/weblogic"
    19  	"github.com/verrazzano/verrazzano/tools/psr/backend/spi"
    20  	"k8s.io/client-go/dynamic"
    21  )
    22  
    23  const (
    24  	// DomainUID specifies the name of the domain in the local cluster
    25  	// By default, the DomainUID is not specified
    26  	DomainUID = "DOMAIN_UID"
    27  
    28  	// DomainNamespace specifies the namespace of the service in the local cluster
    29  	// By default, the DomainNamespace is not specified
    30  	DomainNamespace = "DOMAIN_NAMESPACE"
    31  
    32  	// MinReplicaCount specifies the minimum replicas to scale on the domain
    33  	// By default, MinReplicaCount is set 2
    34  	MinReplicaCount = "MIN_REPLICA_COUNT"
    35  
    36  	// MaxReplicaCount specifies the maximum replicas to scale on the domain
    37  	// By default, MaxReplicaCount is set to 4
    38  	MaxReplicaCount = "MAX_REPLICA_COUNT"
    39  
    40  	metricsPrefix = "weblogic_scaling"
    41  )
    42  
    43  var funcNewPsrClient = k8sclient.NewPsrClient
    44  
    45  //var funcNewDynClient = k8sclient.NewDynamicClient
    46  
    47  type worker struct {
    48  	metricDescList []prometheus.Desc
    49  	*workerMetrics
    50  	psrClient k8sclient.PsrClient
    51  	//dynClient k8sclient.DynamicClient
    52  	*state
    53  	log vzlog.VerrazzanoLogger
    54  }
    55  
    56  type state struct {
    57  	startScaleTime int64
    58  	directionOut   bool
    59  }
    60  
    61  var _ spi.Worker = worker{}
    62  
    63  // workerMetrics holds the metrics produced by the worker. Metrics must be thread safe.
    64  type workerMetrics struct {
    65  	scaleUpDomainCountTotal   metrics.MetricItem
    66  	scaleDownDomainCountTotal metrics.MetricItem
    67  	scaleUpSeconds            metrics.MetricItem
    68  	scaleDownSeconds          metrics.MetricItem
    69  }
    70  
    71  func NewScaleWorker() (spi.Worker, error) {
    72  	c, err := funcNewPsrClient()
    73  	if err != nil {
    74  		return nil, err
    75  	}
    76  	//d, err := funcNewDynClient()
    77  	if err != nil {
    78  		return nil, err
    79  	}
    80  	w := worker{
    81  		psrClient: c,
    82  		log:       vzlog.DefaultLogger(),
    83  		state:     &state{},
    84  		workerMetrics: &workerMetrics{
    85  			scaleUpDomainCountTotal: metrics.MetricItem{
    86  				Name: "scale_up_domain_count_total",
    87  				Help: "The total number of successful scale up domain requests",
    88  				Type: prometheus.CounterValue,
    89  			},
    90  			scaleDownDomainCountTotal: metrics.MetricItem{
    91  				Name: "scale_down_domain_count_total",
    92  				Help: "The total number of failed scale down domain requests",
    93  				Type: prometheus.CounterValue,
    94  			},
    95  			scaleUpSeconds: metrics.MetricItem{
    96  				Name: "scale_up_seconds",
    97  				Help: "The total number of seconds elapsed to scale up the domain",
    98  				Type: prometheus.GaugeValue,
    99  			},
   100  			scaleDownSeconds: metrics.MetricItem{
   101  				Name: "scale_down_seconds",
   102  				Help: "The total number of seconds elapsed to scale down the domain",
   103  				Type: prometheus.GaugeValue,
   104  			},
   105  		}}
   106  
   107  	w.metricDescList = []prometheus.Desc{
   108  		*w.scaleUpDomainCountTotal.BuildMetricDesc(w.GetWorkerDesc().MetricsPrefix),
   109  		*w.scaleDownDomainCountTotal.BuildMetricDesc(w.GetWorkerDesc().MetricsPrefix),
   110  		*w.scaleUpSeconds.BuildMetricDesc(w.GetWorkerDesc().MetricsPrefix),
   111  		*w.scaleDownSeconds.BuildMetricDesc(w.GetWorkerDesc().MetricsPrefix),
   112  	}
   113  
   114  	if err := config.PsrEnv.LoadFromEnv(w.GetEnvDescList()); err != nil {
   115  		return w, err
   116  	}
   117  
   118  	metricsLabels := map[string]string{
   119  		config.PsrWorkerTypeMetricsName: config.PsrEnv.GetEnv(config.PsrWorkerType),
   120  	}
   121  
   122  	w.metricDescList = metrics.BuildMetricDescList([]*metrics.MetricItem{
   123  		&w.scaleUpDomainCountTotal,
   124  		&w.scaleDownDomainCountTotal,
   125  		&w.scaleUpSeconds,
   126  		&w.scaleDownSeconds,
   127  	}, metricsLabels, w.GetWorkerDesc().MetricsPrefix)
   128  
   129  	return w, nil
   130  }
   131  
   132  // GetWorkerDesc returns the WorkerDesc for the worker
   133  func (w worker) GetWorkerDesc() spi.WorkerDesc {
   134  	return spi.WorkerDesc{
   135  		WorkerType:    config.WorkerTypeWlsScale,
   136  		Description:   "The scale domain worker scales up and scales down the domain",
   137  		MetricsPrefix: metricsPrefix,
   138  	}
   139  }
   140  
   141  func (w worker) GetEnvDescList() []osenv.EnvVarDesc {
   142  	return []osenv.EnvVarDesc{
   143  		{Key: DomainUID, DefaultVal: "", Required: true},
   144  		{Key: DomainNamespace, DefaultVal: "", Required: true},
   145  		{Key: MinReplicaCount, DefaultVal: "2", Required: true},
   146  		{Key: MaxReplicaCount, DefaultVal: "4", Required: true},
   147  	}
   148  }
   149  
   150  func (w worker) GetMetricDescList() []prometheus.Desc {
   151  	return w.metricDescList
   152  }
   153  
   154  func (w worker) GetMetricList() []prometheus.Metric {
   155  	return []prometheus.Metric{
   156  		w.scaleUpDomainCountTotal.BuildMetric(),
   157  		w.scaleDownDomainCountTotal.BuildMetric(),
   158  		w.scaleUpSeconds.BuildMetric(),
   159  		w.scaleDownSeconds.BuildMetric(),
   160  	}
   161  }
   162  
   163  func (w worker) WantLoopInfoLogged() bool {
   164  	return false
   165  }
   166  
   167  func (w worker) PreconditionsMet() (bool, error) {
   168  	return true, nil
   169  }
   170  
   171  func (w worker) DoWork(conf config.CommonConfig, log vzlog.VerrazzanoLogger) error {
   172  	var replicas int64
   173  	max, err := strconv.ParseInt(config.PsrEnv.GetEnv(MaxReplicaCount), 10, 64)
   174  	if err != nil {
   175  		return fmt.Errorf("MaxReplicaCount can not be parsed to an integer: %v", err)
   176  	}
   177  	min, err := strconv.ParseInt(config.PsrEnv.GetEnv(MinReplicaCount), 10, 64)
   178  	if err != nil {
   179  		return fmt.Errorf("MinReplicaCount can not be parsed to an integer: %v", err)
   180  	}
   181  	domainNamespace := config.PsrEnv.GetEnv(DomainNamespace)
   182  	domainUID := config.PsrEnv.GetEnv(DomainUID)
   183  	client := w.psrClient.DynClient
   184  
   185  	// get current replicas at /spec/replicas
   186  	currentReplicas, err := weblogic.GetCurrentReplicas(client, domainNamespace, domainUID)
   187  	if err != nil {
   188  		return fmt.Errorf("failed to get current replicas: %v", err)
   189  	}
   190  
   191  	// set replicas to scale based on current replicas
   192  	if currentReplicas > min {
   193  		replicas = min
   194  		w.state.directionOut = false
   195  	} else {
   196  		replicas = max
   197  		w.state.directionOut = true
   198  	}
   199  	w.state.startScaleTime = time.Now().UnixNano()
   200  	err = weblogic.PatchReplicas(client, domainNamespace, domainUID, replicas)
   201  	if err != nil {
   202  		return fmt.Errorf("failed to patch the replicas: %v", err)
   203  	}
   204  	err = w.waitForReadyReplicas(client, domainNamespace, domainUID, replicas)
   205  	if err != nil {
   206  		return fmt.Errorf("failed to get the ready replicas: %v", err)
   207  	}
   208  	elapsedSecs := time.Now().UnixNano() - w.state.startScaleTime
   209  	if w.state.directionOut {
   210  		atomic.StoreInt64(&w.workerMetrics.scaleUpSeconds.Val, elapsedSecs)
   211  		atomic.AddInt64(&w.workerMetrics.scaleUpDomainCountTotal.Val, 1)
   212  	} else {
   213  		atomic.StoreInt64(&w.workerMetrics.scaleDownSeconds.Val, elapsedSecs)
   214  		atomic.AddInt64(&w.workerMetrics.scaleDownDomainCountTotal.Val, 1)
   215  	}
   216  
   217  	return nil
   218  }
   219  
   220  func (w worker) waitForReadyReplicas(client dynamic.Interface, namespace string, name string, readyReplicas int64) error {
   221  	for {
   222  		rr, err := weblogic.GetReadyReplicas(client, namespace, name)
   223  		if err != nil {
   224  			return err
   225  		}
   226  		if rr == readyReplicas {
   227  			break
   228  		}
   229  		w.log.Progressf("Waiting for ReadyReplicas to be %v", readyReplicas)
   230  		time.Sleep(1 * time.Second)
   231  	}
   232  	return nil
   233  }