github.com/verrazzano/verrazzano@v1.7.0/tools/psr/backend/workers/weblogic/scale/scale.go (about) 1 // Copyright (c) 2022, Oracle and/or its affiliates. 2 // Licensed under the Universal Permissive License v 1.0 as shown at https://oss.oracle.com/licenses/upl. 3 4 package scale 5 6 import ( 7 "fmt" 8 "github.com/verrazzano/verrazzano/tools/psr/backend/pkg/k8sclient" 9 "strconv" 10 "sync/atomic" 11 "time" 12 13 "github.com/prometheus/client_golang/prometheus" 14 "github.com/verrazzano/verrazzano/pkg/log/vzlog" 15 "github.com/verrazzano/verrazzano/tools/psr/backend/config" 16 "github.com/verrazzano/verrazzano/tools/psr/backend/metrics" 17 "github.com/verrazzano/verrazzano/tools/psr/backend/osenv" 18 "github.com/verrazzano/verrazzano/tools/psr/backend/pkg/weblogic" 19 "github.com/verrazzano/verrazzano/tools/psr/backend/spi" 20 "k8s.io/client-go/dynamic" 21 ) 22 23 const ( 24 // DomainUID specifies the name of the domain in the local cluster 25 // By default, the DomainUID is not specified 26 DomainUID = "DOMAIN_UID" 27 28 // DomainNamespace specifies the namespace of the service in the local cluster 29 // By default, the DomainNamespace is not specified 30 DomainNamespace = "DOMAIN_NAMESPACE" 31 32 // MinReplicaCount specifies the minimum replicas to scale on the domain 33 // By default, MinReplicaCount is set 2 34 MinReplicaCount = "MIN_REPLICA_COUNT" 35 36 // MaxReplicaCount specifies the maximum replicas to scale on the domain 37 // By default, MaxReplicaCount is set to 4 38 MaxReplicaCount = "MAX_REPLICA_COUNT" 39 40 metricsPrefix = "weblogic_scaling" 41 ) 42 43 var funcNewPsrClient = k8sclient.NewPsrClient 44 45 //var funcNewDynClient = k8sclient.NewDynamicClient 46 47 type worker struct { 48 metricDescList []prometheus.Desc 49 *workerMetrics 50 psrClient k8sclient.PsrClient 51 //dynClient k8sclient.DynamicClient 52 *state 53 log vzlog.VerrazzanoLogger 54 } 55 56 type state struct { 57 startScaleTime int64 58 directionOut bool 59 } 60 61 var _ spi.Worker = worker{} 62 63 // workerMetrics holds the metrics produced by the worker. Metrics must be thread safe. 64 type workerMetrics struct { 65 scaleUpDomainCountTotal metrics.MetricItem 66 scaleDownDomainCountTotal metrics.MetricItem 67 scaleUpSeconds metrics.MetricItem 68 scaleDownSeconds metrics.MetricItem 69 } 70 71 func NewScaleWorker() (spi.Worker, error) { 72 c, err := funcNewPsrClient() 73 if err != nil { 74 return nil, err 75 } 76 //d, err := funcNewDynClient() 77 if err != nil { 78 return nil, err 79 } 80 w := worker{ 81 psrClient: c, 82 log: vzlog.DefaultLogger(), 83 state: &state{}, 84 workerMetrics: &workerMetrics{ 85 scaleUpDomainCountTotal: metrics.MetricItem{ 86 Name: "scale_up_domain_count_total", 87 Help: "The total number of successful scale up domain requests", 88 Type: prometheus.CounterValue, 89 }, 90 scaleDownDomainCountTotal: metrics.MetricItem{ 91 Name: "scale_down_domain_count_total", 92 Help: "The total number of failed scale down domain requests", 93 Type: prometheus.CounterValue, 94 }, 95 scaleUpSeconds: metrics.MetricItem{ 96 Name: "scale_up_seconds", 97 Help: "The total number of seconds elapsed to scale up the domain", 98 Type: prometheus.GaugeValue, 99 }, 100 scaleDownSeconds: metrics.MetricItem{ 101 Name: "scale_down_seconds", 102 Help: "The total number of seconds elapsed to scale down the domain", 103 Type: prometheus.GaugeValue, 104 }, 105 }} 106 107 w.metricDescList = []prometheus.Desc{ 108 *w.scaleUpDomainCountTotal.BuildMetricDesc(w.GetWorkerDesc().MetricsPrefix), 109 *w.scaleDownDomainCountTotal.BuildMetricDesc(w.GetWorkerDesc().MetricsPrefix), 110 *w.scaleUpSeconds.BuildMetricDesc(w.GetWorkerDesc().MetricsPrefix), 111 *w.scaleDownSeconds.BuildMetricDesc(w.GetWorkerDesc().MetricsPrefix), 112 } 113 114 if err := config.PsrEnv.LoadFromEnv(w.GetEnvDescList()); err != nil { 115 return w, err 116 } 117 118 metricsLabels := map[string]string{ 119 config.PsrWorkerTypeMetricsName: config.PsrEnv.GetEnv(config.PsrWorkerType), 120 } 121 122 w.metricDescList = metrics.BuildMetricDescList([]*metrics.MetricItem{ 123 &w.scaleUpDomainCountTotal, 124 &w.scaleDownDomainCountTotal, 125 &w.scaleUpSeconds, 126 &w.scaleDownSeconds, 127 }, metricsLabels, w.GetWorkerDesc().MetricsPrefix) 128 129 return w, nil 130 } 131 132 // GetWorkerDesc returns the WorkerDesc for the worker 133 func (w worker) GetWorkerDesc() spi.WorkerDesc { 134 return spi.WorkerDesc{ 135 WorkerType: config.WorkerTypeWlsScale, 136 Description: "The scale domain worker scales up and scales down the domain", 137 MetricsPrefix: metricsPrefix, 138 } 139 } 140 141 func (w worker) GetEnvDescList() []osenv.EnvVarDesc { 142 return []osenv.EnvVarDesc{ 143 {Key: DomainUID, DefaultVal: "", Required: true}, 144 {Key: DomainNamespace, DefaultVal: "", Required: true}, 145 {Key: MinReplicaCount, DefaultVal: "2", Required: true}, 146 {Key: MaxReplicaCount, DefaultVal: "4", Required: true}, 147 } 148 } 149 150 func (w worker) GetMetricDescList() []prometheus.Desc { 151 return w.metricDescList 152 } 153 154 func (w worker) GetMetricList() []prometheus.Metric { 155 return []prometheus.Metric{ 156 w.scaleUpDomainCountTotal.BuildMetric(), 157 w.scaleDownDomainCountTotal.BuildMetric(), 158 w.scaleUpSeconds.BuildMetric(), 159 w.scaleDownSeconds.BuildMetric(), 160 } 161 } 162 163 func (w worker) WantLoopInfoLogged() bool { 164 return false 165 } 166 167 func (w worker) PreconditionsMet() (bool, error) { 168 return true, nil 169 } 170 171 func (w worker) DoWork(conf config.CommonConfig, log vzlog.VerrazzanoLogger) error { 172 var replicas int64 173 max, err := strconv.ParseInt(config.PsrEnv.GetEnv(MaxReplicaCount), 10, 64) 174 if err != nil { 175 return fmt.Errorf("MaxReplicaCount can not be parsed to an integer: %v", err) 176 } 177 min, err := strconv.ParseInt(config.PsrEnv.GetEnv(MinReplicaCount), 10, 64) 178 if err != nil { 179 return fmt.Errorf("MinReplicaCount can not be parsed to an integer: %v", err) 180 } 181 domainNamespace := config.PsrEnv.GetEnv(DomainNamespace) 182 domainUID := config.PsrEnv.GetEnv(DomainUID) 183 client := w.psrClient.DynClient 184 185 // get current replicas at /spec/replicas 186 currentReplicas, err := weblogic.GetCurrentReplicas(client, domainNamespace, domainUID) 187 if err != nil { 188 return fmt.Errorf("failed to get current replicas: %v", err) 189 } 190 191 // set replicas to scale based on current replicas 192 if currentReplicas > min { 193 replicas = min 194 w.state.directionOut = false 195 } else { 196 replicas = max 197 w.state.directionOut = true 198 } 199 w.state.startScaleTime = time.Now().UnixNano() 200 err = weblogic.PatchReplicas(client, domainNamespace, domainUID, replicas) 201 if err != nil { 202 return fmt.Errorf("failed to patch the replicas: %v", err) 203 } 204 err = w.waitForReadyReplicas(client, domainNamespace, domainUID, replicas) 205 if err != nil { 206 return fmt.Errorf("failed to get the ready replicas: %v", err) 207 } 208 elapsedSecs := time.Now().UnixNano() - w.state.startScaleTime 209 if w.state.directionOut { 210 atomic.StoreInt64(&w.workerMetrics.scaleUpSeconds.Val, elapsedSecs) 211 atomic.AddInt64(&w.workerMetrics.scaleUpDomainCountTotal.Val, 1) 212 } else { 213 atomic.StoreInt64(&w.workerMetrics.scaleDownSeconds.Val, elapsedSecs) 214 atomic.AddInt64(&w.workerMetrics.scaleDownDomainCountTotal.Val, 1) 215 } 216 217 return nil 218 } 219 220 func (w worker) waitForReadyReplicas(client dynamic.Interface, namespace string, name string, readyReplicas int64) error { 221 for { 222 rr, err := weblogic.GetReadyReplicas(client, namespace, name) 223 if err != nil { 224 return err 225 } 226 if rr == readyReplicas { 227 break 228 } 229 w.log.Progressf("Waiting for ReadyReplicas to be %v", readyReplicas) 230 time.Sleep(1 * time.Second) 231 } 232 return nil 233 }