istio.io/istio@v0.0.0-20240520182934-d79c90f27776/pilot/pkg/status/distribution/state.go (about) 1 // Copyright Istio Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package distribution 16 17 import ( 18 "fmt" 19 "strings" 20 "sync" 21 "time" 22 23 "google.golang.org/protobuf/types/known/timestamppb" 24 v1 "k8s.io/api/core/v1" 25 metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" 26 "k8s.io/apimachinery/pkg/labels" 27 "k8s.io/client-go/dynamic" 28 "k8s.io/client-go/informers" 29 "k8s.io/client-go/kubernetes" 30 "k8s.io/client-go/rest" 31 "k8s.io/client-go/tools/cache" 32 "k8s.io/utils/clock" 33 34 "istio.io/api/meta/v1alpha1" 35 "istio.io/istio/pilot/pkg/features" 36 "istio.io/istio/pilot/pkg/model" 37 "istio.io/istio/pilot/pkg/status" 38 "istio.io/istio/pkg/config" 39 "istio.io/istio/pkg/log" 40 ) 41 42 var scope = log.RegisterScope("status", 43 "CRD distribution status debugging") 44 45 type Progress struct { 46 AckedInstances int 47 TotalInstances int 48 } 49 50 func (p *Progress) PlusEquals(p2 Progress) { 51 p.TotalInstances += p2.TotalInstances 52 p.AckedInstances += p2.AckedInstances 53 } 54 55 type Controller struct { 56 configStore model.ConfigStore 57 mu sync.RWMutex 58 CurrentState map[status.Resource]map[string]Progress 59 ObservationTime map[string]time.Time 60 UpdateInterval time.Duration 61 dynamicClient dynamic.Interface 62 clock clock.Clock 63 workers *status.Controller 64 StaleInterval time.Duration 65 cmInformer cache.SharedIndexInformer 66 cmHandle cache.ResourceEventHandlerRegistration 67 } 68 69 func NewController(restConfig *rest.Config, namespace string, cs model.ConfigStore, m *status.Manager) *Controller { 70 c := &Controller{ 71 CurrentState: make(map[status.Resource]map[string]Progress), 72 ObservationTime: make(map[string]time.Time), 73 UpdateInterval: 200 * time.Millisecond, 74 StaleInterval: time.Minute, 75 clock: clock.RealClock{}, 76 configStore: cs, 77 workers: m.CreateIstioStatusController(func(status *v1alpha1.IstioStatus, context any) *v1alpha1.IstioStatus { 78 if status == nil { 79 return nil 80 } 81 distributionState := context.(Progress) 82 if needsReconcile, desiredStatus := ReconcileStatuses(status, distributionState); needsReconcile { 83 return desiredStatus 84 } 85 return status 86 }), 87 } 88 89 // client-go defaults to 5 QPS, with 10 Boost, which is insufficient for updating status on all the config 90 // in the mesh. These values can be configured using environment variables for tuning (see pilot/pkg/features) 91 restConfig.QPS = float32(features.StatusQPS) 92 restConfig.Burst = features.StatusBurst 93 var err error 94 if c.dynamicClient, err = dynamic.NewForConfig(restConfig); err != nil { 95 scope.Fatalf("Could not connect to kubernetes: %s", err) 96 } 97 98 // configmap informer 99 i := informers.NewSharedInformerFactoryWithOptions(kubernetes.NewForConfigOrDie(restConfig), 1*time.Minute, 100 informers.WithNamespace(namespace), 101 informers.WithTweakListOptions(func(listOptions *metav1.ListOptions) { 102 listOptions.LabelSelector = labels.Set(map[string]string{labelKey: "true"}).AsSelector().String() 103 })). 104 Core().V1().ConfigMaps() 105 c.cmInformer = i.Informer() 106 c.cmHandle, _ = c.cmInformer.AddEventHandler(&DistroReportHandler{dc: c}) 107 108 return c 109 } 110 111 func (c *Controller) Start(stop <-chan struct{}) { 112 scope.Info("Starting status leader controller") 113 114 // this will list all existing configmaps, as well as updates, right? 115 go c.cmInformer.Run(stop) 116 117 // create Status Writer 118 t := c.clock.Tick(c.UpdateInterval) 119 for { 120 select { 121 case <-stop: 122 _ = c.cmInformer.RemoveEventHandler(c.cmHandle) 123 return 124 case <-t: 125 staleReporters := c.writeAllStatus() 126 if len(staleReporters) > 0 { 127 c.removeStaleReporters(staleReporters) 128 } 129 } 130 } 131 } 132 133 func (c *Controller) handleReport(d Report) { 134 defer c.mu.Unlock() 135 c.mu.Lock() 136 for resstr := range d.InProgressResources { 137 res := *status.ResourceFromString(resstr) 138 if _, ok := c.CurrentState[res]; !ok { 139 c.CurrentState[res] = make(map[string]Progress) 140 } 141 c.CurrentState[res][d.Reporter] = Progress{d.InProgressResources[resstr], d.DataPlaneCount} 142 } 143 c.ObservationTime[d.Reporter] = c.clock.Now() 144 } 145 146 func (c *Controller) writeAllStatus() (staleReporters []string) { 147 defer c.mu.RUnlock() 148 c.mu.RLock() 149 for config, fractions := range c.CurrentState { 150 if !strings.HasSuffix(config.Group, "istio.io") { 151 // don't try to write status for non-istio types 152 continue 153 } 154 var distributionState Progress 155 for reporter, w := range fractions { 156 // check for stale data here 157 if c.clock.Since(c.ObservationTime[reporter]) > c.StaleInterval { 158 scope.Warnf("Status reporter %s has not been heard from since %v, deleting report.", 159 reporter, c.ObservationTime[reporter]) 160 staleReporters = append(staleReporters, reporter) 161 } else { 162 distributionState.PlusEquals(w) 163 } 164 } 165 if distributionState.TotalInstances > 0 { // this is necessary when all reports are stale. 166 c.queueWriteStatus(config, distributionState) 167 } 168 } 169 return 170 } 171 172 func (c *Controller) removeStaleReporters(staleReporters []string) { 173 defer c.mu.Unlock() 174 c.mu.Lock() 175 for key, fractions := range c.CurrentState { 176 for _, staleReporter := range staleReporters { 177 delete(fractions, staleReporter) 178 } 179 c.CurrentState[key] = fractions 180 } 181 } 182 183 func (c *Controller) queueWriteStatus(config status.Resource, state Progress) { 184 c.workers.EnqueueStatusUpdateResource(state, config) 185 } 186 187 func (c *Controller) configDeleted(res config.Config) { 188 r := status.ResourceFromModelConfig(res) 189 c.workers.Delete(r) 190 } 191 192 func boolToConditionStatus(b bool) string { 193 if b { 194 return "True" 195 } 196 return "False" 197 } 198 199 func ReconcileStatuses(current *v1alpha1.IstioStatus, desired Progress) (bool, *v1alpha1.IstioStatus) { 200 needsReconcile := false 201 desiredCondition := v1alpha1.IstioCondition{ 202 Type: "Reconciled", 203 Status: boolToConditionStatus(desired.AckedInstances == desired.TotalInstances), 204 LastProbeTime: timestamppb.Now(), 205 LastTransitionTime: timestamppb.Now(), 206 Message: fmt.Sprintf("%d/%d proxies up to date.", desired.AckedInstances, desired.TotalInstances), 207 } 208 current = current.DeepCopy() 209 var currentCondition *v1alpha1.IstioCondition 210 conditionIndex := -1 211 for i, c := range current.Conditions { 212 if c.Type == "Reconciled" { 213 currentCondition = current.Conditions[i] 214 conditionIndex = i 215 break 216 } 217 } 218 if currentCondition == nil || 219 currentCondition.Message != desiredCondition.Message || 220 currentCondition.Status != desiredCondition.Status { 221 needsReconcile = true 222 } 223 if conditionIndex > -1 { 224 current.Conditions[conditionIndex] = &desiredCondition 225 } else { 226 current.Conditions = append(current.Conditions, &desiredCondition) 227 } 228 return needsReconcile, current 229 } 230 231 type DistroReportHandler struct { 232 dc *Controller 233 } 234 235 func (drh *DistroReportHandler) OnAdd(obj any, _ bool) { 236 drh.HandleNew(obj) 237 } 238 239 func (drh *DistroReportHandler) OnUpdate(oldObj, newObj any) { 240 drh.HandleNew(newObj) 241 } 242 243 func (drh *DistroReportHandler) HandleNew(obj any) { 244 cm, ok := obj.(*v1.ConfigMap) 245 if !ok { 246 scope.Warnf("expected configmap, but received %v, discarding", obj) 247 return 248 } 249 rptStr := cm.Data[dataField] 250 scope.Debugf("using report: %s", rptStr) 251 dr, err := ReportFromYaml([]byte(cm.Data[dataField])) 252 if err != nil { 253 scope.Warnf("received malformed distributionReport %s, discarding: %v", cm.Name, err) 254 return 255 } 256 drh.dc.handleReport(dr) 257 } 258 259 func (drh *DistroReportHandler) OnDelete(obj any) { 260 // TODO: what do we do here? will these ever be deleted? 261 }