github.com/kiali/kiali@v1.84.0/business/health.go (about) 1 package business 2 3 import ( 4 "context" 5 "fmt" 6 "time" 7 8 "github.com/prometheus/common/model" 9 "k8s.io/apimachinery/pkg/api/errors" 10 11 "github.com/kiali/kiali/kubernetes" 12 "github.com/kiali/kiali/models" 13 "github.com/kiali/kiali/observability" 14 "github.com/kiali/kiali/prometheus" 15 ) 16 17 // HealthService deals with fetching health from various sources and convert to kiali model 18 type HealthService struct { 19 prom prometheus.ClientInterface 20 businessLayer *Layer 21 userClients map[string]kubernetes.ClientInterface 22 } 23 24 type NamespaceHealthCriteria struct { 25 IncludeMetrics bool 26 Namespace string 27 Cluster string 28 QueryTime time.Time 29 RateInterval string 30 } 31 32 // Annotation Filter for Health 33 var HealthAnnotation = []models.AnnotationKey{models.RateHealthAnnotation} 34 35 // GetServiceHealth returns a service health (service request error rate) 36 func (in *HealthService) GetServiceHealth(ctx context.Context, namespace, cluster, service, rateInterval string, queryTime time.Time, svc *models.Service) (models.ServiceHealth, error) { 37 var end observability.EndFunc 38 _, end = observability.StartSpan(ctx, "GetServiceHealth", 39 observability.Attribute("package", "business"), 40 observability.Attribute("namespace", namespace), 41 observability.Attribute("service", service), 42 observability.Attribute("rateInterval", rateInterval), 43 observability.Attribute("queryTime", queryTime), 44 ) 45 defer end() 46 47 rqHealth, err := in.getServiceRequestsHealth(namespace, cluster, service, rateInterval, queryTime, svc) 48 return models.ServiceHealth{Requests: rqHealth}, err 49 } 50 51 // GetAppHealth returns an app health from just Namespace and app name (thus, it fetches data from K8S and Prometheus) 52 func (in *HealthService) GetAppHealth(ctx context.Context, namespace, cluster, app, rateInterval string, queryTime time.Time, appD *appDetails) (models.AppHealth, error) { 53 var end observability.EndFunc 54 _, end = observability.StartSpan(ctx, "GetAppHealth", 55 observability.Attribute("package", "business"), 56 observability.Attribute("namespace", namespace), 57 observability.Attribute("cluster", cluster), 58 observability.Attribute("app", app), 59 observability.Attribute("rateInterval", rateInterval), 60 observability.Attribute("queryTime", queryTime), 61 ) 62 defer end() 63 64 return in.getAppHealth(namespace, cluster, app, rateInterval, queryTime, appD.Workloads) 65 } 66 67 func (in *HealthService) getAppHealth(namespace, cluster, app, rateInterval string, queryTime time.Time, ws models.Workloads) (models.AppHealth, error) { 68 health := models.EmptyAppHealth() 69 70 // Perf: do not bother fetching request rate if there are no workloads or no workload has sidecar 71 hasSidecar := false 72 for _, w := range ws { 73 if w.IstioSidecar || w.IsGateway() { 74 hasSidecar = true 75 break 76 } 77 } 78 79 // Fetch services requests rates 80 var errRate error 81 if hasSidecar { 82 rate, err := in.getAppRequestsHealth(namespace, cluster, app, rateInterval, queryTime) 83 health.Requests = rate 84 errRate = err 85 } 86 87 // Deployment status 88 health.WorkloadStatuses = ws.CastWorkloadStatuses() 89 90 return health, errRate 91 } 92 93 // GetWorkloadHealth returns a workload health from just Namespace and workload (thus, it fetches data from K8S and Prometheus) 94 func (in *HealthService) GetWorkloadHealth(ctx context.Context, namespace, cluster, workload, rateInterval string, queryTime time.Time, w *models.Workload) (models.WorkloadHealth, error) { 95 var end observability.EndFunc 96 _, end = observability.StartSpan(ctx, "GetWorkloadHealth", 97 observability.Attribute("package", "business"), 98 observability.Attribute("namespace", namespace), 99 observability.Attribute("workload", workload), 100 observability.Attribute("rateInterval", rateInterval), 101 observability.Attribute("queryTime", queryTime), 102 ) 103 defer end() 104 105 // Perf: do not bother fetching request rate if workload has no sidecar 106 if !w.IstioSidecar && !w.IsGateway() { 107 return models.WorkloadHealth{ 108 WorkloadStatus: w.CastWorkloadStatus(), 109 Requests: models.NewEmptyRequestHealth(), 110 }, nil 111 } 112 113 // Add Telemetry info 114 rate, err := in.getWorkloadRequestsHealth(namespace, cluster, workload, rateInterval, queryTime, w) 115 return models.WorkloadHealth{ 116 WorkloadStatus: w.CastWorkloadStatus(), 117 Requests: rate, 118 }, err 119 } 120 121 // GetNamespaceAppHealth returns a health for all apps in given Namespace (thus, it fetches data from K8S and Prometheus) 122 func (in *HealthService) GetNamespaceAppHealth(ctx context.Context, criteria NamespaceHealthCriteria) (models.NamespaceAppHealth, error) { 123 var end observability.EndFunc 124 ctx, end = observability.StartSpan(ctx, "GetNamespaceAppHealth", 125 observability.Attribute("package", "business"), 126 observability.Attribute("cluster", criteria.Cluster), 127 observability.Attribute("namespace", criteria.Namespace), 128 observability.Attribute("rateInterval", criteria.RateInterval), 129 observability.Attribute("queryTime", criteria.QueryTime), 130 ) 131 defer end() 132 133 cluster := criteria.Cluster 134 135 if _, ok := in.userClients[cluster]; !ok { 136 return nil, fmt.Errorf("Cluster [%s] is not found or is not accessible for Kiali", cluster) 137 } 138 139 appEntities, err := in.businessLayer.App.fetchNamespaceApps(ctx, criteria.Namespace, cluster, "") 140 if err != nil { 141 return nil, err 142 } 143 144 return in.getNamespaceAppHealth(appEntities, criteria) 145 } 146 147 func (in *HealthService) getNamespaceAppHealth(appEntities namespaceApps, criteria NamespaceHealthCriteria) (models.NamespaceAppHealth, error) { 148 namespace := criteria.Namespace 149 queryTime := criteria.QueryTime 150 rateInterval := criteria.RateInterval 151 cluster := criteria.Cluster 152 allHealth := make(models.NamespaceAppHealth) 153 154 // Perf: do not bother fetching request rate if no workloads or no workload has sidecar 155 sidecarPresent := false 156 var appSidecars = make(map[string]bool) 157 158 // Prepare all data 159 for app, entities := range appEntities { 160 if app != "" { 161 h := models.EmptyAppHealth() 162 allHealth[app] = &h 163 if entities != nil { 164 h.WorkloadStatuses = entities.Workloads.CastWorkloadStatuses() 165 for _, w := range entities.Workloads { 166 if w.IstioSidecar || w.IsGateway() { 167 sidecarPresent = true 168 appSidecars[app] = true 169 break 170 } 171 } 172 } 173 } 174 } 175 176 if sidecarPresent && criteria.IncludeMetrics { 177 // Fetch services requests rates 178 rates, err := in.prom.GetAllRequestRates(namespace, cluster, rateInterval, queryTime) 179 if err != nil { 180 return allHealth, errors.NewServiceUnavailable(err.Error()) 181 } 182 // Fill with collected request rates 183 fillAppRequestRates(allHealth, rates, appSidecars) 184 } 185 186 return allHealth, nil 187 } 188 189 // GetNamespaceServiceHealth returns a health for all services in given Namespace (thus, it fetches data from K8S and Prometheus) 190 func (in *HealthService) GetNamespaceServiceHealth(ctx context.Context, criteria NamespaceHealthCriteria) (models.NamespaceServiceHealth, error) { 191 var end observability.EndFunc 192 ctx, end = observability.StartSpan(ctx, "GetNamespaceServiceHealth", 193 observability.Attribute("package", "business"), 194 observability.Attribute("namespace", criteria.Namespace), 195 observability.Attribute("cluster", criteria.Cluster), 196 observability.Attribute("rateInterval", criteria.RateInterval), 197 observability.Attribute("queryTime", criteria.QueryTime), 198 ) 199 defer end() 200 201 namespace := criteria.Namespace 202 cluster := criteria.Cluster 203 204 if _, ok := in.userClients[cluster]; !ok { 205 return nil, fmt.Errorf("Cluster [%s] is not found or is not accessible for Kiali", cluster) 206 } 207 208 if _, err := in.businessLayer.Namespace.GetClusterNamespace(ctx, namespace, cluster); err != nil { 209 return nil, err 210 } 211 212 var services *models.ServiceList 213 var err error 214 215 svcCriteria := ServiceCriteria{ 216 Cluster: cluster, 217 Namespace: namespace, 218 IncludeHealth: false, 219 IncludeIstioResources: false, 220 IncludeOnlyDefinitions: true, 221 } 222 services, err = in.businessLayer.Svc.GetServiceList(ctx, svcCriteria) 223 if err != nil { 224 return nil, err 225 } 226 return in.getNamespaceServiceHealth(services, criteria), nil 227 } 228 229 func (in *HealthService) getNamespaceServiceHealth(services *models.ServiceList, criteria NamespaceHealthCriteria) models.NamespaceServiceHealth { 230 namespace := criteria.Namespace 231 queryTime := criteria.QueryTime 232 rateInterval := criteria.RateInterval 233 cluster := criteria.Cluster 234 235 allHealth := make(models.NamespaceServiceHealth) 236 237 // Prepare all data (note that it's important to provide data for all services, even those which may not have any health, for overview cards) 238 if services != nil { 239 for _, service := range services.Services { 240 h := models.EmptyServiceHealth() 241 h.Requests.HealthAnnotations = service.HealthAnnotations 242 allHealth[service.Name] = &h 243 } 244 } 245 246 if criteria.IncludeMetrics { 247 // Fetch services requests rates 248 rates, _ := in.prom.GetNamespaceServicesRequestRates(namespace, cluster, rateInterval, queryTime) 249 // Fill with collected request rates 250 lblDestSvc := model.LabelName("destination_service_name") 251 for _, sample := range rates { 252 service := string(sample.Metric[lblDestSvc]) 253 if health, ok := allHealth[service]; ok { 254 health.Requests.AggregateInbound(sample) 255 } 256 } 257 for _, health := range allHealth { 258 health.Requests.CombineReporters() 259 } 260 } 261 return allHealth 262 } 263 264 // GetNamespaceWorkloadHealth returns a health for all workloads in given Namespace (thus, it fetches data from K8S and Prometheus) 265 func (in *HealthService) GetNamespaceWorkloadHealth(ctx context.Context, criteria NamespaceHealthCriteria) (models.NamespaceWorkloadHealth, error) { 266 namespace := criteria.Namespace 267 rateInterval := criteria.RateInterval 268 queryTime := criteria.QueryTime 269 cluster := criteria.Cluster 270 var end observability.EndFunc 271 ctx, end = observability.StartSpan(ctx, "GetNamespaceWorkloadHealth", 272 observability.Attribute("package", "business"), 273 observability.Attribute("namespace", namespace), 274 observability.Attribute("cluster", cluster), 275 observability.Attribute("rateInterval", rateInterval), 276 observability.Attribute("queryTime", queryTime), 277 ) 278 defer end() 279 280 if _, ok := in.userClients[cluster]; !ok { 281 return nil, fmt.Errorf("Cluster [%s] is not found or is not accessible for Kiali", cluster) 282 } 283 284 if _, err := in.businessLayer.Namespace.GetClusterNamespace(ctx, namespace, cluster); err != nil { 285 return nil, err 286 } 287 288 wl, err := in.businessLayer.Workload.fetchWorkloadsFromCluster(ctx, cluster, namespace, "") 289 if err != nil { 290 return nil, err 291 } 292 293 return in.getNamespaceWorkloadHealth(wl, criteria) 294 } 295 296 func (in *HealthService) getNamespaceWorkloadHealth(ws models.Workloads, criteria NamespaceHealthCriteria) (models.NamespaceWorkloadHealth, error) { 297 // Perf: do not bother fetching request rate if no workloads or no workload has sidecar 298 hasSidecar := false 299 namespace := criteria.Namespace 300 rateInterval := criteria.RateInterval 301 queryTime := criteria.QueryTime 302 cluster := criteria.Cluster 303 var wlSidecars = make(map[string]bool) 304 305 allHealth := make(models.NamespaceWorkloadHealth) 306 for _, w := range ws { 307 allHealth[w.Name] = models.EmptyWorkloadHealth() 308 allHealth[w.Name].Requests.HealthAnnotations = models.GetHealthAnnotation(w.HealthAnnotations, HealthAnnotation) 309 allHealth[w.Name].WorkloadStatus = w.CastWorkloadStatus() 310 if w.IstioSidecar || w.IsGateway() { 311 hasSidecar = true 312 wlSidecars[w.Name] = true 313 } 314 } 315 316 if hasSidecar && criteria.IncludeMetrics { 317 // Fetch services requests rates 318 rates, err := in.prom.GetAllRequestRates(namespace, cluster, rateInterval, queryTime) 319 if err != nil { 320 return allHealth, errors.NewServiceUnavailable(err.Error()) 321 } 322 // Fill with collected request rates 323 fillWorkloadRequestRates(allHealth, rates, wlSidecars) 324 } 325 326 return allHealth, nil 327 } 328 329 // fillAppRequestRates aggregates requests rates from metrics fetched from Prometheus, and stores the result in the health map. 330 func fillAppRequestRates(allHealth models.NamespaceAppHealth, rates model.Vector, appSidecars map[string]bool) { 331 lblDest := model.LabelName("destination_canonical_service") 332 lblSrc := model.LabelName("source_canonical_service") 333 334 for _, sample := range rates { 335 name := string(sample.Metric[lblDest]) 336 // include requests only to apps which have a sidecar 337 if _, ok := appSidecars[name]; ok { 338 if health, ok := allHealth[name]; ok { 339 health.Requests.AggregateInbound(sample) 340 } 341 name = string(sample.Metric[lblSrc]) 342 if health, ok := allHealth[name]; ok { 343 health.Requests.AggregateOutbound(sample) 344 } 345 } 346 } 347 for _, health := range allHealth { 348 health.Requests.CombineReporters() 349 } 350 } 351 352 // fillWorkloadRequestRates aggregates requests rates from metrics fetched from Prometheus, and stores the result in the health map. 353 func fillWorkloadRequestRates(allHealth models.NamespaceWorkloadHealth, rates model.Vector, wlSidecars map[string]bool) { 354 lblDest := model.LabelName("destination_workload") 355 lblSrc := model.LabelName("source_workload") 356 for _, sample := range rates { 357 name := string(sample.Metric[lblDest]) 358 // include requests only to workloads which have a sidecar 359 if _, ok := wlSidecars[name]; ok { 360 if health, ok := allHealth[name]; ok { 361 health.Requests.AggregateInbound(sample) 362 } 363 name = string(sample.Metric[lblSrc]) 364 if health, ok := allHealth[name]; ok { 365 health.Requests.AggregateOutbound(sample) 366 } 367 } 368 } 369 for _, health := range allHealth { 370 health.Requests.CombineReporters() 371 } 372 } 373 374 func (in *HealthService) getServiceRequestsHealth(namespace, cluster, service, rateInterval string, queryTime time.Time, svc *models.Service) (models.RequestHealth, error) { 375 rqHealth := models.NewEmptyRequestHealth() 376 if svc.Type == "External" { 377 // ServiceEntry from Istio Registry 378 // Telemetry doesn't collect a namespace 379 namespace = "unknown" 380 } 381 inbound, err := in.prom.GetServiceRequestRates(namespace, cluster, service, rateInterval, queryTime) 382 if err != nil { 383 return rqHealth, errors.NewServiceUnavailable(err.Error()) 384 } 385 for _, sample := range inbound { 386 rqHealth.AggregateInbound(sample) 387 } 388 rqHealth.HealthAnnotations = svc.HealthAnnotations 389 rqHealth.CombineReporters() 390 return rqHealth, nil 391 } 392 393 func (in *HealthService) getAppRequestsHealth(namespace, cluster, app, rateInterval string, queryTime time.Time) (models.RequestHealth, error) { 394 rqHealth := models.NewEmptyRequestHealth() 395 396 inbound, outbound, err := in.prom.GetAppRequestRates(namespace, cluster, app, rateInterval, queryTime) 397 if err != nil { 398 return rqHealth, errors.NewServiceUnavailable(err.Error()) 399 } 400 for _, sample := range inbound { 401 rqHealth.AggregateInbound(sample) 402 } 403 for _, sample := range outbound { 404 rqHealth.AggregateOutbound(sample) 405 } 406 rqHealth.CombineReporters() 407 return rqHealth, nil 408 } 409 410 func (in *HealthService) getWorkloadRequestsHealth(namespace, cluster, workload, rateInterval string, queryTime time.Time, w *models.Workload) (models.RequestHealth, error) { 411 rqHealth := models.NewEmptyRequestHealth() 412 // @TODO include w.Cluster into query 413 inbound, outbound, err := in.prom.GetWorkloadRequestRates(namespace, cluster, workload, rateInterval, queryTime) 414 if err != nil { 415 return rqHealth, err 416 } 417 for _, sample := range inbound { 418 rqHealth.AggregateInbound(sample) 419 } 420 for _, sample := range outbound { 421 rqHealth.AggregateOutbound(sample) 422 } 423 if len(w.Pods) > 0 { 424 rqHealth.HealthAnnotations = models.GetHealthAnnotation(w.HealthAnnotations, HealthAnnotation) 425 } 426 rqHealth.CombineReporters() 427 return rqHealth, err 428 }