github.com/kiali/kiali@v1.84.0/graph/telemetry/istio/appender/aggregate_node.go (about) 1 package appender 2 3 import ( 4 "fmt" 5 "time" 6 7 "github.com/prometheus/common/model" 8 9 "github.com/kiali/kiali/graph" 10 "github.com/kiali/kiali/graph/telemetry/istio/util" 11 "github.com/kiali/kiali/log" 12 "github.com/kiali/kiali/prometheus" 13 ) 14 15 const ( 16 AggregateNodeAppenderName = "aggregateNode" 17 ) 18 19 // AggregateNodeAppender is responsible for injecting aggregate nodes into the graph to gain 20 // visibility into traffic aggregations for a user-specfied metric attribute. 21 // Note: Aggregate Nodes are supported only on Requests traffic (not TCP or gRPC-message traffic) 22 type AggregateNodeAppender struct { 23 Aggregate string 24 AggregateValue string 25 GraphType string 26 InjectServiceNodes bool 27 Namespaces map[string]graph.NamespaceInfo 28 QueryTime int64 // unix time in seconds 29 Rates graph.RequestedRates 30 Service string 31 } 32 33 // Name implements Appender 34 func (a AggregateNodeAppender) Name() string { 35 return AggregateNodeAppenderName 36 } 37 38 // IsFinalizer implements Appender 39 func (a AggregateNodeAppender) IsFinalizer() bool { 40 return false 41 } 42 43 // AppendGraph implements Appender 44 func (a AggregateNodeAppender) AppendGraph(trafficMap graph.TrafficMap, globalInfo *graph.AppenderGlobalInfo, namespaceInfo *graph.AppenderNamespaceInfo) { 45 if len(trafficMap) == 0 { 46 return 47 } 48 49 // Aggregate Nodes are not applicable to Service Graphs 50 if a.GraphType == graph.GraphTypeService { 51 return 52 } 53 54 // Aggregate Nodes are currently supported only on Requests traffic (not TCP or gRPC-message traffic) 55 if a.Rates.Grpc != graph.RateRequests && a.Rates.Http != graph.RateRequests { 56 return 57 } 58 59 if globalInfo.PromClient == nil { 60 var err error 61 globalInfo.PromClient, err = prometheus.NewClient() 62 graph.CheckError(err) 63 } 64 65 if a.AggregateValue == "" { 66 a.appendGraph(trafficMap, namespaceInfo.Namespace, globalInfo.PromClient) 67 } else { 68 a.appendNodeGraph(trafficMap, namespaceInfo.Namespace, globalInfo.PromClient) 69 } 70 } 71 72 func (a AggregateNodeAppender) appendGraph(trafficMap graph.TrafficMap, namespace string, client *prometheus.Client) { 73 log.Tracef("Resolving request aggregates for namespace=[%s], aggregate=[%s]", namespace, a.Aggregate) 74 duration := a.Namespaces[namespace].Duration 75 76 // query prometheus for aggregate info in two queries (assume aggregation is typically request classification, so use dest telemetry): 77 // note1: we want to only match the aggregate when it is set and not "unknown". But in Prometheus a negative test on an unset label 78 // matches everything, so using %s!=unknown means we still have to filter out unset time-series below... 79 // note2: for now we will filter out aggregates with no traffic on the assumption that users probably don't want to 80 // see them and it will just increase the graph density. To change that behavior remove the "> 0" conditions. 81 // 1) query for requests originating from a workload outside the namespace. 82 groupBy := fmt.Sprintf("source_cluster,source_workload_namespace,source_workload,source_canonical_service,source_canonical_revision,destination_cluster,destination_service_namespace,destination_service,destination_service_name,destination_workload_namespace,destination_workload,destination_canonical_service,destination_canonical_revision,request_protocol,response_code,grpc_response_status,response_flags,%s", a.Aggregate) 83 httpQuery := fmt.Sprintf(`sum(rate(%s{reporter="destination",source_workload_namespace!="%s",destination_service_namespace="%v",%s!="unknown"}[%vs])) by (%s) > 0`, 84 "istio_requests_total", 85 namespace, 86 namespace, 87 a.Aggregate, 88 int(duration.Seconds()), // range duration for the query 89 groupBy) 90 query := httpQuery 91 vector := promQuery(query, time.Unix(a.QueryTime, 0), client.GetContext(), client.API(), a) 92 a.injectAggregates(trafficMap, &vector) 93 94 // 2) query for requests originating from a workload inside of the namespace 95 httpQuery = fmt.Sprintf(`sum(rate(%s{reporter="destination",source_workload_namespace="%s",%s!="unknown"}[%vs])) by (%s) > 0`, 96 "istio_requests_total", 97 namespace, 98 a.Aggregate, 99 int(duration.Seconds()), // range duration for the query 100 groupBy) 101 query = httpQuery 102 vector = promQuery(query, time.Unix(a.QueryTime, 0), client.GetContext(), client.API(), a) 103 a.injectAggregates(trafficMap, &vector) 104 } 105 106 func (a AggregateNodeAppender) appendNodeGraph(trafficMap graph.TrafficMap, namespace string, client *prometheus.Client) { 107 log.Tracef("Resolving node request aggregates for namespace=[%s], aggregate=[%s=%s]", namespace, a.Aggregate, a.AggregateValue) 108 duration := a.Namespaces[namespace].Duration 109 110 // query prometheus for aggregate info in a single query (assume aggregation is typically request classification, so use dest telemetry): 111 // note1: for now we will filter out aggregates with no traffic on the assumption that users probably don't want to 112 // see them and it will just increase the graph density. To change that behavior remove the "> 0" conditions. 113 serviceFragment := "" 114 if a.Service != "" { 115 serviceFragment = fmt.Sprintf(`,destination_service_name="%s"`, a.Service) 116 } 117 groupBy := fmt.Sprintf("source_cluster,source_workload_namespace,source_workload,source_canonical_service,source_canonical_revision,destination_cluster,destination_service_namespace,destination_service,destination_service_name,destination_workload_namespace,destination_workload,destination_canonical_service,destination_canonical_revision,request_protocol,response_code,grpc_response_status,response_flags,%s", a.Aggregate) 118 httpQuery := fmt.Sprintf(`sum(rate(%s{reporter="destination",destination_service_namespace="%s",%s="%s"%s}[%vs])) by (%s) > 0`, 119 "istio_requests_total", 120 namespace, 121 a.Aggregate, 122 a.AggregateValue, 123 serviceFragment, 124 int(duration.Seconds()), // range duration for the query 125 groupBy) 126 query := httpQuery 127 vector := promQuery(query, time.Unix(a.QueryTime, 0), client.GetContext(), client.API(), a) 128 a.injectAggregates(trafficMap, &vector) 129 } 130 131 func (a AggregateNodeAppender) injectAggregates(trafficMap graph.TrafficMap, vector *model.Vector) { 132 skipRequestsGrpc := a.Rates.Grpc != graph.RateRequests 133 skipRequestsHttp := a.Rates.Http != graph.RateRequests 134 135 for _, s := range *vector { 136 m := s.Metric 137 lSourceCluster, sourceClusterOk := m["source_cluster"] 138 lSourceWlNs, sourceWlNsOk := m["source_workload_namespace"] 139 lSourceWl, sourceWlOk := m["source_workload"] 140 lSourceApp, sourceAppOk := m["source_canonical_service"] 141 lSourceVer, sourceVerOk := m["source_canonical_revision"] 142 lDestCluster, destClusterOk := m["destination_cluster"] 143 lDestSvcNs, destSvcNsOk := m["destination_service_namespace"] 144 lDestSvc, destSvcOk := m["destination_service"] 145 lDestSvcName, destSvcNameOk := m["destination_service_name"] 146 lDestWlNs, destWlNsOk := m["destination_workload_namespace"] 147 lDestWl, destWlOk := m["destination_workload"] 148 lDestApp, destAppOk := m["destination_canonical_service"] 149 lDestVer, destVerOk := m["destination_canonical_revision"] 150 lCode := m["response_code"] 151 lGrpc, grpcOk := m["grpc_response_status"] // will be missing for non-GRPC 152 lFlags, flagsOk := m["response_flags"] 153 lProtocol, protocolOk := m["request_protocol"] // because currently we only support requests traffic the protocol should be set 154 lAggregate, aggregateOk := m[model.LabelName(a.Aggregate)] // may be unset, see note above 155 156 if !aggregateOk { 157 continue 158 } 159 160 if !sourceWlNsOk || !sourceWlOk || !sourceAppOk || !sourceVerOk || !destSvcNsOk || !destSvcOk || !destSvcNameOk || !destWlNsOk || !destWlOk || !destAppOk || !destVerOk || !flagsOk || !protocolOk { 161 log.Warningf("Skipping %v, missing expected labels", m.String()) 162 continue 163 } 164 165 sourceWlNs := string(lSourceWlNs) 166 sourceWl := string(lSourceWl) 167 sourceApp := string(lSourceApp) 168 sourceVer := string(lSourceVer) 169 destSvc := string(lDestSvc) 170 code := string(lCode) 171 protocol := string(lProtocol) 172 flags := string(lFlags) 173 aggregate := string(lAggregate) 174 175 if (skipRequestsHttp && protocol == graph.HTTP.Name) || (skipRequestsGrpc && protocol == graph.GRPC.Name) { 176 continue 177 } 178 179 // handle clusters 180 sourceCluster, destCluster := util.HandleClusters(lSourceCluster, sourceClusterOk, lDestCluster, destClusterOk) 181 182 if util.IsBadSourceTelemetry(sourceCluster, sourceClusterOk, sourceWlNs, sourceWl, sourceApp) { 183 continue 184 } 185 186 if protocolOk { 187 // set response code in a backward compatible way 188 code = util.HandleResponseCode(protocol, code, grpcOk, string(lGrpc)) 189 } else { 190 // because currently we only support requests traffic the protocol should be set 191 log.Warningf("Skipping %v, missing expected protocol label", m.String()) 192 continue 193 // protocol = "tcp" 194 } 195 196 // handle unusual destinations 197 destCluster, destSvcNs, destSvcName, destWlNs, destWl, destApp, destVer, _ := util.HandleDestination(sourceCluster, sourceWlNs, sourceWl, destCluster, string(lDestSvcNs), string(lDestSvc), string(lDestSvcName), string(lDestWlNs), string(lDestWl), string(lDestApp), string(lDestVer)) 198 199 if util.IsBadDestTelemetry(destCluster, destClusterOk, destSvcNs, destSvc, destSvcName, destWl) { 200 continue 201 } 202 203 // make code more readable by setting "host" because "destSvc" holds destination.service.host | request.host | "unknown" 204 host := destSvc 205 206 val := float64(s.Value) 207 208 // inject aggregate node between source and destination 209 sourceID, _, _ := graph.Id(sourceCluster, sourceWlNs, "", sourceWlNs, sourceWl, sourceApp, sourceVer, a.GraphType) 210 sourceNode, sourceFound := trafficMap[sourceID] 211 if !sourceFound { 212 log.Debugf("Expected source [%s] node not found in traffic map. Skipping aggregate injection [%s]", sourceID, aggregate) 213 continue 214 } 215 216 // if service nodes are injected show the service-related aggregation: 217 // - use the service node as the dest 218 // - associate aggregate node with the destSvcName and, if set, destApp 219 // else show the independent aggregation by using the workload/app node as the dest 220 destID := "" 221 if a.InjectServiceNodes { 222 destID, _, _ = graph.Id(destCluster, destSvcNs, destSvcName, "", "", "", "", a.GraphType) // service 223 } else { 224 destID, _, _ = graph.Id(destCluster, destSvcNs, destSvcName, destWlNs, destWl, destApp, destVer, a.GraphType) // wl/app 225 } 226 destNode, destFound := trafficMap[destID] 227 if !destFound { 228 log.Debugf("Expected dest [%s] node not found in traffic map. Skipping aggregate injection [%s]", destID, aggregate) 229 continue 230 } 231 232 var aggrNode *graph.Node 233 if a.InjectServiceNodes { 234 aggrNode, _ = addNode(trafficMap, destCluster, destSvcNs, a.Aggregate, aggregate, destSvcName, destApp) 235 } else { 236 aggrNode, _ = addNode(trafficMap, destCluster, destWlNs, a.Aggregate, aggregate, "", "") 237 } 238 239 // replace the non-classified edge (from source to dest) with the classified edges 240 // - note that if not every request has a classification match the traffic may be lower than actual, I 241 // think this this OK, and if the user cares they should define a "catch-all" classification match 242 safeEdges := []*graph.Edge{} 243 for _, e := range sourceNode.Edges { 244 if e.Dest.ID != destID { 245 safeEdges = append(safeEdges, e) 246 } 247 } 248 sourceNode.Edges = safeEdges 249 250 addTraffic(val, protocol, code, flags, host, sourceNode, aggrNode) 251 addTraffic(val, protocol, code, flags, host, aggrNode, destNode) 252 } 253 } 254 255 func addTraffic(val float64, protocol, code, flags, host string, source, dest *graph.Node) { 256 var edge *graph.Edge 257 for _, e := range source.Edges { 258 if dest.ID == e.Dest.ID && e.Metadata[graph.ProtocolKey] == protocol { 259 edge = e 260 break 261 } 262 } 263 if nil == edge { 264 edge = source.AddEdge(dest) 265 edge.Metadata[graph.ProtocolKey] = protocol 266 } 267 268 // Only update traffic on the aggregate node and associated edges. Remember that this is an appender and the 269 // in/out traffic is already set for the non-aggregate nodes. 270 var sourceMetadata graph.Metadata 271 var destMetadata graph.Metadata 272 if source.NodeType == graph.NodeTypeAggregate { 273 sourceMetadata = source.Metadata 274 } else { 275 destMetadata = dest.Metadata 276 } 277 graph.AddToMetadata(protocol, val, code, flags, host, sourceMetadata, destMetadata, edge.Metadata) 278 } 279 280 func addNode(trafficMap graph.TrafficMap, cluster, namespace, aggregate, aggregateVal, svcName, app string) (*graph.Node, bool) { 281 id := graph.AggregateID(cluster, namespace, aggregate, aggregateVal, svcName) 282 node, found := trafficMap[id] 283 if !found { 284 newNode := graph.NewAggregateNodeExplicit(id, cluster, namespace, aggregate, aggregateVal, svcName, app) 285 node = &newNode 286 trafficMap[id] = node 287 } 288 return node, found 289 }