go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/builder.go (about) 1 // Copyright (c) Mondoo, Inc. 2 // SPDX-License-Identifier: BUSL-1.1 3 4 package internal 5 6 import ( 7 "fmt" 8 "math" 9 "sort" 10 "time" 11 12 vrs "github.com/hashicorp/go-version" 13 "github.com/rs/zerolog/log" 14 "go.mondoo.com/cnquery" 15 "go.mondoo.com/cnquery/llx" 16 ) 17 18 type query struct { 19 codeBundle *llx.CodeBundle 20 requiredProps map[string]string 21 resolvedProperties map[string]*llx.Primitive 22 } 23 24 type GraphBuilder struct { 25 // queries is a map of QrID to query 26 queries []query 27 // datapointCollectors contains the collectors which will receive 28 // datapoints 29 datapointCollectors []DatapointCollector 30 // collectDatapointChecksums specifies additional datapoints outside 31 // the reporting job to collect 32 collectDatapointChecksums []string 33 // datapointType is a map of checksum to type for datapoint type 34 // conversion. This is sourced from the compiled query 35 datapointType map[string]string 36 // progressReporter is a configured interface to receive progress 37 // updates 38 progressReporter ProgressReporter 39 // mondooVersion is the version of mondoo. This is generally sourced 40 // from the binary, but is configurable to make testing easier 41 mondooVersion string 42 // queryTimeout is the amount of time to wait for the underlying lumi 43 // runtime to send all the expected datapoints. 44 queryTimeout time.Duration 45 46 featureBoolAssertions bool 47 } 48 49 func NewBuilder() *GraphBuilder { 50 return &GraphBuilder{ 51 queries: []query{}, 52 datapointCollectors: []DatapointCollector{}, 53 collectDatapointChecksums: []string{}, 54 datapointType: map[string]string{}, 55 progressReporter: NoopProgressReporter{}, 56 mondooVersion: cnquery.GetCoreVersion(), 57 queryTimeout: 5 * time.Minute, 58 } 59 } 60 61 // AddQuery adds the provided code to be executed to the graph 62 func (b *GraphBuilder) AddQuery(c *llx.CodeBundle, propertyChecksums map[string]string, resolvedProperties map[string]*llx.Primitive) { 63 b.queries = append(b.queries, query{ 64 codeBundle: c, 65 requiredProps: propertyChecksums, 66 resolvedProperties: resolvedProperties, 67 }) 68 } 69 70 func (b *GraphBuilder) AddDatapointType(datapointChecksum string, typ string) { 71 b.datapointType[datapointChecksum] = typ 72 } 73 74 // CollectDatapoint requests the provided checksum be collected and sent to 75 // the configured datapoint collectors 76 func (b *GraphBuilder) CollectDatapoint(datapointChecksum string) { 77 b.collectDatapointChecksums = append(b.collectDatapointChecksums, datapointChecksum) 78 } 79 80 // AddDatapointCollector adds a datapoint collector. Collected datapoints 81 // will be sent to all the provided datapoint collectors 82 func (b *GraphBuilder) AddDatapointCollector(c DatapointCollector) { 83 b.datapointCollectors = append(b.datapointCollectors, c) 84 } 85 86 // WithProgressReporter sets the interface which will receive progress updates 87 func (b *GraphBuilder) WithProgressReporter(r ProgressReporter) { 88 b.progressReporter = r 89 } 90 91 // WithMondooVersion sets the version of mondoo 92 func (b *GraphBuilder) WithMondooVersion(mondooVersion string) { 93 b.mondooVersion = mondooVersion 94 } 95 96 // WithMondooVersion sets the version of mondoo 97 func (b *GraphBuilder) WithQueryTimeout(timeout time.Duration) { 98 b.queryTimeout = timeout 99 } 100 101 func (b *GraphBuilder) WithFeatureBoolAssertions(featureBoolAssertions bool) { 102 b.featureBoolAssertions = featureBoolAssertions 103 } 104 105 func (b *GraphBuilder) Build(schema llx.Schema, runtime llx.Runtime, assetMrn string) (*GraphExecutor, error) { 106 resultChan := make(chan *llx.RawResult, 128) 107 108 queries := make(map[string]query, len(b.queries)) 109 for _, q := range b.queries { 110 queries[q.codeBundle.GetCodeV2().GetId()] = q 111 } 112 113 ge := &GraphExecutor{ 114 nodes: map[NodeID]*Node{}, 115 edges: map[NodeID][]NodeID{}, 116 priorityMap: map[NodeID]int{}, 117 queryTimeout: b.queryTimeout, 118 executionManager: newExecutionManager(schema, runtime, make(chan runQueueItem, len(queries)), 119 resultChan, b.queryTimeout), 120 resultChan: resultChan, 121 doneChan: make(chan struct{}), 122 } 123 124 ge.nodes[DatapointCollectorID] = &Node{ 125 id: DatapointCollectorID, 126 nodeType: DatapointCollectorNodeType, 127 data: &DatapointCollectorNodeData{ 128 unreported: map[string]*llx.RawResult{}, 129 collectors: b.datapointCollectors, 130 }, 131 } 132 133 unrunnableQueries := []query{} 134 135 var mondooVersion *vrs.Version 136 if b.mondooVersion != "" && b.mondooVersion != "unstable" { 137 var err error 138 mondooVersion, err = vrs.NewVersion(b.mondooVersion) 139 if err != nil { 140 log.Warn().Err(err).Str("version", b.mondooVersion).Msg("unable to parse mondoo version") 141 } 142 } 143 144 for queryID, q := range queries { 145 canRun := checkVersion(q.codeBundle, mondooVersion) 146 if canRun { 147 ge.addExecutionQueryNode(queryID, q, q.resolvedProperties, b.datapointType) 148 } else { 149 unrunnableQueries = append(unrunnableQueries, q) 150 } 151 } 152 153 datapointsToCollect := make([]string, len(b.collectDatapointChecksums)) 154 copy(datapointsToCollect, b.collectDatapointChecksums) 155 156 for _, datapointChecksum := range datapointsToCollect { 157 ge.addEdge(NodeID(datapointChecksum), DatapointCollectorID) 158 } 159 160 ge.handleUnrunnableQueries(unrunnableQueries) 161 162 ge.createFinisherNode(b.progressReporter) 163 164 for nodeID := range ge.nodes { 165 prioritizeNode(ge.nodes, ge.edges, ge.priorityMap, nodeID) 166 } 167 168 // The finisher is the lowest priority node. This makes it so that 169 // when a recalculation is triggered through a datapoint being reported, 170 // the finisher only gets notified after all other intermediate nodes are 171 // notified 172 ge.priorityMap[CollectionFinisherID] = math.MinInt 173 174 return ge, nil 175 } 176 177 // handleUnrunnableQueries takes the queries for which the running version does 178 // to meet the minimum version requirement and marks the datapoints as error. 179 // This is only done for datapoints which will not be reported by a runnable query 180 func (ge *GraphExecutor) handleUnrunnableQueries(unrunnableQueries []query) { 181 for _, q := range unrunnableQueries { 182 for _, checksum := range CodepointChecksums(q.codeBundle) { 183 if _, ok := ge.nodes[NodeID(checksum)]; ok { 184 // If the datapoint will be reported by another query, skip 185 // handling it 186 continue 187 } 188 189 ge.addDatapointNode( 190 checksum, 191 nil, 192 &llx.RawResult{ 193 CodeID: checksum, 194 Data: &llx.RawData{ 195 Error: fmt.Errorf("Unable to run query, cnquery version %s required", q.codeBundle.MinMondooVersion), 196 }, 197 }) 198 } 199 } 200 } 201 202 func (ge *GraphExecutor) addEdge(from NodeID, to NodeID) { 203 ge.edges[from] = insertSorted(ge.edges[from], to) 204 } 205 206 func (ge *GraphExecutor) createFinisherNode(r ProgressReporter) { 207 nodeID := CollectionFinisherID 208 nodeData := &CollectionFinisherNodeData{ 209 remainingDatapoints: make(map[string]struct{}, len(ge.nodes)), 210 doneChan: ge.doneChan, 211 progressReporter: r, 212 } 213 214 for datapointNodeID, n := range ge.nodes { 215 if n.nodeType == DatapointNodeType { 216 ge.addEdge(datapointNodeID, nodeID) 217 nodeData.remainingDatapoints[datapointNodeID] = struct{}{} 218 } 219 } 220 totalDatapoints := len(nodeData.remainingDatapoints) 221 nodeData.totalDatapoints = totalDatapoints 222 223 ge.nodes[nodeID] = &Node{ 224 id: nodeID, 225 nodeType: CollectionFinisherNodeType, 226 data: nodeData, 227 } 228 } 229 230 func (ge *GraphExecutor) addExecutionQueryNode(queryID string, q query, resolvedProperties map[string]*llx.Primitive, datapointTypeMap map[string]string) { 231 n, ok := ge.nodes[NodeID(queryID)] 232 if ok { 233 return 234 } 235 236 codeBundle := q.codeBundle 237 238 nodeData := &ExecutionQueryNodeData{ 239 queryID: queryID, 240 codeBundle: codeBundle, 241 requiredProperties: map[string]*executionQueryProperty{}, 242 runState: notReadyQueryNotReady, 243 runQueue: ge.executionManager.runQueue, 244 } 245 246 n = &Node{ 247 id: NodeID(string(ExecutionQueryNodeType) + "/" + queryID), 248 nodeType: ExecutionQueryNodeType, 249 data: nodeData, 250 } 251 252 // These don't report anything, but they make the graph connected 253 for _, checksum := range CodepointChecksums(codeBundle) { 254 var expectedType *string 255 if t, ok := datapointTypeMap[checksum]; ok { 256 expectedType = &t 257 } 258 ge.addDatapointNode(checksum, expectedType, nil) 259 ge.addEdge(n.id, NodeID(checksum)) 260 } 261 262 for name, checksum := range q.requiredProps { 263 nodeData.requiredProperties[name] = &executionQueryProperty{ 264 name: name, 265 checksum: checksum, 266 resolved: false, 267 value: nil, 268 } 269 ge.addEdge(NodeID(checksum), n.id) 270 } 271 272 for name, val := range resolvedProperties { 273 if rp, ok := nodeData.requiredProperties[name]; !ok { 274 nodeData.requiredProperties[name] = &executionQueryProperty{ 275 name: name, 276 checksum: "", 277 resolved: true, 278 value: &llx.Result{ 279 Data: val, 280 }, 281 } 282 } else { 283 rp.value = &llx.Result{ 284 Data: val, 285 } 286 rp.resolved = true 287 } 288 } 289 290 ge.nodes[n.id] = n 291 } 292 293 func (ge *GraphExecutor) addDatapointNode(datapointChecksum string, expectedType *string, res *llx.RawResult) { 294 n, ok := ge.nodes[NodeID(datapointChecksum)] 295 if ok { 296 return 297 } 298 299 nodeData := &DatapointNodeData{ 300 expectedType: expectedType, 301 isReported: res != nil, 302 res: res, 303 } 304 n = &Node{ 305 id: NodeID(datapointChecksum), 306 nodeType: DatapointNodeType, 307 data: nodeData, 308 } 309 310 ge.nodes[NodeID(datapointChecksum)] = n 311 } 312 313 // prioritizeNode assigns each node in the graph a priority. The priority makes graph traversal 314 // act like a breadth-first search, minimizing the number of recalculations needed for each node. 315 // For example, the reporting job with a query id of the asset will have a lower priority than 316 // reporting jobs which have a query id of a policy mrn. In a similar way, the reporting jobs 317 // that have a query id of policy mrns have a lower priority than reporting jobs for queries. 318 // This means that if a batch of data arrives, all query reporting jobs will be recalculated first. 319 // The policy reporting jobs will be calculated after that, and then the asset reporting job. 320 func prioritizeNode(nodes map[NodeID]*Node, edges map[NodeID][]NodeID, priorityMap map[NodeID]int, n NodeID) int { 321 if d, ok := priorityMap[n]; ok { 322 return d 323 } 324 childrenMaxDepth := 0 325 for _, v := range edges[n] { 326 childDepth := prioritizeNode(nodes, edges, priorityMap, v) 327 if childDepth > childrenMaxDepth { 328 childrenMaxDepth = childDepth 329 } 330 } 331 myDepth := childrenMaxDepth + 1 332 priorityMap[n] = myDepth 333 return myDepth 334 } 335 336 func checkVersion(codeBundle *llx.CodeBundle, curMin *vrs.Version) bool { 337 if curMin != nil && codeBundle.MinMondooVersion != "" { 338 requiredVer := codeBundle.MinMondooVersion 339 reqMin, err := vrs.NewVersion(requiredVer) 340 if err == nil && curMin.LessThan(reqMin) { 341 return false 342 } 343 } 344 return true 345 } 346 347 func insertSorted(ss []string, s string) []string { 348 i := sort.SearchStrings(ss, s) 349 if i < len(ss) && ss[i] == s { 350 return ss 351 } 352 ss = append(ss, "") 353 copy(ss[i+1:], ss[i:]) 354 ss[i] = s 355 return ss 356 } 357 358 func CodepointChecksums(codeBundle *llx.CodeBundle) []string { 359 return append(EntrypointChecksums(codeBundle), 360 DatapointChecksums(codeBundle)...) 361 } 362 363 func EntrypointChecksums(codeBundle *llx.CodeBundle) []string { 364 var checksums []string 365 366 checksums = make([]string, len(codeBundle.CodeV2.Blocks[0].Entrypoints)) 367 for i, ref := range codeBundle.CodeV2.Blocks[0].Entrypoints { 368 checksums[i] = codeBundle.CodeV2.Checksums[ref] 369 } 370 371 return checksums 372 } 373 374 func DatapointChecksums(codeBundle *llx.CodeBundle) []string { 375 var checksums []string 376 377 checksums = make([]string, len(codeBundle.CodeV2.Blocks[0].Datapoints)) 378 for i, ref := range codeBundle.CodeV2.Blocks[0].Datapoints { 379 checksums[i] = codeBundle.CodeV2.Checksums[ref] 380 } 381 382 return checksums 383 }