go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/nodes.go (about) 1 // Copyright (c) Mondoo, Inc. 2 // SPDX-License-Identifier: BUSL-1.1 3 4 package internal 5 6 import ( 7 "github.com/rs/zerolog/log" 8 "go.mondoo.com/cnquery/llx" 9 "go.mondoo.com/cnquery/types" 10 ) 11 12 const ( 13 // ExecutionQueryNodeType represents a node that will execute 14 // a query. It can be notified by datapoint nodes, representing 15 // its dependant properties 16 ExecutionQueryNodeType NodeType = "execution_query" 17 // DatapointNodeType represents a node that is a datapoint/entrypoint. 18 // These nodes are implicitly notified when results are received from 19 // the executor threads. They also have edges from execution query nodes, 20 // however these just connect the execution and reporting nodes in the graph. 21 // When triggered by an execution query, the result will be a noop. These nodes 22 // typically notify execution query nodes with properties, reporting query 23 // nodes to calculate a query score, and reporting job nodes the calculate 24 // data collection completion. 25 DatapointNodeType NodeType = "datapoint" 26 // ReportingJobNodeType represent scores that needed to be collected. This 27 // information is sourced from the resolved policy. Nodes of this type are 28 // notified by datapoints to indicate collection of data, reporting query 29 // nodes to be notified of query scores, and other reporting job nodes to 30 // be notified of scores of dependant reporting jobs 31 ReportingJobNodeType NodeType = "reporting_job" 32 // DatapointCollectorNodeType represents a sink for datapoints in the graph. 33 // There is only one of these nodes in the graph, and it can only be notified 34 // by datapoint nodes 35 DatapointCollectorNodeType NodeType = "datapoint_collector" 36 // CollectionFinisherNodeType represents a node that collects datapoints. It is 37 // used to notify of completion when all the expected datapoints have been received. 38 // It is different from the datapoint collector node in that it always has the lowest 39 // priority, so all other work is guaranteed to complete before it says things are done 40 CollectionFinisherNodeType NodeType = "collection_finisher" 41 42 DatapointCollectorID NodeID = "__datapoint_collector__" 43 CollectionFinisherID NodeID = "__collection_finisher__" 44 ) 45 46 type executionQueryProperty struct { 47 name string 48 checksum string 49 value *llx.Result 50 resolved bool 51 } 52 53 func (p *executionQueryProperty) Resolve(value *llx.Result) { 54 p.value = value 55 p.resolved = true 56 } 57 58 func (p *executionQueryProperty) IsResolved() bool { 59 return p.resolved 60 } 61 62 type DataResult struct { 63 checksum string 64 resolved bool 65 value *llx.RawResult 66 } 67 68 type queryRunState int 69 70 const ( 71 notReadyQueryNotReady queryRunState = iota 72 readyQueryRunState 73 executedQueryRunState 74 ) 75 76 // ExecutionQueryNodeData represents a node of type ExecutionQueryNodeType 77 type ExecutionQueryNodeData struct { 78 queryID string 79 codeBundle *llx.CodeBundle 80 81 invalidated bool 82 requiredProperties map[string]*executionQueryProperty 83 runState queryRunState 84 runQueue chan<- runQueueItem 85 } 86 87 func (nodeData *ExecutionQueryNodeData) initialize() { 88 nodeData.updateRunState() 89 if nodeData.runState == readyQueryRunState { 90 nodeData.invalidated = true 91 } 92 } 93 94 // consume saves any received data that matches any the required properties 95 func (nodeData *ExecutionQueryNodeData) consume(from NodeID, data *envelope) { 96 if nodeData.runState == executedQueryRunState { 97 // Nothing can change once the query has been marked as executed 98 return 99 } 100 101 if len(nodeData.requiredProperties) == 0 { 102 nodeData.invalidated = true 103 } 104 105 if data.res != nil { 106 for _, p := range nodeData.requiredProperties { 107 // Find the property with the matching checksum 108 if p.checksum == data.res.CodeID { 109 // Save the value of the property 110 p.Resolve(data.res.Result()) 111 // invalidate the node for recalculation 112 nodeData.invalidated = true 113 } 114 } 115 } 116 } 117 118 // recalculate checks if all required properties are satisfied. Once 119 // all have been received, the query is queued for execution 120 func (nodeData *ExecutionQueryNodeData) recalculate() *envelope { 121 if !nodeData.invalidated { 122 // Nothing can change once the query has been marked as executed 123 return nil 124 } 125 126 // Update the run state so we know if the state changed to 127 // runnable 128 nodeData.updateRunState() 129 nodeData.invalidated = false 130 131 if nodeData.runState == readyQueryRunState { 132 nodeData.run() 133 } 134 135 // An empty envelope notifies the parent. These nodes always point at 136 // Datapoint nodes. The datapoint nodes don't need this message, and 137 // it actually makes more work for the datapoint node. The reason to 138 // send it is to uphold the contract of if something changes, we push 139 // a message through the graph. And in this case, something did 140 // technically change 141 return &envelope{} 142 } 143 144 // run sends this query to be run to the executor queue 145 // this should only be called when the query is runnable ( 146 // all properties needed are available) 147 func (nodeData *ExecutionQueryNodeData) run() { 148 var props map[string]*llx.Result 149 150 if len(nodeData.requiredProperties) > 0 { 151 props = make(map[string]*llx.Result) 152 for _, p := range nodeData.requiredProperties { 153 props[p.name] = p.value 154 } 155 } 156 157 nodeData.runState = executedQueryRunState 158 159 nodeData.runQueue <- runQueueItem{ 160 codeBundle: nodeData.codeBundle, 161 props: props, 162 } 163 } 164 165 // updateRunState sets the query to runnable if all the 166 // required properties needed have been received 167 func (d *ExecutionQueryNodeData) updateRunState() { 168 if d.runState == readyQueryRunState { 169 return 170 } 171 172 runnable := true 173 174 for _, p := range d.requiredProperties { 175 runnable = runnable && p.IsResolved() 176 } 177 178 if runnable { 179 d.runState = readyQueryRunState 180 } else { 181 d.runState = notReadyQueryNotReady 182 } 183 } 184 185 // DatapointNodeData is the data for queries of type DatapointNodeType. 186 type DatapointNodeData struct { 187 expectedType *string 188 isReported bool 189 invalidated bool 190 res *llx.RawResult 191 } 192 193 func (nodeData *DatapointNodeData) initialize() { 194 if nodeData.res != nil { 195 nodeData.set(nodeData.res) 196 } 197 } 198 199 // consume saves the result of the datapoint. 200 func (nodeData *DatapointNodeData) consume(from NodeID, data *envelope) { 201 if nodeData.isReported { 202 // No change detection happens. If a datapoint is reported once, that is the value 203 // we will use. 204 return 205 } 206 if data == nil || data.res == nil { 207 // This can be triggered with no data by the execution query nodes. These 208 // messages are not the ones we care about 209 return 210 } 211 212 nodeData.set(data.res) 213 } 214 215 func (nodeData *DatapointNodeData) set(res *llx.RawResult) { 216 nodeData.invalidated = true 217 nodeData.isReported = true 218 219 if nodeData.expectedType == nil || types.Type(*nodeData.expectedType) == types.Unset || 220 res.Data.Type == types.Nil || res.Data.Type == types.Type(*nodeData.expectedType) || 221 res.Data.Error != nil { 222 nodeData.res = res 223 } else { 224 nodeData.res = res.CastResult(types.Type(*nodeData.expectedType)).RawResultV2() 225 } 226 } 227 228 // recalculate passes on the datapoint's result if its available 229 func (nodeData *DatapointNodeData) recalculate() *envelope { 230 if !nodeData.invalidated { 231 return nil 232 } 233 234 nodeData.invalidated = false 235 236 return &envelope{ 237 res: nodeData.res, 238 } 239 } 240 241 // ReportingQueryNodeData is the data for queries of type ReportingQueryNodeType. 242 type ReportingQueryNodeData struct { 243 featureBoolAssertions bool 244 queryID string 245 246 results map[string]*DataResult 247 invalidated bool 248 } 249 250 func (nodeData *ReportingQueryNodeData) initialize() { 251 invalidated := len(nodeData.results) == 0 252 for _, dr := range nodeData.results { 253 invalidated = invalidated || dr.resolved 254 } 255 nodeData.invalidated = invalidated 256 } 257 258 // consume stores datapoint results sent to it. These represent entrypoints which 259 // are needed to calculate the score 260 func (nodeData *ReportingQueryNodeData) consume(from NodeID, data *envelope) { 261 dr, ok := nodeData.results[from] 262 if !ok { 263 return 264 } 265 if dr.resolved { 266 return 267 } 268 269 dr.value = data.res 270 dr.resolved = true 271 nodeData.invalidated = true 272 } 273 274 type reportingJobDatapoint struct { 275 res *llx.RawResult 276 } 277 278 // ReportingJobNodeData is the data for nodes of type ReportingJobNodeType 279 type ReportingJobNodeData struct { 280 queryID string 281 isQuery bool 282 283 datapoints map[NodeID]*reportingJobDatapoint 284 completed bool 285 invalidated bool 286 } 287 288 func (nodeData *ReportingJobNodeData) initialize() { 289 nodeData.invalidated = true 290 } 291 292 // consume saves scores from dependent reporting queries and reporting jobs, and 293 // results from dependent datapoints 294 func (nodeData *ReportingJobNodeData) consume(from NodeID, data *envelope) { 295 if data.res != nil { 296 dp, ok := nodeData.datapoints[from] 297 if !ok { 298 panic("invalid datapoint report") 299 } 300 dp.res = data.res 301 nodeData.invalidated = true 302 } 303 } 304 305 // CollectionFinisherNodeData represents the node of type CollectionFinisherNodeType 306 // It keeps track of the datapoints that have yet to report back 307 type CollectionFinisherNodeData struct { 308 progressReporter ProgressReporter 309 totalDatapoints int 310 311 remainingDatapoints map[NodeID]struct{} 312 doneChan chan struct{} 313 invalidated bool 314 } 315 316 func (nodeData *CollectionFinisherNodeData) initialize() { 317 if len(nodeData.remainingDatapoints) == 0 { 318 nodeData.invalidated = true 319 } 320 } 321 322 // consume marks the received dataponts as finished 323 func (nodeData *CollectionFinisherNodeData) consume(from NodeID, data *envelope) { 324 if len(nodeData.remainingDatapoints) == 0 { 325 return 326 } 327 log.Debug().Msgf("%s finished", from) 328 delete(nodeData.remainingDatapoints, from) 329 nodeData.invalidated = true 330 } 331 332 // recalculate closes the completion channel if all the data has been received 333 func (nodeData *CollectionFinisherNodeData) recalculate() *envelope { 334 if !nodeData.invalidated { 335 return nil 336 } 337 nodeData.progressReporter.Progress(nodeData.totalDatapoints-len(nodeData.remainingDatapoints), nodeData.totalDatapoints) 338 nodeData.invalidated = false 339 if len(nodeData.remainingDatapoints) == 0 { 340 log.Debug().Msg("graph has received all datapoints") 341 close(nodeData.doneChan) 342 } 343 return nil 344 } 345 346 // DatapointCollectorNodeData is the data for nodes of type DatapointCollectorNodeType 347 type DatapointCollectorNodeData struct { 348 collectors []DatapointCollector 349 unreported map[string]*llx.RawResult 350 invalidated bool 351 } 352 353 func (nodeData *DatapointCollectorNodeData) initialize() { 354 if len(nodeData.unreported) > 0 { 355 nodeData.invalidated = true 356 } 357 } 358 359 // consume collects datapoints 360 func (nodeData *DatapointCollectorNodeData) consume(from NodeID, data *envelope) { 361 if data.res != nil { 362 nodeData.unreported[data.res.CodeID] = data.res 363 nodeData.invalidated = true 364 } 365 } 366 367 // recalculate passes the newly collected datapoints to the configured collectors 368 func (nodeData *DatapointCollectorNodeData) recalculate() *envelope { 369 if !nodeData.invalidated { 370 return nil 371 } 372 nodeData.invalidated = false 373 arr := make([]*llx.RawResult, len(nodeData.unreported)) 374 i := 0 375 for _, rr := range nodeData.unreported { 376 arr[i] = rr 377 i++ 378 } 379 for _, dc := range nodeData.collectors { 380 dc.SinkData(arr) 381 } 382 for k := range nodeData.unreported { 383 delete(nodeData.unreported, k) 384 } 385 return nil 386 }