go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/nodes.go (about)

     1  // Copyright (c) Mondoo, Inc.
     2  // SPDX-License-Identifier: BUSL-1.1
     3  
     4  package internal
     5  
     6  import (
     7  	"github.com/rs/zerolog/log"
     8  	"go.mondoo.com/cnquery/llx"
     9  	"go.mondoo.com/cnquery/types"
    10  )
    11  
    12  const (
    13  	// ExecutionQueryNodeType represents a node that will execute
    14  	// a query. It can be notified by datapoint nodes, representing
    15  	// its dependant properties
    16  	ExecutionQueryNodeType NodeType = "execution_query"
    17  	// DatapointNodeType represents a node that is a datapoint/entrypoint.
    18  	// These nodes are implicitly notified when results are received from
    19  	// the executor threads. They also have edges from execution query nodes,
    20  	// however these just connect the execution and reporting nodes in the graph.
    21  	// When triggered by an execution query, the result will be a noop. These nodes
    22  	// typically notify execution query nodes with properties, reporting query
    23  	// nodes to calculate a query score, and reporting job nodes the calculate
    24  	// data collection completion.
    25  	DatapointNodeType NodeType = "datapoint"
    26  	// ReportingJobNodeType represent scores that needed to be collected. This
    27  	// information is sourced from the resolved policy. Nodes of this type are
    28  	// notified by datapoints to indicate collection of data, reporting query
    29  	// nodes to be notified of query scores, and other reporting job nodes to
    30  	// be notified of scores of dependant reporting jobs
    31  	ReportingJobNodeType NodeType = "reporting_job"
    32  	// DatapointCollectorNodeType represents a sink for datapoints in the graph.
    33  	// There is only one of these nodes in the graph, and it can only be notified
    34  	// by datapoint nodes
    35  	DatapointCollectorNodeType NodeType = "datapoint_collector"
    36  	// CollectionFinisherNodeType represents a node that collects datapoints. It is
    37  	// used to notify of completion when all the expected datapoints have been received.
    38  	// It is different from the datapoint collector node in that it always has the lowest
    39  	// priority, so all other work is guaranteed to complete before it says things are done
    40  	CollectionFinisherNodeType NodeType = "collection_finisher"
    41  
    42  	DatapointCollectorID NodeID = "__datapoint_collector__"
    43  	CollectionFinisherID NodeID = "__collection_finisher__"
    44  )
    45  
    46  type executionQueryProperty struct {
    47  	name     string
    48  	checksum string
    49  	value    *llx.Result
    50  	resolved bool
    51  }
    52  
    53  func (p *executionQueryProperty) Resolve(value *llx.Result) {
    54  	p.value = value
    55  	p.resolved = true
    56  }
    57  
    58  func (p *executionQueryProperty) IsResolved() bool {
    59  	return p.resolved
    60  }
    61  
    62  type DataResult struct {
    63  	checksum string
    64  	resolved bool
    65  	value    *llx.RawResult
    66  }
    67  
    68  type queryRunState int
    69  
    70  const (
    71  	notReadyQueryNotReady queryRunState = iota
    72  	readyQueryRunState
    73  	executedQueryRunState
    74  )
    75  
    76  // ExecutionQueryNodeData represents a node of type ExecutionQueryNodeType
    77  type ExecutionQueryNodeData struct {
    78  	queryID    string
    79  	codeBundle *llx.CodeBundle
    80  
    81  	invalidated        bool
    82  	requiredProperties map[string]*executionQueryProperty
    83  	runState           queryRunState
    84  	runQueue           chan<- runQueueItem
    85  }
    86  
    87  func (nodeData *ExecutionQueryNodeData) initialize() {
    88  	nodeData.updateRunState()
    89  	if nodeData.runState == readyQueryRunState {
    90  		nodeData.invalidated = true
    91  	}
    92  }
    93  
    94  // consume saves any received data that matches any the required properties
    95  func (nodeData *ExecutionQueryNodeData) consume(from NodeID, data *envelope) {
    96  	if nodeData.runState == executedQueryRunState {
    97  		// Nothing can change once the query has been marked as executed
    98  		return
    99  	}
   100  
   101  	if len(nodeData.requiredProperties) == 0 {
   102  		nodeData.invalidated = true
   103  	}
   104  
   105  	if data.res != nil {
   106  		for _, p := range nodeData.requiredProperties {
   107  			// Find the property with the matching checksum
   108  			if p.checksum == data.res.CodeID {
   109  				// Save the value of the property
   110  				p.Resolve(data.res.Result())
   111  				// invalidate the node for recalculation
   112  				nodeData.invalidated = true
   113  			}
   114  		}
   115  	}
   116  }
   117  
   118  // recalculate checks if all required properties are satisfied. Once
   119  // all have been received, the query is queued for execution
   120  func (nodeData *ExecutionQueryNodeData) recalculate() *envelope {
   121  	if !nodeData.invalidated {
   122  		// Nothing can change once the query has been marked as executed
   123  		return nil
   124  	}
   125  
   126  	// Update the run state so we know if the state changed to
   127  	// runnable
   128  	nodeData.updateRunState()
   129  	nodeData.invalidated = false
   130  
   131  	if nodeData.runState == readyQueryRunState {
   132  		nodeData.run()
   133  	}
   134  
   135  	// An empty envelope notifies the parent. These nodes always point at
   136  	// Datapoint nodes. The datapoint nodes don't need this message, and
   137  	// it actually makes more work for the datapoint node. The reason to
   138  	// send it is to uphold the contract of if something changes, we push
   139  	// a message through the graph. And in this case, something did
   140  	// technically change
   141  	return &envelope{}
   142  }
   143  
   144  // run sends this query to be run to the executor queue
   145  // this should only be called when the query is runnable (
   146  // all properties needed are available)
   147  func (nodeData *ExecutionQueryNodeData) run() {
   148  	var props map[string]*llx.Result
   149  
   150  	if len(nodeData.requiredProperties) > 0 {
   151  		props = make(map[string]*llx.Result)
   152  		for _, p := range nodeData.requiredProperties {
   153  			props[p.name] = p.value
   154  		}
   155  	}
   156  
   157  	nodeData.runState = executedQueryRunState
   158  
   159  	nodeData.runQueue <- runQueueItem{
   160  		codeBundle: nodeData.codeBundle,
   161  		props:      props,
   162  	}
   163  }
   164  
   165  // updateRunState sets the query to runnable if all the
   166  // required properties needed have been received
   167  func (d *ExecutionQueryNodeData) updateRunState() {
   168  	if d.runState == readyQueryRunState {
   169  		return
   170  	}
   171  
   172  	runnable := true
   173  
   174  	for _, p := range d.requiredProperties {
   175  		runnable = runnable && p.IsResolved()
   176  	}
   177  
   178  	if runnable {
   179  		d.runState = readyQueryRunState
   180  	} else {
   181  		d.runState = notReadyQueryNotReady
   182  	}
   183  }
   184  
   185  // DatapointNodeData is the data for queries of type DatapointNodeType.
   186  type DatapointNodeData struct {
   187  	expectedType *string
   188  	isReported   bool
   189  	invalidated  bool
   190  	res          *llx.RawResult
   191  }
   192  
   193  func (nodeData *DatapointNodeData) initialize() {
   194  	if nodeData.res != nil {
   195  		nodeData.set(nodeData.res)
   196  	}
   197  }
   198  
   199  // consume saves the result of the datapoint.
   200  func (nodeData *DatapointNodeData) consume(from NodeID, data *envelope) {
   201  	if nodeData.isReported {
   202  		// No change detection happens. If a datapoint is reported once, that is the value
   203  		// we will use.
   204  		return
   205  	}
   206  	if data == nil || data.res == nil {
   207  		// This can be triggered with no data by the execution query nodes. These
   208  		// messages are not the ones we care about
   209  		return
   210  	}
   211  
   212  	nodeData.set(data.res)
   213  }
   214  
   215  func (nodeData *DatapointNodeData) set(res *llx.RawResult) {
   216  	nodeData.invalidated = true
   217  	nodeData.isReported = true
   218  
   219  	if nodeData.expectedType == nil || types.Type(*nodeData.expectedType) == types.Unset ||
   220  		res.Data.Type == types.Nil || res.Data.Type == types.Type(*nodeData.expectedType) ||
   221  		res.Data.Error != nil {
   222  		nodeData.res = res
   223  	} else {
   224  		nodeData.res = res.CastResult(types.Type(*nodeData.expectedType)).RawResultV2()
   225  	}
   226  }
   227  
   228  // recalculate passes on the datapoint's result if its available
   229  func (nodeData *DatapointNodeData) recalculate() *envelope {
   230  	if !nodeData.invalidated {
   231  		return nil
   232  	}
   233  
   234  	nodeData.invalidated = false
   235  
   236  	return &envelope{
   237  		res: nodeData.res,
   238  	}
   239  }
   240  
   241  // ReportingQueryNodeData is the data for queries of type ReportingQueryNodeType.
   242  type ReportingQueryNodeData struct {
   243  	featureBoolAssertions bool
   244  	queryID               string
   245  
   246  	results     map[string]*DataResult
   247  	invalidated bool
   248  }
   249  
   250  func (nodeData *ReportingQueryNodeData) initialize() {
   251  	invalidated := len(nodeData.results) == 0
   252  	for _, dr := range nodeData.results {
   253  		invalidated = invalidated || dr.resolved
   254  	}
   255  	nodeData.invalidated = invalidated
   256  }
   257  
   258  // consume stores datapoint results sent to it. These represent entrypoints which
   259  // are needed to calculate the score
   260  func (nodeData *ReportingQueryNodeData) consume(from NodeID, data *envelope) {
   261  	dr, ok := nodeData.results[from]
   262  	if !ok {
   263  		return
   264  	}
   265  	if dr.resolved {
   266  		return
   267  	}
   268  
   269  	dr.value = data.res
   270  	dr.resolved = true
   271  	nodeData.invalidated = true
   272  }
   273  
   274  type reportingJobDatapoint struct {
   275  	res *llx.RawResult
   276  }
   277  
   278  // ReportingJobNodeData is the data for nodes of type ReportingJobNodeType
   279  type ReportingJobNodeData struct {
   280  	queryID string
   281  	isQuery bool
   282  
   283  	datapoints  map[NodeID]*reportingJobDatapoint
   284  	completed   bool
   285  	invalidated bool
   286  }
   287  
   288  func (nodeData *ReportingJobNodeData) initialize() {
   289  	nodeData.invalidated = true
   290  }
   291  
   292  // consume saves scores from dependent reporting queries and reporting jobs, and
   293  // results from dependent datapoints
   294  func (nodeData *ReportingJobNodeData) consume(from NodeID, data *envelope) {
   295  	if data.res != nil {
   296  		dp, ok := nodeData.datapoints[from]
   297  		if !ok {
   298  			panic("invalid datapoint report")
   299  		}
   300  		dp.res = data.res
   301  		nodeData.invalidated = true
   302  	}
   303  }
   304  
   305  // CollectionFinisherNodeData represents the node of type CollectionFinisherNodeType
   306  // It keeps track of the datapoints that have yet to report back
   307  type CollectionFinisherNodeData struct {
   308  	progressReporter ProgressReporter
   309  	totalDatapoints  int
   310  
   311  	remainingDatapoints map[NodeID]struct{}
   312  	doneChan            chan struct{}
   313  	invalidated         bool
   314  }
   315  
   316  func (nodeData *CollectionFinisherNodeData) initialize() {
   317  	if len(nodeData.remainingDatapoints) == 0 {
   318  		nodeData.invalidated = true
   319  	}
   320  }
   321  
   322  // consume marks the received dataponts as finished
   323  func (nodeData *CollectionFinisherNodeData) consume(from NodeID, data *envelope) {
   324  	if len(nodeData.remainingDatapoints) == 0 {
   325  		return
   326  	}
   327  	log.Debug().Msgf("%s finished", from)
   328  	delete(nodeData.remainingDatapoints, from)
   329  	nodeData.invalidated = true
   330  }
   331  
   332  // recalculate closes the completion channel if all the data has been received
   333  func (nodeData *CollectionFinisherNodeData) recalculate() *envelope {
   334  	if !nodeData.invalidated {
   335  		return nil
   336  	}
   337  	nodeData.progressReporter.Progress(nodeData.totalDatapoints-len(nodeData.remainingDatapoints), nodeData.totalDatapoints)
   338  	nodeData.invalidated = false
   339  	if len(nodeData.remainingDatapoints) == 0 {
   340  		log.Debug().Msg("graph has received all datapoints")
   341  		close(nodeData.doneChan)
   342  	}
   343  	return nil
   344  }
   345  
   346  // DatapointCollectorNodeData is the data for nodes of type DatapointCollectorNodeType
   347  type DatapointCollectorNodeData struct {
   348  	collectors  []DatapointCollector
   349  	unreported  map[string]*llx.RawResult
   350  	invalidated bool
   351  }
   352  
   353  func (nodeData *DatapointCollectorNodeData) initialize() {
   354  	if len(nodeData.unreported) > 0 {
   355  		nodeData.invalidated = true
   356  	}
   357  }
   358  
   359  // consume collects datapoints
   360  func (nodeData *DatapointCollectorNodeData) consume(from NodeID, data *envelope) {
   361  	if data.res != nil {
   362  		nodeData.unreported[data.res.CodeID] = data.res
   363  		nodeData.invalidated = true
   364  	}
   365  }
   366  
   367  // recalculate passes the newly collected datapoints to the configured collectors
   368  func (nodeData *DatapointCollectorNodeData) recalculate() *envelope {
   369  	if !nodeData.invalidated {
   370  		return nil
   371  	}
   372  	nodeData.invalidated = false
   373  	arr := make([]*llx.RawResult, len(nodeData.unreported))
   374  	i := 0
   375  	for _, rr := range nodeData.unreported {
   376  		arr[i] = rr
   377  		i++
   378  	}
   379  	for _, dc := range nodeData.collectors {
   380  		dc.SinkData(arr)
   381  	}
   382  	for k := range nodeData.unreported {
   383  		delete(nodeData.unreported, k)
   384  	}
   385  	return nil
   386  }