go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/builder.go (about)

     1  // Copyright (c) Mondoo, Inc.
     2  // SPDX-License-Identifier: BUSL-1.1
     3  
     4  package internal
     5  
     6  import (
     7  	"fmt"
     8  	"math"
     9  	"sort"
    10  	"time"
    11  
    12  	vrs "github.com/hashicorp/go-version"
    13  	"github.com/rs/zerolog/log"
    14  	"go.mondoo.com/cnquery"
    15  	"go.mondoo.com/cnquery/llx"
    16  )
    17  
    18  type query struct {
    19  	codeBundle         *llx.CodeBundle
    20  	requiredProps      map[string]string
    21  	resolvedProperties map[string]*llx.Primitive
    22  }
    23  
    24  type GraphBuilder struct {
    25  	// queries is a map of QrID to query
    26  	queries []query
    27  	// datapointCollectors contains the collectors which will receive
    28  	// datapoints
    29  	datapointCollectors []DatapointCollector
    30  	// collectDatapointChecksums specifies additional datapoints outside
    31  	// the reporting job to collect
    32  	collectDatapointChecksums []string
    33  	// datapointType is a map of checksum to type for datapoint type
    34  	// conversion. This is sourced from the compiled query
    35  	datapointType map[string]string
    36  	// progressReporter is a configured interface to receive progress
    37  	// updates
    38  	progressReporter ProgressReporter
    39  	// mondooVersion is the version of mondoo. This is generally sourced
    40  	// from the binary, but is configurable to make testing easier
    41  	mondooVersion string
    42  	// queryTimeout is the amount of time to wait for the underlying lumi
    43  	// runtime to send all the expected datapoints.
    44  	queryTimeout time.Duration
    45  
    46  	featureBoolAssertions bool
    47  }
    48  
    49  func NewBuilder() *GraphBuilder {
    50  	return &GraphBuilder{
    51  		queries:                   []query{},
    52  		datapointCollectors:       []DatapointCollector{},
    53  		collectDatapointChecksums: []string{},
    54  		datapointType:             map[string]string{},
    55  		progressReporter:          NoopProgressReporter{},
    56  		mondooVersion:             cnquery.GetCoreVersion(),
    57  		queryTimeout:              5 * time.Minute,
    58  	}
    59  }
    60  
    61  // AddQuery adds the provided code to be executed to the graph
    62  func (b *GraphBuilder) AddQuery(c *llx.CodeBundle, propertyChecksums map[string]string, resolvedProperties map[string]*llx.Primitive) {
    63  	b.queries = append(b.queries, query{
    64  		codeBundle:         c,
    65  		requiredProps:      propertyChecksums,
    66  		resolvedProperties: resolvedProperties,
    67  	})
    68  }
    69  
    70  func (b *GraphBuilder) AddDatapointType(datapointChecksum string, typ string) {
    71  	b.datapointType[datapointChecksum] = typ
    72  }
    73  
    74  // CollectDatapoint requests the provided checksum be collected and sent to
    75  // the configured datapoint collectors
    76  func (b *GraphBuilder) CollectDatapoint(datapointChecksum string) {
    77  	b.collectDatapointChecksums = append(b.collectDatapointChecksums, datapointChecksum)
    78  }
    79  
    80  // AddDatapointCollector adds a datapoint collector. Collected datapoints
    81  // will be sent to all the provided datapoint collectors
    82  func (b *GraphBuilder) AddDatapointCollector(c DatapointCollector) {
    83  	b.datapointCollectors = append(b.datapointCollectors, c)
    84  }
    85  
    86  // WithProgressReporter sets the interface which will receive progress updates
    87  func (b *GraphBuilder) WithProgressReporter(r ProgressReporter) {
    88  	b.progressReporter = r
    89  }
    90  
    91  // WithMondooVersion sets the version of mondoo
    92  func (b *GraphBuilder) WithMondooVersion(mondooVersion string) {
    93  	b.mondooVersion = mondooVersion
    94  }
    95  
    96  // WithMondooVersion sets the version of mondoo
    97  func (b *GraphBuilder) WithQueryTimeout(timeout time.Duration) {
    98  	b.queryTimeout = timeout
    99  }
   100  
   101  func (b *GraphBuilder) WithFeatureBoolAssertions(featureBoolAssertions bool) {
   102  	b.featureBoolAssertions = featureBoolAssertions
   103  }
   104  
   105  func (b *GraphBuilder) Build(schema llx.Schema, runtime llx.Runtime, assetMrn string) (*GraphExecutor, error) {
   106  	resultChan := make(chan *llx.RawResult, 128)
   107  
   108  	queries := make(map[string]query, len(b.queries))
   109  	for _, q := range b.queries {
   110  		queries[q.codeBundle.GetCodeV2().GetId()] = q
   111  	}
   112  
   113  	ge := &GraphExecutor{
   114  		nodes:        map[NodeID]*Node{},
   115  		edges:        map[NodeID][]NodeID{},
   116  		priorityMap:  map[NodeID]int{},
   117  		queryTimeout: b.queryTimeout,
   118  		executionManager: newExecutionManager(schema, runtime, make(chan runQueueItem, len(queries)),
   119  			resultChan, b.queryTimeout),
   120  		resultChan: resultChan,
   121  		doneChan:   make(chan struct{}),
   122  	}
   123  
   124  	ge.nodes[DatapointCollectorID] = &Node{
   125  		id:       DatapointCollectorID,
   126  		nodeType: DatapointCollectorNodeType,
   127  		data: &DatapointCollectorNodeData{
   128  			unreported: map[string]*llx.RawResult{},
   129  			collectors: b.datapointCollectors,
   130  		},
   131  	}
   132  
   133  	unrunnableQueries := []query{}
   134  
   135  	var mondooVersion *vrs.Version
   136  	if b.mondooVersion != "" && b.mondooVersion != "unstable" {
   137  		var err error
   138  		mondooVersion, err = vrs.NewVersion(b.mondooVersion)
   139  		if err != nil {
   140  			log.Warn().Err(err).Str("version", b.mondooVersion).Msg("unable to parse mondoo version")
   141  		}
   142  	}
   143  
   144  	for queryID, q := range queries {
   145  		canRun := checkVersion(q.codeBundle, mondooVersion)
   146  		if canRun {
   147  			ge.addExecutionQueryNode(queryID, q, q.resolvedProperties, b.datapointType)
   148  		} else {
   149  			unrunnableQueries = append(unrunnableQueries, q)
   150  		}
   151  	}
   152  
   153  	datapointsToCollect := make([]string, len(b.collectDatapointChecksums))
   154  	copy(datapointsToCollect, b.collectDatapointChecksums)
   155  
   156  	for _, datapointChecksum := range datapointsToCollect {
   157  		ge.addEdge(NodeID(datapointChecksum), DatapointCollectorID)
   158  	}
   159  
   160  	ge.handleUnrunnableQueries(unrunnableQueries)
   161  
   162  	ge.createFinisherNode(b.progressReporter)
   163  
   164  	for nodeID := range ge.nodes {
   165  		prioritizeNode(ge.nodes, ge.edges, ge.priorityMap, nodeID)
   166  	}
   167  
   168  	// The finisher is the lowest priority node. This makes it so that
   169  	// when a recalculation is triggered through a datapoint being reported,
   170  	// the finisher only gets notified after all other intermediate nodes are
   171  	// notified
   172  	ge.priorityMap[CollectionFinisherID] = math.MinInt
   173  
   174  	return ge, nil
   175  }
   176  
   177  // handleUnrunnableQueries takes the queries for which the running version does
   178  // to meet the minimum version requirement and marks the datapoints as error.
   179  // This is only done for datapoints which will not be reported by a runnable query
   180  func (ge *GraphExecutor) handleUnrunnableQueries(unrunnableQueries []query) {
   181  	for _, q := range unrunnableQueries {
   182  		for _, checksum := range CodepointChecksums(q.codeBundle) {
   183  			if _, ok := ge.nodes[NodeID(checksum)]; ok {
   184  				// If the datapoint will be reported by another query, skip
   185  				// handling it
   186  				continue
   187  			}
   188  
   189  			ge.addDatapointNode(
   190  				checksum,
   191  				nil,
   192  				&llx.RawResult{
   193  					CodeID: checksum,
   194  					Data: &llx.RawData{
   195  						Error: fmt.Errorf("Unable to run query, cnquery version %s required", q.codeBundle.MinMondooVersion),
   196  					},
   197  				})
   198  		}
   199  	}
   200  }
   201  
   202  func (ge *GraphExecutor) addEdge(from NodeID, to NodeID) {
   203  	ge.edges[from] = insertSorted(ge.edges[from], to)
   204  }
   205  
   206  func (ge *GraphExecutor) createFinisherNode(r ProgressReporter) {
   207  	nodeID := CollectionFinisherID
   208  	nodeData := &CollectionFinisherNodeData{
   209  		remainingDatapoints: make(map[string]struct{}, len(ge.nodes)),
   210  		doneChan:            ge.doneChan,
   211  		progressReporter:    r,
   212  	}
   213  
   214  	for datapointNodeID, n := range ge.nodes {
   215  		if n.nodeType == DatapointNodeType {
   216  			ge.addEdge(datapointNodeID, nodeID)
   217  			nodeData.remainingDatapoints[datapointNodeID] = struct{}{}
   218  		}
   219  	}
   220  	totalDatapoints := len(nodeData.remainingDatapoints)
   221  	nodeData.totalDatapoints = totalDatapoints
   222  
   223  	ge.nodes[nodeID] = &Node{
   224  		id:       nodeID,
   225  		nodeType: CollectionFinisherNodeType,
   226  		data:     nodeData,
   227  	}
   228  }
   229  
   230  func (ge *GraphExecutor) addExecutionQueryNode(queryID string, q query, resolvedProperties map[string]*llx.Primitive, datapointTypeMap map[string]string) {
   231  	n, ok := ge.nodes[NodeID(queryID)]
   232  	if ok {
   233  		return
   234  	}
   235  
   236  	codeBundle := q.codeBundle
   237  
   238  	nodeData := &ExecutionQueryNodeData{
   239  		queryID:            queryID,
   240  		codeBundle:         codeBundle,
   241  		requiredProperties: map[string]*executionQueryProperty{},
   242  		runState:           notReadyQueryNotReady,
   243  		runQueue:           ge.executionManager.runQueue,
   244  	}
   245  
   246  	n = &Node{
   247  		id:       NodeID(string(ExecutionQueryNodeType) + "/" + queryID),
   248  		nodeType: ExecutionQueryNodeType,
   249  		data:     nodeData,
   250  	}
   251  
   252  	// These don't report anything, but they make the graph connected
   253  	for _, checksum := range CodepointChecksums(codeBundle) {
   254  		var expectedType *string
   255  		if t, ok := datapointTypeMap[checksum]; ok {
   256  			expectedType = &t
   257  		}
   258  		ge.addDatapointNode(checksum, expectedType, nil)
   259  		ge.addEdge(n.id, NodeID(checksum))
   260  	}
   261  
   262  	for name, checksum := range q.requiredProps {
   263  		nodeData.requiredProperties[name] = &executionQueryProperty{
   264  			name:     name,
   265  			checksum: checksum,
   266  			resolved: false,
   267  			value:    nil,
   268  		}
   269  		ge.addEdge(NodeID(checksum), n.id)
   270  	}
   271  
   272  	for name, val := range resolvedProperties {
   273  		if rp, ok := nodeData.requiredProperties[name]; !ok {
   274  			nodeData.requiredProperties[name] = &executionQueryProperty{
   275  				name:     name,
   276  				checksum: "",
   277  				resolved: true,
   278  				value: &llx.Result{
   279  					Data: val,
   280  				},
   281  			}
   282  		} else {
   283  			rp.value = &llx.Result{
   284  				Data: val,
   285  			}
   286  			rp.resolved = true
   287  		}
   288  	}
   289  
   290  	ge.nodes[n.id] = n
   291  }
   292  
   293  func (ge *GraphExecutor) addDatapointNode(datapointChecksum string, expectedType *string, res *llx.RawResult) {
   294  	n, ok := ge.nodes[NodeID(datapointChecksum)]
   295  	if ok {
   296  		return
   297  	}
   298  
   299  	nodeData := &DatapointNodeData{
   300  		expectedType: expectedType,
   301  		isReported:   res != nil,
   302  		res:          res,
   303  	}
   304  	n = &Node{
   305  		id:       NodeID(datapointChecksum),
   306  		nodeType: DatapointNodeType,
   307  		data:     nodeData,
   308  	}
   309  
   310  	ge.nodes[NodeID(datapointChecksum)] = n
   311  }
   312  
   313  // prioritizeNode assigns each node in the graph a priority. The priority makes graph traversal
   314  // act like a breadth-first search, minimizing the number of recalculations needed for each node.
   315  // For example, the reporting job with a query id of the asset will have a lower priority than
   316  // reporting jobs which have a query id of a policy mrn. In a similar way, the reporting jobs
   317  // that have a query id of policy mrns have a lower priority than reporting jobs for queries.
   318  // This means that if a batch of data arrives, all query reporting jobs will be recalculated first.
   319  // The policy reporting jobs will be calculated after that, and then the asset reporting job.
   320  func prioritizeNode(nodes map[NodeID]*Node, edges map[NodeID][]NodeID, priorityMap map[NodeID]int, n NodeID) int {
   321  	if d, ok := priorityMap[n]; ok {
   322  		return d
   323  	}
   324  	childrenMaxDepth := 0
   325  	for _, v := range edges[n] {
   326  		childDepth := prioritizeNode(nodes, edges, priorityMap, v)
   327  		if childDepth > childrenMaxDepth {
   328  			childrenMaxDepth = childDepth
   329  		}
   330  	}
   331  	myDepth := childrenMaxDepth + 1
   332  	priorityMap[n] = myDepth
   333  	return myDepth
   334  }
   335  
   336  func checkVersion(codeBundle *llx.CodeBundle, curMin *vrs.Version) bool {
   337  	if curMin != nil && codeBundle.MinMondooVersion != "" {
   338  		requiredVer := codeBundle.MinMondooVersion
   339  		reqMin, err := vrs.NewVersion(requiredVer)
   340  		if err == nil && curMin.LessThan(reqMin) {
   341  			return false
   342  		}
   343  	}
   344  	return true
   345  }
   346  
   347  func insertSorted(ss []string, s string) []string {
   348  	i := sort.SearchStrings(ss, s)
   349  	if i < len(ss) && ss[i] == s {
   350  		return ss
   351  	}
   352  	ss = append(ss, "")
   353  	copy(ss[i+1:], ss[i:])
   354  	ss[i] = s
   355  	return ss
   356  }
   357  
   358  func CodepointChecksums(codeBundle *llx.CodeBundle) []string {
   359  	return append(EntrypointChecksums(codeBundle),
   360  		DatapointChecksums(codeBundle)...)
   361  }
   362  
   363  func EntrypointChecksums(codeBundle *llx.CodeBundle) []string {
   364  	var checksums []string
   365  
   366  	checksums = make([]string, len(codeBundle.CodeV2.Blocks[0].Entrypoints))
   367  	for i, ref := range codeBundle.CodeV2.Blocks[0].Entrypoints {
   368  		checksums[i] = codeBundle.CodeV2.Checksums[ref]
   369  	}
   370  
   371  	return checksums
   372  }
   373  
   374  func DatapointChecksums(codeBundle *llx.CodeBundle) []string {
   375  	var checksums []string
   376  
   377  	checksums = make([]string, len(codeBundle.CodeV2.Blocks[0].Datapoints))
   378  	for i, ref := range codeBundle.CodeV2.Blocks[0].Datapoints {
   379  		checksums[i] = codeBundle.CodeV2.Checksums[ref]
   380  	}
   381  
   382  	return checksums
   383  }