go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/graph.go (about)

     1  // Copyright (c) Mondoo, Inc.
     2  // SPDX-License-Identifier: BUSL-1.1
     3  
     4  package internal
     5  
     6  import (
     7  	"container/heap"
     8  	"fmt"
     9  	"os"
    10  	"time"
    11  
    12  	vrs "github.com/hashicorp/go-version"
    13  	"github.com/rs/zerolog/log"
    14  	"go.mondoo.com/cnquery/llx"
    15  )
    16  
    17  type (
    18  	NodeType string
    19  	NodeID   = string
    20  )
    21  
    22  type Node struct {
    23  	id       NodeID
    24  	nodeType NodeType
    25  	data     nodeData
    26  }
    27  
    28  // envelope represents data that can be passed
    29  // between nodes
    30  type envelope struct {
    31  	res *llx.RawResult
    32  }
    33  
    34  // nodeData must be implemented by each node type and will
    35  // be attached to the Node struct. Pushing data through the
    36  // graph involves 2 calls for each node. First, the graph
    37  // will ask the node to consume any new data is has from
    38  // the nodes dependants (in edges). Once all the data has
    39  // been sent, it will ask the node to recalculate and return
    40  // any data that it should send from this node to its out
    41  // edges.
    42  type nodeData interface {
    43  	initialize()
    44  	// consume sends data to this node from a dependant node.
    45  	// consume should be defer as much work to recalculate as
    46  	// possible, as recalculate will only be called after all
    47  	// available dependant data has been sent
    48  	consume(from NodeID, data *envelope)
    49  	// recalculate is used to recalculate data for this node.
    50  	// If nothing has changed and the out edges do not need
    51  	// to be notified, this function should return nil
    52  	recalculate() *envelope
    53  }
    54  
    55  type GraphExecutor struct {
    56  	nodes         map[NodeID]*Node
    57  	edges         map[NodeID][]NodeID
    58  	priorityMap   map[NodeID]int
    59  	queryTimeout  time.Duration
    60  	mondooVersion *vrs.Version
    61  
    62  	executionManager *executionManager
    63  	resultChan       chan *llx.RawResult
    64  	doneChan         chan struct{}
    65  }
    66  
    67  // Execute executes the graph
    68  //
    69  // The algorithm:
    70  // Tell the nodes to initialize themselves. This invalidates
    71  // them if needed before any messages are sent. For example,
    72  // execution nodes become invalidated if all their property
    73  // dependencies are specified or they have no property
    74  // dependencies.
    75  //
    76  // The execution happens in rounds of asking nodes to consume,
    77  // and then recalculate, starting with datapoint nodes. A round
    78  // starts when a batch of datapoints has been received
    79  //
    80  // The execution of queries reports datapoints. The nodes that represent
    81  // these datapoints are looked up. We first ask these nodes to consume
    82  // the results that were received, and put each on in a priority queue.
    83  //
    84  // For each node in the priority, we ask it to recalculate itself. If
    85  // recalculate returns non-nil, we call consume on each out edge and
    86  // put those nodes in the priority queue.
    87  //
    88  // The round ends when the priority queue is empty. At the end of the round,
    89  // the reporting graph will be fully up-to-date. Because the graph is acyclic
    90  // and we assign a priority to each node, each node in the graph should only
    91  // recalculate at most once in each round
    92  func (ge *GraphExecutor) Execute() error {
    93  	ge.executionManager.Start()
    94  
    95  	// Trigger the execution nodes
    96  	maxPriority := len(ge.nodes) + 1
    97  	q := make(PriorityQueue, 0, len(ge.nodes))
    98  	heap.Init(&q)
    99  	for nodeID, n := range ge.nodes {
   100  		n.data.initialize()
   101  		heap.Push(&q, &Item{
   102  			priority: maxPriority,
   103  			receiver: nodeID,
   104  			sender:   "__initialize__",
   105  		})
   106  	}
   107  
   108  	done := false
   109  	var err error
   110  OUTER:
   111  	for {
   112  		// process queue
   113  		for q.Len() > 0 {
   114  			item := heap.Pop(&q).(*Item)
   115  
   116  			n := ge.nodes[item.receiver]
   117  			dataToSend := n.data.recalculate()
   118  			log.Trace().
   119  				Str("from", item.sender).
   120  				Str("to", item.receiver).
   121  				Msg("recalculate result")
   122  
   123  			if dataToSend != nil {
   124  				edges := ge.edges[item.receiver]
   125  				for _, v := range edges {
   126  					log.Trace().
   127  						Str("from", item.receiver).
   128  						Str("to", v).
   129  						Bool("hasResult", dataToSend.res != nil).
   130  						Msg("consume result")
   131  					childNode := ge.nodes[v]
   132  					childNode.data.consume(n.id, dataToSend)
   133  					heap.Push(&q, &Item{
   134  						priority: ge.priorityMap[v],
   135  						receiver: v,
   136  						sender:   item.receiver,
   137  					})
   138  				}
   139  			}
   140  		}
   141  
   142  		if done {
   143  			break OUTER
   144  		}
   145  
   146  		// Wait for message
   147  		select {
   148  		case res := <-ge.resultChan:
   149  			nodeID := res.CodeID
   150  			n := ge.nodes[nodeID]
   151  			n.data.consume("", &envelope{res: res})
   152  			heap.Push(&q, &Item{
   153  				priority: maxPriority,
   154  				receiver: nodeID,
   155  				sender:   "",
   156  			})
   157  		case <-ge.doneChan:
   158  			done = true
   159  		case err = <-ge.executionManager.Err():
   160  			break OUTER
   161  		}
   162  		// drain all available messages
   163  	DRAIN:
   164  		for {
   165  			select {
   166  			case res := <-ge.resultChan:
   167  				nodeID := res.CodeID
   168  				n := ge.nodes[nodeID]
   169  				n.data.consume("", &envelope{res: res})
   170  				heap.Push(&q, &Item{
   171  					priority: maxPriority,
   172  					receiver: nodeID,
   173  					sender:   "",
   174  				})
   175  			default:
   176  				break DRAIN
   177  			}
   178  		}
   179  	}
   180  
   181  	ge.executionManager.Stop()
   182  	return err
   183  }
   184  
   185  func (ge *GraphExecutor) Debug() {
   186  	if val, ok := os.LookupEnv("DEBUG"); ok && (val == "1" || val == "true") {
   187  	} else {
   188  		return
   189  	}
   190  	f, err := os.Create("mondoo-debug-resolved-policy.dot")
   191  	if err != nil {
   192  		log.Error().Err(err).Msg("failed to write debug graph")
   193  		return
   194  	}
   195  	defer f.Close()
   196  
   197  	f.WriteString("digraph \"resolvedpolicy\" {\n")
   198  	for k, n := range ge.nodes {
   199  		var shape string
   200  		label := fmt.Sprintf("priority\n%d\ntype\n%s\n", ge.priorityMap[k], n.nodeType)
   201  		switch n.nodeType {
   202  		case ExecutionQueryNodeType:
   203  			shape = "circle"
   204  			nodeData := n.data.(*ExecutionQueryNodeData)
   205  			label = fmt.Sprintf("%squery_id\n%s", label, nodeData.queryID)
   206  		case DatapointNodeType:
   207  			shape = "invtriangle"
   208  			maxLen := 6
   209  			if len(k) < 6 {
   210  				maxLen = len(k)
   211  			}
   212  			label = fmt.Sprintf("%schecksum\n%s...", label, k[:maxLen])
   213  		case DatapointCollectorNodeType:
   214  			shape = "cds"
   215  		case CollectionFinisherNodeType:
   216  			shape = "hexagon"
   217  		}
   218  		fmt.Fprintf(f, "\t%q [group=%s shape=%s label=%q]\n", k, n.nodeType, shape, label)
   219  	}
   220  
   221  	for from, tos := range ge.edges {
   222  		for _, to := range tos {
   223  			fmt.Fprintf(f, "\t%q -> %q\n", from, to)
   224  		}
   225  	}
   226  	f.WriteString("}")
   227  
   228  	if err := f.Close(); err != nil {
   229  		log.Error().Err(err).Msg("failed to write debug graph")
   230  		return
   231  	}
   232  
   233  	return
   234  }