go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/graph.go (about) 1 // Copyright (c) Mondoo, Inc. 2 // SPDX-License-Identifier: BUSL-1.1 3 4 package internal 5 6 import ( 7 "container/heap" 8 "fmt" 9 "os" 10 "time" 11 12 vrs "github.com/hashicorp/go-version" 13 "github.com/rs/zerolog/log" 14 "go.mondoo.com/cnquery/llx" 15 ) 16 17 type ( 18 NodeType string 19 NodeID = string 20 ) 21 22 type Node struct { 23 id NodeID 24 nodeType NodeType 25 data nodeData 26 } 27 28 // envelope represents data that can be passed 29 // between nodes 30 type envelope struct { 31 res *llx.RawResult 32 } 33 34 // nodeData must be implemented by each node type and will 35 // be attached to the Node struct. Pushing data through the 36 // graph involves 2 calls for each node. First, the graph 37 // will ask the node to consume any new data is has from 38 // the nodes dependants (in edges). Once all the data has 39 // been sent, it will ask the node to recalculate and return 40 // any data that it should send from this node to its out 41 // edges. 42 type nodeData interface { 43 initialize() 44 // consume sends data to this node from a dependant node. 45 // consume should be defer as much work to recalculate as 46 // possible, as recalculate will only be called after all 47 // available dependant data has been sent 48 consume(from NodeID, data *envelope) 49 // recalculate is used to recalculate data for this node. 50 // If nothing has changed and the out edges do not need 51 // to be notified, this function should return nil 52 recalculate() *envelope 53 } 54 55 type GraphExecutor struct { 56 nodes map[NodeID]*Node 57 edges map[NodeID][]NodeID 58 priorityMap map[NodeID]int 59 queryTimeout time.Duration 60 mondooVersion *vrs.Version 61 62 executionManager *executionManager 63 resultChan chan *llx.RawResult 64 doneChan chan struct{} 65 } 66 67 // Execute executes the graph 68 // 69 // The algorithm: 70 // Tell the nodes to initialize themselves. This invalidates 71 // them if needed before any messages are sent. For example, 72 // execution nodes become invalidated if all their property 73 // dependencies are specified or they have no property 74 // dependencies. 75 // 76 // The execution happens in rounds of asking nodes to consume, 77 // and then recalculate, starting with datapoint nodes. A round 78 // starts when a batch of datapoints has been received 79 // 80 // The execution of queries reports datapoints. The nodes that represent 81 // these datapoints are looked up. We first ask these nodes to consume 82 // the results that were received, and put each on in a priority queue. 83 // 84 // For each node in the priority, we ask it to recalculate itself. If 85 // recalculate returns non-nil, we call consume on each out edge and 86 // put those nodes in the priority queue. 87 // 88 // The round ends when the priority queue is empty. At the end of the round, 89 // the reporting graph will be fully up-to-date. Because the graph is acyclic 90 // and we assign a priority to each node, each node in the graph should only 91 // recalculate at most once in each round 92 func (ge *GraphExecutor) Execute() error { 93 ge.executionManager.Start() 94 95 // Trigger the execution nodes 96 maxPriority := len(ge.nodes) + 1 97 q := make(PriorityQueue, 0, len(ge.nodes)) 98 heap.Init(&q) 99 for nodeID, n := range ge.nodes { 100 n.data.initialize() 101 heap.Push(&q, &Item{ 102 priority: maxPriority, 103 receiver: nodeID, 104 sender: "__initialize__", 105 }) 106 } 107 108 done := false 109 var err error 110 OUTER: 111 for { 112 // process queue 113 for q.Len() > 0 { 114 item := heap.Pop(&q).(*Item) 115 116 n := ge.nodes[item.receiver] 117 dataToSend := n.data.recalculate() 118 log.Trace(). 119 Str("from", item.sender). 120 Str("to", item.receiver). 121 Msg("recalculate result") 122 123 if dataToSend != nil { 124 edges := ge.edges[item.receiver] 125 for _, v := range edges { 126 log.Trace(). 127 Str("from", item.receiver). 128 Str("to", v). 129 Bool("hasResult", dataToSend.res != nil). 130 Msg("consume result") 131 childNode := ge.nodes[v] 132 childNode.data.consume(n.id, dataToSend) 133 heap.Push(&q, &Item{ 134 priority: ge.priorityMap[v], 135 receiver: v, 136 sender: item.receiver, 137 }) 138 } 139 } 140 } 141 142 if done { 143 break OUTER 144 } 145 146 // Wait for message 147 select { 148 case res := <-ge.resultChan: 149 nodeID := res.CodeID 150 n := ge.nodes[nodeID] 151 n.data.consume("", &envelope{res: res}) 152 heap.Push(&q, &Item{ 153 priority: maxPriority, 154 receiver: nodeID, 155 sender: "", 156 }) 157 case <-ge.doneChan: 158 done = true 159 case err = <-ge.executionManager.Err(): 160 break OUTER 161 } 162 // drain all available messages 163 DRAIN: 164 for { 165 select { 166 case res := <-ge.resultChan: 167 nodeID := res.CodeID 168 n := ge.nodes[nodeID] 169 n.data.consume("", &envelope{res: res}) 170 heap.Push(&q, &Item{ 171 priority: maxPriority, 172 receiver: nodeID, 173 sender: "", 174 }) 175 default: 176 break DRAIN 177 } 178 } 179 } 180 181 ge.executionManager.Stop() 182 return err 183 } 184 185 func (ge *GraphExecutor) Debug() { 186 if val, ok := os.LookupEnv("DEBUG"); ok && (val == "1" || val == "true") { 187 } else { 188 return 189 } 190 f, err := os.Create("mondoo-debug-resolved-policy.dot") 191 if err != nil { 192 log.Error().Err(err).Msg("failed to write debug graph") 193 return 194 } 195 defer f.Close() 196 197 f.WriteString("digraph \"resolvedpolicy\" {\n") 198 for k, n := range ge.nodes { 199 var shape string 200 label := fmt.Sprintf("priority\n%d\ntype\n%s\n", ge.priorityMap[k], n.nodeType) 201 switch n.nodeType { 202 case ExecutionQueryNodeType: 203 shape = "circle" 204 nodeData := n.data.(*ExecutionQueryNodeData) 205 label = fmt.Sprintf("%squery_id\n%s", label, nodeData.queryID) 206 case DatapointNodeType: 207 shape = "invtriangle" 208 maxLen := 6 209 if len(k) < 6 { 210 maxLen = len(k) 211 } 212 label = fmt.Sprintf("%schecksum\n%s...", label, k[:maxLen]) 213 case DatapointCollectorNodeType: 214 shape = "cds" 215 case CollectionFinisherNodeType: 216 shape = "hexagon" 217 } 218 fmt.Fprintf(f, "\t%q [group=%s shape=%s label=%q]\n", k, n.nodeType, shape, label) 219 } 220 221 for from, tos := range ge.edges { 222 for _, to := range tos { 223 fmt.Fprintf(f, "\t%q -> %q\n", from, to) 224 } 225 } 226 f.WriteString("}") 227 228 if err := f.Close(); err != nil { 229 log.Error().Err(err).Msg("failed to write debug graph") 230 return 231 } 232 233 return 234 }