go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/execution_manager.go

go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/execution_manager.go (about)

     1  // Copyright (c) Mondoo, Inc.
     2  // SPDX-License-Identifier: BUSL-1.1
     3  
     4  package internal
     5  
     6  import (
     7  	"errors"
     8  	"sync"
     9  	"time"
    10  
    11  	"github.com/rs/zerolog/log"
    12  	"go.mondoo.com/cnquery/llx"
    13  )
    14  
    15  type executionManager struct {
    16  	schema  llx.Schema
    17  	runtime llx.Runtime
    18  	// runQueue is the channel the execution manager will read
    19  	// items that need to be run from
    20  	runQueue chan runQueueItem
    21  	// resultChan is the channel the execution manager will write
    22  	// results to
    23  	resultChan chan *llx.RawResult
    24  	// errChan is used to signal an unrecoverable error. The execution
    25  	// manager writes to this channel
    26  	errChan chan error
    27  	// timeout is the amount of time the executor will wait for a query
    28  	// to return all the results after
    29  	timeout time.Duration
    30  	// stopChan is a channel that is closed when a stop is requested
    31  	stopChan chan struct{}
    32  	wg       sync.WaitGroup
    33  }
    34  
    35  type runQueueItem struct {
    36  	codeBundle *llx.CodeBundle
    37  	props      map[string]*llx.Result
    38  }
    39  
    40  func newExecutionManager(schema llx.Schema, runtime llx.Runtime, runQueue chan runQueueItem,
    41  	resultChan chan *llx.RawResult, timeout time.Duration,
    42  ) *executionManager {
    43  	return &executionManager{
    44  		runQueue:   runQueue,
    45  		schema:     schema,
    46  		runtime:    runtime,
    47  		resultChan: resultChan,
    48  		errChan:    make(chan error, 1),
    49  		stopChan:   make(chan struct{}),
    50  		timeout:    timeout,
    51  	}
    52  }
    53  
    54  func (em *executionManager) Start() {
    55  	em.wg.Add(1)
    56  	go func() {
    57  		defer em.wg.Done()
    58  		for {
    59  			// Prioritize stopChan
    60  			select {
    61  			case <-em.stopChan:
    62  				return
    63  			default:
    64  			}
    65  
    66  			select {
    67  			case item, ok := <-em.runQueue:
    68  				if !ok {
    69  					return
    70  				}
    71  				props := make(map[string]*llx.Primitive)
    72  				errMsg := ""
    73  				for k, r := range item.props {
    74  					if r.Error != "" {
    75  						// This case is tricky to handle. If we cannot run the query at
    76  						// all, its unclear what to report for the datapoint. If we
    77  						// report them in, then another query cant report them, at least
    78  						// with the way things are right now. If we don't report them,
    79  						// things will wait around for datapoint results that will never
    80  						// arrive.
    81  						errMsg = "property " + k + " errored: " + r.Error
    82  						break
    83  					}
    84  					props[k] = r.Data
    85  				}
    86  
    87  				if err := em.executeCodeBundle(item.codeBundle, props, errMsg); err != nil {
    88  					// an error is returned if we cannot execute a query. This happens
    89  					// if the lumi runtime doesn't report back expected data, there is
    90  					// a problem with the lumi runtime, or the query is somehow invalid.
    91  					// We need to give up here because the underlying runtime is in a bad
    92  					// state and/or we will not be able to report certain datapoints and
    93  					// we cannot be confident about which ones
    94  					select {
    95  					case em.errChan <- err:
    96  					default:
    97  					}
    98  					return
    99  				}
   100  			case <-em.stopChan:
   101  				return
   102  			}
   103  		}
   104  	}()
   105  }
   106  
   107  func (em *executionManager) Err() chan error {
   108  	return em.errChan
   109  }
   110  
   111  func (em *executionManager) Stop() {
   112  	close(em.stopChan)
   113  	em.wg.Wait()
   114  }
   115  
   116  func (em *executionManager) executeCodeBundle(codeBundle *llx.CodeBundle, props map[string]*llx.Primitive, errMsg string) error {
   117  	wg := NewWaitGroup()
   118  
   119  	sendResult := func(rr *llx.RawResult) {
   120  		log.Trace().Str("codeID", rr.CodeID).Msg("received result from executor")
   121  		wg.Done(rr.CodeID)
   122  		select {
   123  		case em.resultChan <- rr:
   124  		case <-em.stopChan:
   125  		}
   126  	}
   127  
   128  	checksums := map[string]struct{}{}
   129  	// Find the list of things we must wait for before execution of this codebundle is considered done
   130  	for _, checksum := range CodepointChecksums(codeBundle) {
   131  		if _, ok := checksums[checksum]; !ok {
   132  			checksums[checksum] = struct{}{}
   133  			// We must use a synchronization primitive because the llx.Run callback
   134  			// is not guaranteed to happen in a single thread
   135  			wg.Add(checksum)
   136  			if errMsg != "" {
   137  				// TODO: this is not entirely correct when looking at things as a whole.
   138  				// Its possible that another query executing will produce a non error.
   139  				// However, datapoint nodes take the first data that was reported. This
   140  				// issue exists in general for any query that errors
   141  				sendResult(&llx.RawResult{
   142  					CodeID: checksum,
   143  					Data: &llx.RawData{
   144  						Error: errors.New(errMsg),
   145  					},
   146  				})
   147  			}
   148  		}
   149  	}
   150  
   151  	if errMsg != "" {
   152  		return nil
   153  	}
   154  
   155  	var executor iExecutor
   156  	var err error
   157  	var codeID string
   158  
   159  	codeID = codeBundle.CodeV2.GetId()
   160  	log.Debug().Str("qrid", codeID).Msg("starting query execution")
   161  	defer func() {
   162  		log.Debug().Str("qrid", codeID).Msg("finished query execution")
   163  	}()
   164  
   165  	// TODO(jaym): sendResult may not be correct. We may need to fill in the
   166  	// checksum
   167  	x, err := llx.NewExecutorV2(codeBundle.CodeV2, em.runtime, props, sendResult)
   168  	if err == nil {
   169  		x.Run()
   170  	}
   171  	executor = x
   172  
   173  	if err != nil {
   174  		return err
   175  	}
   176  
   177  	execDoneChan := make(chan struct{})
   178  	go func() {
   179  		wg.Wait()
   180  		close(execDoneChan)
   181  	}()
   182  
   183  	var errOut error
   184  
   185  	timer := time.NewTimer(em.timeout)
   186  	defer timer.Stop()
   187  	select {
   188  	case <-timer.C:
   189  		log.Error().Dur("timeout", em.timeout).Str("qrid", codeID).Msg("execution timed out")
   190  		errOut = errQueryTimeout
   191  	case <-execDoneChan:
   192  	}
   193  
   194  	unreported := wg.Decommission()
   195  	if len(unreported) > 0 {
   196  		log.Warn().Strs("missing", unreported).Str("qrid", codeID).Msg("unreported datapoints")
   197  	}
   198  
   199  	if err := executor.Unregister(); err != nil {
   200  		return err
   201  	}
   202  
   203  	return errOut
   204  }
   205  
   206  var errQueryTimeout = errors.New("query execution timed out")
   207  
   208  type iExecutor interface {
   209  	Unregister() error
   210  }