go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/collector.go (about)

     1  // Copyright (c) Mondoo, Inc.
     2  // SPDX-License-Identifier: BUSL-1.1
     3  
     4  package internal
     5  
     6  import (
     7  	"sync"
     8  	"time"
     9  
    10  	"github.com/rs/zerolog/log"
    11  	"go.mondoo.com/cnquery/llx"
    12  )
    13  
    14  const (
    15  	// MAX_DATAPOINT is the limit in bytes of any data field. The limit
    16  	// is used to prevent sending data upstream that is too large for the
    17  	// server to store. The limit is specified in bytes.
    18  	// TODO: needed to increase the size for vulnerability reports
    19  	// we need to size down the vulnerability reports with just current cves and advisories
    20  	MAX_DATAPOINT = 2 * (1 << 20)
    21  )
    22  
    23  type DatapointCollector interface {
    24  	SinkData([]*llx.RawResult)
    25  }
    26  
    27  type Collector interface {
    28  	DatapointCollector
    29  }
    30  
    31  type BufferedCollector struct {
    32  	results   map[string]*llx.RawResult
    33  	lock      sync.Mutex
    34  	collector Collector
    35  	duration  time.Duration
    36  	stopChan  chan struct{}
    37  	wg        sync.WaitGroup
    38  }
    39  
    40  type BufferedCollectorOpt func(*BufferedCollector)
    41  
    42  func NewBufferedCollector(collector Collector, opts ...BufferedCollectorOpt) *BufferedCollector {
    43  	c := &BufferedCollector{
    44  		results:   map[string]*llx.RawResult{},
    45  		duration:  5 * time.Second,
    46  		collector: collector,
    47  		stopChan:  make(chan struct{}),
    48  	}
    49  	c.run()
    50  	return c
    51  }
    52  
    53  func (c *BufferedCollector) run() {
    54  	c.wg.Add(1)
    55  	go func() {
    56  		defer c.wg.Done()
    57  
    58  		done := false
    59  		results := []*llx.RawResult{}
    60  		for {
    61  
    62  			c.lock.Lock()
    63  			for _, rr := range c.results {
    64  				results = append(results, rr)
    65  			}
    66  			for k := range c.results {
    67  				delete(c.results, k)
    68  			}
    69  
    70  			c.lock.Unlock()
    71  
    72  			if len(results) > 0 {
    73  				c.collector.SinkData(results)
    74  			}
    75  
    76  			results = results[:0]
    77  
    78  			if done {
    79  				return
    80  			}
    81  
    82  			// TODO: we should only use one timer
    83  			timer := time.NewTimer(c.duration)
    84  			select {
    85  			case <-c.stopChan:
    86  				done = true
    87  			case <-timer.C:
    88  			}
    89  			timer.Stop()
    90  		}
    91  	}()
    92  }
    93  
    94  func (c *BufferedCollector) FlushAndStop() {
    95  	close(c.stopChan)
    96  	c.wg.Wait()
    97  }
    98  
    99  func (c *BufferedCollector) SinkData(results []*llx.RawResult) {
   100  	c.lock.Lock()
   101  	defer c.lock.Unlock()
   102  	for _, rr := range results {
   103  		c.results[rr.CodeID] = rr
   104  	}
   105  }
   106  
   107  type ResultCollector struct {
   108  	assetMrn string
   109  }
   110  
   111  func (c *ResultCollector) toResult(rr *llx.RawResult) *llx.Result {
   112  	v := rr.Result()
   113  	if v.Data.Size() > MAX_DATAPOINT {
   114  		log.Warn().
   115  			Str("asset", c.assetMrn).
   116  			Str("id", rr.CodeID).
   117  			Msg("executor.scoresheet> not storing datafield because it is too large")
   118  
   119  		v = &llx.Result{
   120  			Error:  "datafield was removed because it is too large",
   121  			CodeId: v.CodeId,
   122  		}
   123  	}
   124  	return v
   125  }
   126  
   127  func (c *ResultCollector) SinkData(results []*llx.RawResult) {
   128  	if len(results) == 0 {
   129  		return
   130  	}
   131  	resultsToSend := make(map[string]*llx.Result, len(results))
   132  	for _, rr := range results {
   133  		resultsToSend[rr.CodeID] = c.toResult(rr)
   134  	}
   135  
   136  	log.Debug().Msg("Sending datapoints")
   137  	// TODO
   138  }
   139  
   140  type FuncCollector struct {
   141  	SinkDataFunc func(results []*llx.RawResult)
   142  }
   143  
   144  func (c *FuncCollector) SinkData(results []*llx.RawResult) {
   145  	if len(results) == 0 || c.SinkDataFunc == nil {
   146  		return
   147  	}
   148  	c.SinkDataFunc(results)
   149  }