go.mondoo.com/cnquery@v0.0.0-20231005093811-59568235f6ea/mql/internal/collector.go (about) 1 // Copyright (c) Mondoo, Inc. 2 // SPDX-License-Identifier: BUSL-1.1 3 4 package internal 5 6 import ( 7 "sync" 8 "time" 9 10 "github.com/rs/zerolog/log" 11 "go.mondoo.com/cnquery/llx" 12 ) 13 14 const ( 15 // MAX_DATAPOINT is the limit in bytes of any data field. The limit 16 // is used to prevent sending data upstream that is too large for the 17 // server to store. The limit is specified in bytes. 18 // TODO: needed to increase the size for vulnerability reports 19 // we need to size down the vulnerability reports with just current cves and advisories 20 MAX_DATAPOINT = 2 * (1 << 20) 21 ) 22 23 type DatapointCollector interface { 24 SinkData([]*llx.RawResult) 25 } 26 27 type Collector interface { 28 DatapointCollector 29 } 30 31 type BufferedCollector struct { 32 results map[string]*llx.RawResult 33 lock sync.Mutex 34 collector Collector 35 duration time.Duration 36 stopChan chan struct{} 37 wg sync.WaitGroup 38 } 39 40 type BufferedCollectorOpt func(*BufferedCollector) 41 42 func NewBufferedCollector(collector Collector, opts ...BufferedCollectorOpt) *BufferedCollector { 43 c := &BufferedCollector{ 44 results: map[string]*llx.RawResult{}, 45 duration: 5 * time.Second, 46 collector: collector, 47 stopChan: make(chan struct{}), 48 } 49 c.run() 50 return c 51 } 52 53 func (c *BufferedCollector) run() { 54 c.wg.Add(1) 55 go func() { 56 defer c.wg.Done() 57 58 done := false 59 results := []*llx.RawResult{} 60 for { 61 62 c.lock.Lock() 63 for _, rr := range c.results { 64 results = append(results, rr) 65 } 66 for k := range c.results { 67 delete(c.results, k) 68 } 69 70 c.lock.Unlock() 71 72 if len(results) > 0 { 73 c.collector.SinkData(results) 74 } 75 76 results = results[:0] 77 78 if done { 79 return 80 } 81 82 // TODO: we should only use one timer 83 timer := time.NewTimer(c.duration) 84 select { 85 case <-c.stopChan: 86 done = true 87 case <-timer.C: 88 } 89 timer.Stop() 90 } 91 }() 92 } 93 94 func (c *BufferedCollector) FlushAndStop() { 95 close(c.stopChan) 96 c.wg.Wait() 97 } 98 99 func (c *BufferedCollector) SinkData(results []*llx.RawResult) { 100 c.lock.Lock() 101 defer c.lock.Unlock() 102 for _, rr := range results { 103 c.results[rr.CodeID] = rr 104 } 105 } 106 107 type ResultCollector struct { 108 assetMrn string 109 } 110 111 func (c *ResultCollector) toResult(rr *llx.RawResult) *llx.Result { 112 v := rr.Result() 113 if v.Data.Size() > MAX_DATAPOINT { 114 log.Warn(). 115 Str("asset", c.assetMrn). 116 Str("id", rr.CodeID). 117 Msg("executor.scoresheet> not storing datafield because it is too large") 118 119 v = &llx.Result{ 120 Error: "datafield was removed because it is too large", 121 CodeId: v.CodeId, 122 } 123 } 124 return v 125 } 126 127 func (c *ResultCollector) SinkData(results []*llx.RawResult) { 128 if len(results) == 0 { 129 return 130 } 131 resultsToSend := make(map[string]*llx.Result, len(results)) 132 for _, rr := range results { 133 resultsToSend[rr.CodeID] = c.toResult(rr) 134 } 135 136 log.Debug().Msg("Sending datapoints") 137 // TODO 138 } 139 140 type FuncCollector struct { 141 SinkDataFunc func(results []*llx.RawResult) 142 } 143 144 func (c *FuncCollector) SinkData(results []*llx.RawResult) { 145 if len(results) == 0 || c.SinkDataFunc == nil { 146 return 147 } 148 c.SinkDataFunc(results) 149 }