github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/hash_sample.go (about) 1 package processor 2 3 import ( 4 "math" 5 "time" 6 7 "github.com/Jeffail/benthos/v3/internal/docs" 8 "github.com/Jeffail/benthos/v3/lib/log" 9 "github.com/Jeffail/benthos/v3/lib/metrics" 10 "github.com/Jeffail/benthos/v3/lib/response" 11 "github.com/Jeffail/benthos/v3/lib/types" 12 "github.com/OneOfOne/xxhash" 13 ) 14 15 //------------------------------------------------------------------------------ 16 17 func init() { 18 Constructors[TypeHashSample] = TypeSpec{ 19 constructor: NewHashSample, 20 Status: docs.StatusDeprecated, 21 Footnotes: ` 22 ## Alternatives 23 24 All functionality of this processor has been superseded by the 25 [bloblang](/docs/components/processors/bloblang) processor.`, 26 FieldSpecs: docs.FieldSpecs{ 27 docs.FieldCommon("retain_min", "The lower percentage of the sample range."), 28 docs.FieldCommon("retain_max", "The upper percentage of the sample range."), 29 docs.FieldAdvanced("parts", "An array of message indexes within the batch to sample based on. If left empty all messages are included. This field is only applicable when batching messages [at the input level](/docs/configuration/batching).").Array(), 30 }, 31 } 32 } 33 34 //------------------------------------------------------------------------------ 35 36 // hashSamplingNorm is the constant factor to normalise a uint64 into the 37 // (0.0, 100.0) range. 38 const hashSamplingNorm = 100.0 / float64(math.MaxUint64) 39 40 func scaleNum(n uint64) float64 { 41 return float64(n) * hashSamplingNorm 42 } 43 44 //------------------------------------------------------------------------------ 45 46 // HashSampleConfig contains configuration fields for the HashSample processor. 47 type HashSampleConfig struct { 48 RetainMin float64 `json:"retain_min" yaml:"retain_min"` 49 RetainMax float64 `json:"retain_max" yaml:"retain_max"` 50 Parts []int `json:"parts" yaml:"parts"` // message parts to hash 51 } 52 53 // NewHashSampleConfig returns a HashSampleConfig with default values. 54 func NewHashSampleConfig() HashSampleConfig { 55 return HashSampleConfig{ 56 RetainMin: 0.0, 57 RetainMax: 10.0, // retain the first [0, 10%) interval 58 Parts: []int{0}, // only consider the 1st part 59 } 60 } 61 62 //------------------------------------------------------------------------------ 63 64 // HashSample is a processor that removes messages based on a sample factor by 65 // hashing its contents. 66 type HashSample struct { 67 conf Config 68 log log.Modular 69 stats metrics.Type 70 71 mCount metrics.StatCounter 72 mDropOOB metrics.StatCounter 73 mDropped metrics.StatCounter 74 mErr metrics.StatCounter 75 mSent metrics.StatCounter 76 mBatchSent metrics.StatCounter 77 } 78 79 // NewHashSample returns a HashSample processor. 80 func NewHashSample( 81 conf Config, mgr types.Manager, log log.Modular, stats metrics.Type, 82 ) (Type, error) { 83 return &HashSample{ 84 conf: conf, 85 log: log, 86 stats: stats, 87 88 mCount: stats.GetCounter("count"), 89 mDropOOB: stats.GetCounter("dropped_part_out_of_bounds"), 90 mDropped: stats.GetCounter("dropped"), 91 mErr: stats.GetCounter("error"), 92 mSent: stats.GetCounter("sent"), 93 mBatchSent: stats.GetCounter("batch.sent"), 94 }, nil 95 } 96 97 //------------------------------------------------------------------------------ 98 99 // ProcessMessage applies the processor to a message, either creating >0 100 // resulting messages or a response to be sent back to the message source. 101 func (s *HashSample) ProcessMessage(msg types.Message) ([]types.Message, types.Response) { 102 s.mCount.Incr(1) 103 104 hash := xxhash.New64() 105 106 lParts := msg.Len() 107 for _, index := range s.conf.HashSample.Parts { 108 if index < 0 { 109 // Negative indexes count backwards from the end. 110 index = lParts + index 111 } 112 113 // Check boundary of part index. 114 if index < 0 || index >= lParts { 115 s.mDropOOB.Incr(1) 116 s.mDropped.Incr(1) 117 s.log.Debugf("Cannot sample message part %v for parts count: %v\n", index, lParts) 118 return nil, response.NewAck() 119 } 120 121 // Attempt to add part to hash. 122 if _, err := hash.Write(msg.Get(index).Get()); err != nil { 123 s.mErr.Incr(1) 124 s.log.Debugf("Cannot hash message part for sampling: %v\n", err) 125 return nil, response.NewAck() 126 } 127 } 128 129 rate := scaleNum(hash.Sum64()) 130 if rate >= s.conf.HashSample.RetainMin && rate < s.conf.HashSample.RetainMax { 131 s.mBatchSent.Incr(1) 132 s.mSent.Incr(int64(msg.Len())) 133 msgs := [1]types.Message{msg} 134 return msgs[:], nil 135 } 136 137 s.mDropped.Incr(int64(msg.Len())) 138 return nil, response.NewAck() 139 } 140 141 // CloseAsync shuts down the processor and stops processing requests. 142 func (s *HashSample) CloseAsync() { 143 } 144 145 // WaitForClose blocks until the processor has closed down. 146 func (s *HashSample) WaitForClose(timeout time.Duration) error { 147 return nil 148 } 149 150 //------------------------------------------------------------------------------