github.com/Jeffail/benthos/v3@v3.65.0/lib/processor/hash_sample_test.go (about)

     1  package processor
     2  
     3  import (
     4  	"reflect"
     5  	"testing"
     6  
     7  	"github.com/Jeffail/benthos/v3/lib/log"
     8  	"github.com/Jeffail/benthos/v3/lib/message"
     9  	"github.com/Jeffail/benthos/v3/lib/metrics"
    10  	"github.com/Jeffail/benthos/v3/lib/response"
    11  )
    12  
    13  func TestHashSample(t *testing.T) {
    14  	doc1 := []byte(`some text`)       // hashed to 44.82100
    15  	doc2 := []byte(`some other text`) // hashed to 94.99035
    16  	doc3 := []byte(`abc`)             // hashed to 26.84963
    17  
    18  	tt := []struct {
    19  		name     string
    20  		input    []byte
    21  		min      float64
    22  		max      float64
    23  		expected []byte
    24  	}{
    25  		{"100% sample", doc1, 0.0, 101.0, doc1},
    26  		{"0% sample", doc1, 0.0, 0.0, nil},
    27  
    28  		{"lower 50% sample", doc1, 0.0, 50.0, doc1},
    29  		{"upper 50% sample", doc1, 50.0, 101.0, nil},
    30  
    31  		{"lower 33% sample", doc1, 0.0, 33.0, nil},
    32  		{"mid 33% sample", doc1, 33.0, 66.0, doc1},
    33  		{"upper 33% sample", doc1, 66.0, 101.0, nil},
    34  
    35  		// -----
    36  
    37  		{"100% sample", doc2, 0.0, 101.0, doc2},
    38  		{"0% sample", doc2, 0.0, 0.0, nil},
    39  
    40  		{"lower 50% sample", doc2, 0.0, 50.0, nil},
    41  		{"upper 50% sample", doc2, 50.0, 101.0, doc2},
    42  
    43  		{"lower 33% sample", doc2, 0.0, 33.0, nil},
    44  		{"mid 33% sample", doc2, 33.0, 66.0, nil},
    45  		{"upper 33% sample", doc2, 66.0, 101.0, doc2},
    46  
    47  		// -----
    48  
    49  		{"100% sample", doc3, 0.0, 101.0, doc3},
    50  		{"0% sample", doc3, 0.0, 0.0, nil},
    51  
    52  		{"lower 50% sample", doc3, 0.0, 50.0, doc3},
    53  		{"upper 50% sample", doc3, 50.0, 101.0, nil},
    54  
    55  		{"lower 33% sample", doc3, 0.0, 33.0, doc3},
    56  		{"mid 33% sample", doc3, 33.0, 66.0, nil},
    57  		{"upper 33% sample", doc3, 66.0, 101.0, nil},
    58  	}
    59  
    60  	for _, tc := range tt {
    61  		t.Run(tc.name, func(t *testing.T) {
    62  			conf := NewConfig()
    63  			conf.HashSample.RetainMin = tc.min
    64  			conf.HashSample.RetainMax = tc.max
    65  			conf.HashSample.Parts = []int{0}
    66  
    67  			testLog := log.Noop()
    68  			proc, err := NewHashSample(conf, nil, testLog, metrics.Noop())
    69  			if err != nil {
    70  				t.Error(err)
    71  				return
    72  			}
    73  
    74  			msgIn := message.New([][]byte{tc.input})
    75  			msgs, _ := proc.ProcessMessage(msgIn)
    76  
    77  			if tc.expected != nil && len(msgs) == 0 {
    78  				t.Error("Message told not to propagate even if it was expected to propagate")
    79  			}
    80  			if tc.expected == nil && len(msgs) != 0 {
    81  				t.Error("Message told to propagate even if it was not expected to propagate")
    82  			}
    83  			if tc.expected != nil && len(msgs) > 0 {
    84  				if !reflect.DeepEqual(message.GetAllBytes(msgs[0])[0], tc.expected) {
    85  					t.Errorf("Unexpected sampling: EXPECTED: %v, ACTUAL: %v", tc.expected, message.GetAllBytes(msgs[0])[0])
    86  				}
    87  			}
    88  		})
    89  	}
    90  }
    91  
    92  func TestHashSamplePartSelection(t *testing.T) {
    93  	doc1 := []byte(`some text`) // hashed to 44.82100
    94  
    95  	tt := []struct {
    96  		name       string
    97  		insertPart int
    98  		selectPart int
    99  	}{
   100  		{"index 0", 0, 0},
   101  		{"index 1", 1, 1},
   102  		{"index 2", 2, 2},
   103  		{"index 3", 3, 3},
   104  		{"index 4", 4, 4},
   105  		{"index -1", 4, -1},
   106  		{"index -2", 3, -2},
   107  		{"index -3", 2, -3},
   108  		{"index -4", 1, -4},
   109  		{"index -5", 0, -5},
   110  	}
   111  
   112  	for _, tc := range tt {
   113  		t.Run(tc.name, func(t *testing.T) {
   114  			conf := NewConfig()
   115  			conf.HashSample.RetainMin = 44.8
   116  			conf.HashSample.RetainMax = 44.9
   117  			conf.HashSample.Parts = []int{tc.selectPart}
   118  
   119  			testLog := log.Noop()
   120  			proc, err := NewHashSample(conf, nil, testLog, metrics.Noop())
   121  			if err != nil {
   122  				t.Error(err)
   123  				return
   124  			}
   125  
   126  			parts := make([][]byte, 5)
   127  			for i := range parts {
   128  				parts[i] = []byte("FOO")
   129  			}
   130  			parts[tc.insertPart] = doc1
   131  
   132  			msgIn := message.New(parts)
   133  			msgs, _ := proc.ProcessMessage(msgIn)
   134  			if len(msgs) > 0 {
   135  				if !reflect.DeepEqual(msgIn, msgs[0]) {
   136  					t.Error("Message told to propagate but not given")
   137  				}
   138  			} else {
   139  				t.Error("Message told not to propagate")
   140  			}
   141  		})
   142  	}
   143  }
   144  
   145  func TestHashSampleBoundsCheck(t *testing.T) {
   146  	conf := NewConfig()
   147  	conf.HashSample.Parts = []int{5}
   148  
   149  	testLog := log.Noop()
   150  	proc, err := NewHashSample(conf, nil, testLog, metrics.Noop())
   151  	if err != nil {
   152  		t.Fatal(err)
   153  	}
   154  
   155  	msgIn := message.New([][]byte{})
   156  	msgs, res := proc.ProcessMessage(msgIn)
   157  	if len(msgs) > 0 {
   158  		t.Error("OOB message told to propagate")
   159  	}
   160  
   161  	if exp, act := response.NewAck(), res; !reflect.DeepEqual(exp, act) {
   162  		t.Errorf("Wrong response returned: %v != %v", act, exp)
   163  	}
   164  }
   165  
   166  func TestHashSampleNegBoundsCheck(t *testing.T) {
   167  	conf := NewConfig()
   168  	conf.HashSample.Parts = []int{-5}
   169  
   170  	testLog := log.Noop()
   171  	proc, err := NewHashSample(conf, nil, testLog, metrics.Noop())
   172  	if err != nil {
   173  		t.Fatal(err)
   174  	}
   175  
   176  	msgIn := message.New([][]byte{})
   177  	msgs, res := proc.ProcessMessage(msgIn)
   178  	if len(msgs) > 0 {
   179  		t.Error("OOB message told to propagate")
   180  	}
   181  
   182  	if exp, act := response.NewAck(), res; !reflect.DeepEqual(exp, act) {
   183  		t.Errorf("Wrong response returned: %v != %v", act, exp)
   184  	}
   185  }