github.com/Jeffail/benthos/v3@v3.65.0/lib/input/sequence_test.go (about) 1 package input 2 3 import ( 4 "fmt" 5 "os" 6 "path/filepath" 7 "sort" 8 "testing" 9 "time" 10 11 "github.com/Jeffail/benthos/v3/lib/log" 12 "github.com/Jeffail/benthos/v3/lib/metrics" 13 "github.com/Jeffail/benthos/v3/lib/response" 14 "github.com/Jeffail/benthos/v3/lib/types" 15 "github.com/stretchr/testify/assert" 16 "github.com/stretchr/testify/require" 17 ) 18 19 func writeFiles(t *testing.T, dir string, nameToContent map[string]string) { 20 t.Helper() 21 22 for k, v := range nameToContent { 23 require.NoError(t, os.WriteFile(filepath.Join(dir, k), []byte(v), 0o600)) 24 } 25 } 26 27 func TestSequenceHappy(t *testing.T) { 28 t.Parallel() 29 30 tmpDir := t.TempDir() 31 32 files := map[string]string{ 33 "f1": "foo\nbar\nbaz", 34 "f2": "buz\nbev\nbif\n", 35 "f3": "qux\nquz\nqev", 36 } 37 38 writeFiles(t, tmpDir, files) 39 40 conf := NewConfig() 41 conf.Type = TypeSequence 42 43 for _, k := range []string{"f1", "f2", "f3"} { 44 inConf := NewConfig() 45 inConf.Type = TypeFile 46 inConf.File.Path = filepath.Join(tmpDir, k) 47 conf.Sequence.Inputs = append(conf.Sequence.Inputs, inConf) 48 } 49 50 rdr, err := New(conf, types.NoopMgr(), log.Noop(), metrics.Noop()) 51 require.NoError(t, err) 52 53 exp, act := []string{ 54 "foo", "bar", "baz", "buz", "bev", "bif", "qux", "quz", "qev", 55 }, []string{} 56 57 consumeLoop: 58 for { 59 select { 60 case tran, open := <-rdr.TransactionChan(): 61 if !open { 62 break consumeLoop 63 } 64 assert.Equal(t, 1, tran.Payload.Len()) 65 act = append(act, string(tran.Payload.Get(0).Get())) 66 select { 67 case tran.ResponseChan <- response.NewAck(): 68 case <-time.After(time.Minute): 69 t.Fatalf("failed to ack after: %v", act) 70 } 71 case <-time.After(time.Minute): 72 t.Fatalf("Failed to consume message after: %v", act) 73 } 74 } 75 76 assert.Equal(t, exp, act) 77 78 rdr.CloseAsync() 79 assert.NoError(t, rdr.WaitForClose(time.Second)) 80 } 81 82 func TestSequenceJoins(t *testing.T) { 83 t.Parallel() 84 85 tmpDir := t.TempDir() 86 87 files := map[string]string{ 88 "csv1": "id,name,age\naaa,A,20\nbbb,B,21\nccc,B,22\n", 89 "csv2": "id,hobby\nccc,fencing\naaa,running\naaa,gaming\n", 90 "ndjson1": `{"id":"aaa","stuff":{"first":"foo"}} 91 {"id":"bbb","stuff":{"first":"bar"}} 92 {"id":"aaa","stuff":{"second":"baz"}}`, 93 } 94 95 writeFiles(t, tmpDir, files) 96 97 conf := NewConfig() 98 conf.Type = TypeSequence 99 conf.Sequence.ShardedJoin.IDPath = "id" 100 conf.Sequence.ShardedJoin.Iterations = 1 101 conf.Sequence.ShardedJoin.Type = "full-outter" 102 103 csvConf := NewConfig() 104 csvConf.Type = TypeCSVFile 105 csvConf.CSVFile.Paths = []string{ 106 filepath.Join(tmpDir, "csv1"), 107 filepath.Join(tmpDir, "csv2"), 108 } 109 conf.Sequence.Inputs = append(conf.Sequence.Inputs, csvConf) 110 for _, k := range []string{"ndjson1"} { 111 inConf := NewConfig() 112 inConf.Type = TypeFile 113 inConf.File.Path = filepath.Join(tmpDir, k) 114 conf.Sequence.Inputs = append(conf.Sequence.Inputs, inConf) 115 } 116 117 rdr, err := New(conf, types.NoopMgr(), log.Noop(), metrics.Noop()) 118 require.NoError(t, err) 119 120 exp, act := []string{ 121 `{"age":"20","hobby":["running","gaming"],"id":"aaa","name":"A","stuff":{"first":"foo","second":"baz"}}`, 122 `{"age":"21","id":"bbb","name":"B","stuff":{"first":"bar"}}`, 123 `{"age":"22","hobby":"fencing","id":"ccc","name":"B"}`, 124 }, []string{} 125 126 consumeLoop: 127 for { 128 select { 129 case tran, open := <-rdr.TransactionChan(): 130 if !open { 131 break consumeLoop 132 } 133 assert.Equal(t, 1, tran.Payload.Len()) 134 act = append(act, string(tran.Payload.Get(0).Get())) 135 select { 136 case tran.ResponseChan <- response.NewAck(): 137 case <-time.After(time.Minute): 138 t.Fatalf("failed to ack after: %v", act) 139 } 140 case <-time.After(time.Minute): 141 t.Fatalf("Failed to consume message after: %v", act) 142 } 143 } 144 145 sort.Strings(exp) 146 sort.Strings(act) 147 assert.Equal(t, exp, act) 148 149 rdr.CloseAsync() 150 assert.NoError(t, rdr.WaitForClose(time.Second)) 151 } 152 153 func TestSequenceJoinsMergeStrategies(t *testing.T) { 154 t.Parallel() 155 156 testCases := []struct { 157 name string 158 flushOnFinal bool 159 mergeStrat string 160 files map[string]string 161 finalFile string 162 result []string 163 }{ 164 { 165 name: "array from final", 166 flushOnFinal: true, 167 mergeStrat: "array", 168 files: map[string]string{ 169 "csv1": "id,name,age\naaa,A,20\nbbb,B,21\nccc,B,22\n", 170 "csv2": "id,hobby\nccc,fencing\naaa,running\naaa,gaming\n", 171 }, 172 finalFile: "id,stuff\naaa,first\nccc,second\naaa,third\n", 173 result: []string{ 174 `{"age":"20","hobby":["running","gaming"],"id":"aaa","name":"A","stuff":"first"}`, 175 `{"age":"22","hobby":"fencing","id":"ccc","name":"B","stuff":"second"}`, 176 `{"age":"20","hobby":["running","gaming"],"id":"aaa","name":"A","stuff":["first","third"]}`, 177 }, 178 }, 179 { 180 name: "replace from final", 181 flushOnFinal: true, 182 mergeStrat: "replace", 183 files: map[string]string{ 184 "csv1": "id,name,age\naaa,A,20\nbbb,B,21\nccc,B,22\n", 185 "csv2": "id,hobby\nccc,fencing\naaa,running\naaa,gaming\n", 186 }, 187 finalFile: "id,stuff\naaa,first\nccc,second\naaa,third\n", 188 result: []string{ 189 `{"age":"20","hobby":"gaming","id":"aaa","name":"A","stuff":"first"}`, 190 `{"age":"20","hobby":"gaming","id":"aaa","name":"A","stuff":"third"}`, 191 `{"age":"22","hobby":"fencing","id":"ccc","name":"B","stuff":"second"}`, 192 }, 193 }, 194 { 195 name: "keep from final", 196 flushOnFinal: true, 197 mergeStrat: "keep", 198 files: map[string]string{ 199 "csv1": "id,name,age\naaa,A,20\nbbb,B,21\nccc,B,22\n", 200 "csv2": "id,hobby\nccc,fencing\naaa,running\naaa,gaming\n", 201 }, 202 finalFile: "id,stuff\naaa,first\nccc,second\naaa,third\n", 203 result: []string{ 204 `{"age":"20","hobby":"running","id":"aaa","name":"A","stuff":"first"}`, 205 `{"age":"20","hobby":"running","id":"aaa","name":"A","stuff":"first"}`, 206 `{"age":"22","hobby":"fencing","id":"ccc","name":"B","stuff":"second"}`, 207 }, 208 }, 209 } 210 211 for _, test := range testCases { 212 test := test 213 t.Run(test.name, func(t *testing.T) { 214 tmpDir := t.TempDir() 215 216 writeFiles(t, tmpDir, test.files) 217 writeFiles(t, tmpDir, map[string]string{ 218 "final.csv": test.finalFile, 219 }) 220 221 conf := NewConfig() 222 conf.Type = TypeSequence 223 conf.Sequence.ShardedJoin.IDPath = "id" 224 conf.Sequence.ShardedJoin.MergeStrategy = test.mergeStrat 225 if test.flushOnFinal { 226 conf.Sequence.ShardedJoin.Type = "outter" 227 } else { 228 conf.Sequence.ShardedJoin.Type = "full-outter" 229 } 230 conf.Sequence.ShardedJoin.Iterations = 1 231 232 csvConf := NewConfig() 233 csvConf.Type = TypeCSVFile 234 for k := range test.files { 235 csvConf.CSVFile.Paths = append(csvConf.CSVFile.Paths, filepath.Join(tmpDir, k)) 236 } 237 conf.Sequence.Inputs = append(conf.Sequence.Inputs, csvConf) 238 239 finalConf := NewConfig() 240 finalConf.Type = TypeCSVFile 241 finalConf.CSVFile.Paths = []string{filepath.Join(tmpDir, "final.csv")} 242 conf.Sequence.Inputs = append(conf.Sequence.Inputs, finalConf) 243 244 rdr, err := New(conf, types.NoopMgr(), log.Noop(), metrics.Noop()) 245 require.NoError(t, err) 246 247 exp, act := test.result, []string{} 248 249 consumeLoop: 250 for { 251 select { 252 case tran, open := <-rdr.TransactionChan(): 253 if !open { 254 break consumeLoop 255 } 256 assert.Equal(t, 1, tran.Payload.Len()) 257 act = append(act, string(tran.Payload.Get(0).Get())) 258 select { 259 case tran.ResponseChan <- response.NewAck(): 260 case <-time.After(time.Minute): 261 t.Fatalf("failed to ack after: %v", act) 262 } 263 case <-time.After(time.Minute): 264 t.Fatalf("Failed to consume message after: %v", act) 265 } 266 } 267 268 sort.Strings(exp) 269 sort.Strings(act) 270 assert.Equal(t, exp, act) 271 272 rdr.CloseAsync() 273 assert.NoError(t, rdr.WaitForClose(time.Second)) 274 }) 275 } 276 } 277 278 func TestSequenceJoinsBig(t *testing.T) { 279 t.Skip() 280 t.Parallel() 281 282 tmpDir := t.TempDir() 283 284 jsonPath := filepath.Join(tmpDir, "one.ndjson") 285 csvPath := filepath.Join(tmpDir, "two.csv") 286 287 ndjsonFile, err := os.Create(jsonPath) 288 require.NoError(t, err) 289 290 csvFile, err := os.Create(csvPath) 291 require.NoError(t, err) 292 293 conf := NewConfig() 294 conf.Type = TypeSequence 295 conf.Sequence.ShardedJoin.IDPath = "id" 296 conf.Sequence.ShardedJoin.Iterations = 5 297 conf.Sequence.ShardedJoin.Type = "full-outter" 298 299 csvConf := NewConfig() 300 csvConf.Type = TypeCSVFile 301 csvConf.CSVFile.Paths = []string{csvPath} 302 conf.Sequence.Inputs = append(conf.Sequence.Inputs, csvConf) 303 304 jsonConf := NewConfig() 305 jsonConf.Type = TypeFile 306 jsonConf.File.Paths = []string{jsonPath} 307 jsonConf.File.Codec = "lines" 308 conf.Sequence.Inputs = append(conf.Sequence.Inputs, jsonConf) 309 310 totalRows := 1000 311 312 exp, act := []string{}, []string{} 313 314 _, err = csvFile.WriteString("id,bar\n") 315 require.NoError(t, err) 316 for i := 0; i < totalRows; i++ { 317 exp = append(exp, fmt.Sprintf(`{"bar":["bar%v","baz%v"],"foo":"foo%v","id":"%v"}`, i, i, i, i)) 318 319 _, err = fmt.Fprintf(ndjsonFile, "{\"id\":\"%v\",\"foo\":\"foo%v\"}\n", i, i) 320 require.NoError(t, err) 321 322 _, err = fmt.Fprintf(csvFile, "%v,bar%v\n", i, i) 323 require.NoError(t, err) 324 } 325 for i := 0; i < totalRows; i++ { 326 _, err = fmt.Fprintf(csvFile, "%v,baz%v\n", i, i) 327 require.NoError(t, err) 328 } 329 require.NoError(t, ndjsonFile.Close()) 330 require.NoError(t, csvFile.Close()) 331 332 rdr, err := New(conf, types.NoopMgr(), log.Noop(), metrics.Noop()) 333 require.NoError(t, err) 334 335 consumeLoop: 336 for { 337 select { 338 case tran, open := <-rdr.TransactionChan(): 339 if !open { 340 break consumeLoop 341 } 342 assert.Equal(t, 1, tran.Payload.Len()) 343 act = append(act, string(tran.Payload.Get(0).Get())) 344 select { 345 case tran.ResponseChan <- response.NewAck(): 346 case <-time.After(time.Minute): 347 t.Fatalf("failed to ack after: %v", act) 348 } 349 case <-time.After(time.Minute): 350 t.Fatalf("Failed to consume message after: %v", act) 351 } 352 } 353 354 sort.Strings(exp) 355 sort.Strings(act) 356 assert.Equal(t, exp, act) 357 358 rdr.CloseAsync() 359 assert.NoError(t, rdr.WaitForClose(time.Second)) 360 } 361 362 func TestSequenceSad(t *testing.T) { 363 t.Parallel() 364 365 tmpDir := t.TempDir() 366 367 files := map[string]string{ 368 "f1": "foo\nbar\nbaz", 369 "f4": "buz\nbev\nbif\n", 370 } 371 372 writeFiles(t, tmpDir, files) 373 374 conf := NewConfig() 375 conf.Type = TypeSequence 376 377 for _, k := range []string{"f1", "f2", "f3"} { 378 inConf := NewConfig() 379 inConf.Type = TypeFile 380 inConf.File.Path = filepath.Join(tmpDir, k) 381 conf.Sequence.Inputs = append(conf.Sequence.Inputs, inConf) 382 } 383 384 rdr, err := New(conf, types.NoopMgr(), log.Noop(), metrics.Noop()) 385 require.NoError(t, err) 386 387 exp := []string{ 388 "foo", "bar", "baz", 389 } 390 391 for i, str := range exp { 392 select { 393 case tran, open := <-rdr.TransactionChan(): 394 if !open { 395 t.Fatal("closed earlier than expected") 396 } 397 assert.Equal(t, 1, tran.Payload.Len()) 398 assert.Equal(t, str, string(tran.Payload.Get(0).Get())) 399 select { 400 case tran.ResponseChan <- response.NewAck(): 401 case <-time.After(time.Minute): 402 t.Fatalf("failed to ack after: %v", str) 403 } 404 case <-time.After(time.Minute): 405 t.Fatalf("Failed to consume message %v", i) 406 } 407 } 408 409 select { 410 case <-rdr.TransactionChan(): 411 t.Fatal("unexpected transaction") 412 case <-time.After(100 * time.Millisecond): 413 } 414 415 exp = []string{ 416 "buz", "bev", "bif", 417 } 418 419 require.NoError(t, os.Rename(filepath.Join(tmpDir, "f4"), filepath.Join(tmpDir, "f2"))) 420 421 for i, str := range exp { 422 select { 423 case tran, open := <-rdr.TransactionChan(): 424 if !open { 425 t.Fatal("closed earlier than expected") 426 } 427 assert.Equal(t, 1, tran.Payload.Len()) 428 assert.Equal(t, str, string(tran.Payload.Get(0).Get())) 429 select { 430 case tran.ResponseChan <- response.NewAck(): 431 case <-time.After(time.Minute): 432 t.Fatalf("failed to ack after: %v", str) 433 } 434 case <-time.After(time.Minute): 435 t.Fatalf("Failed to consume message %v", i) 436 } 437 } 438 439 rdr.CloseAsync() 440 assert.NoError(t, rdr.WaitForClose(time.Second)) 441 } 442 443 func TestSequenceEarlyTermination(t *testing.T) { 444 t.Parallel() 445 446 tmpDir := t.TempDir() 447 448 writeFiles(t, tmpDir, map[string]string{ 449 "f1": "foo\nbar\nbaz", 450 }) 451 452 conf := NewConfig() 453 conf.Type = TypeSequence 454 455 inConf := NewConfig() 456 inConf.Type = TypeFile 457 inConf.File.Path = filepath.Join(tmpDir, "f1") 458 conf.Sequence.Inputs = append(conf.Sequence.Inputs, inConf) 459 460 rdr, err := New(conf, types.NoopMgr(), log.Noop(), metrics.Noop()) 461 require.NoError(t, err) 462 463 select { 464 case tran, open := <-rdr.TransactionChan(): 465 if !open { 466 t.Fatal("closed earlier than expected") 467 } 468 assert.Equal(t, 1, tran.Payload.Len()) 469 assert.Equal(t, "foo", string(tran.Payload.Get(0).Get())) 470 case <-time.After(time.Minute): 471 t.Fatal("timed out") 472 } 473 474 rdr.CloseAsync() 475 assert.NoError(t, rdr.WaitForClose(time.Second*5)) 476 }