github.com/Jeffail/benthos/v3@v3.65.0/lib/input/csv_test.go (about) 1 package input 2 3 import ( 4 "bytes" 5 "context" 6 "errors" 7 "io" 8 "os" 9 "path" 10 "path/filepath" 11 "testing" 12 "time" 13 14 "github.com/Jeffail/benthos/v3/lib/input/reader" 15 "github.com/Jeffail/benthos/v3/lib/log" 16 "github.com/Jeffail/benthos/v3/lib/metrics" 17 "github.com/Jeffail/benthos/v3/lib/response" 18 "github.com/Jeffail/benthos/v3/lib/types" 19 "github.com/stretchr/testify/assert" 20 "github.com/stretchr/testify/require" 21 ) 22 23 func TestCSVReaderHappy(t *testing.T) { 24 var handle bytes.Buffer 25 26 for _, msg := range []string{ 27 "header1,header2,header3", 28 "foo1,foo2,foo3", 29 "bar1,bar2,bar3", 30 "baz1,baz2,baz3", 31 } { 32 handle.Write([]byte(msg)) 33 handle.Write([]byte("\n")) 34 } 35 36 ctored := false 37 f, err := newCSVReader( 38 func(ctx context.Context) (io.Reader, error) { 39 if ctored { 40 return nil, io.EOF 41 } 42 ctored = true 43 return &handle, nil 44 }, 45 func(ctx context.Context) {}, 46 ) 47 require.NoError(t, err) 48 49 t.Cleanup(func() { 50 f.CloseAsync() 51 require.NoError(t, f.WaitForClose(time.Second)) 52 }) 53 54 require.NoError(t, f.ConnectWithContext(context.Background())) 55 56 for _, exp := range []string{ 57 `{"header1":"foo1","header2":"foo2","header3":"foo3"}`, 58 `{"header1":"bar1","header2":"bar2","header3":"bar3"}`, 59 `{"header1":"baz1","header2":"baz2","header3":"baz3"}`, 60 } { 61 var resMsg types.Message 62 resMsg, _, err = f.ReadWithContext(context.Background()) 63 require.NoError(t, err) 64 65 assert.Equal(t, exp, string(resMsg.Get(0).Get())) 66 } 67 68 _, _, err = f.ReadWithContext(context.Background()) 69 assert.Equal(t, types.ErrNotConnected, err) 70 71 err = f.ConnectWithContext(context.Background()) 72 assert.Equal(t, types.ErrTypeClosed, err) 73 } 74 75 func TestCSVGPaths(t *testing.T) { 76 dir := t.TempDir() 77 78 require.NoError(t, os.WriteFile(filepath.Join(dir, "a.csv"), []byte(`header1,header2,header3 79 foo1,bar1,baz1 80 foo2,bar2,baz2 81 foo3,bar3,baz3 82 `), 0o777)) 83 require.NoError(t, os.WriteFile(filepath.Join(dir, "b.csv"), []byte(`header4,header5,header6 84 foo4,bar4,baz4 85 foo5,bar5,baz5 86 foo6,bar6,baz6 87 `), 0o777)) 88 89 conf := NewConfig() 90 conf.Type = TypeCSVFile 91 conf.CSVFile.Paths = []string{ 92 path.Join(dir, "a.csv"), 93 path.Join(dir, "b.csv"), 94 } 95 96 f, err := New(conf, nil, log.Noop(), metrics.Noop()) 97 require.NoError(t, err) 98 99 t.Cleanup(func() { 100 require.NoError(t, f.WaitForClose(time.Second)) 101 }) 102 103 for _, exp := range []string{ 104 `{"header1":"foo1","header2":"bar1","header3":"baz1"}`, 105 `{"header1":"foo2","header2":"bar2","header3":"baz2"}`, 106 `{"header1":"foo3","header2":"bar3","header3":"baz3"}`, 107 `{"header4":"foo4","header5":"bar4","header6":"baz4"}`, 108 `{"header4":"foo5","header5":"bar5","header6":"baz5"}`, 109 `{"header4":"foo6","header5":"bar6","header6":"baz6"}`, 110 } { 111 m := readMsg(t, f.TransactionChan()) 112 assert.Equal(t, exp, string(m.Get(0).Get())) 113 } 114 } 115 116 func TestCSVGlobPaths(t *testing.T) { 117 dir := t.TempDir() 118 119 require.NoError(t, os.WriteFile(filepath.Join(dir, "a.csv"), []byte(`header1,header2,header3 120 foo1,bar1,baz1 121 foo2,bar2,baz2 122 foo3,bar3,baz3 123 `), 0o777)) 124 require.NoError(t, os.WriteFile(filepath.Join(dir, "b.csv"), []byte(`header4,header5,header6 125 foo4,bar4,baz4 126 foo5,bar5,baz5 127 foo6,bar6,baz6 128 `), 0o777)) 129 130 conf := NewConfig() 131 conf.Type = TypeCSVFile 132 conf.CSVFile.Paths = []string{dir + "/*.csv"} 133 134 f, err := New(conf, nil, log.Noop(), metrics.Noop()) 135 require.NoError(t, err) 136 137 t.Cleanup(func() { 138 require.NoError(t, f.WaitForClose(time.Second)) 139 }) 140 141 for _, exp := range []string{ 142 `{"header1":"foo1","header2":"bar1","header3":"baz1"}`, 143 `{"header1":"foo2","header2":"bar2","header3":"baz2"}`, 144 `{"header1":"foo3","header2":"bar3","header3":"baz3"}`, 145 `{"header4":"foo4","header5":"bar4","header6":"baz4"}`, 146 `{"header4":"foo5","header5":"bar5","header6":"baz5"}`, 147 `{"header4":"foo6","header5":"bar6","header6":"baz6"}`, 148 } { 149 m := readMsg(t, f.TransactionChan()) 150 assert.Equal(t, exp, string(m.Get(0).Get())) 151 } 152 } 153 154 func TestCSVReaderGroupCount(t *testing.T) { 155 var handle bytes.Buffer 156 157 for _, msg := range []string{ 158 "foo,bar,baz", 159 "foo1,bar1,baz1", 160 "foo2,bar2,baz2", 161 "foo3,bar3,baz3", 162 "foo4,bar4,baz4", 163 "foo5,bar5,baz5", 164 "foo6,bar6,baz6", 165 "foo7,bar7,baz7", 166 } { 167 handle.Write([]byte(msg)) 168 handle.Write([]byte("\n")) 169 } 170 171 ctored := false 172 f, err := newCSVReader( 173 func(ctx context.Context) (io.Reader, error) { 174 if ctored { 175 return nil, io.EOF 176 } 177 ctored = true 178 return &handle, nil 179 }, 180 func(ctx context.Context) {}, 181 optCSVSetGroupCount(3), 182 ) 183 require.NoError(t, err) 184 185 t.Cleanup(func() { 186 f.CloseAsync() 187 require.NoError(t, f.WaitForClose(time.Second)) 188 }) 189 190 require.NoError(t, f.ConnectWithContext(context.Background())) 191 192 for _, exp := range [][]string{ 193 { 194 `{"bar":"bar1","baz":"baz1","foo":"foo1"}`, 195 `{"bar":"bar2","baz":"baz2","foo":"foo2"}`, 196 `{"bar":"bar3","baz":"baz3","foo":"foo3"}`, 197 }, 198 { 199 `{"bar":"bar4","baz":"baz4","foo":"foo4"}`, 200 `{"bar":"bar5","baz":"baz5","foo":"foo5"}`, 201 `{"bar":"bar6","baz":"baz6","foo":"foo6"}`, 202 }, 203 { 204 `{"bar":"bar7","baz":"baz7","foo":"foo7"}`, 205 }, 206 } { 207 var resMsg types.Message 208 resMsg, _, err = f.ReadWithContext(context.Background()) 209 require.NoError(t, err) 210 211 require.Equal(t, len(exp), resMsg.Len()) 212 for i := 0; i < len(exp); i++ { 213 assert.Equal(t, exp[i], string(resMsg.Get(i).Get())) 214 } 215 } 216 217 _, _, err = f.ReadWithContext(context.Background()) 218 assert.Equal(t, types.ErrNotConnected, err) 219 220 err = f.ConnectWithContext(context.Background()) 221 assert.Equal(t, types.ErrTypeClosed, err) 222 } 223 224 func TestCSVReadersTwoFiles(t *testing.T) { 225 var handleOne, handleTwo bytes.Buffer 226 227 for _, msg := range []string{ 228 "header1,header2,header3", 229 "foo1,foo2,foo3", 230 "bar1,bar2,bar3", 231 "baz1,baz2,baz3", 232 } { 233 handleOne.Write([]byte(msg)) 234 handleOne.Write([]byte("\n")) 235 } 236 237 for _, msg := range []string{ 238 "header4,header5,header6", 239 "foo1,foo2,foo3", 240 "bar1,bar2,bar3", 241 "baz1,baz2,baz3", 242 } { 243 handleTwo.Write([]byte(msg)) 244 handleTwo.Write([]byte("\n")) 245 } 246 247 consumedFirst, consumedSecond := false, false 248 249 f, err := newCSVReader( 250 func(ctx context.Context) (io.Reader, error) { 251 if !consumedFirst { 252 consumedFirst = true 253 return &handleOne, nil 254 } else if !consumedSecond { 255 consumedSecond = true 256 return &handleTwo, nil 257 } 258 return nil, io.EOF 259 }, 260 func(ctx context.Context) {}, 261 ) 262 require.NoError(t, err) 263 264 t.Cleanup(func() { 265 f.CloseAsync() 266 require.NoError(t, f.WaitForClose(time.Second)) 267 }) 268 269 require.NoError(t, f.ConnectWithContext(context.Background())) 270 271 for i, exp := range []string{ 272 `{"header1":"foo1","header2":"foo2","header3":"foo3"}`, 273 `{"header1":"bar1","header2":"bar2","header3":"bar3"}`, 274 `{"header1":"baz1","header2":"baz2","header3":"baz3"}`, 275 `{"header4":"foo1","header5":"foo2","header6":"foo3"}`, 276 `{"header4":"bar1","header5":"bar2","header6":"bar3"}`, 277 `{"header4":"baz1","header5":"baz2","header6":"baz3"}`, 278 } { 279 var resMsg types.Message 280 var ackFn reader.AsyncAckFn 281 resMsg, ackFn, err = f.ReadWithContext(context.Background()) 282 if err == types.ErrNotConnected { 283 require.NoError(t, f.ConnectWithContext(context.Background())) 284 resMsg, ackFn, err = f.ReadWithContext(context.Background()) 285 } 286 require.NoError(t, err, i) 287 assert.Equal(t, exp, string(resMsg.Get(0).Get()), i) 288 _ = ackFn(context.Background(), response.NewAck()) 289 } 290 291 _, _, err = f.ReadWithContext(context.Background()) 292 assert.Equal(t, types.ErrNotConnected, err) 293 294 err = f.ConnectWithContext(context.Background()) 295 assert.Equal(t, types.ErrTypeClosed, err) 296 } 297 298 func TestCSVReaderCustomComma(t *testing.T) { 299 var handle bytes.Buffer 300 301 for _, msg := range []string{ 302 "header1|header2|header3", 303 "foo1|foo2|foo3", 304 "bar1|bar2|bar3", 305 "baz1|baz2|baz3", 306 } { 307 handle.Write([]byte(msg)) 308 handle.Write([]byte("\n")) 309 } 310 311 ctored := false 312 f, err := newCSVReader( 313 func(ctx context.Context) (io.Reader, error) { 314 if ctored { 315 return nil, io.EOF 316 } 317 ctored = true 318 return &handle, nil 319 }, 320 func(ctx context.Context) {}, 321 optCSVSetComma('|'), 322 ) 323 require.NoError(t, err) 324 325 t.Cleanup(func() { 326 f.CloseAsync() 327 require.NoError(t, f.WaitForClose(time.Second)) 328 }) 329 330 require.NoError(t, f.ConnectWithContext(context.Background())) 331 332 for _, exp := range []string{ 333 `{"header1":"foo1","header2":"foo2","header3":"foo3"}`, 334 `{"header1":"bar1","header2":"bar2","header3":"bar3"}`, 335 `{"header1":"baz1","header2":"baz2","header3":"baz3"}`, 336 } { 337 var resMsg types.Message 338 resMsg, _, err = f.ReadWithContext(context.Background()) 339 require.NoError(t, err) 340 341 assert.Equal(t, exp, string(resMsg.Get(0).Get())) 342 } 343 344 _, _, err = f.ReadWithContext(context.Background()) 345 assert.Equal(t, types.ErrNotConnected, err) 346 347 err = f.ConnectWithContext(context.Background()) 348 assert.Equal(t, types.ErrTypeClosed, err) 349 } 350 351 func TestCSVReaderRelaxed(t *testing.T) { 352 var handle bytes.Buffer 353 354 for _, msg := range []string{ 355 "header1,header2,header3", 356 "foo1,foo2,foo3", 357 "bar1,bar2,bar3,bar4", 358 "baz1,baz2,baz3", 359 "buz1,buz2", 360 } { 361 handle.Write([]byte(msg)) 362 handle.Write([]byte("\n")) 363 } 364 365 ctored := false 366 f, err := newCSVReader( 367 func(ctx context.Context) (io.Reader, error) { 368 if ctored { 369 return nil, io.EOF 370 } 371 ctored = true 372 return &handle, nil 373 }, 374 func(ctx context.Context) {}, 375 optCSVSetStrict(false), 376 ) 377 require.NoError(t, err) 378 379 t.Cleanup(func() { 380 f.CloseAsync() 381 require.NoError(t, f.WaitForClose(time.Second)) 382 }) 383 384 require.NoError(t, f.ConnectWithContext(context.Background())) 385 386 for _, exp := range []string{ 387 `{"header1":"foo1","header2":"foo2","header3":"foo3"}`, 388 `["bar1","bar2","bar3","bar4"]`, 389 `{"header1":"baz1","header2":"baz2","header3":"baz3"}`, 390 `{"header1":"buz1","header2":"buz2"}`, 391 } { 392 var resMsg types.Message 393 resMsg, _, err = f.ReadWithContext(context.Background()) 394 require.NoError(t, err) 395 396 assert.Equal(t, exp, string(resMsg.Get(0).Get())) 397 } 398 399 _, _, err = f.ReadWithContext(context.Background()) 400 assert.Equal(t, types.ErrNotConnected, err) 401 402 err = f.ConnectWithContext(context.Background()) 403 assert.Equal(t, types.ErrTypeClosed, err) 404 } 405 406 func TestCSVReaderStrict(t *testing.T) { 407 var handle bytes.Buffer 408 409 for _, msg := range []string{ 410 "header1,header2,header3", 411 "foo1,foo2,foo3", 412 "bar1,bar2,bar3,bar4", 413 "baz1,baz2,baz3", 414 "buz1,buz2", 415 } { 416 handle.Write([]byte(msg)) 417 handle.Write([]byte("\n")) 418 } 419 420 ctored := false 421 f, err := newCSVReader( 422 func(ctx context.Context) (io.Reader, error) { 423 if ctored { 424 return nil, io.EOF 425 } 426 ctored = true 427 return &handle, nil 428 }, 429 func(ctx context.Context) {}, 430 optCSVSetStrict(true), 431 ) 432 require.NoError(t, err) 433 434 t.Cleanup(func() { 435 f.CloseAsync() 436 require.NoError(t, f.WaitForClose(time.Second)) 437 }) 438 439 require.NoError(t, f.ConnectWithContext(context.Background())) 440 441 for _, exp := range []interface{}{ 442 `{"header1":"foo1","header2":"foo2","header3":"foo3"}`, 443 errors.New("record on line 3: wrong number of fields"), 444 `{"header1":"baz1","header2":"baz2","header3":"baz3"}`, 445 errors.New("record on line 5: wrong number of fields"), 446 } { 447 var resMsg types.Message 448 resMsg, _, err = f.ReadWithContext(context.Background()) 449 450 switch expT := exp.(type) { 451 case string: 452 require.NoError(t, err) 453 assert.Equal(t, expT, string(resMsg.Get(0).Get())) 454 case error: 455 assert.EqualError(t, err, expT.Error()) 456 } 457 } 458 459 _, _, err = f.ReadWithContext(context.Background()) 460 assert.Equal(t, types.ErrNotConnected, err) 461 462 err = f.ConnectWithContext(context.Background()) 463 assert.Equal(t, types.ErrTypeClosed, err) 464 }