github.com/Jeffail/benthos/v3@v3.65.0/lib/input/csv_test.go (about)

     1  package input
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"errors"
     7  	"io"
     8  	"os"
     9  	"path"
    10  	"path/filepath"
    11  	"testing"
    12  	"time"
    13  
    14  	"github.com/Jeffail/benthos/v3/lib/input/reader"
    15  	"github.com/Jeffail/benthos/v3/lib/log"
    16  	"github.com/Jeffail/benthos/v3/lib/metrics"
    17  	"github.com/Jeffail/benthos/v3/lib/response"
    18  	"github.com/Jeffail/benthos/v3/lib/types"
    19  	"github.com/stretchr/testify/assert"
    20  	"github.com/stretchr/testify/require"
    21  )
    22  
    23  func TestCSVReaderHappy(t *testing.T) {
    24  	var handle bytes.Buffer
    25  
    26  	for _, msg := range []string{
    27  		"header1,header2,header3",
    28  		"foo1,foo2,foo3",
    29  		"bar1,bar2,bar3",
    30  		"baz1,baz2,baz3",
    31  	} {
    32  		handle.Write([]byte(msg))
    33  		handle.Write([]byte("\n"))
    34  	}
    35  
    36  	ctored := false
    37  	f, err := newCSVReader(
    38  		func(ctx context.Context) (io.Reader, error) {
    39  			if ctored {
    40  				return nil, io.EOF
    41  			}
    42  			ctored = true
    43  			return &handle, nil
    44  		},
    45  		func(ctx context.Context) {},
    46  	)
    47  	require.NoError(t, err)
    48  
    49  	t.Cleanup(func() {
    50  		f.CloseAsync()
    51  		require.NoError(t, f.WaitForClose(time.Second))
    52  	})
    53  
    54  	require.NoError(t, f.ConnectWithContext(context.Background()))
    55  
    56  	for _, exp := range []string{
    57  		`{"header1":"foo1","header2":"foo2","header3":"foo3"}`,
    58  		`{"header1":"bar1","header2":"bar2","header3":"bar3"}`,
    59  		`{"header1":"baz1","header2":"baz2","header3":"baz3"}`,
    60  	} {
    61  		var resMsg types.Message
    62  		resMsg, _, err = f.ReadWithContext(context.Background())
    63  		require.NoError(t, err)
    64  
    65  		assert.Equal(t, exp, string(resMsg.Get(0).Get()))
    66  	}
    67  
    68  	_, _, err = f.ReadWithContext(context.Background())
    69  	assert.Equal(t, types.ErrNotConnected, err)
    70  
    71  	err = f.ConnectWithContext(context.Background())
    72  	assert.Equal(t, types.ErrTypeClosed, err)
    73  }
    74  
    75  func TestCSVGPaths(t *testing.T) {
    76  	dir := t.TempDir()
    77  
    78  	require.NoError(t, os.WriteFile(filepath.Join(dir, "a.csv"), []byte(`header1,header2,header3
    79  foo1,bar1,baz1
    80  foo2,bar2,baz2
    81  foo3,bar3,baz3
    82  `), 0o777))
    83  	require.NoError(t, os.WriteFile(filepath.Join(dir, "b.csv"), []byte(`header4,header5,header6
    84  foo4,bar4,baz4
    85  foo5,bar5,baz5
    86  foo6,bar6,baz6
    87  `), 0o777))
    88  
    89  	conf := NewConfig()
    90  	conf.Type = TypeCSVFile
    91  	conf.CSVFile.Paths = []string{
    92  		path.Join(dir, "a.csv"),
    93  		path.Join(dir, "b.csv"),
    94  	}
    95  
    96  	f, err := New(conf, nil, log.Noop(), metrics.Noop())
    97  	require.NoError(t, err)
    98  
    99  	t.Cleanup(func() {
   100  		require.NoError(t, f.WaitForClose(time.Second))
   101  	})
   102  
   103  	for _, exp := range []string{
   104  		`{"header1":"foo1","header2":"bar1","header3":"baz1"}`,
   105  		`{"header1":"foo2","header2":"bar2","header3":"baz2"}`,
   106  		`{"header1":"foo3","header2":"bar3","header3":"baz3"}`,
   107  		`{"header4":"foo4","header5":"bar4","header6":"baz4"}`,
   108  		`{"header4":"foo5","header5":"bar5","header6":"baz5"}`,
   109  		`{"header4":"foo6","header5":"bar6","header6":"baz6"}`,
   110  	} {
   111  		m := readMsg(t, f.TransactionChan())
   112  		assert.Equal(t, exp, string(m.Get(0).Get()))
   113  	}
   114  }
   115  
   116  func TestCSVGlobPaths(t *testing.T) {
   117  	dir := t.TempDir()
   118  
   119  	require.NoError(t, os.WriteFile(filepath.Join(dir, "a.csv"), []byte(`header1,header2,header3
   120  foo1,bar1,baz1
   121  foo2,bar2,baz2
   122  foo3,bar3,baz3
   123  `), 0o777))
   124  	require.NoError(t, os.WriteFile(filepath.Join(dir, "b.csv"), []byte(`header4,header5,header6
   125  foo4,bar4,baz4
   126  foo5,bar5,baz5
   127  foo6,bar6,baz6
   128  `), 0o777))
   129  
   130  	conf := NewConfig()
   131  	conf.Type = TypeCSVFile
   132  	conf.CSVFile.Paths = []string{dir + "/*.csv"}
   133  
   134  	f, err := New(conf, nil, log.Noop(), metrics.Noop())
   135  	require.NoError(t, err)
   136  
   137  	t.Cleanup(func() {
   138  		require.NoError(t, f.WaitForClose(time.Second))
   139  	})
   140  
   141  	for _, exp := range []string{
   142  		`{"header1":"foo1","header2":"bar1","header3":"baz1"}`,
   143  		`{"header1":"foo2","header2":"bar2","header3":"baz2"}`,
   144  		`{"header1":"foo3","header2":"bar3","header3":"baz3"}`,
   145  		`{"header4":"foo4","header5":"bar4","header6":"baz4"}`,
   146  		`{"header4":"foo5","header5":"bar5","header6":"baz5"}`,
   147  		`{"header4":"foo6","header5":"bar6","header6":"baz6"}`,
   148  	} {
   149  		m := readMsg(t, f.TransactionChan())
   150  		assert.Equal(t, exp, string(m.Get(0).Get()))
   151  	}
   152  }
   153  
   154  func TestCSVReaderGroupCount(t *testing.T) {
   155  	var handle bytes.Buffer
   156  
   157  	for _, msg := range []string{
   158  		"foo,bar,baz",
   159  		"foo1,bar1,baz1",
   160  		"foo2,bar2,baz2",
   161  		"foo3,bar3,baz3",
   162  		"foo4,bar4,baz4",
   163  		"foo5,bar5,baz5",
   164  		"foo6,bar6,baz6",
   165  		"foo7,bar7,baz7",
   166  	} {
   167  		handle.Write([]byte(msg))
   168  		handle.Write([]byte("\n"))
   169  	}
   170  
   171  	ctored := false
   172  	f, err := newCSVReader(
   173  		func(ctx context.Context) (io.Reader, error) {
   174  			if ctored {
   175  				return nil, io.EOF
   176  			}
   177  			ctored = true
   178  			return &handle, nil
   179  		},
   180  		func(ctx context.Context) {},
   181  		optCSVSetGroupCount(3),
   182  	)
   183  	require.NoError(t, err)
   184  
   185  	t.Cleanup(func() {
   186  		f.CloseAsync()
   187  		require.NoError(t, f.WaitForClose(time.Second))
   188  	})
   189  
   190  	require.NoError(t, f.ConnectWithContext(context.Background()))
   191  
   192  	for _, exp := range [][]string{
   193  		{
   194  			`{"bar":"bar1","baz":"baz1","foo":"foo1"}`,
   195  			`{"bar":"bar2","baz":"baz2","foo":"foo2"}`,
   196  			`{"bar":"bar3","baz":"baz3","foo":"foo3"}`,
   197  		},
   198  		{
   199  			`{"bar":"bar4","baz":"baz4","foo":"foo4"}`,
   200  			`{"bar":"bar5","baz":"baz5","foo":"foo5"}`,
   201  			`{"bar":"bar6","baz":"baz6","foo":"foo6"}`,
   202  		},
   203  		{
   204  			`{"bar":"bar7","baz":"baz7","foo":"foo7"}`,
   205  		},
   206  	} {
   207  		var resMsg types.Message
   208  		resMsg, _, err = f.ReadWithContext(context.Background())
   209  		require.NoError(t, err)
   210  
   211  		require.Equal(t, len(exp), resMsg.Len())
   212  		for i := 0; i < len(exp); i++ {
   213  			assert.Equal(t, exp[i], string(resMsg.Get(i).Get()))
   214  		}
   215  	}
   216  
   217  	_, _, err = f.ReadWithContext(context.Background())
   218  	assert.Equal(t, types.ErrNotConnected, err)
   219  
   220  	err = f.ConnectWithContext(context.Background())
   221  	assert.Equal(t, types.ErrTypeClosed, err)
   222  }
   223  
   224  func TestCSVReadersTwoFiles(t *testing.T) {
   225  	var handleOne, handleTwo bytes.Buffer
   226  
   227  	for _, msg := range []string{
   228  		"header1,header2,header3",
   229  		"foo1,foo2,foo3",
   230  		"bar1,bar2,bar3",
   231  		"baz1,baz2,baz3",
   232  	} {
   233  		handleOne.Write([]byte(msg))
   234  		handleOne.Write([]byte("\n"))
   235  	}
   236  
   237  	for _, msg := range []string{
   238  		"header4,header5,header6",
   239  		"foo1,foo2,foo3",
   240  		"bar1,bar2,bar3",
   241  		"baz1,baz2,baz3",
   242  	} {
   243  		handleTwo.Write([]byte(msg))
   244  		handleTwo.Write([]byte("\n"))
   245  	}
   246  
   247  	consumedFirst, consumedSecond := false, false
   248  
   249  	f, err := newCSVReader(
   250  		func(ctx context.Context) (io.Reader, error) {
   251  			if !consumedFirst {
   252  				consumedFirst = true
   253  				return &handleOne, nil
   254  			} else if !consumedSecond {
   255  				consumedSecond = true
   256  				return &handleTwo, nil
   257  			}
   258  			return nil, io.EOF
   259  		},
   260  		func(ctx context.Context) {},
   261  	)
   262  	require.NoError(t, err)
   263  
   264  	t.Cleanup(func() {
   265  		f.CloseAsync()
   266  		require.NoError(t, f.WaitForClose(time.Second))
   267  	})
   268  
   269  	require.NoError(t, f.ConnectWithContext(context.Background()))
   270  
   271  	for i, exp := range []string{
   272  		`{"header1":"foo1","header2":"foo2","header3":"foo3"}`,
   273  		`{"header1":"bar1","header2":"bar2","header3":"bar3"}`,
   274  		`{"header1":"baz1","header2":"baz2","header3":"baz3"}`,
   275  		`{"header4":"foo1","header5":"foo2","header6":"foo3"}`,
   276  		`{"header4":"bar1","header5":"bar2","header6":"bar3"}`,
   277  		`{"header4":"baz1","header5":"baz2","header6":"baz3"}`,
   278  	} {
   279  		var resMsg types.Message
   280  		var ackFn reader.AsyncAckFn
   281  		resMsg, ackFn, err = f.ReadWithContext(context.Background())
   282  		if err == types.ErrNotConnected {
   283  			require.NoError(t, f.ConnectWithContext(context.Background()))
   284  			resMsg, ackFn, err = f.ReadWithContext(context.Background())
   285  		}
   286  		require.NoError(t, err, i)
   287  		assert.Equal(t, exp, string(resMsg.Get(0).Get()), i)
   288  		_ = ackFn(context.Background(), response.NewAck())
   289  	}
   290  
   291  	_, _, err = f.ReadWithContext(context.Background())
   292  	assert.Equal(t, types.ErrNotConnected, err)
   293  
   294  	err = f.ConnectWithContext(context.Background())
   295  	assert.Equal(t, types.ErrTypeClosed, err)
   296  }
   297  
   298  func TestCSVReaderCustomComma(t *testing.T) {
   299  	var handle bytes.Buffer
   300  
   301  	for _, msg := range []string{
   302  		"header1|header2|header3",
   303  		"foo1|foo2|foo3",
   304  		"bar1|bar2|bar3",
   305  		"baz1|baz2|baz3",
   306  	} {
   307  		handle.Write([]byte(msg))
   308  		handle.Write([]byte("\n"))
   309  	}
   310  
   311  	ctored := false
   312  	f, err := newCSVReader(
   313  		func(ctx context.Context) (io.Reader, error) {
   314  			if ctored {
   315  				return nil, io.EOF
   316  			}
   317  			ctored = true
   318  			return &handle, nil
   319  		},
   320  		func(ctx context.Context) {},
   321  		optCSVSetComma('|'),
   322  	)
   323  	require.NoError(t, err)
   324  
   325  	t.Cleanup(func() {
   326  		f.CloseAsync()
   327  		require.NoError(t, f.WaitForClose(time.Second))
   328  	})
   329  
   330  	require.NoError(t, f.ConnectWithContext(context.Background()))
   331  
   332  	for _, exp := range []string{
   333  		`{"header1":"foo1","header2":"foo2","header3":"foo3"}`,
   334  		`{"header1":"bar1","header2":"bar2","header3":"bar3"}`,
   335  		`{"header1":"baz1","header2":"baz2","header3":"baz3"}`,
   336  	} {
   337  		var resMsg types.Message
   338  		resMsg, _, err = f.ReadWithContext(context.Background())
   339  		require.NoError(t, err)
   340  
   341  		assert.Equal(t, exp, string(resMsg.Get(0).Get()))
   342  	}
   343  
   344  	_, _, err = f.ReadWithContext(context.Background())
   345  	assert.Equal(t, types.ErrNotConnected, err)
   346  
   347  	err = f.ConnectWithContext(context.Background())
   348  	assert.Equal(t, types.ErrTypeClosed, err)
   349  }
   350  
   351  func TestCSVReaderRelaxed(t *testing.T) {
   352  	var handle bytes.Buffer
   353  
   354  	for _, msg := range []string{
   355  		"header1,header2,header3",
   356  		"foo1,foo2,foo3",
   357  		"bar1,bar2,bar3,bar4",
   358  		"baz1,baz2,baz3",
   359  		"buz1,buz2",
   360  	} {
   361  		handle.Write([]byte(msg))
   362  		handle.Write([]byte("\n"))
   363  	}
   364  
   365  	ctored := false
   366  	f, err := newCSVReader(
   367  		func(ctx context.Context) (io.Reader, error) {
   368  			if ctored {
   369  				return nil, io.EOF
   370  			}
   371  			ctored = true
   372  			return &handle, nil
   373  		},
   374  		func(ctx context.Context) {},
   375  		optCSVSetStrict(false),
   376  	)
   377  	require.NoError(t, err)
   378  
   379  	t.Cleanup(func() {
   380  		f.CloseAsync()
   381  		require.NoError(t, f.WaitForClose(time.Second))
   382  	})
   383  
   384  	require.NoError(t, f.ConnectWithContext(context.Background()))
   385  
   386  	for _, exp := range []string{
   387  		`{"header1":"foo1","header2":"foo2","header3":"foo3"}`,
   388  		`["bar1","bar2","bar3","bar4"]`,
   389  		`{"header1":"baz1","header2":"baz2","header3":"baz3"}`,
   390  		`{"header1":"buz1","header2":"buz2"}`,
   391  	} {
   392  		var resMsg types.Message
   393  		resMsg, _, err = f.ReadWithContext(context.Background())
   394  		require.NoError(t, err)
   395  
   396  		assert.Equal(t, exp, string(resMsg.Get(0).Get()))
   397  	}
   398  
   399  	_, _, err = f.ReadWithContext(context.Background())
   400  	assert.Equal(t, types.ErrNotConnected, err)
   401  
   402  	err = f.ConnectWithContext(context.Background())
   403  	assert.Equal(t, types.ErrTypeClosed, err)
   404  }
   405  
   406  func TestCSVReaderStrict(t *testing.T) {
   407  	var handle bytes.Buffer
   408  
   409  	for _, msg := range []string{
   410  		"header1,header2,header3",
   411  		"foo1,foo2,foo3",
   412  		"bar1,bar2,bar3,bar4",
   413  		"baz1,baz2,baz3",
   414  		"buz1,buz2",
   415  	} {
   416  		handle.Write([]byte(msg))
   417  		handle.Write([]byte("\n"))
   418  	}
   419  
   420  	ctored := false
   421  	f, err := newCSVReader(
   422  		func(ctx context.Context) (io.Reader, error) {
   423  			if ctored {
   424  				return nil, io.EOF
   425  			}
   426  			ctored = true
   427  			return &handle, nil
   428  		},
   429  		func(ctx context.Context) {},
   430  		optCSVSetStrict(true),
   431  	)
   432  	require.NoError(t, err)
   433  
   434  	t.Cleanup(func() {
   435  		f.CloseAsync()
   436  		require.NoError(t, f.WaitForClose(time.Second))
   437  	})
   438  
   439  	require.NoError(t, f.ConnectWithContext(context.Background()))
   440  
   441  	for _, exp := range []interface{}{
   442  		`{"header1":"foo1","header2":"foo2","header3":"foo3"}`,
   443  		errors.New("record on line 3: wrong number of fields"),
   444  		`{"header1":"baz1","header2":"baz2","header3":"baz3"}`,
   445  		errors.New("record on line 5: wrong number of fields"),
   446  	} {
   447  		var resMsg types.Message
   448  		resMsg, _, err = f.ReadWithContext(context.Background())
   449  
   450  		switch expT := exp.(type) {
   451  		case string:
   452  			require.NoError(t, err)
   453  			assert.Equal(t, expT, string(resMsg.Get(0).Get()))
   454  		case error:
   455  			assert.EqualError(t, err, expT.Error())
   456  		}
   457  	}
   458  
   459  	_, _, err = f.ReadWithContext(context.Background())
   460  	assert.Equal(t, types.ErrNotConnected, err)
   461  
   462  	err = f.ConnectWithContext(context.Background())
   463  	assert.Equal(t, types.ErrTypeClosed, err)
   464  }