github.com/tobgu/qframe@v0.4.0/internal/fastcsv/csv_test.go (about)

     1  package fastcsv
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/csv"
     6  	"fmt"
     7  	"io"
     8  	"os"
     9  	"strings"
    10  	"testing"
    11  )
    12  
    13  func toStrings(bs [][]byte) []string {
    14  	strs := make([]string, 0, len(bs))
    15  	for _, b := range bs {
    16  		strs = append(strs, string(b))
    17  	}
    18  	return strs
    19  }
    20  
    21  func quote(strs []string) []string {
    22  	out := make([]string, 0, len(strs))
    23  	for _, s := range strs {
    24  		out = append(out, fmt.Sprintf("\"%s\"", s))
    25  	}
    26  	return out
    27  }
    28  
    29  func compareLine(line [][]byte, wanted ...string) error {
    30  	if len(line) != len(wanted) {
    31  		return fmt.Errorf(
    32  			"Wanted [%s]; got [%s]",
    33  			strings.Join(quote(wanted), ", "),
    34  			strings.Join(quote(toStrings(line)), ", "),
    35  		)
    36  	}
    37  	for i, s := range toStrings(line) {
    38  		if s != wanted[i] {
    39  			return fmt.Errorf(
    40  				"Mismatch at item %d; wanted '%s'; got '%s'",
    41  				i,
    42  				wanted[i],
    43  				s,
    44  			)
    45  		}
    46  	}
    47  	return nil
    48  }
    49  
    50  func TestRead(t *testing.T) {
    51  	testCases := []struct {
    52  		Title     string
    53  		Input     string
    54  		Wanted    [][]string
    55  		BufferCap int
    56  	}{{
    57  		Title:  "OneRow",
    58  		Input:  "abc,def,ghi",
    59  		Wanted: [][]string{{"abc", "def", "ghi"}},
    60  	}, {
    61  		Title:  "MultipleLines",
    62  		Input:  "abc,def\n1234,56",
    63  		Wanted: [][]string{{"abc", "def"}, {"1234", "56"}},
    64  	}, {
    65  		Title:  "QuotedField",
    66  		Input:  "\"abc\",\"123\",\"456\"",
    67  		Wanted: [][]string{{"abc", "123", "456"}},
    68  	}, {
    69  		Title:  "QuotedFieldMultipleLines",
    70  		Input:  "\"abc\",\"123\"\n\"def\",\"456\"",
    71  		Wanted: [][]string{{"abc", "123"}, {"def", "456"}},
    72  	}, {
    73  		Title:  "SomeQuoted",
    74  		Input:  `hello,"hello2",hello3`,
    75  		Wanted: [][]string{{"hello", "hello2", "hello3"}},
    76  	}, {
    77  		Title:  "QuotedFieldsWithComma",
    78  		Input:  "\"a,b,c\",\"d,e,f\"",
    79  		Wanted: [][]string{{"a,b,c", "d,e,f"}},
    80  	}, {
    81  		Title:  "QuotedFieldsWithNewLine",
    82  		Input:  "\"a\nb\nc\"",
    83  		Wanted: [][]string{{"a\nb\nc"}},
    84  	}, {
    85  		Title:  "QuotedFieldsWithEscapedQuotes",
    86  		Input:  "\"a\"\"b\"",
    87  		Wanted: [][]string{{"a\"b"}},
    88  	}, {
    89  		Title:  "QuotedFieldsWithConsecutiveEscapedQuotes",
    90  		Input:  "\"\"\"\"\"a\"\"\"\"\"",
    91  		Wanted: [][]string{{"\"\"a\"\""}},
    92  	}, {
    93  		Title:  "QuotedFieldsWithEscapeQuotesAndMultipleLines",
    94  		Input:  "abc,\"1\"\"\n2\"",
    95  		Wanted: [][]string{{"abc", "1\"\n2"}},
    96  	}, {
    97  		Title:  "QuotedFieldsWithConsecutiveEscapedQuotesAndMultipleLines",
    98  		Input:  "abc,\"\"\"\"\"a\"\"\"\"\nb\"",
    99  		Wanted: [][]string{{"abc", "\"\"a\"\"\nb"}},
   100  	}, {
   101  		Title:     "QuotedFieldsWithLinesLongerThanBuffer",
   102  		Input:     "\"abc\",\"def\",\"ghi\"",
   103  		Wanted:    [][]string{{"abc", "def", "ghi"}},
   104  		BufferCap: 4,
   105  	}, {
   106  		Title:  "TrailingNewline",
   107  		Input:  "a,b,c\n",
   108  		Wanted: [][]string{{"a", "b", "c"}},
   109  	}, {
   110  		Title:  "EmptyMiddleLine",
   111  		Input:  "a,b\n\nc,d",
   112  		Wanted: [][]string{{"a", "b"}, {""}, {"c", "d"}},
   113  	}, {
   114  		Title:  "CRLF",
   115  		Input:  "a,b,c\r\nd,e,f",
   116  		Wanted: [][]string{{"a", "b", "c"}, {"d", "e", "f"}},
   117  	}, {
   118  		Title:  "CRLF with quote in last column",
   119  		Input:  "\"a\"\r\n\"b\"",
   120  		Wanted: [][]string{{"a"}, {"b"}},
   121  	}, {
   122  		Title:  "CRLF with quote in last column with EOF",
   123  		Input:  "\"a\"\r\n",
   124  		Wanted: [][]string{{"a"}},
   125  	}}
   126  
   127  	for _, testCase := range testCases {
   128  		t.Run(testCase.Title, func(t *testing.T) {
   129  			r := Reader{
   130  				fields: fields{
   131  					// initialize with a deliberately small buffer so we get
   132  					// good coverage of i/o buffering
   133  					buffer: bufferedReader{
   134  						r:    strings.NewReader(testCase.Input),
   135  						data: make([]byte, 0, testCase.BufferCap),
   136  					},
   137  					delimiter: ',',
   138  				},
   139  				fieldsBuffer: make([][]byte, 0, 16),
   140  			}
   141  			for i, wantedLine := range testCase.Wanted {
   142  				fields, err := r.Read()
   143  				if err != nil {
   144  					t.Fatalf("Unexpected error on line %d: %v", i+1, err)
   145  				}
   146  				if err := compareLine(fields, wantedLine...); err != nil {
   147  					t.Fatalf("Mismatch on line %d: %v", i+1, err)
   148  				}
   149  			}
   150  			if _, err := r.Read(); err != io.EOF {
   151  				t.Fatal("Wanted io.EOF; got:", err)
   152  			}
   153  		})
   154  	}
   155  }
   156  
   157  func BenchmarkRead(b *testing.B) {
   158  	data, err := os.ReadFile("testdata/fl_insurance.csv")
   159  	if err != nil {
   160  		b.Fatal(err)
   161  	}
   162  	quotedData, err := os.ReadFile("testdata/fl_insurance_quoted.csv")
   163  	if err != nil {
   164  		b.Fatal(err)
   165  	}
   166  
   167  	b.Run("StdCSV", func(b *testing.B) {
   168  		for i := 0; i < b.N; i++ {
   169  			r := csv.NewReader(bytes.NewReader(data))
   170  			for {
   171  				if _, err := r.Read(); err != nil {
   172  					if err == io.EOF {
   173  						break
   174  					}
   175  					b.Fatal(err)
   176  				}
   177  			}
   178  		}
   179  	})
   180  	b.Run("FastCSV", func(b *testing.B) {
   181  		for i := 0; i < b.N; i++ {
   182  			r := NewReader(bytes.NewReader(data), ',')
   183  			for {
   184  				if _, err := r.Read(); err != nil {
   185  					if err == io.EOF {
   186  						break
   187  					}
   188  					b.Fatal(err)
   189  				}
   190  			}
   191  		}
   192  	})
   193  	b.Run("StdCSVQuoted", func(b *testing.B) {
   194  		for i := 0; i < b.N; i++ {
   195  			r := csv.NewReader(bytes.NewReader(quotedData))
   196  			for {
   197  				if _, err := r.Read(); err != nil {
   198  					if err == io.EOF {
   199  						break
   200  					}
   201  					b.Fatal(err)
   202  				}
   203  			}
   204  		}
   205  	})
   206  	b.Run("FastCSVQuoted", func(b *testing.B) {
   207  		for i := 0; i < b.N; i++ {
   208  			r := NewReader(bytes.NewReader(quotedData), ',')
   209  			for {
   210  				if _, err := r.Read(); err != nil {
   211  					if err == io.EOF {
   212  						break
   213  					}
   214  					b.Fatal(err)
   215  				}
   216  			}
   217  		}
   218  	})
   219  }