github.com/tobgu/qframe@v0.4.0/internal/fastcsv/csv_test.go (about) 1 package fastcsv 2 3 import ( 4 "bytes" 5 "encoding/csv" 6 "fmt" 7 "io" 8 "os" 9 "strings" 10 "testing" 11 ) 12 13 func toStrings(bs [][]byte) []string { 14 strs := make([]string, 0, len(bs)) 15 for _, b := range bs { 16 strs = append(strs, string(b)) 17 } 18 return strs 19 } 20 21 func quote(strs []string) []string { 22 out := make([]string, 0, len(strs)) 23 for _, s := range strs { 24 out = append(out, fmt.Sprintf("\"%s\"", s)) 25 } 26 return out 27 } 28 29 func compareLine(line [][]byte, wanted ...string) error { 30 if len(line) != len(wanted) { 31 return fmt.Errorf( 32 "Wanted [%s]; got [%s]", 33 strings.Join(quote(wanted), ", "), 34 strings.Join(quote(toStrings(line)), ", "), 35 ) 36 } 37 for i, s := range toStrings(line) { 38 if s != wanted[i] { 39 return fmt.Errorf( 40 "Mismatch at item %d; wanted '%s'; got '%s'", 41 i, 42 wanted[i], 43 s, 44 ) 45 } 46 } 47 return nil 48 } 49 50 func TestRead(t *testing.T) { 51 testCases := []struct { 52 Title string 53 Input string 54 Wanted [][]string 55 BufferCap int 56 }{{ 57 Title: "OneRow", 58 Input: "abc,def,ghi", 59 Wanted: [][]string{{"abc", "def", "ghi"}}, 60 }, { 61 Title: "MultipleLines", 62 Input: "abc,def\n1234,56", 63 Wanted: [][]string{{"abc", "def"}, {"1234", "56"}}, 64 }, { 65 Title: "QuotedField", 66 Input: "\"abc\",\"123\",\"456\"", 67 Wanted: [][]string{{"abc", "123", "456"}}, 68 }, { 69 Title: "QuotedFieldMultipleLines", 70 Input: "\"abc\",\"123\"\n\"def\",\"456\"", 71 Wanted: [][]string{{"abc", "123"}, {"def", "456"}}, 72 }, { 73 Title: "SomeQuoted", 74 Input: `hello,"hello2",hello3`, 75 Wanted: [][]string{{"hello", "hello2", "hello3"}}, 76 }, { 77 Title: "QuotedFieldsWithComma", 78 Input: "\"a,b,c\",\"d,e,f\"", 79 Wanted: [][]string{{"a,b,c", "d,e,f"}}, 80 }, { 81 Title: "QuotedFieldsWithNewLine", 82 Input: "\"a\nb\nc\"", 83 Wanted: [][]string{{"a\nb\nc"}}, 84 }, { 85 Title: "QuotedFieldsWithEscapedQuotes", 86 Input: "\"a\"\"b\"", 87 Wanted: [][]string{{"a\"b"}}, 88 }, { 89 Title: "QuotedFieldsWithConsecutiveEscapedQuotes", 90 Input: "\"\"\"\"\"a\"\"\"\"\"", 91 Wanted: [][]string{{"\"\"a\"\""}}, 92 }, { 93 Title: "QuotedFieldsWithEscapeQuotesAndMultipleLines", 94 Input: "abc,\"1\"\"\n2\"", 95 Wanted: [][]string{{"abc", "1\"\n2"}}, 96 }, { 97 Title: "QuotedFieldsWithConsecutiveEscapedQuotesAndMultipleLines", 98 Input: "abc,\"\"\"\"\"a\"\"\"\"\nb\"", 99 Wanted: [][]string{{"abc", "\"\"a\"\"\nb"}}, 100 }, { 101 Title: "QuotedFieldsWithLinesLongerThanBuffer", 102 Input: "\"abc\",\"def\",\"ghi\"", 103 Wanted: [][]string{{"abc", "def", "ghi"}}, 104 BufferCap: 4, 105 }, { 106 Title: "TrailingNewline", 107 Input: "a,b,c\n", 108 Wanted: [][]string{{"a", "b", "c"}}, 109 }, { 110 Title: "EmptyMiddleLine", 111 Input: "a,b\n\nc,d", 112 Wanted: [][]string{{"a", "b"}, {""}, {"c", "d"}}, 113 }, { 114 Title: "CRLF", 115 Input: "a,b,c\r\nd,e,f", 116 Wanted: [][]string{{"a", "b", "c"}, {"d", "e", "f"}}, 117 }, { 118 Title: "CRLF with quote in last column", 119 Input: "\"a\"\r\n\"b\"", 120 Wanted: [][]string{{"a"}, {"b"}}, 121 }, { 122 Title: "CRLF with quote in last column with EOF", 123 Input: "\"a\"\r\n", 124 Wanted: [][]string{{"a"}}, 125 }} 126 127 for _, testCase := range testCases { 128 t.Run(testCase.Title, func(t *testing.T) { 129 r := Reader{ 130 fields: fields{ 131 // initialize with a deliberately small buffer so we get 132 // good coverage of i/o buffering 133 buffer: bufferedReader{ 134 r: strings.NewReader(testCase.Input), 135 data: make([]byte, 0, testCase.BufferCap), 136 }, 137 delimiter: ',', 138 }, 139 fieldsBuffer: make([][]byte, 0, 16), 140 } 141 for i, wantedLine := range testCase.Wanted { 142 fields, err := r.Read() 143 if err != nil { 144 t.Fatalf("Unexpected error on line %d: %v", i+1, err) 145 } 146 if err := compareLine(fields, wantedLine...); err != nil { 147 t.Fatalf("Mismatch on line %d: %v", i+1, err) 148 } 149 } 150 if _, err := r.Read(); err != io.EOF { 151 t.Fatal("Wanted io.EOF; got:", err) 152 } 153 }) 154 } 155 } 156 157 func BenchmarkRead(b *testing.B) { 158 data, err := os.ReadFile("testdata/fl_insurance.csv") 159 if err != nil { 160 b.Fatal(err) 161 } 162 quotedData, err := os.ReadFile("testdata/fl_insurance_quoted.csv") 163 if err != nil { 164 b.Fatal(err) 165 } 166 167 b.Run("StdCSV", func(b *testing.B) { 168 for i := 0; i < b.N; i++ { 169 r := csv.NewReader(bytes.NewReader(data)) 170 for { 171 if _, err := r.Read(); err != nil { 172 if err == io.EOF { 173 break 174 } 175 b.Fatal(err) 176 } 177 } 178 } 179 }) 180 b.Run("FastCSV", func(b *testing.B) { 181 for i := 0; i < b.N; i++ { 182 r := NewReader(bytes.NewReader(data), ',') 183 for { 184 if _, err := r.Read(); err != nil { 185 if err == io.EOF { 186 break 187 } 188 b.Fatal(err) 189 } 190 } 191 } 192 }) 193 b.Run("StdCSVQuoted", func(b *testing.B) { 194 for i := 0; i < b.N; i++ { 195 r := csv.NewReader(bytes.NewReader(quotedData)) 196 for { 197 if _, err := r.Read(); err != nil { 198 if err == io.EOF { 199 break 200 } 201 b.Fatal(err) 202 } 203 } 204 } 205 }) 206 b.Run("FastCSVQuoted", func(b *testing.B) { 207 for i := 0; i < b.N; i++ { 208 r := NewReader(bytes.NewReader(quotedData), ',') 209 for { 210 if _, err := r.Read(); err != nil { 211 if err == io.EOF { 212 break 213 } 214 b.Fatal(err) 215 } 216 } 217 } 218 }) 219 }