github.com/minio/minio@v0.0.0-20240328213742-3f72439b8a27/internal/s3select/csv/reader_contrib_test.go (about) 1 /* 2 * MinIO Object Storage (c) 2021 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package csv 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "io" 24 "os" 25 "reflect" 26 "strings" 27 "testing" 28 29 "github.com/klauspost/compress/zip" 30 "github.com/minio/minio/internal/s3select/sql" 31 ) 32 33 func TestRead(t *testing.T) { 34 cases := []struct { 35 content string 36 recordDelimiter string 37 fieldDelimiter string 38 }{ 39 {"1,2,3\na,b,c\n", "\n", ","}, 40 {"1,2,3\ta,b,c\t", "\t", ","}, 41 {"1,2,3\r\na,b,c\r\n", "\r\n", ","}, 42 } 43 44 for i, c := range cases { 45 var err error 46 var record sql.Record 47 var result bytes.Buffer 48 49 r, _ := NewReader(io.NopCloser(strings.NewReader(c.content)), &ReaderArgs{ 50 FileHeaderInfo: none, 51 RecordDelimiter: c.recordDelimiter, 52 FieldDelimiter: c.fieldDelimiter, 53 QuoteCharacter: defaultQuoteCharacter, 54 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 55 CommentCharacter: defaultCommentCharacter, 56 AllowQuotedRecordDelimiter: false, 57 unmarshaled: true, 58 }) 59 60 for { 61 record, err = r.Read(record) 62 if err != nil { 63 break 64 } 65 opts := sql.WriteCSVOpts{ 66 FieldDelimiter: []rune(c.fieldDelimiter)[0], 67 Quote: '"', 68 QuoteEscape: '"', 69 AlwaysQuote: false, 70 } 71 record.WriteCSV(&result, opts) 72 result.Truncate(result.Len() - 1) 73 result.WriteString(c.recordDelimiter) 74 } 75 r.Close() 76 if err != io.EOF { 77 t.Fatalf("Case %d failed with %s", i, err) 78 } 79 80 if result.String() != c.content { 81 t.Errorf("Case %d failed: expected %v result %v", i, c.content, result.String()) 82 } 83 } 84 } 85 86 type tester interface { 87 Fatal(...interface{}) 88 } 89 90 func openTestFile(t tester, file string) []byte { 91 f, err := os.ReadFile("testdata/testdata.zip") 92 if err != nil { 93 t.Fatal(err) 94 } 95 z, err := zip.NewReader(bytes.NewReader(f), int64(len(f))) 96 if err != nil { 97 t.Fatal(err) 98 } 99 for _, f := range z.File { 100 if f.Name == file { 101 rc, err := f.Open() 102 if err != nil { 103 t.Fatal(err) 104 } 105 defer rc.Close() 106 b, err := io.ReadAll(rc) 107 if err != nil { 108 t.Fatal(err) 109 } 110 return b 111 } 112 } 113 t.Fatal(file, "not found in testdata/testdata.zip") 114 return nil 115 } 116 117 func TestReadExtended(t *testing.T) { 118 cases := []struct { 119 file string 120 recordDelimiter string 121 fieldDelimiter string 122 header bool 123 wantColumns []string 124 wantTenFields string 125 totalFields int 126 }{ 127 { 128 file: "nyc-taxi-data-100k.csv", 129 recordDelimiter: "\n", 130 fieldDelimiter: ",", 131 header: true, 132 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 133 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 134 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 135 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 136 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 137 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 138 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 139 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 140 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 141 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 142 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 143 `, 144 totalFields: 308*2 + 1, 145 }, { 146 file: "nyc-taxi-data-tabs-100k.csv", 147 recordDelimiter: "\n", 148 fieldDelimiter: "\t", 149 header: true, 150 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 151 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 152 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 153 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 154 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 155 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 156 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 157 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 158 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 159 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 160 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 161 `, 162 totalFields: 308*2 + 1, 163 }, { 164 file: "nyc-taxi-data-100k-single-delim.csv", 165 recordDelimiter: "^", 166 fieldDelimiter: ",", 167 header: true, 168 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 169 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 170 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 171 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 172 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 173 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 174 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 175 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 176 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 177 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 178 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 179 `, 180 totalFields: 308*2 + 1, 181 }, { 182 file: "nyc-taxi-data-100k-multi-delim.csv", 183 recordDelimiter: "^Y", 184 fieldDelimiter: ",", 185 header: true, 186 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 187 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 188 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 189 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 190 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 191 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 192 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 193 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 194 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 195 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 196 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 197 `, 198 totalFields: 308*2 + 1, 199 }, { 200 file: "nyc-taxi-data-noheader-100k.csv", 201 recordDelimiter: "\n", 202 fieldDelimiter: ",", 203 header: false, 204 wantColumns: []string{"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9", "_10", "_11", "_12", "_13", "_14", "_15", "_16", "_17", "_18", "_19", "_20", "_21", "_22", "_23", "_24", "_25", "_26", "_27", "_28", "_29", "_30", "_31", "_32", "_33", "_34", "_35", "_36", "_37", "_38", "_39", "_40", "_41", "_42", "_43", "_44", "_45", "_46", "_47", "_48", "_49", "_50", "_51"}, 205 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 206 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 207 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 208 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 209 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 210 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 211 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 212 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 213 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 214 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 215 `, 216 totalFields: 308 * 2, 217 }, 218 } 219 220 for i, c := range cases { 221 t.Run(c.file, func(t *testing.T) { 222 var err error 223 var record sql.Record 224 var result bytes.Buffer 225 input := openTestFile(t, c.file) 226 // Get above block size. 227 input = append(input, input...) 228 args := ReaderArgs{ 229 FileHeaderInfo: use, 230 RecordDelimiter: c.recordDelimiter, 231 FieldDelimiter: c.fieldDelimiter, 232 QuoteCharacter: defaultQuoteCharacter, 233 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 234 CommentCharacter: defaultCommentCharacter, 235 AllowQuotedRecordDelimiter: false, 236 unmarshaled: true, 237 } 238 if !c.header { 239 args.FileHeaderInfo = none 240 } 241 r, _ := NewReader(io.NopCloser(bytes.NewReader(input)), &args) 242 fields := 0 243 for { 244 record, err = r.Read(record) 245 if err != nil { 246 break 247 } 248 if fields < 10 { 249 opts := sql.WriteCSVOpts{ 250 FieldDelimiter: ',', 251 Quote: '"', 252 QuoteEscape: '"', 253 AlwaysQuote: false, 254 } 255 // Write with fixed delimiters, newlines. 256 err := record.WriteCSV(&result, opts) 257 if err != nil { 258 t.Error(err) 259 } 260 } 261 fields++ 262 } 263 r.Close() 264 if err != io.EOF { 265 t.Fatalf("Case %d failed with %s", i, err) 266 } 267 if !reflect.DeepEqual(r.columnNames, c.wantColumns) { 268 t.Errorf("Case %d failed: expected %#v, got result %#v", i, c.wantColumns, r.columnNames) 269 } 270 if result.String() != c.wantTenFields { 271 t.Errorf("Case %d failed: expected %v, got result %v", i, c.wantTenFields, result.String()) 272 } 273 if fields != c.totalFields { 274 t.Errorf("Case %d failed: expected %v results %v", i, c.totalFields, fields) 275 } 276 }) 277 } 278 } 279 280 type errReader struct { 281 err error 282 } 283 284 func (e errReader) Read(p []byte) (n int, err error) { 285 return 0, e.err 286 } 287 288 func TestReadFailures(t *testing.T) { 289 customErr := errors.New("unable to read file :(") 290 cases := []struct { 291 file string 292 recordDelimiter string 293 fieldDelimiter string 294 sendErr error 295 header bool 296 wantColumns []string 297 wantFields string 298 wantErr error 299 }{ 300 { 301 file: "truncated-records.csv", 302 recordDelimiter: "^Y", 303 fieldDelimiter: ",", 304 header: true, 305 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 306 wantFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 307 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100 308 `, 309 wantErr: io.EOF, 310 }, 311 { 312 file: "truncated-records.csv", 313 recordDelimiter: "^Y", 314 fieldDelimiter: ",", 315 sendErr: customErr, 316 header: true, 317 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 318 wantFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 319 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100 320 `, 321 wantErr: customErr, 322 }, 323 { 324 // This works since LazyQuotes is true: 325 file: "invalid-badbarequote.csv", 326 recordDelimiter: "\n", 327 fieldDelimiter: ",", 328 sendErr: nil, 329 header: true, 330 wantColumns: []string{"header1", "header2", "header3"}, 331 wantFields: "ok1,ok2,ok3\n" + `"a ""word""",b` + "\n", 332 wantErr: io.EOF, 333 }, 334 { 335 // This works since LazyQuotes is true: 336 file: "invalid-baddoubleq.csv", 337 recordDelimiter: "\n", 338 fieldDelimiter: ",", 339 sendErr: nil, 340 header: true, 341 wantColumns: []string{"header1", "header2", "header3"}, 342 wantFields: "ok1,ok2,ok3\n" + `"a""""b",c` + "\n", 343 wantErr: io.EOF, 344 }, 345 { 346 // This works since LazyQuotes is true: 347 file: "invalid-badextraq.csv", 348 recordDelimiter: "\n", 349 fieldDelimiter: ",", 350 sendErr: nil, 351 header: true, 352 wantColumns: []string{"header1", "header2", "header3"}, 353 wantFields: "ok1,ok2,ok3\n" + `a word,"b"""` + "\n", 354 wantErr: io.EOF, 355 }, 356 { 357 // This works since LazyQuotes is true: 358 file: "invalid-badstartline.csv", 359 recordDelimiter: "\n", 360 fieldDelimiter: ",", 361 sendErr: nil, 362 header: true, 363 wantColumns: []string{"header1", "header2", "header3"}, 364 wantFields: "ok1,ok2,ok3\n" + `a,"b` + "\n" + `c""d,e` + "\n\"\n", 365 wantErr: io.EOF, 366 }, 367 { 368 // This works since LazyQuotes is true: 369 file: "invalid-badstartline2.csv", 370 recordDelimiter: "\n", 371 fieldDelimiter: ",", 372 sendErr: nil, 373 header: true, 374 wantColumns: []string{"header1", "header2", "header3"}, 375 wantFields: "ok1,ok2,ok3\n" + `a,b` + "\n" + `"d` + "\n\ne\"\n", 376 wantErr: io.EOF, 377 }, 378 { 379 // This works since LazyQuotes is true: 380 file: "invalid-badtrailingq.csv", 381 recordDelimiter: "\n", 382 fieldDelimiter: ",", 383 sendErr: nil, 384 header: true, 385 wantColumns: []string{"header1", "header2", "header3"}, 386 wantFields: "ok1,ok2,ok3\n" + `a word,"b"""` + "\n", 387 wantErr: io.EOF, 388 }, 389 { 390 // This works since LazyQuotes is true: 391 file: "invalid-crlfquoted.csv", 392 recordDelimiter: "\n", 393 fieldDelimiter: ",", 394 sendErr: nil, 395 header: true, 396 wantColumns: []string{"header1", "header2", "header3"}, 397 wantFields: "ok1,ok2,ok3\n" + `"foo""bar"` + "\n", 398 wantErr: io.EOF, 399 }, 400 { 401 // This works since LazyQuotes is true: 402 file: "invalid-csv.csv", 403 recordDelimiter: "\n", 404 fieldDelimiter: ",", 405 sendErr: nil, 406 header: true, 407 wantColumns: []string{"header1", "header2", "header3"}, 408 wantFields: "ok1,ok2,ok3\n" + `"a""""b",c` + "\n", 409 wantErr: io.EOF, 410 }, 411 { 412 // This works since LazyQuotes is true, but output is very weird. 413 file: "invalid-oddquote.csv", 414 recordDelimiter: "\n", 415 fieldDelimiter: ",", 416 sendErr: nil, 417 header: true, 418 wantColumns: []string{"header1", "header2", "header3"}, 419 wantFields: "ok1,ok2,ok3\n" + `""""""",b,c` + "\n\"\n", 420 wantErr: io.EOF, 421 }, 422 { 423 // Test when file ends with a half separator 424 file: "endswithhalfsep.csv", 425 recordDelimiter: "%!", 426 fieldDelimiter: ",", 427 sendErr: nil, 428 header: false, 429 wantColumns: []string{"_1", "_2", "_3"}, 430 wantFields: "a,b,c\na2,b2,c2%\n", 431 wantErr: io.EOF, 432 }, 433 } 434 435 for i, c := range cases { 436 t.Run(c.file, func(t *testing.T) { 437 var err error 438 var record sql.Record 439 var result bytes.Buffer 440 input := openTestFile(t, c.file) 441 args := ReaderArgs{ 442 FileHeaderInfo: use, 443 RecordDelimiter: c.recordDelimiter, 444 FieldDelimiter: c.fieldDelimiter, 445 QuoteCharacter: defaultQuoteCharacter, 446 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 447 CommentCharacter: defaultCommentCharacter, 448 AllowQuotedRecordDelimiter: false, 449 unmarshaled: true, 450 } 451 if !c.header { 452 args.FileHeaderInfo = none 453 } 454 inr := io.Reader(bytes.NewReader(input)) 455 if c.sendErr != nil { 456 inr = io.MultiReader(inr, errReader{c.sendErr}) 457 } 458 r, _ := NewReader(io.NopCloser(inr), &args) 459 fields := 0 460 for { 461 record, err = r.Read(record) 462 if err != nil { 463 break 464 } 465 466 opts := sql.WriteCSVOpts{ 467 FieldDelimiter: ',', 468 Quote: '"', 469 QuoteEscape: '"', 470 AlwaysQuote: false, 471 } 472 // Write with fixed delimiters, newlines. 473 err := record.WriteCSV(&result, opts) 474 if err != nil { 475 t.Error(err) 476 } 477 fields++ 478 } 479 r.Close() 480 if err != c.wantErr { 481 t.Fatalf("Case %d failed with %s", i, err) 482 } 483 if !reflect.DeepEqual(r.columnNames, c.wantColumns) { 484 t.Errorf("Case %d failed: expected \n%#v, got result \n%#v", i, c.wantColumns, r.columnNames) 485 } 486 if result.String() != c.wantFields { 487 t.Errorf("Case %d failed: expected \n%v\nGot result \n%v", i, c.wantFields, result.String()) 488 } 489 }) 490 } 491 } 492 493 func BenchmarkReaderBasic(b *testing.B) { 494 args := ReaderArgs{ 495 FileHeaderInfo: use, 496 RecordDelimiter: "\n", 497 FieldDelimiter: ",", 498 QuoteCharacter: defaultQuoteCharacter, 499 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 500 CommentCharacter: defaultCommentCharacter, 501 AllowQuotedRecordDelimiter: false, 502 unmarshaled: true, 503 } 504 f := openTestFile(b, "nyc-taxi-data-100k.csv") 505 r, err := NewReader(io.NopCloser(bytes.NewBuffer(f)), &args) 506 if err != nil { 507 b.Fatalf("Reading init failed with %s", err) 508 } 509 defer r.Close() 510 b.ReportAllocs() 511 b.ResetTimer() 512 b.SetBytes(int64(len(f))) 513 var record sql.Record 514 for i := 0; i < b.N; i++ { 515 r, err = NewReader(io.NopCloser(bytes.NewBuffer(f)), &args) 516 if err != nil { 517 b.Fatalf("Reading init failed with %s", err) 518 } 519 for err == nil { 520 record, err = r.Read(record) 521 if err != nil && err != io.EOF { 522 b.Fatalf("Reading failed with %s", err) 523 } 524 } 525 r.Close() 526 } 527 } 528 529 func BenchmarkReaderHuge(b *testing.B) { 530 args := ReaderArgs{ 531 FileHeaderInfo: use, 532 RecordDelimiter: "\n", 533 FieldDelimiter: ",", 534 QuoteCharacter: defaultQuoteCharacter, 535 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 536 CommentCharacter: defaultCommentCharacter, 537 AllowQuotedRecordDelimiter: false, 538 unmarshaled: true, 539 } 540 for n := 0; n < 11; n++ { 541 f := openTestFile(b, "nyc-taxi-data-100k.csv") 542 want := 309 543 for i := 0; i < n; i++ { 544 f = append(f, f...) 545 want *= 2 546 } 547 b.Run(fmt.Sprint(len(f)/(1<<10), "K"), func(b *testing.B) { 548 b.ReportAllocs() 549 b.SetBytes(int64(len(f))) 550 b.ResetTimer() 551 var record sql.Record 552 for i := 0; i < b.N; i++ { 553 r, err := NewReader(io.NopCloser(bytes.NewBuffer(f)), &args) 554 if err != nil { 555 b.Fatalf("Reading init failed with %s", err) 556 } 557 558 got := 0 559 for err == nil { 560 record, err = r.Read(record) 561 if err != nil && err != io.EOF { 562 b.Fatalf("Reading failed with %s", err) 563 } 564 got++ 565 } 566 r.Close() 567 if got != want { 568 b.Errorf("want %d records, got %d", want, got) 569 } 570 } 571 }) 572 } 573 } 574 575 func BenchmarkReaderReplace(b *testing.B) { 576 args := ReaderArgs{ 577 FileHeaderInfo: use, 578 RecordDelimiter: "^", 579 FieldDelimiter: ",", 580 QuoteCharacter: defaultQuoteCharacter, 581 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 582 CommentCharacter: defaultCommentCharacter, 583 AllowQuotedRecordDelimiter: false, 584 unmarshaled: true, 585 } 586 f := openTestFile(b, "nyc-taxi-data-100k-single-delim.csv") 587 r, err := NewReader(io.NopCloser(bytes.NewBuffer(f)), &args) 588 if err != nil { 589 b.Fatalf("Reading init failed with %s", err) 590 } 591 defer r.Close() 592 b.ReportAllocs() 593 b.ResetTimer() 594 b.SetBytes(int64(len(f))) 595 var record sql.Record 596 for i := 0; i < b.N; i++ { 597 r, err = NewReader(io.NopCloser(bytes.NewBuffer(f)), &args) 598 if err != nil { 599 b.Fatalf("Reading init failed with %s", err) 600 } 601 602 for err == nil { 603 record, err = r.Read(record) 604 if err != nil && err != io.EOF { 605 b.Fatalf("Reading failed with %s", err) 606 } 607 } 608 r.Close() 609 } 610 } 611 612 func BenchmarkReaderReplaceTwo(b *testing.B) { 613 args := ReaderArgs{ 614 FileHeaderInfo: use, 615 RecordDelimiter: "^Y", 616 FieldDelimiter: ",", 617 QuoteCharacter: defaultQuoteCharacter, 618 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 619 CommentCharacter: defaultCommentCharacter, 620 AllowQuotedRecordDelimiter: false, 621 unmarshaled: true, 622 } 623 f := openTestFile(b, "nyc-taxi-data-100k-multi-delim.csv") 624 r, err := NewReader(io.NopCloser(bytes.NewBuffer(f)), &args) 625 if err != nil { 626 b.Fatalf("Reading init failed with %s", err) 627 } 628 defer r.Close() 629 b.ReportAllocs() 630 b.ResetTimer() 631 b.SetBytes(int64(len(f))) 632 var record sql.Record 633 for i := 0; i < b.N; i++ { 634 r, err = NewReader(io.NopCloser(bytes.NewBuffer(f)), &args) 635 if err != nil { 636 b.Fatalf("Reading init failed with %s", err) 637 } 638 639 for err == nil { 640 record, err = r.Read(record) 641 if err != nil && err != io.EOF { 642 b.Fatalf("Reading failed with %s", err) 643 } 644 } 645 r.Close() 646 } 647 }