storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/csv/reader_test.go (about) 1 /* 2 * MinIO Cloud Storage, (C) 2019 MinIO, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package csv 18 19 import ( 20 "bytes" 21 "errors" 22 "fmt" 23 "io" 24 "io/ioutil" 25 "path/filepath" 26 "reflect" 27 "strings" 28 "testing" 29 30 "github.com/klauspost/compress/zip" 31 32 "storj.io/minio/pkg/s3select/sql" 33 ) 34 35 func TestRead(t *testing.T) { 36 cases := []struct { 37 content string 38 recordDelimiter string 39 fieldDelimiter string 40 }{ 41 {"1,2,3\na,b,c\n", "\n", ","}, 42 {"1,2,3\ta,b,c\t", "\t", ","}, 43 {"1,2,3\r\na,b,c\r\n", "\r\n", ","}, 44 } 45 46 for i, c := range cases { 47 var err error 48 var record sql.Record 49 var result bytes.Buffer 50 51 r, _ := NewReader(ioutil.NopCloser(strings.NewReader(c.content)), &ReaderArgs{ 52 FileHeaderInfo: none, 53 RecordDelimiter: c.recordDelimiter, 54 FieldDelimiter: c.fieldDelimiter, 55 QuoteCharacter: defaultQuoteCharacter, 56 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 57 CommentCharacter: defaultCommentCharacter, 58 AllowQuotedRecordDelimiter: false, 59 unmarshaled: true, 60 }) 61 62 for { 63 record, err = r.Read(record) 64 if err != nil { 65 break 66 } 67 opts := sql.WriteCSVOpts{ 68 FieldDelimiter: []rune(c.fieldDelimiter)[0], 69 Quote: '"', 70 QuoteEscape: '"', 71 AlwaysQuote: false, 72 } 73 record.WriteCSV(&result, opts) 74 result.Truncate(result.Len() - 1) 75 result.WriteString(c.recordDelimiter) 76 } 77 r.Close() 78 if err != io.EOF { 79 t.Fatalf("Case %d failed with %s", i, err) 80 } 81 82 if result.String() != c.content { 83 t.Errorf("Case %d failed: expected %v result %v", i, c.content, result.String()) 84 } 85 } 86 } 87 88 type tester interface { 89 Fatal(...interface{}) 90 } 91 92 func openTestFile(t tester, file string) []byte { 93 f, err := ioutil.ReadFile(filepath.Join("testdata/testdata.zip")) 94 if err != nil { 95 t.Fatal(err) 96 } 97 z, err := zip.NewReader(bytes.NewReader(f), int64(len(f))) 98 if err != nil { 99 t.Fatal(err) 100 } 101 for _, f := range z.File { 102 if f.Name == file { 103 rc, err := f.Open() 104 if err != nil { 105 t.Fatal(err) 106 } 107 defer rc.Close() 108 b, err := ioutil.ReadAll(rc) 109 if err != nil { 110 t.Fatal(err) 111 } 112 return b 113 } 114 } 115 t.Fatal(file, "not found in testdata/testdata.zip") 116 return nil 117 } 118 119 func TestReadExtended(t *testing.T) { 120 cases := []struct { 121 file string 122 recordDelimiter string 123 fieldDelimiter string 124 header bool 125 wantColumns []string 126 wantTenFields string 127 totalFields int 128 }{ 129 { 130 file: "nyc-taxi-data-100k.csv", 131 recordDelimiter: "\n", 132 fieldDelimiter: ",", 133 header: true, 134 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 135 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 136 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 137 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 138 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 139 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 140 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 141 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 142 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 143 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 144 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 145 `, 146 totalFields: 308*2 + 1, 147 }, { 148 file: "nyc-taxi-data-tabs-100k.csv", 149 recordDelimiter: "\n", 150 fieldDelimiter: "\t", 151 header: true, 152 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 153 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 154 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 155 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 156 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 157 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 158 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 159 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 160 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 161 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 162 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 163 `, 164 totalFields: 308*2 + 1, 165 }, { 166 file: "nyc-taxi-data-100k-single-delim.csv", 167 recordDelimiter: "^", 168 fieldDelimiter: ",", 169 header: true, 170 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 171 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 172 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 173 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 174 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 175 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 176 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 177 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 178 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 179 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 180 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 181 `, 182 totalFields: 308*2 + 1, 183 }, { 184 file: "nyc-taxi-data-100k-multi-delim.csv", 185 recordDelimiter: "^Y", 186 fieldDelimiter: ",", 187 header: true, 188 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 189 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 190 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 191 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 192 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 193 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 194 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 195 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 196 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 197 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 198 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 199 `, 200 totalFields: 308*2 + 1, 201 }, { 202 file: "nyc-taxi-data-noheader-100k.csv", 203 recordDelimiter: "\n", 204 fieldDelimiter: ",", 205 header: false, 206 wantColumns: []string{"_1", "_2", "_3", "_4", "_5", "_6", "_7", "_8", "_9", "_10", "_11", "_12", "_13", "_14", "_15", "_16", "_17", "_18", "_19", "_20", "_21", "_22", "_23", "_24", "_25", "_26", "_27", "_28", "_29", "_30", "_31", "_32", "_33", "_34", "_35", "_36", "_37", "_38", "_39", "_40", "_41", "_42", "_43", "_44", "_45", "_46", "_47", "_48", "_49", "_50", "_51"}, 207 wantTenFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 208 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100,1025100,E,MN36,Washington Heights South,3801 209 3389226,2,2014-03-26 17:13:28,2014-03-26 17:19:07,N,1,-73.949493408203125,40.793506622314453,-73.943374633789063,40.786155700683594,1,0.82,5.5,1,0.5,0,0,,,7,1,1,75,75,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1387,164,1,Manhattan,016400,1016400,E,MN33,East Harlem South,3804 210 3389227,2,2014-03-14 21:07:19,2014-03-14 21:11:41,N,1,-73.950538635253906,40.792228698730469,-73.940811157226563,40.809253692626953,1,1.40,6,0.5,0.5,0,0,,,7,2,1,75,42,green,0.00,0.0,0.0,46,22,5.59,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1184,208,1,Manhattan,020800,1020800,E,MN03,Central Harlem North-Polo Grounds,3803 211 3389228,1,2014-03-28 13:52:56,2014-03-28 14:29:01,N,1,-73.950569152832031,40.792312622070313,-73.868507385253906,40.688491821289063,2,16.10,46,0,0.5,0,5.33,,,51.83,2,,75,63,green,0.04,0.0,0.0,62,37,5.37,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1544,1182.02,3,Brooklyn,118202,3118202,E,BK83,Cypress Hills-City Line,4008 212 3389229,2,2014-03-07 09:46:32,2014-03-07 09:55:01,N,1,-73.952301025390625,40.789798736572266,-73.935806274414062,40.794448852539063,1,1.67,8,0,0.5,2,0,,,10.5,1,1,75,74,green,0.00,3.9,0.0,37,26,7.83,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1553,178,1,Manhattan,017800,1017800,E,MN34,East Harlem North,3804 213 3389230,2,2014-03-17 18:23:05,2014-03-17 18:28:38,N,1,-73.952346801757813,40.789844512939453,-73.946319580078125,40.783851623535156,5,0.95,5.5,1,0.5,0.65,0,,,7.65,1,1,75,263,green,0.00,0.0,0.0,35,23,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,32,156.01,1,Manhattan,015601,1015601,I,MN32,Yorkville,3805 214 3389231,1,2014-03-19 19:09:36,2014-03-19 19:12:20,N,1,-73.952377319335938,40.789779663085938,-73.947494506835938,40.796474456787109,1,0.50,4,1,0.5,1,0,,,6.5,1,,75,75,green,0.92,0.0,0.0,46,32,7.16,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1401,174.02,1,Manhattan,017402,1017402,E,MN33,East Harlem South,3804 215 3389232,2,2014-03-20 19:06:28,2014-03-20 19:21:35,N,1,-73.952583312988281,40.789516448974609,-73.985870361328125,40.776973724365234,2,3.04,13,1,0.5,2.8,0,,,17.3,1,1,75,143,green,0.00,0.0,0.0,54,40,8.05,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1742,155,1,Manhattan,015500,1015500,I,MN14,Lincoln Square,3806 216 3389233,2,2014-03-29 09:38:12,2014-03-29 09:44:16,N,1,-73.952728271484375,40.789501190185547,-73.950935363769531,40.775600433349609,1,1.10,6.5,0,0.5,1.3,0,,,8.3,1,1,75,263,green,1.81,0.0,0.0,59,43,10.74,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,2048,138,1,Manhattan,013800,1013800,I,MN32,Yorkville,3805 217 `, 218 totalFields: 308 * 2, 219 }, 220 } 221 222 for i, c := range cases { 223 t.Run(c.file, func(t *testing.T) { 224 225 var err error 226 var record sql.Record 227 var result bytes.Buffer 228 input := openTestFile(t, c.file) 229 // Get above block size. 230 input = append(input, input...) 231 args := ReaderArgs{ 232 FileHeaderInfo: use, 233 RecordDelimiter: c.recordDelimiter, 234 FieldDelimiter: c.fieldDelimiter, 235 QuoteCharacter: defaultQuoteCharacter, 236 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 237 CommentCharacter: defaultCommentCharacter, 238 AllowQuotedRecordDelimiter: false, 239 unmarshaled: true, 240 } 241 if !c.header { 242 args.FileHeaderInfo = none 243 } 244 r, _ := NewReader(ioutil.NopCloser(bytes.NewReader(input)), &args) 245 fields := 0 246 for { 247 record, err = r.Read(record) 248 if err != nil { 249 break 250 } 251 if fields < 10 { 252 opts := sql.WriteCSVOpts{ 253 FieldDelimiter: ',', 254 Quote: '"', 255 QuoteEscape: '"', 256 AlwaysQuote: false, 257 } 258 // Write with fixed delimiters, newlines. 259 err := record.WriteCSV(&result, opts) 260 if err != nil { 261 t.Error(err) 262 } 263 } 264 fields++ 265 } 266 r.Close() 267 if err != io.EOF { 268 t.Fatalf("Case %d failed with %s", i, err) 269 } 270 if !reflect.DeepEqual(r.columnNames, c.wantColumns) { 271 t.Errorf("Case %d failed: expected %#v, got result %#v", i, c.wantColumns, r.columnNames) 272 } 273 if result.String() != c.wantTenFields { 274 t.Errorf("Case %d failed: expected %v, got result %v", i, c.wantTenFields, result.String()) 275 } 276 if fields != c.totalFields { 277 t.Errorf("Case %d failed: expected %v results %v", i, c.totalFields, fields) 278 } 279 }) 280 } 281 } 282 283 type errReader struct { 284 err error 285 } 286 287 func (e errReader) Read(p []byte) (n int, err error) { 288 return 0, e.err 289 } 290 291 func TestReadFailures(t *testing.T) { 292 customErr := errors.New("unable to read file :(") 293 cases := []struct { 294 file string 295 recordDelimiter string 296 fieldDelimiter string 297 sendErr error 298 header bool 299 wantColumns []string 300 wantFields string 301 wantErr error 302 }{ 303 { 304 file: "truncated-records.csv", 305 recordDelimiter: "^Y", 306 fieldDelimiter: ",", 307 header: true, 308 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 309 wantFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 310 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100 311 `, 312 wantErr: io.EOF, 313 }, 314 { 315 file: "truncated-records.csv", 316 recordDelimiter: "^Y", 317 fieldDelimiter: ",", 318 sendErr: customErr, 319 header: true, 320 wantColumns: []string{"trip_id", "vendor_id", "pickup_datetime", "dropoff_datetime", "store_and_fwd_flag", "rate_code_id", "pickup_longitude", "pickup_latitude", "dropoff_longitude", "dropoff_latitude", "passenger_count", "trip_distance", "fare_amount", "extra", "mta_tax", "tip_amount", "tolls_amount", "ehail_fee", "improvement_surcharge", "total_amount", "payment_type", "trip_type", "pickup", "dropoff", "cab_type", "precipitation", "snow_depth", "snowfall", "max_temp", "min_temp", "wind", "pickup_nyct2010_gid", "pickup_ctlabel", "pickup_borocode", "pickup_boroname", "pickup_ct2010", "pickup_boroct2010", "pickup_cdeligibil", "pickup_ntacode", "pickup_ntaname", "pickup_puma", "dropoff_nyct2010_gid", "dropoff_ctlabel", "dropoff_borocode", "dropoff_boroname", "dropoff_ct2010", "dropoff_boroct2010", "dropoff_cdeligibil", "dropoff_ntacode", "dropoff_ntaname", "dropoff_puma"}, 321 wantFields: `3389224,2,2014-03-26 00:26:15,2014-03-26 00:28:38,N,1,-73.950431823730469,40.792251586914063,-73.938949584960937,40.794425964355469,1,0.84,4.5,0.5,0.5,1,0,,,6.5,1,1,75,74,green,0.00,0.0,0.0,36,24,11.86,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,1828,180,1,Manhattan,018000,1018000,E,MN34,East Harlem North,3804 322 3389225,2,2014-03-31 09:42:15,2014-03-31 10:01:17,N,1,-73.950340270996094,40.792228698730469,-73.941970825195313,40.842235565185547,1,4.47,17.5,0,0.5,0,0,,,18,2,1,75,244,green,0.16,0.0,0.0,56,36,8.28,1267,168,1,Manhattan,016800,1016800,E,MN33,East Harlem South,3804,911,251,1,Manhattan,025100 323 `, 324 wantErr: customErr, 325 }, 326 { 327 // This works since LazyQuotes is true: 328 file: "invalid-badbarequote.csv", 329 recordDelimiter: "\n", 330 fieldDelimiter: ",", 331 sendErr: nil, 332 header: true, 333 wantColumns: []string{"header1", "header2", "header3"}, 334 wantFields: "ok1,ok2,ok3\n" + `"a ""word""",b` + "\n", 335 wantErr: io.EOF, 336 }, 337 { 338 // This works since LazyQuotes is true: 339 file: "invalid-baddoubleq.csv", 340 recordDelimiter: "\n", 341 fieldDelimiter: ",", 342 sendErr: nil, 343 header: true, 344 wantColumns: []string{"header1", "header2", "header3"}, 345 wantFields: "ok1,ok2,ok3\n" + `"a""""b",c` + "\n", 346 wantErr: io.EOF, 347 }, 348 { 349 // This works since LazyQuotes is true: 350 file: "invalid-badextraq.csv", 351 recordDelimiter: "\n", 352 fieldDelimiter: ",", 353 sendErr: nil, 354 header: true, 355 wantColumns: []string{"header1", "header2", "header3"}, 356 wantFields: "ok1,ok2,ok3\n" + `a word,"b"""` + "\n", 357 wantErr: io.EOF, 358 }, 359 { 360 // This works since LazyQuotes is true: 361 file: "invalid-badstartline.csv", 362 recordDelimiter: "\n", 363 fieldDelimiter: ",", 364 sendErr: nil, 365 header: true, 366 wantColumns: []string{"header1", "header2", "header3"}, 367 wantFields: "ok1,ok2,ok3\n" + `a,"b` + "\n" + `c""d,e` + "\n\"\n", 368 wantErr: io.EOF, 369 }, 370 { 371 // This works since LazyQuotes is true: 372 file: "invalid-badstartline2.csv", 373 recordDelimiter: "\n", 374 fieldDelimiter: ",", 375 sendErr: nil, 376 header: true, 377 wantColumns: []string{"header1", "header2", "header3"}, 378 wantFields: "ok1,ok2,ok3\n" + `a,b` + "\n" + `"d` + "\n\ne\"\n", 379 wantErr: io.EOF, 380 }, 381 { 382 // This works since LazyQuotes is true: 383 file: "invalid-badtrailingq.csv", 384 recordDelimiter: "\n", 385 fieldDelimiter: ",", 386 sendErr: nil, 387 header: true, 388 wantColumns: []string{"header1", "header2", "header3"}, 389 wantFields: "ok1,ok2,ok3\n" + `a word,"b"""` + "\n", 390 wantErr: io.EOF, 391 }, 392 { 393 // This works since LazyQuotes is true: 394 file: "invalid-crlfquoted.csv", 395 recordDelimiter: "\n", 396 fieldDelimiter: ",", 397 sendErr: nil, 398 header: true, 399 wantColumns: []string{"header1", "header2", "header3"}, 400 wantFields: "ok1,ok2,ok3\n" + `"foo""bar"` + "\n", 401 wantErr: io.EOF, 402 }, 403 { 404 // This works since LazyQuotes is true: 405 file: "invalid-csv.csv", 406 recordDelimiter: "\n", 407 fieldDelimiter: ",", 408 sendErr: nil, 409 header: true, 410 wantColumns: []string{"header1", "header2", "header3"}, 411 wantFields: "ok1,ok2,ok3\n" + `"a""""b",c` + "\n", 412 wantErr: io.EOF, 413 }, 414 { 415 // This works since LazyQuotes is true, but output is very weird. 416 file: "invalid-oddquote.csv", 417 recordDelimiter: "\n", 418 fieldDelimiter: ",", 419 sendErr: nil, 420 header: true, 421 wantColumns: []string{"header1", "header2", "header3"}, 422 wantFields: "ok1,ok2,ok3\n" + `""""""",b,c` + "\n\"\n", 423 wantErr: io.EOF, 424 }, 425 { 426 // Test when file ends with a half separator 427 file: "endswithhalfsep.csv", 428 recordDelimiter: "%!", 429 fieldDelimiter: ",", 430 sendErr: nil, 431 header: false, 432 wantColumns: []string{"_1", "_2", "_3"}, 433 wantFields: "a,b,c\na2,b2,c2%\n", 434 wantErr: io.EOF, 435 }, 436 } 437 438 for i, c := range cases { 439 t.Run(c.file, func(t *testing.T) { 440 441 var err error 442 var record sql.Record 443 var result bytes.Buffer 444 input := openTestFile(t, c.file) 445 args := ReaderArgs{ 446 FileHeaderInfo: use, 447 RecordDelimiter: c.recordDelimiter, 448 FieldDelimiter: c.fieldDelimiter, 449 QuoteCharacter: defaultQuoteCharacter, 450 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 451 CommentCharacter: defaultCommentCharacter, 452 AllowQuotedRecordDelimiter: false, 453 unmarshaled: true, 454 } 455 if !c.header { 456 args.FileHeaderInfo = none 457 } 458 inr := io.Reader(bytes.NewReader(input)) 459 if c.sendErr != nil { 460 inr = io.MultiReader(inr, errReader{c.sendErr}) 461 } 462 r, _ := NewReader(ioutil.NopCloser(inr), &args) 463 fields := 0 464 for { 465 record, err = r.Read(record) 466 if err != nil { 467 break 468 } 469 470 opts := sql.WriteCSVOpts{ 471 FieldDelimiter: ',', 472 Quote: '"', 473 QuoteEscape: '"', 474 AlwaysQuote: false, 475 } 476 // Write with fixed delimiters, newlines. 477 err := record.WriteCSV(&result, opts) 478 if err != nil { 479 t.Error(err) 480 } 481 fields++ 482 } 483 r.Close() 484 if err != c.wantErr { 485 t.Fatalf("Case %d failed with %s", i, err) 486 } 487 if !reflect.DeepEqual(r.columnNames, c.wantColumns) { 488 t.Errorf("Case %d failed: expected \n%#v, got result \n%#v", i, c.wantColumns, r.columnNames) 489 } 490 if result.String() != c.wantFields { 491 t.Errorf("Case %d failed: expected \n%v\nGot result \n%v", i, c.wantFields, result.String()) 492 } 493 }) 494 } 495 } 496 497 func BenchmarkReaderBasic(b *testing.B) { 498 args := ReaderArgs{ 499 FileHeaderInfo: use, 500 RecordDelimiter: "\n", 501 FieldDelimiter: ",", 502 QuoteCharacter: defaultQuoteCharacter, 503 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 504 CommentCharacter: defaultCommentCharacter, 505 AllowQuotedRecordDelimiter: false, 506 unmarshaled: true, 507 } 508 f := openTestFile(b, "nyc-taxi-data-100k.csv") 509 r, err := NewReader(ioutil.NopCloser(bytes.NewBuffer(f)), &args) 510 if err != nil { 511 b.Fatalf("Reading init failed with %s", err) 512 } 513 defer r.Close() 514 b.ReportAllocs() 515 b.ResetTimer() 516 b.SetBytes(int64(len(f))) 517 var record sql.Record 518 for i := 0; i < b.N; i++ { 519 r, err = NewReader(ioutil.NopCloser(bytes.NewBuffer(f)), &args) 520 if err != nil { 521 b.Fatalf("Reading init failed with %s", err) 522 } 523 for err == nil { 524 record, err = r.Read(record) 525 if err != nil && err != io.EOF { 526 b.Fatalf("Reading failed with %s", err) 527 } 528 } 529 r.Close() 530 } 531 } 532 533 func BenchmarkReaderHuge(b *testing.B) { 534 args := ReaderArgs{ 535 FileHeaderInfo: use, 536 RecordDelimiter: "\n", 537 FieldDelimiter: ",", 538 QuoteCharacter: defaultQuoteCharacter, 539 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 540 CommentCharacter: defaultCommentCharacter, 541 AllowQuotedRecordDelimiter: false, 542 unmarshaled: true, 543 } 544 for n := 0; n < 11; n++ { 545 f := openTestFile(b, "nyc-taxi-data-100k.csv") 546 want := 309 547 for i := 0; i < n; i++ { 548 f = append(f, f...) 549 want *= 2 550 } 551 b.Run(fmt.Sprint(len(f)/(1<<10), "K"), func(b *testing.B) { 552 b.ReportAllocs() 553 b.SetBytes(int64(len(f))) 554 b.ResetTimer() 555 var record sql.Record 556 for i := 0; i < b.N; i++ { 557 r, err := NewReader(ioutil.NopCloser(bytes.NewBuffer(f)), &args) 558 if err != nil { 559 b.Fatalf("Reading init failed with %s", err) 560 } 561 562 got := 0 563 for err == nil { 564 record, err = r.Read(record) 565 if err != nil && err != io.EOF { 566 b.Fatalf("Reading failed with %s", err) 567 } 568 got++ 569 } 570 r.Close() 571 if got != want { 572 b.Errorf("want %d records, got %d", want, got) 573 } 574 } 575 }) 576 } 577 } 578 579 func BenchmarkReaderReplace(b *testing.B) { 580 args := ReaderArgs{ 581 FileHeaderInfo: use, 582 RecordDelimiter: "^", 583 FieldDelimiter: ",", 584 QuoteCharacter: defaultQuoteCharacter, 585 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 586 CommentCharacter: defaultCommentCharacter, 587 AllowQuotedRecordDelimiter: false, 588 unmarshaled: true, 589 } 590 f := openTestFile(b, "nyc-taxi-data-100k-single-delim.csv") 591 r, err := NewReader(ioutil.NopCloser(bytes.NewBuffer(f)), &args) 592 if err != nil { 593 b.Fatalf("Reading init failed with %s", err) 594 } 595 defer r.Close() 596 b.ReportAllocs() 597 b.ResetTimer() 598 b.SetBytes(int64(len(f))) 599 var record sql.Record 600 for i := 0; i < b.N; i++ { 601 r, err = NewReader(ioutil.NopCloser(bytes.NewBuffer(f)), &args) 602 if err != nil { 603 b.Fatalf("Reading init failed with %s", err) 604 } 605 606 for err == nil { 607 record, err = r.Read(record) 608 if err != nil && err != io.EOF { 609 b.Fatalf("Reading failed with %s", err) 610 } 611 } 612 r.Close() 613 } 614 } 615 616 func BenchmarkReaderReplaceTwo(b *testing.B) { 617 args := ReaderArgs{ 618 FileHeaderInfo: use, 619 RecordDelimiter: "^Y", 620 FieldDelimiter: ",", 621 QuoteCharacter: defaultQuoteCharacter, 622 QuoteEscapeCharacter: defaultQuoteEscapeCharacter, 623 CommentCharacter: defaultCommentCharacter, 624 AllowQuotedRecordDelimiter: false, 625 unmarshaled: true, 626 } 627 f := openTestFile(b, "nyc-taxi-data-100k-multi-delim.csv") 628 r, err := NewReader(ioutil.NopCloser(bytes.NewBuffer(f)), &args) 629 if err != nil { 630 b.Fatalf("Reading init failed with %s", err) 631 } 632 defer r.Close() 633 b.ReportAllocs() 634 b.ResetTimer() 635 b.SetBytes(int64(len(f))) 636 var record sql.Record 637 for i := 0; i < b.N; i++ { 638 r, err = NewReader(ioutil.NopCloser(bytes.NewBuffer(f)), &args) 639 if err != nil { 640 b.Fatalf("Reading init failed with %s", err) 641 } 642 643 for err == nil { 644 record, err = r.Read(record) 645 if err != nil && err != io.EOF { 646 b.Fatalf("Reading failed with %s", err) 647 } 648 } 649 r.Close() 650 } 651 }