github.com/pingcap/br@v5.3.0-alpha.0.20220125034240-ec59c7b6ce30+incompatible/pkg/lightning/mydump/parquet_parser_test.go (about) 1 package mydump 2 3 import ( 4 "context" 5 "io" 6 "path/filepath" 7 "strconv" 8 "time" 9 10 . "github.com/pingcap/check" 11 "github.com/pingcap/tidb/types" 12 "github.com/xitongsys/parquet-go-source/local" 13 writer2 "github.com/xitongsys/parquet-go/writer" 14 15 "github.com/pingcap/br/pkg/storage" 16 ) 17 18 type testParquetParserSuite struct{} 19 20 var _ = Suite(testParquetParserSuite{}) 21 22 func (s testParquetParserSuite) TestParquetParser(c *C) { 23 type Test struct { 24 S string `parquet:"name=sS, type=UTF8, encoding=PLAIN_DICTIONARY"` 25 A int32 `parquet:"name=a_A, type=INT32"` 26 } 27 28 dir := c.MkDir() 29 // prepare data 30 name := "test123.parquet" 31 testPath := filepath.Join(dir, name) 32 pf, err := local.NewLocalFileWriter(testPath) 33 c.Assert(err, IsNil) 34 test := &Test{} 35 writer, err := writer2.NewParquetWriter(pf, test, 2) 36 c.Assert(err, IsNil) 37 38 for i := 0; i < 100; i++ { 39 test.A = int32(i) 40 test.S = strconv.Itoa(i) 41 c.Assert(writer.Write(test), IsNil) 42 } 43 44 c.Assert(writer.WriteStop(), IsNil) 45 c.Assert(pf.Close(), IsNil) 46 47 store, err := storage.NewLocalStorage(dir) 48 c.Assert(err, IsNil) 49 r, err := store.Open(context.TODO(), name) 50 c.Assert(err, IsNil) 51 reader, err := NewParquetParser(context.TODO(), store, r, name) 52 c.Assert(err, IsNil) 53 defer reader.Close() 54 55 c.Assert(reader.Columns(), DeepEquals, []string{"ss", "a_a"}) 56 57 verifyRow := func(i int) { 58 c.Assert(reader.lastRow.RowID, Equals, int64(i+1)) 59 c.Assert(len(reader.lastRow.Row), Equals, 2) 60 c.Assert(reader.lastRow.Row[0], DeepEquals, types.NewCollationStringDatum(strconv.Itoa(i), "", 0)) 61 c.Assert(reader.lastRow.Row[1], DeepEquals, types.NewIntDatum(int64(i))) 62 } 63 64 // test read some rows 65 for i := 0; i < 10; i++ { 66 c.Assert(reader.ReadRow(), IsNil) 67 verifyRow(i) 68 } 69 70 // test set pos to pos < curpos + batchReadRowSize 71 c.Assert(reader.SetPos(15, 15), IsNil) 72 c.Assert(reader.ReadRow(), IsNil) 73 verifyRow(15) 74 75 // test set pos to pos > curpos + batchReadRowSize 76 c.Assert(reader.SetPos(80, 80), IsNil) 77 for i := 80; i < 100; i++ { 78 c.Assert(reader.ReadRow(), IsNil) 79 verifyRow(i) 80 } 81 82 c.Assert(reader.ReadRow(), Equals, io.EOF) 83 } 84 85 func (s testParquetParserSuite) TestParquetVariousTypes(c *C) { 86 // those deprecated TIME/TIMESTAMP types depend on the local timezone! 87 prevTZ := time.Local 88 time.Local = time.FixedZone("UTC+8", 8*60*60) 89 defer func() { 90 time.Local = prevTZ 91 }() 92 93 type Test struct { 94 Date int32 `parquet:"name=date, type=DATE"` 95 TimeMillis int32 `parquet:"name=timemillis, type=TIME_MILLIS"` 96 TimeMicros int64 `parquet:"name=timemicros, type=TIME_MICROS"` 97 TimestampMillis int64 `parquet:"name=timestampmillis, type=TIMESTAMP_MILLIS"` 98 TimestampMicros int64 `parquet:"name=timestampmicros, type=TIMESTAMP_MICROS"` 99 100 Decimal1 int32 `parquet:"name=decimal1, type=DECIMAL, scale=2, precision=9, basetype=INT32"` 101 Decimal2 int32 `parquet:"name=decimal2, type=DECIMAL, scale=4, precision=4, basetype=INT32"` 102 Decimal3 int64 `parquet:"name=decimal3, type=DECIMAL, scale=2, precision=18, basetype=INT64"` 103 Decimal6 int32 `parquet:"name=decimal6, type=DECIMAL, scale=4, precision=4, basetype=INT32"` 104 } 105 106 dir := c.MkDir() 107 // prepare data 108 name := "test123.parquet" 109 testPath := filepath.Join(dir, name) 110 pf, err := local.NewLocalFileWriter(testPath) 111 c.Assert(err, IsNil) 112 test := &Test{} 113 writer, err := writer2.NewParquetWriter(pf, test, 2) 114 c.Assert(err, IsNil) 115 116 v := &Test{ 117 Date: 18564, // 2020-10-29 118 TimeMillis: 62775123, // 17:26:15.123 (note all time are in UTC+8!) 119 TimeMicros: 62775123456, // 17:26:15.123 120 TimestampMillis: 1603963672356, // 2020-10-29T09:27:52.356Z 121 TimestampMicros: 1603963672356956, // 2020-10-29T09:27:52.356956Z 122 Decimal1: -12345678, // -123456.78 123 Decimal2: 456, // 0.0456 124 Decimal3: 123456789012345678, // 1234567890123456.78 125 Decimal6: -1, // -0.0001 126 } 127 c.Assert(writer.Write(v), IsNil) 128 c.Assert(writer.WriteStop(), IsNil) 129 c.Assert(pf.Close(), IsNil) 130 131 store, err := storage.NewLocalStorage(dir) 132 c.Assert(err, IsNil) 133 r, err := store.Open(context.TODO(), name) 134 c.Assert(err, IsNil) 135 reader, err := NewParquetParser(context.TODO(), store, r, name) 136 c.Assert(err, IsNil) 137 defer reader.Close() 138 139 c.Assert(len(reader.columns), Equals, 9) 140 141 c.Assert(reader.ReadRow(), IsNil) 142 rowValue := []string{ 143 "2020-10-29", "17:26:15.123Z", "17:26:15.123456Z", "2020-10-29 09:27:52.356Z", "2020-10-29 09:27:52.356956Z", 144 "-123456.78", "0.0456", "1234567890123456.78", "-0.0001", 145 } 146 row := reader.lastRow.Row 147 c.Assert(len(rowValue), Equals, len(row)) 148 for i := 0; i < len(row); i++ { 149 c.Assert(row[i].Kind(), Equals, types.KindString) 150 c.Assert(rowValue[i], Equals, row[i].GetString()) 151 } 152 153 type TestDecimal struct { 154 Decimal1 int32 `parquet:"name=decimal1, type=DECIMAL, scale=3, precision=5, basetype=INT32"` 155 DecimalRef *int32 `parquet:"name=decimal2, type=DECIMAL, scale=3, precision=5, basetype=INT32"` 156 } 157 158 cases := [][]interface{}{ 159 {int32(0), "0.000"}, 160 {int32(1000), "1.000"}, 161 {int32(-1000), "-1.000"}, 162 {int32(999), "0.999"}, 163 {int32(-999), "-0.999"}, 164 {int32(1), "0.001"}, 165 {int32(-1), "-0.001"}, 166 } 167 168 fileName := "test.02.parquet" 169 testPath = filepath.Join(dir, fileName) 170 pf, err = local.NewLocalFileWriter(testPath) 171 td := &TestDecimal{} 172 c.Assert(err, IsNil) 173 writer, err = writer2.NewParquetWriter(pf, td, 2) 174 c.Assert(err, IsNil) 175 for i, testCase := range cases { 176 val := testCase[0].(int32) 177 td.Decimal1 = val 178 if i%2 == 0 { 179 td.DecimalRef = &val 180 } else { 181 td.DecimalRef = nil 182 } 183 c.Assert(writer.Write(td), IsNil) 184 } 185 c.Assert(writer.WriteStop(), IsNil) 186 c.Assert(pf.Close(), IsNil) 187 188 r, err = store.Open(context.TODO(), fileName) 189 c.Assert(err, IsNil) 190 reader, err = NewParquetParser(context.TODO(), store, r, fileName) 191 c.Assert(err, IsNil) 192 defer reader.Close() 193 194 for i, testCase := range cases { 195 c.Assert(reader.ReadRow(), IsNil) 196 vals := []types.Datum{types.NewCollationStringDatum(testCase[1].(string), "", 0)} 197 if i%2 == 0 { 198 vals = append(vals, vals[0]) 199 } else { 200 vals = append(vals, types.Datum{}) 201 } 202 // because we always reuse the datums in reader.lastRow.Row, so we can't directly 203 // compare will `DeepEqual` here 204 c.Assert(len(reader.lastRow.Row), Equals, len(vals)) 205 for i, val := range vals { 206 c.Assert(reader.lastRow.Row[i].Kind(), Equals, val.Kind()) 207 c.Assert(reader.lastRow.Row[i].GetValue(), Equals, val.GetValue()) 208 } 209 } 210 } 211 212 func (s testParquetParserSuite) TestParquetAurora(c *C) { 213 store, err := storage.NewLocalStorage("examples") 214 c.Assert(err, IsNil) 215 216 fileName := "test.parquet" 217 r, err := store.Open(context.TODO(), fileName) 218 c.Assert(err, IsNil) 219 parser, err := NewParquetParser(context.TODO(), store, r, fileName) 220 c.Assert(err, IsNil) 221 222 c.Assert(parser.Columns(), DeepEquals, []string{"id", "val1", "val2", "d1", "d2", "d3", "d4", "d5", "d6"}) 223 224 expectedRes := [][]interface{}{ 225 {int64(1), int64(1), "0", int64(123), "1.23", "0.00000001", "1234567890", "123", "1.23000000"}, 226 { 227 int64(2), int64(123456), "0", int64(123456), "9999.99", "0.12345678", "99999999999999999999", 228 "999999999999999999999999999999999999", "99999999999999999999.99999999", 229 }, 230 { 231 int64(3), int64(123456), "0", int64(-123456), "-9999.99", "-0.12340000", "-99999999999999999999", 232 "-999999999999999999999999999999999999", "-99999999999999999999.99999999", 233 }, 234 { 235 int64(4), int64(1), "0", int64(123), "1.23", "0.00000001", "1234567890", "123", "1.23000000", 236 }, 237 { 238 int64(5), int64(123456), "0", int64(123456), "9999.99", "0.12345678", "12345678901234567890", 239 "123456789012345678901234567890123456", "99999999999999999999.99999999", 240 }, 241 { 242 int64(6), int64(123456), "0", int64(-123456), "-9999.99", "-0.12340000", 243 "-12345678901234567890", "-123456789012345678901234567890123456", 244 "-99999999999999999999.99999999", 245 }, 246 } 247 248 for i := 0; i < len(expectedRes); i++ { 249 err = parser.ReadRow() 250 c.Assert(err, IsNil) 251 expectedValues := expectedRes[i] 252 row := parser.LastRow().Row 253 c.Assert(len(expectedValues), Equals, len(row)) 254 for j := 0; j < len(row); j++ { 255 switch v := expectedValues[j].(type) { 256 case int64: 257 c.Assert(v, Equals, row[j].GetInt64()) 258 case string: 259 c.Assert(v, Equals, row[j].GetString()) 260 default: 261 c.Error("unexpected value: ", expectedValues[j]) 262 } 263 } 264 } 265 266 c.Assert(parser.ReadRow(), Equals, io.EOF) 267 }