storj.io/minio@v0.0.0-20230509071714-0cbc90f649b1/pkg/s3select/internal/parquet-go/data/column.go (about) 1 /* 2 * Minio Cloud Storage, (C) 2019 Minio, Inc. 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package data 18 19 import ( 20 "bytes" 21 "context" 22 "fmt" 23 "strings" 24 25 "git.apache.org/thrift.git/lib/go/thrift" 26 "github.com/tidwall/gjson" 27 "github.com/tidwall/sjson" 28 29 "storj.io/minio/pkg/s3select/internal/parquet-go/common" 30 "storj.io/minio/pkg/s3select/internal/parquet-go/encoding" 31 "storj.io/minio/pkg/s3select/internal/parquet-go/gen-go/parquet" 32 "storj.io/minio/pkg/s3select/internal/parquet-go/schema" 33 ) 34 35 func getDefaultEncoding(parquetType parquet.Type) parquet.Encoding { 36 switch parquetType { 37 case parquet.Type_BOOLEAN: 38 return parquet.Encoding_PLAIN 39 case parquet.Type_INT32, parquet.Type_INT64, parquet.Type_FLOAT, parquet.Type_DOUBLE: 40 return parquet.Encoding_RLE_DICTIONARY 41 case parquet.Type_BYTE_ARRAY: 42 return parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY 43 } 44 45 return parquet.Encoding_PLAIN 46 } 47 48 func getFirstValueElement(tree *schema.Tree) (valueElement *schema.Element) { 49 tree.Range(func(name string, element *schema.Element) bool { 50 if element.Children == nil { 51 valueElement = element 52 } else { 53 valueElement = getFirstValueElement(element.Children) 54 } 55 56 return false 57 }) 58 59 return valueElement 60 } 61 62 func populate(columnDataMap map[string]*Column, input *jsonValue, tree *schema.Tree, firstValueRL int64) (map[string]*Column, error) { 63 var err error 64 65 pos := 0 66 handleElement := func(name string, element *schema.Element) bool { 67 pos++ 68 69 dataPath := element.PathInTree 70 71 if *element.RepetitionType == parquet.FieldRepetitionType_REPEATED { 72 panic(fmt.Errorf("%v: repetition type must be REQUIRED or OPTIONAL type", dataPath)) 73 } 74 75 inputValue := input.Get(name) 76 if *element.RepetitionType == parquet.FieldRepetitionType_REQUIRED && inputValue.IsNull() { 77 err = fmt.Errorf("%v: nil value for required field", dataPath) 78 return false 79 } 80 81 add := func(element *schema.Element, value interface{}, DL, RL int64) { 82 columnData := columnDataMap[element.PathInSchema] 83 if columnData == nil { 84 columnData = NewColumn(*element.Type) 85 } 86 columnData.add(value, DL, RL) 87 columnDataMap[element.PathInSchema] = columnData 88 } 89 90 // Handle primitive type element. 91 if element.Type != nil { 92 var value interface{} 93 if value, err = inputValue.GetValue(*element.Type, element.ConvertedType); err != nil { 94 return false 95 } 96 97 DL := element.MaxDefinitionLevel 98 if value == nil && DL > 0 { 99 DL-- 100 } 101 102 RL := element.MaxRepetitionLevel 103 if pos == 1 { 104 RL = firstValueRL 105 } 106 107 add(element, value, DL, RL) 108 return true 109 } 110 111 addNull := func() { 112 valueElement := getFirstValueElement(element.Children) 113 114 DL := element.MaxDefinitionLevel 115 if DL > 0 { 116 DL-- 117 } 118 119 RL := element.MaxRepetitionLevel 120 if RL > 0 { 121 RL-- 122 } 123 124 add(valueElement, nil, DL, RL) 125 } 126 127 // Handle group type element. 128 if element.ConvertedType == nil { 129 if inputValue.IsNull() { 130 addNull() 131 return true 132 } 133 134 columnDataMap, err = populate(columnDataMap, inputValue, element.Children, firstValueRL) 135 return (err == nil) 136 } 137 138 // Handle list type element. 139 if *element.ConvertedType == parquet.ConvertedType_LIST { 140 if inputValue.IsNull() { 141 addNull() 142 return true 143 } 144 145 var results []gjson.Result 146 if results, err = inputValue.GetArray(); err != nil { 147 return false 148 } 149 150 listElement, _ := element.Children.Get("list") 151 valueElement, _ := listElement.Children.Get("element") 152 for i := range results { 153 rl := valueElement.MaxRepetitionLevel 154 if i == 0 { 155 rl = firstValueRL 156 } 157 158 var jsonData []byte 159 if jsonData, err = sjson.SetBytes([]byte{}, "element", results[i].Value()); err != nil { 160 return false 161 } 162 163 var jv *jsonValue 164 if jv, err = bytesToJSONValue(jsonData); err != nil { 165 return false 166 } 167 168 if columnDataMap, err = populate(columnDataMap, jv, listElement.Children, rl); err != nil { 169 return false 170 } 171 } 172 return true 173 } 174 175 if *element.ConvertedType == parquet.ConvertedType_MAP { 176 if inputValue.IsNull() { 177 addNull() 178 return true 179 } 180 181 keyValueElement, _ := element.Children.Get("key_value") 182 var rerr error 183 err = inputValue.Range(func(key, value gjson.Result) bool { 184 if !key.Exists() || key.Type == gjson.Null { 185 rerr = fmt.Errorf("%v.key_value.key: not found or null", dataPath) 186 return false 187 } 188 189 var jsonData []byte 190 if jsonData, rerr = sjson.SetBytes([]byte{}, "key", key.Value()); rerr != nil { 191 return false 192 } 193 194 if jsonData, rerr = sjson.SetBytes(jsonData, "value", value.Value()); rerr != nil { 195 return false 196 } 197 198 var jv *jsonValue 199 if jv, rerr = bytesToJSONValue(jsonData); rerr != nil { 200 return false 201 } 202 203 if columnDataMap, rerr = populate(columnDataMap, jv, keyValueElement.Children, firstValueRL); rerr != nil { 204 return false 205 } 206 207 return true 208 }) 209 210 if err != nil { 211 return false 212 } 213 214 err = rerr 215 return (err == nil) 216 } 217 218 err = fmt.Errorf("%v: unsupported converted type %v in %v field type", dataPath, *element.ConvertedType, *element.RepetitionType) 219 return false 220 } 221 222 tree.Range(handleElement) 223 return columnDataMap, err 224 } 225 226 // Column - denotes values of a column. 227 type Column struct { 228 parquetType parquet.Type // value type. 229 values []interface{} // must be a slice of parquet typed values. 230 definitionLevels []int64 // exactly same length of values. 231 repetitionLevels []int64 // exactly same length of values. 232 rowCount int32 233 maxBitWidth int32 234 minValue interface{} 235 maxValue interface{} 236 } 237 238 func (column *Column) updateMinMaxValue(value interface{}) { 239 if column.minValue == nil && column.maxValue == nil { 240 column.minValue = value 241 column.maxValue = value 242 return 243 } 244 245 switch column.parquetType { 246 case parquet.Type_BOOLEAN: 247 if column.minValue.(bool) && !value.(bool) { 248 column.minValue = value 249 } 250 251 if !column.maxValue.(bool) && value.(bool) { 252 column.maxValue = value 253 } 254 255 case parquet.Type_INT32: 256 if column.minValue.(int32) > value.(int32) { 257 column.minValue = value 258 } 259 260 if column.maxValue.(int32) < value.(int32) { 261 column.maxValue = value 262 } 263 264 case parquet.Type_INT64: 265 if column.minValue.(int64) > value.(int64) { 266 column.minValue = value 267 } 268 269 if column.maxValue.(int64) < value.(int64) { 270 column.maxValue = value 271 } 272 273 case parquet.Type_FLOAT: 274 if column.minValue.(float32) > value.(float32) { 275 column.minValue = value 276 } 277 278 if column.maxValue.(float32) < value.(float32) { 279 column.maxValue = value 280 } 281 282 case parquet.Type_DOUBLE: 283 if column.minValue.(float64) > value.(float64) { 284 column.minValue = value 285 } 286 287 if column.maxValue.(float64) < value.(float64) { 288 column.maxValue = value 289 } 290 291 case parquet.Type_BYTE_ARRAY: 292 if bytes.Compare(column.minValue.([]byte), value.([]byte)) > 0 { 293 column.minValue = value 294 } 295 296 if bytes.Compare(column.minValue.([]byte), value.([]byte)) < 0 { 297 column.maxValue = value 298 } 299 } 300 } 301 302 func (column *Column) updateStats(value interface{}, DL, RL int64) { 303 if RL == 0 { 304 column.rowCount++ 305 } 306 307 if value == nil { 308 return 309 } 310 311 var bitWidth int32 312 switch column.parquetType { 313 case parquet.Type_BOOLEAN: 314 bitWidth = 1 315 case parquet.Type_INT32: 316 bitWidth = common.BitWidth(uint64(value.(int32))) 317 case parquet.Type_INT64: 318 bitWidth = common.BitWidth(uint64(value.(int64))) 319 case parquet.Type_FLOAT: 320 bitWidth = 32 321 case parquet.Type_DOUBLE: 322 bitWidth = 64 323 case parquet.Type_BYTE_ARRAY: 324 bitWidth = int32(len(value.([]byte))) 325 } 326 if column.maxBitWidth < bitWidth { 327 column.maxBitWidth = bitWidth 328 } 329 330 column.updateMinMaxValue(value) 331 } 332 333 func (column *Column) add(value interface{}, DL, RL int64) { 334 column.values = append(column.values, value) 335 column.definitionLevels = append(column.definitionLevels, DL) 336 column.repetitionLevels = append(column.repetitionLevels, RL) 337 column.updateStats(value, DL, RL) 338 } 339 340 // AddNull - adds nil value. 341 func (column *Column) AddNull(DL, RL int64) { 342 column.add(nil, DL, RL) 343 } 344 345 // AddBoolean - adds boolean value. 346 func (column *Column) AddBoolean(value bool, DL, RL int64) { 347 if column.parquetType != parquet.Type_BOOLEAN { 348 panic(fmt.Errorf("expected %v value", column.parquetType)) 349 } 350 351 column.add(value, DL, RL) 352 } 353 354 // AddInt32 - adds int32 value. 355 func (column *Column) AddInt32(value int32, DL, RL int64) { 356 if column.parquetType != parquet.Type_INT32 { 357 panic(fmt.Errorf("expected %v value", column.parquetType)) 358 } 359 360 column.add(value, DL, RL) 361 } 362 363 // AddInt64 - adds int64 value. 364 func (column *Column) AddInt64(value int64, DL, RL int64) { 365 if column.parquetType != parquet.Type_INT64 { 366 panic(fmt.Errorf("expected %v value", column.parquetType)) 367 } 368 369 column.add(value, DL, RL) 370 } 371 372 // AddFloat - adds float32 value. 373 func (column *Column) AddFloat(value float32, DL, RL int64) { 374 if column.parquetType != parquet.Type_FLOAT { 375 panic(fmt.Errorf("expected %v value", column.parquetType)) 376 } 377 378 column.add(value, DL, RL) 379 } 380 381 // AddDouble - adds float64 value. 382 func (column *Column) AddDouble(value float64, DL, RL int64) { 383 if column.parquetType != parquet.Type_DOUBLE { 384 panic(fmt.Errorf("expected %v value", column.parquetType)) 385 } 386 387 column.add(value, DL, RL) 388 } 389 390 // AddByteArray - adds byte array value. 391 func (column *Column) AddByteArray(value []byte, DL, RL int64) { 392 if column.parquetType != parquet.Type_BYTE_ARRAY { 393 panic(fmt.Errorf("expected %v value", column.parquetType)) 394 } 395 396 column.add(value, DL, RL) 397 } 398 399 // Merge - merges columns. 400 func (column *Column) Merge(column2 *Column) { 401 if column.parquetType != column2.parquetType { 402 panic(fmt.Errorf("merge differs in parquet type")) 403 } 404 405 column.values = append(column.values, column2.values...) 406 column.definitionLevels = append(column.definitionLevels, column2.definitionLevels...) 407 column.repetitionLevels = append(column.repetitionLevels, column2.repetitionLevels...) 408 409 column.rowCount += column2.rowCount 410 if column.maxBitWidth < column2.maxBitWidth { 411 column.maxBitWidth = column2.maxBitWidth 412 } 413 414 column.updateMinMaxValue(column2.minValue) 415 column.updateMinMaxValue(column2.maxValue) 416 } 417 418 func (column *Column) String() string { 419 var strs []string 420 strs = append(strs, fmt.Sprintf("parquetType: %v", column.parquetType)) 421 strs = append(strs, fmt.Sprintf("values: %v", column.values)) 422 strs = append(strs, fmt.Sprintf("definitionLevels: %v", column.definitionLevels)) 423 strs = append(strs, fmt.Sprintf("repetitionLevels: %v", column.repetitionLevels)) 424 strs = append(strs, fmt.Sprintf("rowCount: %v", column.rowCount)) 425 strs = append(strs, fmt.Sprintf("maxBitWidth: %v", column.maxBitWidth)) 426 strs = append(strs, fmt.Sprintf("minValue: %v", column.minValue)) 427 strs = append(strs, fmt.Sprintf("maxValue: %v", column.maxValue)) 428 return "{" + strings.Join(strs, ", ") + "}" 429 } 430 431 func (column *Column) encodeValue(value interface{}, element *schema.Element) []byte { 432 if value == nil { 433 return nil 434 } 435 436 valueData := encoding.PlainEncode(common.ToSliceValue([]interface{}{value}, column.parquetType), column.parquetType) 437 if column.parquetType == parquet.Type_BYTE_ARRAY && element.ConvertedType != nil { 438 switch *element.ConvertedType { 439 case parquet.ConvertedType_UTF8, parquet.ConvertedType_DECIMAL: 440 valueData = valueData[4:] 441 } 442 } 443 444 return valueData 445 } 446 447 func (column *Column) toDataPageV2(element *schema.Element, parquetEncoding parquet.Encoding) *ColumnChunk { 448 var definedValues []interface{} 449 for _, value := range column.values { 450 if value != nil { 451 definedValues = append(definedValues, value) 452 } 453 } 454 455 var encodedData []byte 456 switch parquetEncoding { 457 case parquet.Encoding_PLAIN: 458 encodedData = encoding.PlainEncode(common.ToSliceValue(definedValues, column.parquetType), column.parquetType) 459 460 case parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: 461 var bytesSlices [][]byte 462 for _, value := range column.values { 463 bytesSlices = append(bytesSlices, value.([]byte)) 464 } 465 encodedData = encoding.DeltaLengthByteArrayEncode(bytesSlices) 466 } 467 468 compressionType := parquet.CompressionCodec_SNAPPY 469 if element.CompressionType != nil { 470 compressionType = *element.CompressionType 471 } 472 473 compressedData, err := common.Compress(compressionType, encodedData) 474 if err != nil { 475 panic(err) 476 } 477 478 DLData := encoding.RLEBitPackedHybridEncode( 479 column.definitionLevels, 480 common.BitWidth(uint64(element.MaxDefinitionLevel)), 481 parquet.Type_INT64, 482 ) 483 484 RLData := encoding.RLEBitPackedHybridEncode( 485 column.repetitionLevels, 486 common.BitWidth(uint64(element.MaxRepetitionLevel)), 487 parquet.Type_INT64, 488 ) 489 490 pageHeader := parquet.NewPageHeader() 491 pageHeader.Type = parquet.PageType_DATA_PAGE_V2 492 pageHeader.CompressedPageSize = int32(len(compressedData) + len(DLData) + len(RLData)) 493 pageHeader.UncompressedPageSize = int32(len(encodedData) + len(DLData) + len(RLData)) 494 pageHeader.DataPageHeaderV2 = parquet.NewDataPageHeaderV2() 495 pageHeader.DataPageHeaderV2.NumValues = int32(len(column.values)) 496 pageHeader.DataPageHeaderV2.NumNulls = int32(len(column.values) - len(definedValues)) 497 pageHeader.DataPageHeaderV2.NumRows = column.rowCount 498 pageHeader.DataPageHeaderV2.Encoding = parquetEncoding 499 pageHeader.DataPageHeaderV2.DefinitionLevelsByteLength = int32(len(DLData)) 500 pageHeader.DataPageHeaderV2.RepetitionLevelsByteLength = int32(len(RLData)) 501 pageHeader.DataPageHeaderV2.IsCompressed = true 502 pageHeader.DataPageHeaderV2.Statistics = parquet.NewStatistics() 503 pageHeader.DataPageHeaderV2.Statistics.Min = column.encodeValue(column.minValue, element) 504 pageHeader.DataPageHeaderV2.Statistics.Max = column.encodeValue(column.maxValue, element) 505 506 ts := thrift.NewTSerializer() 507 ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) 508 rawData, err := ts.Write(context.TODO(), pageHeader) 509 if err != nil { 510 panic(err) 511 } 512 rawData = append(rawData, RLData...) 513 rawData = append(rawData, DLData...) 514 rawData = append(rawData, compressedData...) 515 516 metadata := parquet.NewColumnMetaData() 517 metadata.Type = column.parquetType 518 metadata.Encodings = []parquet.Encoding{ 519 parquet.Encoding_PLAIN, 520 parquet.Encoding_RLE, 521 parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY, 522 } 523 metadata.Codec = compressionType 524 metadata.NumValues = int64(pageHeader.DataPageHeaderV2.NumValues) 525 metadata.TotalCompressedSize = int64(len(rawData)) 526 metadata.TotalUncompressedSize = int64(pageHeader.UncompressedPageSize) + int64(len(rawData)) - int64(pageHeader.CompressedPageSize) 527 metadata.PathInSchema = strings.Split(element.PathInSchema, ".") 528 metadata.Statistics = parquet.NewStatistics() 529 metadata.Statistics.Min = pageHeader.DataPageHeaderV2.Statistics.Min 530 metadata.Statistics.Max = pageHeader.DataPageHeaderV2.Statistics.Max 531 532 chunk := new(ColumnChunk) 533 chunk.ColumnChunk.MetaData = metadata 534 chunk.dataPageLen = int64(len(rawData)) 535 chunk.dataLen = int64(len(rawData)) 536 chunk.data = rawData 537 538 return chunk 539 } 540 541 func (column *Column) toRLEDictPage(element *schema.Element) *ColumnChunk { 542 dictPageData, dataPageData, dictValueCount, indexBitWidth := encoding.RLEDictEncode(column.values, column.parquetType, column.maxBitWidth) 543 544 compressionType := parquet.CompressionCodec_SNAPPY 545 if element.CompressionType != nil { 546 compressionType = *element.CompressionType 547 } 548 549 compressedData, err := common.Compress(compressionType, dictPageData) 550 if err != nil { 551 panic(err) 552 } 553 554 dictPageHeader := parquet.NewPageHeader() 555 dictPageHeader.Type = parquet.PageType_DICTIONARY_PAGE 556 dictPageHeader.CompressedPageSize = int32(len(compressedData)) 557 dictPageHeader.UncompressedPageSize = int32(len(dictPageData)) 558 dictPageHeader.DictionaryPageHeader = parquet.NewDictionaryPageHeader() 559 dictPageHeader.DictionaryPageHeader.NumValues = dictValueCount 560 dictPageHeader.DictionaryPageHeader.Encoding = parquet.Encoding_PLAIN 561 562 ts := thrift.NewTSerializer() 563 ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) 564 dictPageRawData, err := ts.Write(context.TODO(), dictPageHeader) 565 if err != nil { 566 panic(err) 567 } 568 dictPageRawData = append(dictPageRawData, compressedData...) 569 570 RLData := encoding.RLEBitPackedHybridEncode( 571 column.repetitionLevels, 572 common.BitWidth(uint64(element.MaxRepetitionLevel)), 573 parquet.Type_INT64, 574 ) 575 encodedData := RLData 576 577 DLData := encoding.RLEBitPackedHybridEncode( 578 column.definitionLevels, 579 common.BitWidth(uint64(element.MaxDefinitionLevel)), 580 parquet.Type_INT64, 581 ) 582 encodedData = append(encodedData, DLData...) 583 584 encodedData = append(encodedData, indexBitWidth) 585 encodedData = append(encodedData, dataPageData...) 586 587 compressedData, err = common.Compress(compressionType, encodedData) 588 if err != nil { 589 panic(err) 590 } 591 592 dataPageHeader := parquet.NewPageHeader() 593 dataPageHeader.Type = parquet.PageType_DATA_PAGE 594 dataPageHeader.CompressedPageSize = int32(len(compressedData)) 595 dataPageHeader.UncompressedPageSize = int32(len(encodedData)) 596 dataPageHeader.DataPageHeader = parquet.NewDataPageHeader() 597 dataPageHeader.DataPageHeader.NumValues = int32(len(column.values)) 598 dataPageHeader.DataPageHeader.DefinitionLevelEncoding = parquet.Encoding_RLE 599 dataPageHeader.DataPageHeader.RepetitionLevelEncoding = parquet.Encoding_RLE 600 dataPageHeader.DataPageHeader.Encoding = parquet.Encoding_RLE_DICTIONARY 601 602 ts = thrift.NewTSerializer() 603 ts.Protocol = thrift.NewTCompactProtocolFactory().GetProtocol(ts.Transport) 604 dataPageRawData, err := ts.Write(context.TODO(), dataPageHeader) 605 if err != nil { 606 panic(err) 607 } 608 dataPageRawData = append(dataPageRawData, compressedData...) 609 610 metadata := parquet.NewColumnMetaData() 611 metadata.Type = column.parquetType 612 metadata.Encodings = []parquet.Encoding{ 613 parquet.Encoding_PLAIN, 614 parquet.Encoding_RLE, 615 parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY, 616 parquet.Encoding_RLE_DICTIONARY, 617 } 618 metadata.Codec = compressionType 619 metadata.NumValues = int64(dataPageHeader.DataPageHeader.NumValues) 620 metadata.TotalCompressedSize = int64(len(dictPageRawData)) + int64(len(dataPageRawData)) 621 uncompressedSize := int64(dictPageHeader.UncompressedPageSize) + int64(len(dictPageData)) - int64(dictPageHeader.CompressedPageSize) 622 uncompressedSize += int64(dataPageHeader.UncompressedPageSize) + int64(len(dataPageData)) - int64(dataPageHeader.CompressedPageSize) 623 metadata.TotalUncompressedSize = uncompressedSize 624 metadata.PathInSchema = strings.Split(element.PathInSchema, ".") 625 metadata.Statistics = parquet.NewStatistics() 626 metadata.Statistics.Min = column.encodeValue(column.minValue, element) 627 metadata.Statistics.Max = column.encodeValue(column.maxValue, element) 628 629 chunk := new(ColumnChunk) 630 chunk.ColumnChunk.MetaData = metadata 631 chunk.isDictPage = true 632 chunk.dictPageLen = int64(len(dictPageRawData)) 633 chunk.dataPageLen = int64(len(dataPageRawData)) 634 chunk.dataLen = chunk.dictPageLen + chunk.dataPageLen 635 chunk.data = append(dictPageRawData, dataPageRawData...) 636 637 return chunk 638 } 639 640 // Encode an element. 641 func (column *Column) Encode(element *schema.Element) *ColumnChunk { 642 parquetEncoding := getDefaultEncoding(column.parquetType) 643 if element.Encoding != nil { 644 parquetEncoding = *element.Encoding 645 } 646 647 switch parquetEncoding { 648 case parquet.Encoding_PLAIN, parquet.Encoding_DELTA_LENGTH_BYTE_ARRAY: 649 return column.toDataPageV2(element, parquetEncoding) 650 } 651 652 return column.toRLEDictPage(element) 653 } 654 655 // NewColumn - creates new column data 656 func NewColumn(parquetType parquet.Type) *Column { 657 switch parquetType { 658 case parquet.Type_BOOLEAN, parquet.Type_INT32, parquet.Type_INT64, parquet.Type_FLOAT, parquet.Type_DOUBLE, parquet.Type_BYTE_ARRAY: 659 default: 660 panic(fmt.Errorf("unsupported parquet type %v", parquetType)) 661 } 662 663 return &Column{ 664 parquetType: parquetType, 665 } 666 } 667 668 // UnmarshalJSON - decodes JSON data into map of Column. 669 func UnmarshalJSON(data []byte, tree *schema.Tree) (map[string]*Column, error) { 670 if !tree.ReadOnly() { 671 return nil, fmt.Errorf("tree must be read only") 672 } 673 674 inputValue, err := bytesToJSONValue(data) 675 if err != nil { 676 return nil, err 677 } 678 679 columnDataMap := make(map[string]*Column) 680 return populate(columnDataMap, inputValue, tree, 0) 681 }