github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/buffer.go (about) 1 package parquet 2 3 import ( 4 "log" 5 "reflect" 6 "runtime" 7 "sort" 8 "sync" 9 "sync/atomic" 10 11 "github.com/parquet-go/parquet-go/internal/debug" 12 ) 13 14 // GenericBuffer is similar to a Buffer but uses a type parameter to define the 15 // Go type representing the schema of rows in the buffer. 16 // 17 // See GenericWriter for details about the benefits over the classic Buffer API. 18 type GenericBuffer[T any] struct { 19 base Buffer 20 write bufferFunc[T] 21 } 22 23 // NewGenericBuffer is like NewBuffer but returns a GenericBuffer[T] suited to write 24 // rows of Go type T. 25 // 26 // The type parameter T should be a map, struct, or any. Any other types will 27 // cause a panic at runtime. Type checking is a lot more effective when the 28 // generic parameter is a struct type, using map and interface types is somewhat 29 // similar to using a Writer. If using an interface type for the type parameter, 30 // then providing a schema at instantiation is required. 31 // 32 // If the option list may explicitly declare a schema, it must be compatible 33 // with the schema generated from T. 34 func NewGenericBuffer[T any](options ...RowGroupOption) *GenericBuffer[T] { 35 config, err := NewRowGroupConfig(options...) 36 if err != nil { 37 panic(err) 38 } 39 40 t := typeOf[T]() 41 if config.Schema == nil && t != nil { 42 config.Schema = schemaOf(dereference(t)) 43 } 44 45 if config.Schema == nil { 46 panic("generic buffer must be instantiated with schema or concrete type.") 47 } 48 49 buf := &GenericBuffer[T]{ 50 base: Buffer{config: config}, 51 } 52 buf.base.configure(config.Schema) 53 buf.write = bufferFuncOf[T](t, config.Schema) 54 return buf 55 } 56 57 func typeOf[T any]() reflect.Type { 58 var v T 59 return reflect.TypeOf(v) 60 } 61 62 type bufferFunc[T any] func(*GenericBuffer[T], []T) (int, error) 63 64 func bufferFuncOf[T any](t reflect.Type, schema *Schema) bufferFunc[T] { 65 if t == nil { 66 return (*GenericBuffer[T]).writeRows 67 } 68 switch t.Kind() { 69 case reflect.Interface, reflect.Map: 70 return (*GenericBuffer[T]).writeRows 71 72 case reflect.Struct: 73 return makeBufferFunc[T](t, schema) 74 75 case reflect.Pointer: 76 if e := t.Elem(); e.Kind() == reflect.Struct { 77 return makeBufferFunc[T](t, schema) 78 } 79 } 80 panic("cannot create buffer for values of type " + t.String()) 81 } 82 83 func makeBufferFunc[T any](t reflect.Type, schema *Schema) bufferFunc[T] { 84 writeRows := writeRowsFuncOf(t, schema, nil) 85 return func(buf *GenericBuffer[T], rows []T) (n int, err error) { 86 err = writeRows(buf.base.columns, makeArrayOf(rows), columnLevels{}) 87 if err == nil { 88 n = len(rows) 89 } 90 return n, err 91 } 92 } 93 94 func (buf *GenericBuffer[T]) Size() int64 { 95 return buf.base.Size() 96 } 97 98 func (buf *GenericBuffer[T]) NumRows() int64 { 99 return buf.base.NumRows() 100 } 101 102 func (buf *GenericBuffer[T]) ColumnChunks() []ColumnChunk { 103 return buf.base.ColumnChunks() 104 } 105 106 func (buf *GenericBuffer[T]) ColumnBuffers() []ColumnBuffer { 107 return buf.base.ColumnBuffers() 108 } 109 110 func (buf *GenericBuffer[T]) SortingColumns() []SortingColumn { 111 return buf.base.SortingColumns() 112 } 113 114 func (buf *GenericBuffer[T]) Len() int { 115 return buf.base.Len() 116 } 117 118 func (buf *GenericBuffer[T]) Less(i, j int) bool { 119 return buf.base.Less(i, j) 120 } 121 122 func (buf *GenericBuffer[T]) Swap(i, j int) { 123 buf.base.Swap(i, j) 124 } 125 126 func (buf *GenericBuffer[T]) Reset() { 127 buf.base.Reset() 128 } 129 130 func (buf *GenericBuffer[T]) Write(rows []T) (int, error) { 131 if len(rows) == 0 { 132 return 0, nil 133 } 134 return buf.write(buf, rows) 135 } 136 137 func (buf *GenericBuffer[T]) WriteRows(rows []Row) (int, error) { 138 return buf.base.WriteRows(rows) 139 } 140 141 func (buf *GenericBuffer[T]) WriteRowGroup(rowGroup RowGroup) (int64, error) { 142 return buf.base.WriteRowGroup(rowGroup) 143 } 144 145 func (buf *GenericBuffer[T]) Rows() Rows { 146 return buf.base.Rows() 147 } 148 149 func (buf *GenericBuffer[T]) Schema() *Schema { 150 return buf.base.Schema() 151 } 152 153 func (buf *GenericBuffer[T]) writeRows(rows []T) (int, error) { 154 if cap(buf.base.rowbuf) < len(rows) { 155 buf.base.rowbuf = make([]Row, len(rows)) 156 } else { 157 buf.base.rowbuf = buf.base.rowbuf[:len(rows)] 158 } 159 defer clearRows(buf.base.rowbuf) 160 161 schema := buf.base.Schema() 162 for i := range rows { 163 buf.base.rowbuf[i] = schema.Deconstruct(buf.base.rowbuf[i], &rows[i]) 164 } 165 166 return buf.base.WriteRows(buf.base.rowbuf) 167 } 168 169 var ( 170 _ RowGroup = (*GenericBuffer[any])(nil) 171 _ RowGroupWriter = (*GenericBuffer[any])(nil) 172 _ sort.Interface = (*GenericBuffer[any])(nil) 173 174 _ RowGroup = (*GenericBuffer[struct{}])(nil) 175 _ RowGroupWriter = (*GenericBuffer[struct{}])(nil) 176 _ sort.Interface = (*GenericBuffer[struct{}])(nil) 177 178 _ RowGroup = (*GenericBuffer[map[struct{}]struct{}])(nil) 179 _ RowGroupWriter = (*GenericBuffer[map[struct{}]struct{}])(nil) 180 _ sort.Interface = (*GenericBuffer[map[struct{}]struct{}])(nil) 181 ) 182 183 // Buffer represents an in-memory group of parquet rows. 184 // 185 // The main purpose of the Buffer type is to provide a way to sort rows before 186 // writing them to a parquet file. Buffer implements sort.Interface as a way 187 // to support reordering the rows that have been written to it. 188 type Buffer struct { 189 config *RowGroupConfig 190 schema *Schema 191 rowbuf []Row 192 colbuf [][]Value 193 chunks []ColumnChunk 194 columns []ColumnBuffer 195 sorted []ColumnBuffer 196 } 197 198 // NewBuffer constructs a new buffer, using the given list of buffer options 199 // to configure the buffer returned by the function. 200 // 201 // The function panics if the buffer configuration is invalid. Programs that 202 // cannot guarantee the validity of the options passed to NewBuffer should 203 // construct the buffer configuration independently prior to calling this 204 // function: 205 // 206 // config, err := parquet.NewRowGroupConfig(options...) 207 // if err != nil { 208 // // handle the configuration error 209 // ... 210 // } else { 211 // // this call to create a buffer is guaranteed not to panic 212 // buffer := parquet.NewBuffer(config) 213 // ... 214 // } 215 func NewBuffer(options ...RowGroupOption) *Buffer { 216 config, err := NewRowGroupConfig(options...) 217 if err != nil { 218 panic(err) 219 } 220 buf := &Buffer{ 221 config: config, 222 } 223 if config.Schema != nil { 224 buf.configure(config.Schema) 225 } 226 return buf 227 } 228 229 func (buf *Buffer) configure(schema *Schema) { 230 if schema == nil { 231 return 232 } 233 sortingColumns := buf.config.Sorting.SortingColumns 234 buf.sorted = make([]ColumnBuffer, len(sortingColumns)) 235 236 forEachLeafColumnOf(schema, func(leaf leafColumn) { 237 nullOrdering := nullsGoLast 238 columnIndex := int(leaf.columnIndex) 239 columnType := leaf.node.Type() 240 bufferCap := buf.config.ColumnBufferCapacity 241 dictionary := (Dictionary)(nil) 242 encoding := encodingOf(leaf.node) 243 244 if isDictionaryEncoding(encoding) { 245 estimatedDictBufferSize := columnType.EstimateSize(bufferCap) 246 dictBuffer := columnType.NewValues( 247 make([]byte, 0, estimatedDictBufferSize), 248 nil, 249 ) 250 dictionary = columnType.NewDictionary(columnIndex, 0, dictBuffer) 251 columnType = dictionary.Type() 252 } 253 254 sortingIndex := searchSortingColumn(sortingColumns, leaf.path) 255 if sortingIndex < len(sortingColumns) && sortingColumns[sortingIndex].NullsFirst() { 256 nullOrdering = nullsGoFirst 257 } 258 259 column := columnType.NewColumnBuffer(columnIndex, bufferCap) 260 switch { 261 case leaf.maxRepetitionLevel > 0: 262 column = newRepeatedColumnBuffer(column, leaf.maxRepetitionLevel, leaf.maxDefinitionLevel, nullOrdering) 263 case leaf.maxDefinitionLevel > 0: 264 column = newOptionalColumnBuffer(column, leaf.maxDefinitionLevel, nullOrdering) 265 } 266 buf.columns = append(buf.columns, column) 267 268 if sortingIndex < len(sortingColumns) { 269 if sortingColumns[sortingIndex].Descending() { 270 column = &reversedColumnBuffer{column} 271 } 272 buf.sorted[sortingIndex] = column 273 } 274 }) 275 276 buf.schema = schema 277 buf.rowbuf = make([]Row, 0, 1) 278 buf.colbuf = make([][]Value, len(buf.columns)) 279 buf.chunks = make([]ColumnChunk, len(buf.columns)) 280 281 for i, column := range buf.columns { 282 buf.chunks[i] = column 283 } 284 } 285 286 // Size returns the estimated size of the buffer in memory (in bytes). 287 func (buf *Buffer) Size() int64 { 288 size := int64(0) 289 for _, col := range buf.columns { 290 size += col.Size() 291 } 292 return size 293 } 294 295 // NumRows returns the number of rows written to the buffer. 296 func (buf *Buffer) NumRows() int64 { return int64(buf.Len()) } 297 298 // ColumnChunks returns the buffer columns. 299 func (buf *Buffer) ColumnChunks() []ColumnChunk { return buf.chunks } 300 301 // ColumnBuffer returns the buffer columns. 302 // 303 // This method is similar to ColumnChunks, but returns a list of ColumnBuffer 304 // instead of a ColumnChunk values (the latter being read-only); calling 305 // ColumnBuffers or ColumnChunks with the same index returns the same underlying 306 // objects, but with different types, which removes the need for making a type 307 // assertion if the program needed to write directly to the column buffers. 308 // The presence of the ColumnChunks method is still required to satisfy the 309 // RowGroup interface. 310 func (buf *Buffer) ColumnBuffers() []ColumnBuffer { return buf.columns } 311 312 // Schema returns the schema of the buffer. 313 // 314 // The schema is either configured by passing a Schema in the option list when 315 // constructing the buffer, or lazily discovered when the first row is written. 316 func (buf *Buffer) Schema() *Schema { return buf.schema } 317 318 // SortingColumns returns the list of columns by which the buffer will be 319 // sorted. 320 // 321 // The sorting order is configured by passing a SortingColumns option when 322 // constructing the buffer. 323 func (buf *Buffer) SortingColumns() []SortingColumn { return buf.config.Sorting.SortingColumns } 324 325 // Len returns the number of rows written to the buffer. 326 func (buf *Buffer) Len() int { 327 if len(buf.columns) == 0 { 328 return 0 329 } else { 330 // All columns have the same number of rows. 331 return buf.columns[0].Len() 332 } 333 } 334 335 // Less returns true if row[i] < row[j] in the buffer. 336 func (buf *Buffer) Less(i, j int) bool { 337 for _, col := range buf.sorted { 338 switch { 339 case col.Less(i, j): 340 return true 341 case col.Less(j, i): 342 return false 343 } 344 } 345 return false 346 } 347 348 // Swap exchanges the rows at indexes i and j. 349 func (buf *Buffer) Swap(i, j int) { 350 for _, col := range buf.columns { 351 col.Swap(i, j) 352 } 353 } 354 355 // Reset clears the content of the buffer, allowing it to be reused. 356 func (buf *Buffer) Reset() { 357 for _, col := range buf.columns { 358 col.Reset() 359 } 360 } 361 362 // Write writes a row held in a Go value to the buffer. 363 func (buf *Buffer) Write(row interface{}) error { 364 if buf.schema == nil { 365 buf.configure(SchemaOf(row)) 366 } 367 368 buf.rowbuf = buf.rowbuf[:1] 369 defer clearRows(buf.rowbuf) 370 371 buf.rowbuf[0] = buf.schema.Deconstruct(buf.rowbuf[0], row) 372 _, err := buf.WriteRows(buf.rowbuf) 373 return err 374 } 375 376 // WriteRows writes parquet rows to the buffer. 377 func (buf *Buffer) WriteRows(rows []Row) (int, error) { 378 defer func() { 379 for i, colbuf := range buf.colbuf { 380 clearValues(colbuf) 381 buf.colbuf[i] = colbuf[:0] 382 } 383 }() 384 385 if buf.schema == nil { 386 return 0, ErrRowGroupSchemaMissing 387 } 388 389 for _, row := range rows { 390 for _, value := range row { 391 columnIndex := value.Column() 392 buf.colbuf[columnIndex] = append(buf.colbuf[columnIndex], value) 393 } 394 } 395 396 for columnIndex, values := range buf.colbuf { 397 if _, err := buf.columns[columnIndex].WriteValues(values); err != nil { 398 // TODO: an error at this stage will leave the buffer in an invalid 399 // state since the row was partially written. Applications are not 400 // expected to continue using the buffer after getting an error, 401 // maybe we can enforce it? 402 return 0, err 403 } 404 } 405 406 return len(rows), nil 407 } 408 409 // WriteRowGroup satisfies the RowGroupWriter interface. 410 func (buf *Buffer) WriteRowGroup(rowGroup RowGroup) (int64, error) { 411 rowGroupSchema := rowGroup.Schema() 412 switch { 413 case rowGroupSchema == nil: 414 return 0, ErrRowGroupSchemaMissing 415 case buf.schema == nil: 416 buf.configure(rowGroupSchema) 417 case !nodesAreEqual(buf.schema, rowGroupSchema): 418 return 0, ErrRowGroupSchemaMismatch 419 } 420 if !sortingColumnsHavePrefix(rowGroup.SortingColumns(), buf.SortingColumns()) { 421 return 0, ErrRowGroupSortingColumnsMismatch 422 } 423 n := buf.NumRows() 424 r := rowGroup.Rows() 425 defer r.Close() 426 _, err := CopyRows(bufferWriter{buf}, r) 427 return buf.NumRows() - n, err 428 } 429 430 // Rows returns a reader exposing the current content of the buffer. 431 // 432 // The buffer and the returned reader share memory. Mutating the buffer 433 // concurrently to reading rows may result in non-deterministic behavior. 434 func (buf *Buffer) Rows() Rows { return newRowGroupRows(buf, ReadModeSync) } 435 436 // bufferWriter is an adapter for Buffer which implements both RowWriter and 437 // PageWriter to enable optimizations in CopyRows for types that support writing 438 // rows by copying whole pages instead of calling WriteRow repeatedly. 439 type bufferWriter struct{ buf *Buffer } 440 441 func (w bufferWriter) WriteRows(rows []Row) (int, error) { 442 return w.buf.WriteRows(rows) 443 } 444 445 func (w bufferWriter) WriteValues(values []Value) (int, error) { 446 return w.buf.columns[values[0].Column()].WriteValues(values) 447 } 448 449 func (w bufferWriter) WritePage(page Page) (int64, error) { 450 return CopyValues(w.buf.columns[page.Column()], page.Values()) 451 } 452 453 var ( 454 _ RowGroup = (*Buffer)(nil) 455 _ RowGroupWriter = (*Buffer)(nil) 456 _ sort.Interface = (*Buffer)(nil) 457 458 _ RowWriter = (*bufferWriter)(nil) 459 _ PageWriter = (*bufferWriter)(nil) 460 _ ValueWriter = (*bufferWriter)(nil) 461 ) 462 463 type buffer struct { 464 data []byte 465 refc uintptr 466 pool *bufferPool 467 stack []byte 468 } 469 470 func (b *buffer) refCount() int { 471 return int(atomic.LoadUintptr(&b.refc)) 472 } 473 474 func (b *buffer) ref() { 475 atomic.AddUintptr(&b.refc, +1) 476 } 477 478 func (b *buffer) unref() { 479 if atomic.AddUintptr(&b.refc, ^uintptr(0)) == 0 { 480 if b.pool != nil { 481 b.pool.put(b) 482 } 483 } 484 } 485 486 func monitorBufferRelease(b *buffer) { 487 if rc := b.refCount(); rc != 0 { 488 log.Printf("PARQUETGODEBUG: buffer garbage collected with non-zero reference count\n%s", string(b.stack)) 489 } 490 } 491 492 type bufferPool struct { 493 // Buckets are split in two groups for short and large buffers. In the short 494 // buffer group (below 256KB), the growth rate between each bucket is 2. The 495 // growth rate changes to 1.5 in the larger buffer group. 496 // 497 // Short buffer buckets: 498 // --------------------- 499 // 4K, 8K, 16K, 32K, 64K, 128K, 256K 500 // 501 // Large buffer buckets: 502 // --------------------- 503 // 364K, 546K, 819K ... 504 // 505 buckets [bufferPoolBucketCount]sync.Pool 506 } 507 508 func (p *bufferPool) newBuffer(bufferSize, bucketSize int) *buffer { 509 b := &buffer{ 510 data: make([]byte, bufferSize, bucketSize), 511 refc: 1, 512 pool: p, 513 } 514 if debug.TRACEBUF > 0 { 515 b.stack = make([]byte, 4096) 516 runtime.SetFinalizer(b, monitorBufferRelease) 517 } 518 return b 519 } 520 521 // get returns a buffer from the levelled buffer pool. size is used to choose 522 // the appropriate pool. 523 func (p *bufferPool) get(bufferSize int) *buffer { 524 bucketIndex, bucketSize := bufferPoolBucketIndexAndSizeOfGet(bufferSize) 525 526 b := (*buffer)(nil) 527 if bucketIndex >= 0 { 528 b, _ = p.buckets[bucketIndex].Get().(*buffer) 529 } 530 531 if b == nil { 532 b = p.newBuffer(bufferSize, bucketSize) 533 } else { 534 b.data = b.data[:bufferSize] 535 b.ref() 536 } 537 538 if debug.TRACEBUF > 0 { 539 b.stack = b.stack[:runtime.Stack(b.stack[:cap(b.stack)], false)] 540 } 541 return b 542 } 543 544 func (p *bufferPool) put(b *buffer) { 545 if b.pool != p { 546 panic("BUG: buffer returned to a different pool than the one it was allocated from") 547 } 548 if b.refCount() != 0 { 549 panic("BUG: buffer returned to pool with a non-zero reference count") 550 } 551 if bucketIndex, _ := bufferPoolBucketIndexAndSizeOfPut(cap(b.data)); bucketIndex >= 0 { 552 p.buckets[bucketIndex].Put(b) 553 } 554 } 555 556 const ( 557 bufferPoolBucketCount = 32 558 bufferPoolMinSize = 4096 559 bufferPoolLastShortBucketSize = 262144 560 ) 561 562 func bufferPoolNextSize(size int) int { 563 if size < bufferPoolLastShortBucketSize { 564 return size * 2 565 } else { 566 return size + (size / 2) 567 } 568 } 569 570 func bufferPoolBucketIndexAndSizeOfGet(size int) (int, int) { 571 limit := bufferPoolMinSize 572 573 for i := 0; i < bufferPoolBucketCount; i++ { 574 if size <= limit { 575 return i, limit 576 } 577 limit = bufferPoolNextSize(limit) 578 } 579 580 return -1, size 581 } 582 583 func bufferPoolBucketIndexAndSizeOfPut(size int) (int, int) { 584 // When releasing buffers, some may have a capacity that is not one of the 585 // bucket sizes (due to the use of append for example). In this case, we 586 // have to put the buffer is the highest bucket with a size less or equal 587 // to the buffer capacity. 588 if limit := bufferPoolMinSize; size >= limit { 589 for i := 0; i < bufferPoolBucketCount; i++ { 590 n := bufferPoolNextSize(limit) 591 if size < n { 592 return i, limit 593 } 594 limit = n 595 } 596 } 597 return -1, size 598 } 599 600 var ( 601 buffers bufferPool 602 ) 603 604 type bufferedPage struct { 605 Page 606 values *buffer 607 offsets *buffer 608 repetitionLevels *buffer 609 definitionLevels *buffer 610 } 611 612 func newBufferedPage(page Page, values, offsets, definitionLevels, repetitionLevels *buffer) *bufferedPage { 613 p := &bufferedPage{ 614 Page: page, 615 values: values, 616 offsets: offsets, 617 definitionLevels: definitionLevels, 618 repetitionLevels: repetitionLevels, 619 } 620 bufferRef(values) 621 bufferRef(offsets) 622 bufferRef(definitionLevels) 623 bufferRef(repetitionLevels) 624 return p 625 } 626 627 func (p *bufferedPage) Slice(i, j int64) Page { 628 return newBufferedPage( 629 p.Page.Slice(i, j), 630 p.values, 631 p.offsets, 632 p.definitionLevels, 633 p.repetitionLevels, 634 ) 635 } 636 637 func (p *bufferedPage) Retain() { 638 bufferRef(p.values) 639 bufferRef(p.offsets) 640 bufferRef(p.definitionLevels) 641 bufferRef(p.repetitionLevels) 642 } 643 644 func (p *bufferedPage) Release() { 645 bufferUnref(p.values) 646 bufferUnref(p.offsets) 647 bufferUnref(p.definitionLevels) 648 bufferUnref(p.repetitionLevels) 649 } 650 651 func bufferRef(buf *buffer) { 652 if buf != nil { 653 buf.ref() 654 } 655 } 656 657 func bufferUnref(buf *buffer) { 658 if buf != nil { 659 buf.unref() 660 } 661 } 662 663 // Retain is a helper function to increment the reference counter of pages 664 // backed by memory which can be granularly managed by the application. 665 // 666 // Usage of this function is optional and with Release, is intended to allow 667 // finer grain memory management in the application. Most programs should be 668 // able to rely on automated memory management provided by the Go garbage 669 // collector instead. 670 // 671 // The function should be called when a page lifetime is about to be shared 672 // between multiple goroutines or layers of an application, and the program 673 // wants to express "sharing ownership" of the page. 674 // 675 // Calling this function on pages that do not embed a reference counter does 676 // nothing. 677 func Retain(page Page) { 678 if p, _ := page.(retainable); p != nil { 679 p.Retain() 680 } 681 } 682 683 // Release is a helper function to decrement the reference counter of pages 684 // backed by memory which can be granularly managed by the application. 685 // 686 // Usage of this is optional and with Retain, is intended to allow finer grained 687 // memory management in the application, at the expense of potentially causing 688 // panics if the page is used after its reference count has reached zero. Most 689 // programs should be able to rely on automated memory management provided by 690 // the Go garbage collector instead. 691 // 692 // The function should be called to return a page to the internal buffer pool, 693 // when a goroutine "releases ownership" it acquired either by being the single 694 // owner (e.g. capturing the return value from a ReadPage call) or having gotten 695 // shared ownership by calling Retain. 696 // 697 // Calling this function on pages that do not embed a reference counter does 698 // nothing. 699 func Release(page Page) { 700 if p, _ := page.(releasable); p != nil { 701 p.Release() 702 } 703 } 704 705 type retainable interface { 706 Retain() 707 } 708 709 type releasable interface { 710 Release() 711 } 712 713 var ( 714 _ retainable = (*bufferedPage)(nil) 715 _ releasable = (*bufferedPage)(nil) 716 )