github.com/parquet-go/parquet-go@v0.20.0/column_buffer.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"encoding/json"
     6  	"fmt"
     7  	"io"
     8  	"math/bits"
     9  	"reflect"
    10  	"sort"
    11  	"time"
    12  	"unsafe"
    13  
    14  	"github.com/parquet-go/parquet-go/deprecated"
    15  	"github.com/parquet-go/parquet-go/encoding/plain"
    16  	"github.com/parquet-go/parquet-go/internal/bitpack"
    17  	"github.com/parquet-go/parquet-go/internal/unsafecast"
    18  	"github.com/parquet-go/parquet-go/sparse"
    19  )
    20  
    21  // ColumnBuffer is an interface representing columns of a row group.
    22  //
    23  // ColumnBuffer implements sort.Interface as a way to support reordering the
    24  // rows that have been written to it.
    25  //
    26  // The current implementation has a limitation which prevents applications from
    27  // providing custom versions of this interface because it contains unexported
    28  // methods. The only way to create ColumnBuffer values is to call the
    29  // NewColumnBuffer of Type instances. This limitation may be lifted in future
    30  // releases.
    31  type ColumnBuffer interface {
    32  	// Exposes a read-only view of the column buffer.
    33  	ColumnChunk
    34  
    35  	// The column implements ValueReaderAt as a mechanism to read values at
    36  	// specific locations within the buffer.
    37  	ValueReaderAt
    38  
    39  	// The column implements ValueWriter as a mechanism to optimize the copy
    40  	// of values into the buffer in contexts where the row information is
    41  	// provided by the values because the repetition and definition levels
    42  	// are set.
    43  	ValueWriter
    44  
    45  	// For indexed columns, returns the underlying dictionary holding the column
    46  	// values. If the column is not indexed, nil is returned.
    47  	Dictionary() Dictionary
    48  
    49  	// Returns a copy of the column. The returned copy shares no memory with
    50  	// the original, mutations of either column will not modify the other.
    51  	Clone() ColumnBuffer
    52  
    53  	// Returns the column as a Page.
    54  	Page() Page
    55  
    56  	// Clears all rows written to the column.
    57  	Reset()
    58  
    59  	// Returns the current capacity of the column (rows).
    60  	Cap() int
    61  
    62  	// Returns the number of rows currently written to the column.
    63  	Len() int
    64  
    65  	// Compares rows at index i and j and reports whether i < j.
    66  	Less(i, j int) bool
    67  
    68  	// Swaps rows at index i and j.
    69  	Swap(i, j int)
    70  
    71  	// Returns the size of the column buffer in bytes.
    72  	Size() int64
    73  
    74  	// This method is employed to write rows from arrays of Go values into the
    75  	// column buffer. The method is currently unexported because it uses unsafe
    76  	// APIs which would be difficult for applications to leverage, increasing
    77  	// the risk of introducing bugs in the code. As a consequence, applications
    78  	// cannot use custom implementations of the ColumnBuffer interface since
    79  	// they cannot declare an unexported method that would match this signature.
    80  	// It means that in order to create a ColumnBuffer value, programs need to
    81  	// go through a call to NewColumnBuffer on a Type instance. We make this
    82  	// trade off for now as it is preferrable to optimize for safety over
    83  	// extensibility in the public APIs, we might revisit in the future if we
    84  	// learn about valid use cases for custom column buffer types.
    85  	writeValues(rows sparse.Array, levels columnLevels)
    86  }
    87  
    88  type columnLevels struct {
    89  	repetitionDepth byte
    90  	repetitionLevel byte
    91  	definitionLevel byte
    92  }
    93  
    94  func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitionLevels []byte) (ColumnIndex, error) {
    95  	index, err := base.ColumnIndex()
    96  	if err != nil {
    97  		return nil, err
    98  	}
    99  	return &nullableColumnIndex{
   100  		ColumnIndex:        index,
   101  		maxDefinitionLevel: maxDefinitionLevel,
   102  		definitionLevels:   definitionLevels,
   103  	}, nil
   104  }
   105  
   106  type nullableColumnIndex struct {
   107  	ColumnIndex
   108  	maxDefinitionLevel byte
   109  	definitionLevels   []byte
   110  }
   111  
   112  func (index *nullableColumnIndex) NullPage(i int) bool {
   113  	return index.NullCount(i) == int64(len(index.definitionLevels))
   114  }
   115  
   116  func (index *nullableColumnIndex) NullCount(i int) int64 {
   117  	return int64(countLevelsNotEqual(index.definitionLevels, index.maxDefinitionLevel))
   118  }
   119  
   120  type nullOrdering func(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool
   121  
   122  func nullsGoFirst(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool {
   123  	if definitionLevel1 != maxDefinitionLevel {
   124  		return definitionLevel2 == maxDefinitionLevel
   125  	} else {
   126  		return definitionLevel2 == maxDefinitionLevel && column.Less(i, j)
   127  	}
   128  }
   129  
   130  func nullsGoLast(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool {
   131  	return definitionLevel1 == maxDefinitionLevel && (definitionLevel2 != maxDefinitionLevel || column.Less(i, j))
   132  }
   133  
   134  // reversedColumnBuffer is an adapter of ColumnBuffer which inverses the order
   135  // in which rows are ordered when the column gets sorted.
   136  //
   137  // This type is used when buffers are constructed with sorting columns ordering
   138  // values in descending order.
   139  type reversedColumnBuffer struct{ ColumnBuffer }
   140  
   141  func (col *reversedColumnBuffer) Less(i, j int) bool { return col.ColumnBuffer.Less(j, i) }
   142  
   143  // optionalColumnBuffer is an implementation of the ColumnBuffer interface used
   144  // as a wrapper to an underlying ColumnBuffer to manage the creation of
   145  // definition levels.
   146  //
   147  // Null values are not written to the underlying column; instead, the buffer
   148  // tracks offsets of row values in the column, null row values are represented
   149  // by the value -1 and a definition level less than the max.
   150  //
   151  // This column buffer type is used for all leaf columns that have a non-zero
   152  // max definition level and a zero repetition level, which may be because the
   153  // column or one of its parent(s) are marked optional.
   154  type optionalColumnBuffer struct {
   155  	base               ColumnBuffer
   156  	reordered          bool
   157  	maxDefinitionLevel byte
   158  	rows               []int32
   159  	sortIndex          []int32
   160  	definitionLevels   []byte
   161  	nullOrdering       nullOrdering
   162  }
   163  
   164  func newOptionalColumnBuffer(base ColumnBuffer, maxDefinitionLevel byte, nullOrdering nullOrdering) *optionalColumnBuffer {
   165  	n := base.Cap()
   166  	return &optionalColumnBuffer{
   167  		base:               base,
   168  		maxDefinitionLevel: maxDefinitionLevel,
   169  		rows:               make([]int32, 0, n),
   170  		definitionLevels:   make([]byte, 0, n),
   171  		nullOrdering:       nullOrdering,
   172  	}
   173  }
   174  
   175  func (col *optionalColumnBuffer) Clone() ColumnBuffer {
   176  	return &optionalColumnBuffer{
   177  		base:               col.base.Clone(),
   178  		reordered:          col.reordered,
   179  		maxDefinitionLevel: col.maxDefinitionLevel,
   180  		rows:               append([]int32{}, col.rows...),
   181  		definitionLevels:   append([]byte{}, col.definitionLevels...),
   182  		nullOrdering:       col.nullOrdering,
   183  	}
   184  }
   185  
   186  func (col *optionalColumnBuffer) Type() Type {
   187  	return col.base.Type()
   188  }
   189  
   190  func (col *optionalColumnBuffer) NumValues() int64 {
   191  	return int64(len(col.definitionLevels))
   192  }
   193  
   194  func (col *optionalColumnBuffer) ColumnIndex() (ColumnIndex, error) {
   195  	return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels)
   196  }
   197  
   198  func (col *optionalColumnBuffer) OffsetIndex() (OffsetIndex, error) {
   199  	return col.base.OffsetIndex()
   200  }
   201  
   202  func (col *optionalColumnBuffer) BloomFilter() BloomFilter {
   203  	return col.base.BloomFilter()
   204  }
   205  
   206  func (col *optionalColumnBuffer) Dictionary() Dictionary {
   207  	return col.base.Dictionary()
   208  }
   209  
   210  func (col *optionalColumnBuffer) Column() int {
   211  	return col.base.Column()
   212  }
   213  
   214  func (col *optionalColumnBuffer) Pages() Pages {
   215  	return onePage(col.Page())
   216  }
   217  
   218  func (col *optionalColumnBuffer) Page() Page {
   219  	// No need for any cyclic sorting if the rows have not been reordered.
   220  	// This case is also important because the cyclic sorting modifies the
   221  	// buffer which makes it unsafe to read the buffer concurrently.
   222  	if col.reordered {
   223  		numNulls := countLevelsNotEqual(col.definitionLevels, col.maxDefinitionLevel)
   224  		numValues := len(col.rows) - numNulls
   225  
   226  		if numValues > 0 {
   227  			if cap(col.sortIndex) < numValues {
   228  				col.sortIndex = make([]int32, numValues)
   229  			}
   230  			sortIndex := col.sortIndex[:numValues]
   231  			i := 0
   232  			for _, j := range col.rows {
   233  				if j >= 0 {
   234  					sortIndex[j] = int32(i)
   235  					i++
   236  				}
   237  			}
   238  
   239  			// Cyclic sort: O(N)
   240  			for i := range sortIndex {
   241  				for j := int(sortIndex[i]); i != j; j = int(sortIndex[i]) {
   242  					col.base.Swap(i, j)
   243  					sortIndex[i], sortIndex[j] = sortIndex[j], sortIndex[i]
   244  				}
   245  			}
   246  		}
   247  
   248  		i := 0
   249  		for _, r := range col.rows {
   250  			if r >= 0 {
   251  				col.rows[i] = int32(i)
   252  				i++
   253  			}
   254  		}
   255  
   256  		col.reordered = false
   257  	}
   258  
   259  	return newOptionalPage(col.base.Page(), col.maxDefinitionLevel, col.definitionLevels)
   260  }
   261  
   262  func (col *optionalColumnBuffer) Reset() {
   263  	col.base.Reset()
   264  	col.rows = col.rows[:0]
   265  	col.definitionLevels = col.definitionLevels[:0]
   266  }
   267  
   268  func (col *optionalColumnBuffer) Size() int64 {
   269  	return int64(4*len(col.rows)+4*len(col.sortIndex)+len(col.definitionLevels)) + col.base.Size()
   270  }
   271  
   272  func (col *optionalColumnBuffer) Cap() int { return cap(col.rows) }
   273  
   274  func (col *optionalColumnBuffer) Len() int { return len(col.rows) }
   275  
   276  func (col *optionalColumnBuffer) Less(i, j int) bool {
   277  	return col.nullOrdering(
   278  		col.base,
   279  		int(col.rows[i]),
   280  		int(col.rows[j]),
   281  		col.maxDefinitionLevel,
   282  		col.definitionLevels[i],
   283  		col.definitionLevels[j],
   284  	)
   285  }
   286  
   287  func (col *optionalColumnBuffer) Swap(i, j int) {
   288  	// Because the underlying column does not contain null values, we cannot
   289  	// swap its values at indexes i and j. We swap the row indexes only, then
   290  	// reorder the underlying buffer using a cyclic sort when the buffer is
   291  	// materialized into a page view.
   292  	col.reordered = true
   293  	col.rows[i], col.rows[j] = col.rows[j], col.rows[i]
   294  	col.definitionLevels[i], col.definitionLevels[j] = col.definitionLevels[j], col.definitionLevels[i]
   295  }
   296  
   297  func (col *optionalColumnBuffer) WriteValues(values []Value) (n int, err error) {
   298  	rowIndex := int32(col.base.Len())
   299  
   300  	for n < len(values) {
   301  		// Collect index range of contiguous null values, from i to n. If this
   302  		// for loop exhausts the values, all remaining if statements and for
   303  		// loops will be no-ops and the loop will terminate.
   304  		i := n
   305  		for n < len(values) && values[n].definitionLevel != col.maxDefinitionLevel {
   306  			n++
   307  		}
   308  
   309  		// Write the contiguous null values up until the first non-null value
   310  		// obtained in the for loop above.
   311  		for _, v := range values[i:n] {
   312  			col.rows = append(col.rows, -1)
   313  			col.definitionLevels = append(col.definitionLevels, v.definitionLevel)
   314  		}
   315  
   316  		// Collect index range of contiguous non-null values, from i to n.
   317  		i = n
   318  		for n < len(values) && values[n].definitionLevel == col.maxDefinitionLevel {
   319  			n++
   320  		}
   321  
   322  		// As long as i < n we have non-null values still to write. It is
   323  		// possible that we just exhausted the input values in which case i == n
   324  		// and the outer for loop will terminate.
   325  		if i < n {
   326  			count, err := col.base.WriteValues(values[i:n])
   327  			col.definitionLevels = appendLevel(col.definitionLevels, col.maxDefinitionLevel, count)
   328  
   329  			for count > 0 {
   330  				col.rows = append(col.rows, rowIndex)
   331  				rowIndex++
   332  				count--
   333  			}
   334  
   335  			if err != nil {
   336  				return n, err
   337  			}
   338  		}
   339  	}
   340  	return n, nil
   341  }
   342  
   343  func (col *optionalColumnBuffer) writeValues(rows sparse.Array, levels columnLevels) {
   344  	// The row count is zero when writing an null optional value, in which case
   345  	// we still need to output a row to the buffer to record the definition
   346  	// level.
   347  	if rows.Len() == 0 {
   348  		col.definitionLevels = append(col.definitionLevels, levels.definitionLevel)
   349  		col.rows = append(col.rows, -1)
   350  		return
   351  	}
   352  
   353  	col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, rows.Len())
   354  
   355  	i := len(col.rows)
   356  	j := len(col.rows) + rows.Len()
   357  
   358  	if j <= cap(col.rows) {
   359  		col.rows = col.rows[:j]
   360  	} else {
   361  		tmp := make([]int32, j, 2*j)
   362  		copy(tmp, col.rows)
   363  		col.rows = tmp
   364  	}
   365  
   366  	if levels.definitionLevel != col.maxDefinitionLevel {
   367  		broadcastValueInt32(col.rows[i:], -1)
   368  	} else {
   369  		broadcastRangeInt32(col.rows[i:], int32(col.base.Len()))
   370  		col.base.writeValues(rows, levels)
   371  	}
   372  }
   373  
   374  func (col *optionalColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) {
   375  	length := int64(len(col.definitionLevels))
   376  	if offset < 0 {
   377  		return 0, errRowIndexOutOfBounds(offset, length)
   378  	}
   379  	if offset >= length {
   380  		return 0, io.EOF
   381  	}
   382  	if length -= offset; length < int64(len(values)) {
   383  		values = values[:length]
   384  	}
   385  
   386  	numNulls1 := int64(countLevelsNotEqual(col.definitionLevels[:offset], col.maxDefinitionLevel))
   387  	numNulls2 := int64(countLevelsNotEqual(col.definitionLevels[offset:offset+length], col.maxDefinitionLevel))
   388  
   389  	if numNulls2 < length {
   390  		n, err := col.base.ReadValuesAt(values[:length-numNulls2], offset-numNulls1)
   391  		if err != nil {
   392  			return n, err
   393  		}
   394  	}
   395  
   396  	if numNulls2 > 0 {
   397  		columnIndex := ^int16(col.Column())
   398  		i := numNulls2 - 1
   399  		j := length - 1
   400  		definitionLevels := col.definitionLevels[offset : offset+length]
   401  		maxDefinitionLevel := col.maxDefinitionLevel
   402  
   403  		for n := len(definitionLevels) - 1; n >= 0 && j > i; n-- {
   404  			if definitionLevels[n] != maxDefinitionLevel {
   405  				values[j] = Value{definitionLevel: definitionLevels[n], columnIndex: columnIndex}
   406  			} else {
   407  				values[j] = values[i]
   408  				i--
   409  			}
   410  			j--
   411  		}
   412  	}
   413  
   414  	return int(length), nil
   415  }
   416  
   417  // repeatedColumnBuffer is an implementation of the ColumnBuffer interface used
   418  // as a wrapper to an underlying ColumnBuffer to manage the creation of
   419  // repetition levels, definition levels, and map rows to the region of the
   420  // underlying buffer that contains their sequence of values.
   421  //
   422  // Null values are not written to the underlying column; instead, the buffer
   423  // tracks offsets of row values in the column, null row values are represented
   424  // by the value -1 and a definition level less than the max.
   425  //
   426  // This column buffer type is used for all leaf columns that have a non-zero
   427  // max repetition level, which may be because the column or one of its parent(s)
   428  // are marked repeated.
   429  type repeatedColumnBuffer struct {
   430  	base               ColumnBuffer
   431  	reordered          bool
   432  	maxRepetitionLevel byte
   433  	maxDefinitionLevel byte
   434  	rows               []offsetMapping
   435  	repetitionLevels   []byte
   436  	definitionLevels   []byte
   437  	buffer             []Value
   438  	reordering         *repeatedColumnBuffer
   439  	nullOrdering       nullOrdering
   440  }
   441  
   442  // The offsetMapping type maps the logical offset of rows within the repetition
   443  // and definition levels, to the base offsets in the underlying column buffers
   444  // where the non-null values have been written.
   445  type offsetMapping struct {
   446  	offset     uint32
   447  	baseOffset uint32
   448  }
   449  
   450  func newRepeatedColumnBuffer(base ColumnBuffer, maxRepetitionLevel, maxDefinitionLevel byte, nullOrdering nullOrdering) *repeatedColumnBuffer {
   451  	n := base.Cap()
   452  	return &repeatedColumnBuffer{
   453  		base:               base,
   454  		maxRepetitionLevel: maxRepetitionLevel,
   455  		maxDefinitionLevel: maxDefinitionLevel,
   456  		rows:               make([]offsetMapping, 0, n/8),
   457  		repetitionLevels:   make([]byte, 0, n),
   458  		definitionLevels:   make([]byte, 0, n),
   459  		nullOrdering:       nullOrdering,
   460  	}
   461  }
   462  
   463  func (col *repeatedColumnBuffer) Clone() ColumnBuffer {
   464  	return &repeatedColumnBuffer{
   465  		base:               col.base.Clone(),
   466  		reordered:          col.reordered,
   467  		maxRepetitionLevel: col.maxRepetitionLevel,
   468  		maxDefinitionLevel: col.maxDefinitionLevel,
   469  		rows:               append([]offsetMapping{}, col.rows...),
   470  		repetitionLevels:   append([]byte{}, col.repetitionLevels...),
   471  		definitionLevels:   append([]byte{}, col.definitionLevels...),
   472  		nullOrdering:       col.nullOrdering,
   473  	}
   474  }
   475  
   476  func (col *repeatedColumnBuffer) Type() Type {
   477  	return col.base.Type()
   478  }
   479  
   480  func (col *repeatedColumnBuffer) NumValues() int64 {
   481  	return int64(len(col.definitionLevels))
   482  }
   483  
   484  func (col *repeatedColumnBuffer) ColumnIndex() (ColumnIndex, error) {
   485  	return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels)
   486  }
   487  
   488  func (col *repeatedColumnBuffer) OffsetIndex() (OffsetIndex, error) {
   489  	return col.base.OffsetIndex()
   490  }
   491  
   492  func (col *repeatedColumnBuffer) BloomFilter() BloomFilter {
   493  	return col.base.BloomFilter()
   494  }
   495  
   496  func (col *repeatedColumnBuffer) Dictionary() Dictionary {
   497  	return col.base.Dictionary()
   498  }
   499  
   500  func (col *repeatedColumnBuffer) Column() int {
   501  	return col.base.Column()
   502  }
   503  
   504  func (col *repeatedColumnBuffer) Pages() Pages {
   505  	return onePage(col.Page())
   506  }
   507  
   508  func (col *repeatedColumnBuffer) Page() Page {
   509  	if col.reordered {
   510  		if col.reordering == nil {
   511  			col.reordering = col.Clone().(*repeatedColumnBuffer)
   512  		}
   513  
   514  		column := col.reordering
   515  		column.Reset()
   516  		maxNumValues := 0
   517  		defer func() {
   518  			clearValues(col.buffer[:maxNumValues])
   519  		}()
   520  
   521  		baseOffset := 0
   522  
   523  		for _, row := range col.rows {
   524  			rowOffset := int(row.offset)
   525  			rowLength := repeatedRowLength(col.repetitionLevels[rowOffset:])
   526  			numNulls := countLevelsNotEqual(col.definitionLevels[rowOffset:rowOffset+rowLength], col.maxDefinitionLevel)
   527  			numValues := rowLength - numNulls
   528  
   529  			if numValues > 0 {
   530  				if numValues > cap(col.buffer) {
   531  					col.buffer = make([]Value, numValues)
   532  				} else {
   533  					col.buffer = col.buffer[:numValues]
   534  				}
   535  				n, err := col.base.ReadValuesAt(col.buffer, int64(row.baseOffset))
   536  				if err != nil && n < numValues {
   537  					return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err)
   538  				}
   539  				if _, err := column.base.WriteValues(col.buffer); err != nil {
   540  					return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err)
   541  				}
   542  				if numValues > maxNumValues {
   543  					maxNumValues = numValues
   544  				}
   545  			}
   546  
   547  			column.rows = append(column.rows, offsetMapping{
   548  				offset:     uint32(len(column.repetitionLevels)),
   549  				baseOffset: uint32(baseOffset),
   550  			})
   551  
   552  			column.repetitionLevels = append(column.repetitionLevels, col.repetitionLevels[rowOffset:rowOffset+rowLength]...)
   553  			column.definitionLevels = append(column.definitionLevels, col.definitionLevels[rowOffset:rowOffset+rowLength]...)
   554  			baseOffset += numValues
   555  		}
   556  
   557  		col.swapReorderingBuffer(column)
   558  		col.reordered = false
   559  	}
   560  
   561  	return newRepeatedPage(
   562  		col.base.Page(),
   563  		col.maxRepetitionLevel,
   564  		col.maxDefinitionLevel,
   565  		col.repetitionLevels,
   566  		col.definitionLevels,
   567  	)
   568  }
   569  
   570  func (col *repeatedColumnBuffer) swapReorderingBuffer(buf *repeatedColumnBuffer) {
   571  	col.base, buf.base = buf.base, col.base
   572  	col.rows, buf.rows = buf.rows, col.rows
   573  	col.repetitionLevels, buf.repetitionLevels = buf.repetitionLevels, col.repetitionLevels
   574  	col.definitionLevels, buf.definitionLevels = buf.definitionLevels, col.definitionLevels
   575  }
   576  
   577  func (col *repeatedColumnBuffer) Reset() {
   578  	col.base.Reset()
   579  	col.rows = col.rows[:0]
   580  	col.repetitionLevels = col.repetitionLevels[:0]
   581  	col.definitionLevels = col.definitionLevels[:0]
   582  }
   583  
   584  func (col *repeatedColumnBuffer) Size() int64 {
   585  	return int64(8*len(col.rows)+len(col.repetitionLevels)+len(col.definitionLevels)) + col.base.Size()
   586  }
   587  
   588  func (col *repeatedColumnBuffer) Cap() int { return cap(col.rows) }
   589  
   590  func (col *repeatedColumnBuffer) Len() int { return len(col.rows) }
   591  
   592  func (col *repeatedColumnBuffer) Less(i, j int) bool {
   593  	row1 := col.rows[i]
   594  	row2 := col.rows[j]
   595  	less := col.nullOrdering
   596  	row1Length := repeatedRowLength(col.repetitionLevels[row1.offset:])
   597  	row2Length := repeatedRowLength(col.repetitionLevels[row2.offset:])
   598  
   599  	for k := 0; k < row1Length && k < row2Length; k++ {
   600  		x := int(row1.baseOffset)
   601  		y := int(row2.baseOffset)
   602  		definitionLevel1 := col.definitionLevels[int(row1.offset)+k]
   603  		definitionLevel2 := col.definitionLevels[int(row2.offset)+k]
   604  		switch {
   605  		case less(col.base, x, y, col.maxDefinitionLevel, definitionLevel1, definitionLevel2):
   606  			return true
   607  		case less(col.base, y, x, col.maxDefinitionLevel, definitionLevel2, definitionLevel1):
   608  			return false
   609  		}
   610  	}
   611  
   612  	return row1Length < row2Length
   613  }
   614  
   615  func (col *repeatedColumnBuffer) Swap(i, j int) {
   616  	// Because the underlying column does not contain null values, and may hold
   617  	// an arbitrary number of values per row, we cannot swap its values at
   618  	// indexes i and j. We swap the row indexes only, then reorder the base
   619  	// column buffer when its view is materialized into a page by creating a
   620  	// copy and writing rows back to it following the order of rows in the
   621  	// repeated column buffer.
   622  	col.reordered = true
   623  	col.rows[i], col.rows[j] = col.rows[j], col.rows[i]
   624  }
   625  
   626  func (col *repeatedColumnBuffer) WriteValues(values []Value) (numValues int, err error) {
   627  	maxRowLen := 0
   628  	defer func() {
   629  		clearValues(col.buffer[:maxRowLen])
   630  	}()
   631  
   632  	for i := 0; i < len(values); {
   633  		j := i
   634  
   635  		if values[j].repetitionLevel == 0 {
   636  			j++
   637  		}
   638  
   639  		for j < len(values) && values[j].repetitionLevel != 0 {
   640  			j++
   641  		}
   642  
   643  		if err := col.writeRow(values[i:j]); err != nil {
   644  			return numValues, err
   645  		}
   646  
   647  		if len(col.buffer) > maxRowLen {
   648  			maxRowLen = len(col.buffer)
   649  		}
   650  
   651  		numValues += j - i
   652  		i = j
   653  	}
   654  
   655  	return numValues, nil
   656  }
   657  
   658  func (col *repeatedColumnBuffer) writeRow(row []Value) error {
   659  	col.buffer = col.buffer[:0]
   660  
   661  	for _, v := range row {
   662  		if v.definitionLevel == col.maxDefinitionLevel {
   663  			col.buffer = append(col.buffer, v)
   664  		}
   665  	}
   666  
   667  	baseOffset := col.base.NumValues()
   668  	if len(col.buffer) > 0 {
   669  		if _, err := col.base.WriteValues(col.buffer); err != nil {
   670  			return err
   671  		}
   672  	}
   673  
   674  	if row[0].repetitionLevel == 0 {
   675  		col.rows = append(col.rows, offsetMapping{
   676  			offset:     uint32(len(col.repetitionLevels)),
   677  			baseOffset: uint32(baseOffset),
   678  		})
   679  	}
   680  
   681  	for _, v := range row {
   682  		col.repetitionLevels = append(col.repetitionLevels, v.repetitionLevel)
   683  		col.definitionLevels = append(col.definitionLevels, v.definitionLevel)
   684  	}
   685  
   686  	return nil
   687  }
   688  
   689  func (col *repeatedColumnBuffer) writeValues(row sparse.Array, levels columnLevels) {
   690  	if levels.repetitionLevel == 0 {
   691  		col.rows = append(col.rows, offsetMapping{
   692  			offset:     uint32(len(col.repetitionLevels)),
   693  			baseOffset: uint32(col.base.NumValues()),
   694  		})
   695  	}
   696  
   697  	if row.Len() == 0 {
   698  		col.repetitionLevels = append(col.repetitionLevels, levels.repetitionLevel)
   699  		col.definitionLevels = append(col.definitionLevels, levels.definitionLevel)
   700  		return
   701  	}
   702  
   703  	col.repetitionLevels = appendLevel(col.repetitionLevels, levels.repetitionLevel, row.Len())
   704  	col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, row.Len())
   705  
   706  	if levels.definitionLevel == col.maxDefinitionLevel {
   707  		col.base.writeValues(row, levels)
   708  	}
   709  }
   710  
   711  func (col *repeatedColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) {
   712  	// TODO:
   713  	panic("NOT IMPLEMENTED")
   714  }
   715  
   716  // repeatedRowLength gives the length of the repeated row starting at the
   717  // beginning of the repetitionLevels slice.
   718  func repeatedRowLength(repetitionLevels []byte) int {
   719  	// If a repetition level exists, at least one value is required to represent
   720  	// the column.
   721  	if len(repetitionLevels) > 0 {
   722  		// The subsequent levels will represent the start of a new record when
   723  		// they go back to zero.
   724  		if i := bytes.IndexByte(repetitionLevels[1:], 0); i >= 0 {
   725  			return i + 1
   726  		}
   727  	}
   728  	return len(repetitionLevels)
   729  }
   730  
   731  // =============================================================================
   732  // The types below are in-memory implementations of the ColumnBuffer interface
   733  // for each parquet type.
   734  //
   735  // These column buffers are created by calling NewColumnBuffer on parquet.Type
   736  // instances; each parquet type manages to construct column buffers of the
   737  // appropriate type, which ensures that we are packing as many values as we
   738  // can in memory.
   739  //
   740  // See Type.NewColumnBuffer for details about how these types get created.
   741  // =============================================================================
   742  
   743  type booleanColumnBuffer struct{ booleanPage }
   744  
   745  func newBooleanColumnBuffer(typ Type, columnIndex int16, numValues int32) *booleanColumnBuffer {
   746  	// Boolean values are bit-packed, we can fit up to 8 values per byte.
   747  	bufferSize := (numValues + 7) / 8
   748  	return &booleanColumnBuffer{
   749  		booleanPage: booleanPage{
   750  			typ:         typ,
   751  			bits:        make([]byte, 0, bufferSize),
   752  			columnIndex: ^columnIndex,
   753  		},
   754  	}
   755  }
   756  
   757  func (col *booleanColumnBuffer) Clone() ColumnBuffer {
   758  	return &booleanColumnBuffer{
   759  		booleanPage: booleanPage{
   760  			typ:         col.typ,
   761  			bits:        append([]byte{}, col.bits...),
   762  			offset:      col.offset,
   763  			numValues:   col.numValues,
   764  			columnIndex: col.columnIndex,
   765  		},
   766  	}
   767  }
   768  
   769  func (col *booleanColumnBuffer) ColumnIndex() (ColumnIndex, error) {
   770  	return booleanColumnIndex{&col.booleanPage}, nil
   771  }
   772  
   773  func (col *booleanColumnBuffer) OffsetIndex() (OffsetIndex, error) {
   774  	return booleanOffsetIndex{&col.booleanPage}, nil
   775  }
   776  
   777  func (col *booleanColumnBuffer) BloomFilter() BloomFilter { return nil }
   778  
   779  func (col *booleanColumnBuffer) Dictionary() Dictionary { return nil }
   780  
   781  func (col *booleanColumnBuffer) Pages() Pages { return onePage(col.Page()) }
   782  
   783  func (col *booleanColumnBuffer) Page() Page { return &col.booleanPage }
   784  
   785  func (col *booleanColumnBuffer) Reset() {
   786  	col.bits = col.bits[:0]
   787  	col.offset = 0
   788  	col.numValues = 0
   789  }
   790  
   791  func (col *booleanColumnBuffer) Cap() int { return 8 * cap(col.bits) }
   792  
   793  func (col *booleanColumnBuffer) Len() int { return int(col.numValues) }
   794  
   795  func (col *booleanColumnBuffer) Less(i, j int) bool {
   796  	a := col.valueAt(i)
   797  	b := col.valueAt(j)
   798  	return a != b && !a
   799  }
   800  
   801  func (col *booleanColumnBuffer) valueAt(i int) bool {
   802  	j := uint32(i) / 8
   803  	k := uint32(i) % 8
   804  	return ((col.bits[j] >> k) & 1) != 0
   805  }
   806  
   807  func (col *booleanColumnBuffer) setValueAt(i int, v bool) {
   808  	// `offset` is always zero in the page of a column buffer
   809  	j := uint32(i) / 8
   810  	k := uint32(i) % 8
   811  	x := byte(0)
   812  	if v {
   813  		x = 1
   814  	}
   815  	col.bits[j] = (col.bits[j] & ^(1 << k)) | (x << k)
   816  }
   817  
   818  func (col *booleanColumnBuffer) Swap(i, j int) {
   819  	a := col.valueAt(i)
   820  	b := col.valueAt(j)
   821  	col.setValueAt(i, b)
   822  	col.setValueAt(j, a)
   823  }
   824  
   825  func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) {
   826  	col.writeValues(sparse.MakeBoolArray(values).UnsafeArray(), columnLevels{})
   827  	return len(values), nil
   828  }
   829  
   830  func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) {
   831  	var model Value
   832  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
   833  	return len(values), nil
   834  }
   835  
   836  func (col *booleanColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
   837  	numBytes := bitpack.ByteCount(uint(col.numValues) + uint(rows.Len()))
   838  	if cap(col.bits) < numBytes {
   839  		col.bits = append(make([]byte, 0, max(numBytes, 2*cap(col.bits))), col.bits...)
   840  	}
   841  	col.bits = col.bits[:numBytes]
   842  	i := 0
   843  	r := 8 - (int(col.numValues) % 8)
   844  	bytes := rows.Uint8Array()
   845  
   846  	if r <= bytes.Len() {
   847  		// First we attempt to write enough bits to align the number of values
   848  		// in the column buffer on 8 bytes. After this step the next bit should
   849  		// be written at the zero'th index of a byte of the buffer.
   850  		if r < 8 {
   851  			var b byte
   852  			for i < r {
   853  				v := bytes.Index(i)
   854  				b |= (v & 1) << uint(i)
   855  				i++
   856  			}
   857  			x := uint(col.numValues) / 8
   858  			y := uint(col.numValues) % 8
   859  			col.bits[x] = (b << y) | (col.bits[x] & ^(0xFF << y))
   860  			col.numValues += int32(i)
   861  		}
   862  
   863  		if n := ((bytes.Len() - i) / 8) * 8; n > 0 {
   864  			// At this stage, we know that that we have at least 8 bits to write
   865  			// and the bits will be aligned on the address of a byte in the
   866  			// output buffer. We can work on 8 values per loop iteration,
   867  			// packing them into a single byte and writing it to the output
   868  			// buffer. This effectively reduces by 87.5% the number of memory
   869  			// stores that the program needs to perform to generate the values.
   870  			i += sparse.GatherBits(col.bits[col.numValues/8:], bytes.Slice(i, i+n))
   871  			col.numValues += int32(n)
   872  		}
   873  	}
   874  
   875  	for i < bytes.Len() {
   876  		x := uint(col.numValues) / 8
   877  		y := uint(col.numValues) % 8
   878  		b := bytes.Index(i)
   879  		col.bits[x] = ((b & 1) << y) | (col.bits[x] & ^(1 << y))
   880  		col.numValues++
   881  		i++
   882  	}
   883  
   884  	col.bits = col.bits[:bitpack.ByteCount(uint(col.numValues))]
   885  }
   886  
   887  func (col *booleanColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
   888  	i := int(offset)
   889  	switch {
   890  	case i < 0:
   891  		return 0, errRowIndexOutOfBounds(offset, int64(col.numValues))
   892  	case i >= int(col.numValues):
   893  		return 0, io.EOF
   894  	default:
   895  		for n < len(values) && i < int(col.numValues) {
   896  			values[n] = col.makeValue(col.valueAt(i))
   897  			n++
   898  			i++
   899  		}
   900  		if n < len(values) {
   901  			err = io.EOF
   902  		}
   903  		return n, err
   904  	}
   905  }
   906  
   907  type int32ColumnBuffer struct{ int32Page }
   908  
   909  func newInt32ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int32ColumnBuffer {
   910  	return &int32ColumnBuffer{
   911  		int32Page: int32Page{
   912  			typ:         typ,
   913  			values:      make([]int32, 0, numValues),
   914  			columnIndex: ^columnIndex,
   915  		},
   916  	}
   917  }
   918  
   919  func (col *int32ColumnBuffer) Clone() ColumnBuffer {
   920  	return &int32ColumnBuffer{
   921  		int32Page: int32Page{
   922  			typ:         col.typ,
   923  			values:      append([]int32{}, col.values...),
   924  			columnIndex: col.columnIndex,
   925  		},
   926  	}
   927  }
   928  
   929  func (col *int32ColumnBuffer) ColumnIndex() (ColumnIndex, error) {
   930  	return int32ColumnIndex{&col.int32Page}, nil
   931  }
   932  
   933  func (col *int32ColumnBuffer) OffsetIndex() (OffsetIndex, error) {
   934  	return int32OffsetIndex{&col.int32Page}, nil
   935  }
   936  
   937  func (col *int32ColumnBuffer) BloomFilter() BloomFilter { return nil }
   938  
   939  func (col *int32ColumnBuffer) Dictionary() Dictionary { return nil }
   940  
   941  func (col *int32ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
   942  
   943  func (col *int32ColumnBuffer) Page() Page { return &col.int32Page }
   944  
   945  func (col *int32ColumnBuffer) Reset() { col.values = col.values[:0] }
   946  
   947  func (col *int32ColumnBuffer) Cap() int { return cap(col.values) }
   948  
   949  func (col *int32ColumnBuffer) Len() int { return len(col.values) }
   950  
   951  func (col *int32ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
   952  
   953  func (col *int32ColumnBuffer) Swap(i, j int) {
   954  	col.values[i], col.values[j] = col.values[j], col.values[i]
   955  }
   956  
   957  func (col *int32ColumnBuffer) Write(b []byte) (int, error) {
   958  	if (len(b) % 4) != 0 {
   959  		return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b))
   960  	}
   961  	col.values = append(col.values, unsafecast.BytesToInt32(b)...)
   962  	return len(b), nil
   963  }
   964  
   965  func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) {
   966  	col.values = append(col.values, values...)
   967  	return len(values), nil
   968  }
   969  
   970  func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) {
   971  	var model Value
   972  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
   973  	return len(values), nil
   974  }
   975  
   976  func (col *int32ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
   977  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
   978  		col.values = append(make([]int32, 0, max(n, 2*cap(col.values))), col.values...)
   979  	}
   980  	n := len(col.values)
   981  	col.values = col.values[:n+rows.Len()]
   982  	sparse.GatherInt32(col.values[n:], rows.Int32Array())
   983  
   984  }
   985  
   986  func (col *int32ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
   987  	i := int(offset)
   988  	switch {
   989  	case i < 0:
   990  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
   991  	case i >= len(col.values):
   992  		return 0, io.EOF
   993  	default:
   994  		for n < len(values) && i < len(col.values) {
   995  			values[n] = col.makeValue(col.values[i])
   996  			n++
   997  			i++
   998  		}
   999  		if n < len(values) {
  1000  			err = io.EOF
  1001  		}
  1002  		return n, err
  1003  	}
  1004  }
  1005  
  1006  type int64ColumnBuffer struct{ int64Page }
  1007  
  1008  func newInt64ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int64ColumnBuffer {
  1009  	return &int64ColumnBuffer{
  1010  		int64Page: int64Page{
  1011  			typ:         typ,
  1012  			values:      make([]int64, 0, numValues),
  1013  			columnIndex: ^columnIndex,
  1014  		},
  1015  	}
  1016  }
  1017  
  1018  func (col *int64ColumnBuffer) Clone() ColumnBuffer {
  1019  	return &int64ColumnBuffer{
  1020  		int64Page: int64Page{
  1021  			typ:         col.typ,
  1022  			values:      append([]int64{}, col.values...),
  1023  			columnIndex: col.columnIndex,
  1024  		},
  1025  	}
  1026  }
  1027  
  1028  func (col *int64ColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1029  	return int64ColumnIndex{&col.int64Page}, nil
  1030  }
  1031  
  1032  func (col *int64ColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1033  	return int64OffsetIndex{&col.int64Page}, nil
  1034  }
  1035  
  1036  func (col *int64ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1037  
  1038  func (col *int64ColumnBuffer) Dictionary() Dictionary { return nil }
  1039  
  1040  func (col *int64ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1041  
  1042  func (col *int64ColumnBuffer) Page() Page { return &col.int64Page }
  1043  
  1044  func (col *int64ColumnBuffer) Reset() { col.values = col.values[:0] }
  1045  
  1046  func (col *int64ColumnBuffer) Cap() int { return cap(col.values) }
  1047  
  1048  func (col *int64ColumnBuffer) Len() int { return len(col.values) }
  1049  
  1050  func (col *int64ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1051  
  1052  func (col *int64ColumnBuffer) Swap(i, j int) {
  1053  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1054  }
  1055  
  1056  func (col *int64ColumnBuffer) Write(b []byte) (int, error) {
  1057  	if (len(b) % 8) != 0 {
  1058  		return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b))
  1059  	}
  1060  	col.values = append(col.values, unsafecast.BytesToInt64(b)...)
  1061  	return len(b), nil
  1062  }
  1063  
  1064  func (col *int64ColumnBuffer) WriteInt64s(values []int64) (int, error) {
  1065  	col.values = append(col.values, values...)
  1066  	return len(values), nil
  1067  }
  1068  
  1069  func (col *int64ColumnBuffer) WriteValues(values []Value) (int, error) {
  1070  	var model Value
  1071  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1072  	return len(values), nil
  1073  }
  1074  
  1075  func (col *int64ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1076  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1077  		col.values = append(make([]int64, 0, max(n, 2*cap(col.values))), col.values...)
  1078  	}
  1079  	n := len(col.values)
  1080  	col.values = col.values[:n+rows.Len()]
  1081  	sparse.GatherInt64(col.values[n:], rows.Int64Array())
  1082  }
  1083  
  1084  func (col *int64ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1085  	i := int(offset)
  1086  	switch {
  1087  	case i < 0:
  1088  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1089  	case i >= len(col.values):
  1090  		return 0, io.EOF
  1091  	default:
  1092  		for n < len(values) && i < len(col.values) {
  1093  			values[n] = col.makeValue(col.values[i])
  1094  			n++
  1095  			i++
  1096  		}
  1097  		if n < len(values) {
  1098  			err = io.EOF
  1099  		}
  1100  		return n, err
  1101  	}
  1102  }
  1103  
  1104  type int96ColumnBuffer struct{ int96Page }
  1105  
  1106  func newInt96ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int96ColumnBuffer {
  1107  	return &int96ColumnBuffer{
  1108  		int96Page: int96Page{
  1109  			typ:         typ,
  1110  			values:      make([]deprecated.Int96, 0, numValues),
  1111  			columnIndex: ^columnIndex,
  1112  		},
  1113  	}
  1114  }
  1115  
  1116  func (col *int96ColumnBuffer) Clone() ColumnBuffer {
  1117  	return &int96ColumnBuffer{
  1118  		int96Page: int96Page{
  1119  			typ:         col.typ,
  1120  			values:      append([]deprecated.Int96{}, col.values...),
  1121  			columnIndex: col.columnIndex,
  1122  		},
  1123  	}
  1124  }
  1125  
  1126  func (col *int96ColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1127  	return int96ColumnIndex{&col.int96Page}, nil
  1128  }
  1129  
  1130  func (col *int96ColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1131  	return int96OffsetIndex{&col.int96Page}, nil
  1132  }
  1133  
  1134  func (col *int96ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1135  
  1136  func (col *int96ColumnBuffer) Dictionary() Dictionary { return nil }
  1137  
  1138  func (col *int96ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1139  
  1140  func (col *int96ColumnBuffer) Page() Page { return &col.int96Page }
  1141  
  1142  func (col *int96ColumnBuffer) Reset() { col.values = col.values[:0] }
  1143  
  1144  func (col *int96ColumnBuffer) Cap() int { return cap(col.values) }
  1145  
  1146  func (col *int96ColumnBuffer) Len() int { return len(col.values) }
  1147  
  1148  func (col *int96ColumnBuffer) Less(i, j int) bool { return col.values[i].Less(col.values[j]) }
  1149  
  1150  func (col *int96ColumnBuffer) Swap(i, j int) {
  1151  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1152  }
  1153  
  1154  func (col *int96ColumnBuffer) Write(b []byte) (int, error) {
  1155  	if (len(b) % 12) != 0 {
  1156  		return 0, fmt.Errorf("cannot write INT96 values from input of size %d", len(b))
  1157  	}
  1158  	col.values = append(col.values, deprecated.BytesToInt96(b)...)
  1159  	return len(b), nil
  1160  }
  1161  
  1162  func (col *int96ColumnBuffer) WriteInt96s(values []deprecated.Int96) (int, error) {
  1163  	col.values = append(col.values, values...)
  1164  	return len(values), nil
  1165  }
  1166  
  1167  func (col *int96ColumnBuffer) WriteValues(values []Value) (int, error) {
  1168  	for _, v := range values {
  1169  		col.values = append(col.values, v.Int96())
  1170  	}
  1171  	return len(values), nil
  1172  }
  1173  
  1174  func (col *int96ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1175  	for i := 0; i < rows.Len(); i++ {
  1176  		p := rows.Index(i)
  1177  		col.values = append(col.values, *(*deprecated.Int96)(p))
  1178  	}
  1179  }
  1180  
  1181  func (col *int96ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1182  	i := int(offset)
  1183  	switch {
  1184  	case i < 0:
  1185  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1186  	case i >= len(col.values):
  1187  		return 0, io.EOF
  1188  	default:
  1189  		for n < len(values) && i < len(col.values) {
  1190  			values[n] = col.makeValue(col.values[i])
  1191  			n++
  1192  			i++
  1193  		}
  1194  		if n < len(values) {
  1195  			err = io.EOF
  1196  		}
  1197  		return n, err
  1198  	}
  1199  }
  1200  
  1201  type floatColumnBuffer struct{ floatPage }
  1202  
  1203  func newFloatColumnBuffer(typ Type, columnIndex int16, numValues int32) *floatColumnBuffer {
  1204  	return &floatColumnBuffer{
  1205  		floatPage: floatPage{
  1206  			typ:         typ,
  1207  			values:      make([]float32, 0, numValues),
  1208  			columnIndex: ^columnIndex,
  1209  		},
  1210  	}
  1211  }
  1212  
  1213  func (col *floatColumnBuffer) Clone() ColumnBuffer {
  1214  	return &floatColumnBuffer{
  1215  		floatPage: floatPage{
  1216  			typ:         col.typ,
  1217  			values:      append([]float32{}, col.values...),
  1218  			columnIndex: col.columnIndex,
  1219  		},
  1220  	}
  1221  }
  1222  
  1223  func (col *floatColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1224  	return floatColumnIndex{&col.floatPage}, nil
  1225  }
  1226  
  1227  func (col *floatColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1228  	return floatOffsetIndex{&col.floatPage}, nil
  1229  }
  1230  
  1231  func (col *floatColumnBuffer) BloomFilter() BloomFilter { return nil }
  1232  
  1233  func (col *floatColumnBuffer) Dictionary() Dictionary { return nil }
  1234  
  1235  func (col *floatColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1236  
  1237  func (col *floatColumnBuffer) Page() Page { return &col.floatPage }
  1238  
  1239  func (col *floatColumnBuffer) Reset() { col.values = col.values[:0] }
  1240  
  1241  func (col *floatColumnBuffer) Cap() int { return cap(col.values) }
  1242  
  1243  func (col *floatColumnBuffer) Len() int { return len(col.values) }
  1244  
  1245  func (col *floatColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1246  
  1247  func (col *floatColumnBuffer) Swap(i, j int) {
  1248  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1249  }
  1250  
  1251  func (col *floatColumnBuffer) Write(b []byte) (int, error) {
  1252  	if (len(b) % 4) != 0 {
  1253  		return 0, fmt.Errorf("cannot write FLOAT values from input of size %d", len(b))
  1254  	}
  1255  	col.values = append(col.values, unsafecast.BytesToFloat32(b)...)
  1256  	return len(b), nil
  1257  }
  1258  
  1259  func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) {
  1260  	col.values = append(col.values, values...)
  1261  	return len(values), nil
  1262  }
  1263  
  1264  func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) {
  1265  	var model Value
  1266  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1267  	return len(values), nil
  1268  }
  1269  
  1270  func (col *floatColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1271  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1272  		col.values = append(make([]float32, 0, max(n, 2*cap(col.values))), col.values...)
  1273  	}
  1274  	n := len(col.values)
  1275  	col.values = col.values[:n+rows.Len()]
  1276  	sparse.GatherFloat32(col.values[n:], rows.Float32Array())
  1277  }
  1278  
  1279  func (col *floatColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1280  	i := int(offset)
  1281  	switch {
  1282  	case i < 0:
  1283  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1284  	case i >= len(col.values):
  1285  		return 0, io.EOF
  1286  	default:
  1287  		for n < len(values) && i < len(col.values) {
  1288  			values[n] = col.makeValue(col.values[i])
  1289  			n++
  1290  			i++
  1291  		}
  1292  		if n < len(values) {
  1293  			err = io.EOF
  1294  		}
  1295  		return n, err
  1296  	}
  1297  }
  1298  
  1299  type doubleColumnBuffer struct{ doublePage }
  1300  
  1301  func newDoubleColumnBuffer(typ Type, columnIndex int16, numValues int32) *doubleColumnBuffer {
  1302  	return &doubleColumnBuffer{
  1303  		doublePage: doublePage{
  1304  			typ:         typ,
  1305  			values:      make([]float64, 0, numValues),
  1306  			columnIndex: ^columnIndex,
  1307  		},
  1308  	}
  1309  }
  1310  
  1311  func (col *doubleColumnBuffer) Clone() ColumnBuffer {
  1312  	return &doubleColumnBuffer{
  1313  		doublePage: doublePage{
  1314  			typ:         col.typ,
  1315  			values:      append([]float64{}, col.values...),
  1316  			columnIndex: col.columnIndex,
  1317  		},
  1318  	}
  1319  }
  1320  
  1321  func (col *doubleColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1322  	return doubleColumnIndex{&col.doublePage}, nil
  1323  }
  1324  
  1325  func (col *doubleColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1326  	return doubleOffsetIndex{&col.doublePage}, nil
  1327  }
  1328  
  1329  func (col *doubleColumnBuffer) BloomFilter() BloomFilter { return nil }
  1330  
  1331  func (col *doubleColumnBuffer) Dictionary() Dictionary { return nil }
  1332  
  1333  func (col *doubleColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1334  
  1335  func (col *doubleColumnBuffer) Page() Page { return &col.doublePage }
  1336  
  1337  func (col *doubleColumnBuffer) Reset() { col.values = col.values[:0] }
  1338  
  1339  func (col *doubleColumnBuffer) Cap() int { return cap(col.values) }
  1340  
  1341  func (col *doubleColumnBuffer) Len() int { return len(col.values) }
  1342  
  1343  func (col *doubleColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1344  
  1345  func (col *doubleColumnBuffer) Swap(i, j int) {
  1346  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1347  }
  1348  
  1349  func (col *doubleColumnBuffer) Write(b []byte) (int, error) {
  1350  	if (len(b) % 8) != 0 {
  1351  		return 0, fmt.Errorf("cannot write DOUBLE values from input of size %d", len(b))
  1352  	}
  1353  	col.values = append(col.values, unsafecast.BytesToFloat64(b)...)
  1354  	return len(b), nil
  1355  }
  1356  
  1357  func (col *doubleColumnBuffer) WriteDoubles(values []float64) (int, error) {
  1358  	col.values = append(col.values, values...)
  1359  	return len(values), nil
  1360  }
  1361  
  1362  func (col *doubleColumnBuffer) WriteValues(values []Value) (int, error) {
  1363  	var model Value
  1364  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1365  	return len(values), nil
  1366  }
  1367  
  1368  func (col *doubleColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1369  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1370  		col.values = append(make([]float64, 0, max(n, 2*cap(col.values))), col.values...)
  1371  	}
  1372  	n := len(col.values)
  1373  	col.values = col.values[:n+rows.Len()]
  1374  	sparse.GatherFloat64(col.values[n:], rows.Float64Array())
  1375  }
  1376  
  1377  func (col *doubleColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1378  	i := int(offset)
  1379  	switch {
  1380  	case i < 0:
  1381  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1382  	case i >= len(col.values):
  1383  		return 0, io.EOF
  1384  	default:
  1385  		for n < len(values) && i < len(col.values) {
  1386  			values[n] = col.makeValue(col.values[i])
  1387  			n++
  1388  			i++
  1389  		}
  1390  		if n < len(values) {
  1391  			err = io.EOF
  1392  		}
  1393  		return n, err
  1394  	}
  1395  }
  1396  
  1397  type byteArrayColumnBuffer struct {
  1398  	byteArrayPage
  1399  	lengths []uint32
  1400  	scratch []byte
  1401  }
  1402  
  1403  func newByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int32) *byteArrayColumnBuffer {
  1404  	return &byteArrayColumnBuffer{
  1405  		byteArrayPage: byteArrayPage{
  1406  			typ:         typ,
  1407  			values:      make([]byte, 0, typ.EstimateSize(int(numValues))),
  1408  			offsets:     make([]uint32, 0, numValues+1),
  1409  			columnIndex: ^columnIndex,
  1410  		},
  1411  		lengths: make([]uint32, 0, numValues),
  1412  	}
  1413  }
  1414  
  1415  func (col *byteArrayColumnBuffer) Clone() ColumnBuffer {
  1416  	return &byteArrayColumnBuffer{
  1417  		byteArrayPage: byteArrayPage{
  1418  			typ:         col.typ,
  1419  			values:      col.cloneValues(),
  1420  			offsets:     col.cloneOffsets(),
  1421  			columnIndex: col.columnIndex,
  1422  		},
  1423  		lengths: col.cloneLengths(),
  1424  	}
  1425  }
  1426  
  1427  func (col *byteArrayColumnBuffer) cloneLengths() []uint32 {
  1428  	lengths := make([]uint32, len(col.lengths))
  1429  	copy(lengths, col.lengths)
  1430  	return lengths
  1431  }
  1432  
  1433  func (col *byteArrayColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1434  	return byteArrayColumnIndex{&col.byteArrayPage}, nil
  1435  }
  1436  
  1437  func (col *byteArrayColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1438  	return byteArrayOffsetIndex{&col.byteArrayPage}, nil
  1439  }
  1440  
  1441  func (col *byteArrayColumnBuffer) BloomFilter() BloomFilter { return nil }
  1442  
  1443  func (col *byteArrayColumnBuffer) Dictionary() Dictionary { return nil }
  1444  
  1445  func (col *byteArrayColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1446  
  1447  func (col *byteArrayColumnBuffer) Page() Page {
  1448  	if len(col.lengths) > 0 && orderOfUint32(col.offsets) < 1 { // unordered?
  1449  		if cap(col.scratch) < len(col.values) {
  1450  			col.scratch = make([]byte, 0, cap(col.values))
  1451  		} else {
  1452  			col.scratch = col.scratch[:0]
  1453  		}
  1454  
  1455  		for i := range col.lengths {
  1456  			n := len(col.scratch)
  1457  			col.scratch = append(col.scratch, col.index(i)...)
  1458  			col.offsets[i] = uint32(n)
  1459  		}
  1460  
  1461  		col.values, col.scratch = col.scratch, col.values
  1462  	}
  1463  	// The offsets have the total length as the last item. Since we are about to
  1464  	// expose the column buffer's internal state as a Page value we ensure that
  1465  	// the last offset is the total length of all values.
  1466  	col.offsets = append(col.offsets[:len(col.lengths)], uint32(len(col.values)))
  1467  	return &col.byteArrayPage
  1468  }
  1469  
  1470  func (col *byteArrayColumnBuffer) Reset() {
  1471  	col.values = col.values[:0]
  1472  	col.offsets = col.offsets[:0]
  1473  	col.lengths = col.lengths[:0]
  1474  }
  1475  
  1476  func (col *byteArrayColumnBuffer) NumRows() int64 { return int64(col.Len()) }
  1477  
  1478  func (col *byteArrayColumnBuffer) NumValues() int64 { return int64(col.Len()) }
  1479  
  1480  func (col *byteArrayColumnBuffer) Cap() int { return cap(col.lengths) }
  1481  
  1482  func (col *byteArrayColumnBuffer) Len() int { return len(col.lengths) }
  1483  
  1484  func (col *byteArrayColumnBuffer) Less(i, j int) bool {
  1485  	return bytes.Compare(col.index(i), col.index(j)) < 0
  1486  }
  1487  
  1488  func (col *byteArrayColumnBuffer) Swap(i, j int) {
  1489  	col.offsets[i], col.offsets[j] = col.offsets[j], col.offsets[i]
  1490  	col.lengths[i], col.lengths[j] = col.lengths[j], col.lengths[i]
  1491  }
  1492  
  1493  func (col *byteArrayColumnBuffer) Write(b []byte) (int, error) {
  1494  	_, n, err := col.writeByteArrays(b)
  1495  	return n, err
  1496  }
  1497  
  1498  func (col *byteArrayColumnBuffer) WriteByteArrays(values []byte) (int, error) {
  1499  	n, _, err := col.writeByteArrays(values)
  1500  	return n, err
  1501  }
  1502  
  1503  func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes int, err error) {
  1504  	baseCount := len(col.lengths)
  1505  	baseBytes := len(col.values) + (plain.ByteArrayLengthSize * len(col.lengths))
  1506  
  1507  	err = plain.RangeByteArray(values, func(value []byte) error {
  1508  		col.append(unsafecast.BytesToString(value))
  1509  		return nil
  1510  	})
  1511  
  1512  	count = len(col.lengths) - baseCount
  1513  	bytes = (len(col.values) - baseBytes) + (plain.ByteArrayLengthSize * count)
  1514  	return count, bytes, err
  1515  }
  1516  
  1517  func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, error) {
  1518  	var model Value
  1519  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.ptr)), columnLevels{})
  1520  	return len(values), nil
  1521  }
  1522  
  1523  func (col *byteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1524  	for i := 0; i < rows.Len(); i++ {
  1525  		p := rows.Index(i)
  1526  		col.append(*(*string)(p))
  1527  	}
  1528  }
  1529  
  1530  func (col *byteArrayColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1531  	i := int(offset)
  1532  	switch {
  1533  	case i < 0:
  1534  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.lengths)))
  1535  	case i >= len(col.lengths):
  1536  		return 0, io.EOF
  1537  	default:
  1538  		for n < len(values) && i < len(col.lengths) {
  1539  			values[n] = col.makeValueBytes(col.index(i))
  1540  			n++
  1541  			i++
  1542  		}
  1543  		if n < len(values) {
  1544  			err = io.EOF
  1545  		}
  1546  		return n, err
  1547  	}
  1548  }
  1549  
  1550  func (col *byteArrayColumnBuffer) append(value string) {
  1551  	col.offsets = append(col.offsets, uint32(len(col.values)))
  1552  	col.lengths = append(col.lengths, uint32(len(value)))
  1553  	col.values = append(col.values, value...)
  1554  }
  1555  
  1556  func (col *byteArrayColumnBuffer) index(i int) []byte {
  1557  	offset := col.offsets[i]
  1558  	length := col.lengths[i]
  1559  	end := offset + length
  1560  	return col.values[offset:end:end]
  1561  }
  1562  
  1563  type fixedLenByteArrayColumnBuffer struct {
  1564  	fixedLenByteArrayPage
  1565  	tmp []byte
  1566  }
  1567  
  1568  func newFixedLenByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int32) *fixedLenByteArrayColumnBuffer {
  1569  	size := typ.Length()
  1570  	return &fixedLenByteArrayColumnBuffer{
  1571  		fixedLenByteArrayPage: fixedLenByteArrayPage{
  1572  			typ:         typ,
  1573  			size:        size,
  1574  			data:        make([]byte, 0, typ.EstimateSize(int(numValues))),
  1575  			columnIndex: ^columnIndex,
  1576  		},
  1577  		tmp: make([]byte, size),
  1578  	}
  1579  }
  1580  
  1581  func (col *fixedLenByteArrayColumnBuffer) Clone() ColumnBuffer {
  1582  	return &fixedLenByteArrayColumnBuffer{
  1583  		fixedLenByteArrayPage: fixedLenByteArrayPage{
  1584  			typ:         col.typ,
  1585  			size:        col.size,
  1586  			data:        append([]byte{}, col.data...),
  1587  			columnIndex: col.columnIndex,
  1588  		},
  1589  		tmp: make([]byte, col.size),
  1590  	}
  1591  }
  1592  
  1593  func (col *fixedLenByteArrayColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1594  	return fixedLenByteArrayColumnIndex{&col.fixedLenByteArrayPage}, nil
  1595  }
  1596  
  1597  func (col *fixedLenByteArrayColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1598  	return fixedLenByteArrayOffsetIndex{&col.fixedLenByteArrayPage}, nil
  1599  }
  1600  
  1601  func (col *fixedLenByteArrayColumnBuffer) BloomFilter() BloomFilter { return nil }
  1602  
  1603  func (col *fixedLenByteArrayColumnBuffer) Dictionary() Dictionary { return nil }
  1604  
  1605  func (col *fixedLenByteArrayColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1606  
  1607  func (col *fixedLenByteArrayColumnBuffer) Page() Page { return &col.fixedLenByteArrayPage }
  1608  
  1609  func (col *fixedLenByteArrayColumnBuffer) Reset() { col.data = col.data[:0] }
  1610  
  1611  func (col *fixedLenByteArrayColumnBuffer) Cap() int { return cap(col.data) / col.size }
  1612  
  1613  func (col *fixedLenByteArrayColumnBuffer) Len() int { return len(col.data) / col.size }
  1614  
  1615  func (col *fixedLenByteArrayColumnBuffer) Less(i, j int) bool {
  1616  	return bytes.Compare(col.index(i), col.index(j)) < 0
  1617  }
  1618  
  1619  func (col *fixedLenByteArrayColumnBuffer) Swap(i, j int) {
  1620  	t, u, v := col.tmp[:col.size], col.index(i), col.index(j)
  1621  	copy(t, u)
  1622  	copy(u, v)
  1623  	copy(v, t)
  1624  }
  1625  
  1626  func (col *fixedLenByteArrayColumnBuffer) index(i int) []byte {
  1627  	j := (i + 0) * col.size
  1628  	k := (i + 1) * col.size
  1629  	return col.data[j:k:k]
  1630  }
  1631  
  1632  func (col *fixedLenByteArrayColumnBuffer) Write(b []byte) (int, error) {
  1633  	n, err := col.WriteFixedLenByteArrays(b)
  1634  	return n * col.size, err
  1635  }
  1636  
  1637  func (col *fixedLenByteArrayColumnBuffer) WriteFixedLenByteArrays(values []byte) (int, error) {
  1638  	d, m := len(values)/col.size, len(values)%col.size
  1639  	if m != 0 {
  1640  		return 0, fmt.Errorf("cannot write FIXED_LEN_BYTE_ARRAY values of size %d from input of size %d", col.size, len(values))
  1641  	}
  1642  	col.data = append(col.data, values...)
  1643  	return d, nil
  1644  }
  1645  
  1646  func (col *fixedLenByteArrayColumnBuffer) WriteValues(values []Value) (int, error) {
  1647  	for _, v := range values {
  1648  		col.data = append(col.data, v.byteArray()...)
  1649  	}
  1650  	return len(values), nil
  1651  }
  1652  
  1653  func (col *fixedLenByteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1654  	n := col.size * rows.Len()
  1655  	i := len(col.data)
  1656  	j := len(col.data) + n
  1657  
  1658  	if cap(col.data) < j {
  1659  		col.data = append(make([]byte, 0, max(i+n, 2*cap(col.data))), col.data...)
  1660  	}
  1661  
  1662  	col.data = col.data[:j]
  1663  	newData := col.data[i:]
  1664  
  1665  	for i := 0; i < rows.Len(); i++ {
  1666  		p := rows.Index(i)
  1667  		copy(newData[i*col.size:], unsafe.Slice((*byte)(p), col.size))
  1668  	}
  1669  }
  1670  
  1671  func (col *fixedLenByteArrayColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1672  	i := int(offset) * col.size
  1673  	switch {
  1674  	case i < 0:
  1675  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.data)/col.size))
  1676  	case i >= len(col.data):
  1677  		return 0, io.EOF
  1678  	default:
  1679  		for n < len(values) && i < len(col.data) {
  1680  			values[n] = col.makeValueBytes(col.data[i : i+col.size])
  1681  			n++
  1682  			i += col.size
  1683  		}
  1684  		if n < len(values) {
  1685  			err = io.EOF
  1686  		}
  1687  		return n, err
  1688  	}
  1689  }
  1690  
  1691  type uint32ColumnBuffer struct{ uint32Page }
  1692  
  1693  func newUint32ColumnBuffer(typ Type, columnIndex int16, numValues int32) *uint32ColumnBuffer {
  1694  	return &uint32ColumnBuffer{
  1695  		uint32Page: uint32Page{
  1696  			typ:         typ,
  1697  			values:      make([]uint32, 0, numValues),
  1698  			columnIndex: ^columnIndex,
  1699  		},
  1700  	}
  1701  }
  1702  
  1703  func (col *uint32ColumnBuffer) Clone() ColumnBuffer {
  1704  	return &uint32ColumnBuffer{
  1705  		uint32Page: uint32Page{
  1706  			typ:         col.typ,
  1707  			values:      append([]uint32{}, col.values...),
  1708  			columnIndex: col.columnIndex,
  1709  		},
  1710  	}
  1711  }
  1712  
  1713  func (col *uint32ColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1714  	return uint32ColumnIndex{&col.uint32Page}, nil
  1715  }
  1716  
  1717  func (col *uint32ColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1718  	return uint32OffsetIndex{&col.uint32Page}, nil
  1719  }
  1720  
  1721  func (col *uint32ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1722  
  1723  func (col *uint32ColumnBuffer) Dictionary() Dictionary { return nil }
  1724  
  1725  func (col *uint32ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1726  
  1727  func (col *uint32ColumnBuffer) Page() Page { return &col.uint32Page }
  1728  
  1729  func (col *uint32ColumnBuffer) Reset() { col.values = col.values[:0] }
  1730  
  1731  func (col *uint32ColumnBuffer) Cap() int { return cap(col.values) }
  1732  
  1733  func (col *uint32ColumnBuffer) Len() int { return len(col.values) }
  1734  
  1735  func (col *uint32ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1736  
  1737  func (col *uint32ColumnBuffer) Swap(i, j int) {
  1738  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1739  }
  1740  
  1741  func (col *uint32ColumnBuffer) Write(b []byte) (int, error) {
  1742  	if (len(b) % 4) != 0 {
  1743  		return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b))
  1744  	}
  1745  	col.values = append(col.values, unsafecast.BytesToUint32(b)...)
  1746  	return len(b), nil
  1747  }
  1748  
  1749  func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) {
  1750  	col.values = append(col.values, values...)
  1751  	return len(values), nil
  1752  }
  1753  
  1754  func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) {
  1755  	var model Value
  1756  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1757  	return len(values), nil
  1758  }
  1759  
  1760  func (col *uint32ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1761  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1762  		col.values = append(make([]uint32, 0, max(n, 2*cap(col.values))), col.values...)
  1763  	}
  1764  	n := len(col.values)
  1765  	col.values = col.values[:n+rows.Len()]
  1766  	sparse.GatherUint32(col.values[n:], rows.Uint32Array())
  1767  }
  1768  
  1769  func (col *uint32ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1770  	i := int(offset)
  1771  	switch {
  1772  	case i < 0:
  1773  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1774  	case i >= len(col.values):
  1775  		return 0, io.EOF
  1776  	default:
  1777  		for n < len(values) && i < len(col.values) {
  1778  			values[n] = col.makeValue(col.values[i])
  1779  			n++
  1780  			i++
  1781  		}
  1782  		if n < len(values) {
  1783  			err = io.EOF
  1784  		}
  1785  		return n, err
  1786  	}
  1787  }
  1788  
  1789  type uint64ColumnBuffer struct{ uint64Page }
  1790  
  1791  func newUint64ColumnBuffer(typ Type, columnIndex int16, numValues int32) *uint64ColumnBuffer {
  1792  	return &uint64ColumnBuffer{
  1793  		uint64Page: uint64Page{
  1794  			typ:         typ,
  1795  			values:      make([]uint64, 0, numValues),
  1796  			columnIndex: ^columnIndex,
  1797  		},
  1798  	}
  1799  }
  1800  
  1801  func (col *uint64ColumnBuffer) Clone() ColumnBuffer {
  1802  	return &uint64ColumnBuffer{
  1803  		uint64Page: uint64Page{
  1804  			typ:         col.typ,
  1805  			values:      append([]uint64{}, col.values...),
  1806  			columnIndex: col.columnIndex,
  1807  		},
  1808  	}
  1809  }
  1810  
  1811  func (col *uint64ColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1812  	return uint64ColumnIndex{&col.uint64Page}, nil
  1813  }
  1814  
  1815  func (col *uint64ColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1816  	return uint64OffsetIndex{&col.uint64Page}, nil
  1817  }
  1818  
  1819  func (col *uint64ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1820  
  1821  func (col *uint64ColumnBuffer) Dictionary() Dictionary { return nil }
  1822  
  1823  func (col *uint64ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1824  
  1825  func (col *uint64ColumnBuffer) Page() Page { return &col.uint64Page }
  1826  
  1827  func (col *uint64ColumnBuffer) Reset() { col.values = col.values[:0] }
  1828  
  1829  func (col *uint64ColumnBuffer) Cap() int { return cap(col.values) }
  1830  
  1831  func (col *uint64ColumnBuffer) Len() int { return len(col.values) }
  1832  
  1833  func (col *uint64ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1834  
  1835  func (col *uint64ColumnBuffer) Swap(i, j int) {
  1836  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1837  }
  1838  
  1839  func (col *uint64ColumnBuffer) Write(b []byte) (int, error) {
  1840  	if (len(b) % 8) != 0 {
  1841  		return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b))
  1842  	}
  1843  	col.values = append(col.values, unsafecast.BytesToUint64(b)...)
  1844  	return len(b), nil
  1845  }
  1846  
  1847  func (col *uint64ColumnBuffer) WriteUint64s(values []uint64) (int, error) {
  1848  	col.values = append(col.values, values...)
  1849  	return len(values), nil
  1850  }
  1851  
  1852  func (col *uint64ColumnBuffer) WriteValues(values []Value) (int, error) {
  1853  	var model Value
  1854  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1855  	return len(values), nil
  1856  }
  1857  
  1858  func (col *uint64ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1859  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1860  		col.values = append(make([]uint64, 0, max(n, 2*cap(col.values))), col.values...)
  1861  	}
  1862  	n := len(col.values)
  1863  	col.values = col.values[:n+rows.Len()]
  1864  	sparse.GatherUint64(col.values[n:], rows.Uint64Array())
  1865  }
  1866  
  1867  func (col *uint64ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1868  	i := int(offset)
  1869  	switch {
  1870  	case i < 0:
  1871  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1872  	case i >= len(col.values):
  1873  		return 0, io.EOF
  1874  	default:
  1875  		for n < len(values) && i < len(col.values) {
  1876  			values[n] = col.makeValue(col.values[i])
  1877  			n++
  1878  			i++
  1879  		}
  1880  		if n < len(values) {
  1881  			err = io.EOF
  1882  		}
  1883  		return n, err
  1884  	}
  1885  }
  1886  
  1887  type be128ColumnBuffer struct{ be128Page }
  1888  
  1889  func newBE128ColumnBuffer(typ Type, columnIndex int16, numValues int32) *be128ColumnBuffer {
  1890  	return &be128ColumnBuffer{
  1891  		be128Page: be128Page{
  1892  			typ:         typ,
  1893  			values:      make([][16]byte, 0, numValues),
  1894  			columnIndex: ^columnIndex,
  1895  		},
  1896  	}
  1897  }
  1898  
  1899  func (col *be128ColumnBuffer) Clone() ColumnBuffer {
  1900  	return &be128ColumnBuffer{
  1901  		be128Page: be128Page{
  1902  			typ:         col.typ,
  1903  			values:      append([][16]byte{}, col.values...),
  1904  			columnIndex: col.columnIndex,
  1905  		},
  1906  	}
  1907  }
  1908  
  1909  func (col *be128ColumnBuffer) ColumnIndex() (ColumnIndex, error) {
  1910  	return be128ColumnIndex{&col.be128Page}, nil
  1911  }
  1912  
  1913  func (col *be128ColumnBuffer) OffsetIndex() (OffsetIndex, error) {
  1914  	return be128OffsetIndex{&col.be128Page}, nil
  1915  }
  1916  
  1917  func (col *be128ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1918  
  1919  func (col *be128ColumnBuffer) Dictionary() Dictionary { return nil }
  1920  
  1921  func (col *be128ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1922  
  1923  func (col *be128ColumnBuffer) Page() Page { return &col.be128Page }
  1924  
  1925  func (col *be128ColumnBuffer) Reset() { col.values = col.values[:0] }
  1926  
  1927  func (col *be128ColumnBuffer) Cap() int { return cap(col.values) }
  1928  
  1929  func (col *be128ColumnBuffer) Len() int { return len(col.values) }
  1930  
  1931  func (col *be128ColumnBuffer) Less(i, j int) bool {
  1932  	return lessBE128(&col.values[i], &col.values[j])
  1933  }
  1934  
  1935  func (col *be128ColumnBuffer) Swap(i, j int) {
  1936  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1937  }
  1938  
  1939  func (col *be128ColumnBuffer) WriteValues(values []Value) (int, error) {
  1940  	if n := len(col.values) + len(values); n > cap(col.values) {
  1941  		col.values = append(make([][16]byte, 0, max(n, 2*cap(col.values))), col.values...)
  1942  	}
  1943  	n := len(col.values)
  1944  	col.values = col.values[:n+len(values)]
  1945  	newValues := col.values[n:]
  1946  	for i, v := range values {
  1947  		copy(newValues[i][:], v.byteArray())
  1948  	}
  1949  	return len(values), nil
  1950  }
  1951  
  1952  func (col *be128ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1953  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1954  		col.values = append(make([][16]byte, 0, max(n, 2*cap(col.values))), col.values...)
  1955  	}
  1956  	n := len(col.values)
  1957  	col.values = col.values[:n+rows.Len()]
  1958  	sparse.GatherUint128(col.values[n:], rows.Uint128Array())
  1959  }
  1960  
  1961  func (col *be128ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1962  	i := int(offset)
  1963  	switch {
  1964  	case i < 0:
  1965  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1966  	case i >= len(col.values):
  1967  		return 0, io.EOF
  1968  	default:
  1969  		for n < len(values) && i < len(col.values) {
  1970  			values[n] = col.makeValue(&col.values[i])
  1971  			n++
  1972  			i++
  1973  		}
  1974  		if n < len(values) {
  1975  			err = io.EOF
  1976  		}
  1977  		return n, err
  1978  	}
  1979  }
  1980  
  1981  var (
  1982  	_ sort.Interface = (ColumnBuffer)(nil)
  1983  	_ io.Writer      = (*byteArrayColumnBuffer)(nil)
  1984  	_ io.Writer      = (*fixedLenByteArrayColumnBuffer)(nil)
  1985  )
  1986  
  1987  // writeRowsFunc is the type of functions that apply rows to a set of column
  1988  // buffers.
  1989  //
  1990  // - columns is the array of column buffer where the rows are written.
  1991  //
  1992  // - rows is the array of Go values to write to the column buffers.
  1993  //
  1994  //   - levels is used to track the column index, repetition and definition levels
  1995  //     of values when writing optional or repeated columns.
  1996  type writeRowsFunc func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error
  1997  
  1998  // writeRowsFuncOf generates a writeRowsFunc function for the given Go type and
  1999  // parquet schema. The column path indicates the column that the function is
  2000  // being generated for in the parquet schema.
  2001  func writeRowsFuncOf(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
  2002  	if leaf, exists := schema.Lookup(path...); exists && leaf.Node.Type().LogicalType() != nil && leaf.Node.Type().LogicalType().Json != nil {
  2003  		return writeRowsFuncOfJSON(t, schema, path)
  2004  	}
  2005  
  2006  	switch t {
  2007  	case reflect.TypeOf(deprecated.Int96{}):
  2008  		return writeRowsFuncOfRequired(t, schema, path)
  2009  	case reflect.TypeOf(time.Time{}):
  2010  		return writeRowsFuncOfTime(t, schema, path)
  2011  	}
  2012  
  2013  	switch t.Kind() {
  2014  	case reflect.Bool,
  2015  		reflect.Int,
  2016  		reflect.Uint,
  2017  		reflect.Int32,
  2018  		reflect.Uint32,
  2019  		reflect.Int64,
  2020  		reflect.Uint64,
  2021  		reflect.Float32,
  2022  		reflect.Float64,
  2023  		reflect.String:
  2024  		return writeRowsFuncOfRequired(t, schema, path)
  2025  
  2026  	case reflect.Slice:
  2027  		if t.Elem().Kind() == reflect.Uint8 {
  2028  			return writeRowsFuncOfRequired(t, schema, path)
  2029  		} else {
  2030  			return writeRowsFuncOfSlice(t, schema, path)
  2031  		}
  2032  
  2033  	case reflect.Array:
  2034  		if t.Elem().Kind() == reflect.Uint8 {
  2035  			return writeRowsFuncOfRequired(t, schema, path)
  2036  		}
  2037  
  2038  	case reflect.Pointer:
  2039  		return writeRowsFuncOfPointer(t, schema, path)
  2040  
  2041  	case reflect.Struct:
  2042  		return writeRowsFuncOfStruct(t, schema, path)
  2043  
  2044  	case reflect.Map:
  2045  		return writeRowsFuncOfMap(t, schema, path)
  2046  	}
  2047  
  2048  	panic("cannot convert Go values of type " + typeNameOf(t) + " to parquet value")
  2049  }
  2050  
  2051  func writeRowsFuncOfRequired(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
  2052  	column := schema.mapping.lookup(path)
  2053  	columnIndex := column.columnIndex
  2054  	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2055  		columns[columnIndex].writeValues(rows, levels)
  2056  		return nil
  2057  	}
  2058  }
  2059  
  2060  func writeRowsFuncOfOptional(t reflect.Type, schema *Schema, path columnPath, writeRows writeRowsFunc) writeRowsFunc {
  2061  	nullIndex := nullIndexFuncOf(t)
  2062  	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2063  		if rows.Len() == 0 {
  2064  			return writeRows(columns, rows, levels)
  2065  		}
  2066  
  2067  		nulls := acquireBitmap(rows.Len())
  2068  		defer releaseBitmap(nulls)
  2069  		nullIndex(nulls.bits, rows)
  2070  
  2071  		nullLevels := levels
  2072  		levels.definitionLevel++
  2073  		// In this function, we are dealing with optional values which are
  2074  		// neither pointers nor slices; for example, a int32 field marked
  2075  		// "optional" in its parent struct.
  2076  		//
  2077  		// We need to find zero values, which should be represented as nulls
  2078  		// in the parquet column. In order to minimize the calls to writeRows
  2079  		// and maximize throughput, we use the nullIndex and nonNullIndex
  2080  		// functions, which are type-specific implementations of the algorithm.
  2081  		//
  2082  		// Sections of the input that are contiguous nulls or non-nulls can be
  2083  		// sent to a single call to writeRows to be written to the underlying
  2084  		// buffer since they share the same definition level.
  2085  		//
  2086  		// This optimization is defeated by inputs alternating null and non-null
  2087  		// sequences of single values, we do not expect this condition to be a
  2088  		// common case.
  2089  		for i := 0; i < rows.Len(); {
  2090  			j := 0
  2091  			x := i / 64
  2092  			y := i % 64
  2093  
  2094  			if y != 0 {
  2095  				if b := nulls.bits[x] >> uint(y); b == 0 {
  2096  					x++
  2097  					y = 0
  2098  				} else {
  2099  					y += bits.TrailingZeros64(b)
  2100  					goto writeNulls
  2101  				}
  2102  			}
  2103  
  2104  			for x < len(nulls.bits) && nulls.bits[x] == 0 {
  2105  				x++
  2106  			}
  2107  
  2108  			if x < len(nulls.bits) {
  2109  				y = bits.TrailingZeros64(nulls.bits[x]) % 64
  2110  			}
  2111  
  2112  		writeNulls:
  2113  			if j = x*64 + y; j > rows.Len() {
  2114  				j = rows.Len()
  2115  			}
  2116  
  2117  			if i < j {
  2118  				if err := writeRows(columns, rows.Slice(i, j), nullLevels); err != nil {
  2119  					return err
  2120  				}
  2121  				i = j
  2122  			}
  2123  
  2124  			if y != 0 {
  2125  				if b := nulls.bits[x] >> uint(y); b == (1<<uint64(y))-1 {
  2126  					x++
  2127  					y = 0
  2128  				} else {
  2129  					y += bits.TrailingZeros64(^b)
  2130  					goto writeNonNulls
  2131  				}
  2132  			}
  2133  
  2134  			for x < len(nulls.bits) && nulls.bits[x] == ^uint64(0) {
  2135  				x++
  2136  			}
  2137  
  2138  			if x < len(nulls.bits) {
  2139  				y = bits.TrailingZeros64(^nulls.bits[x]) % 64
  2140  			}
  2141  
  2142  		writeNonNulls:
  2143  			if j = x*64 + y; j > rows.Len() {
  2144  				j = rows.Len()
  2145  			}
  2146  
  2147  			if i < j {
  2148  				if err := writeRows(columns, rows.Slice(i, j), levels); err != nil {
  2149  					return err
  2150  				}
  2151  				i = j
  2152  			}
  2153  		}
  2154  
  2155  		return nil
  2156  	}
  2157  }
  2158  
  2159  func writeRowsFuncOfPointer(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
  2160  	elemType := t.Elem()
  2161  	elemSize := uintptr(elemType.Size())
  2162  	writeRows := writeRowsFuncOf(elemType, schema, path)
  2163  
  2164  	if len(path) == 0 {
  2165  		// This code path is taken when generating a writeRowsFunc for a pointer
  2166  		// type. In this case, we do not need to increase the definition level
  2167  		// since we are not deailng with an optional field but a pointer to the
  2168  		// row type.
  2169  		return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2170  			if rows.Len() == 0 {
  2171  				return writeRows(columns, rows, levels)
  2172  			}
  2173  
  2174  			for i := 0; i < rows.Len(); i++ {
  2175  				p := *(*unsafe.Pointer)(rows.Index(i))
  2176  				a := sparse.Array{}
  2177  				if p != nil {
  2178  					a = makeArray(p, 1, elemSize)
  2179  				}
  2180  				if err := writeRows(columns, a, levels); err != nil {
  2181  					return err
  2182  				}
  2183  			}
  2184  
  2185  			return nil
  2186  		}
  2187  	}
  2188  
  2189  	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2190  		if rows.Len() == 0 {
  2191  			return writeRows(columns, rows, levels)
  2192  		}
  2193  
  2194  		for i := 0; i < rows.Len(); i++ {
  2195  			p := *(*unsafe.Pointer)(rows.Index(i))
  2196  			a := sparse.Array{}
  2197  			elemLevels := levels
  2198  			if p != nil {
  2199  				a = makeArray(p, 1, elemSize)
  2200  				elemLevels.definitionLevel++
  2201  			}
  2202  			if err := writeRows(columns, a, elemLevels); err != nil {
  2203  				return err
  2204  			}
  2205  		}
  2206  
  2207  		return nil
  2208  	}
  2209  }
  2210  
  2211  func writeRowsFuncOfSlice(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
  2212  	elemType := t.Elem()
  2213  	elemSize := uintptr(elemType.Size())
  2214  	writeRows := writeRowsFuncOf(elemType, schema, path)
  2215  
  2216  	// When the element is a pointer type, the writeRows function will be an
  2217  	// instance returned by writeRowsFuncOfPointer, which handles incrementing
  2218  	// the definition level if the pointer value is not nil.
  2219  	definitionLevelIncrement := byte(0)
  2220  	if elemType.Kind() != reflect.Ptr {
  2221  		definitionLevelIncrement = 1
  2222  	}
  2223  
  2224  	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2225  		if rows.Len() == 0 {
  2226  			return writeRows(columns, rows, levels)
  2227  		}
  2228  
  2229  		levels.repetitionDepth++
  2230  
  2231  		for i := 0; i < rows.Len(); i++ {
  2232  			p := (*sliceHeader)(rows.Index(i))
  2233  			a := makeArray(p.base, p.len, elemSize)
  2234  			b := sparse.Array{}
  2235  
  2236  			elemLevels := levels
  2237  			if a.Len() > 0 {
  2238  				b = a.Slice(0, 1)
  2239  				elemLevels.definitionLevel += definitionLevelIncrement
  2240  			}
  2241  
  2242  			if err := writeRows(columns, b, elemLevels); err != nil {
  2243  				return err
  2244  			}
  2245  
  2246  			if a.Len() > 1 {
  2247  				elemLevels.repetitionLevel = elemLevels.repetitionDepth
  2248  
  2249  				if err := writeRows(columns, a.Slice(1, a.Len()), elemLevels); err != nil {
  2250  					return err
  2251  				}
  2252  			}
  2253  		}
  2254  
  2255  		return nil
  2256  	}
  2257  }
  2258  
  2259  func writeRowsFuncOfStruct(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
  2260  	type column struct {
  2261  		offset    uintptr
  2262  		writeRows writeRowsFunc
  2263  	}
  2264  
  2265  	fields := structFieldsOf(t)
  2266  	columns := make([]column, len(fields))
  2267  
  2268  	for i, f := range fields {
  2269  		optional := false
  2270  		columnPath := path.append(f.Name)
  2271  		forEachStructTagOption(f, func(_ reflect.Type, option, _ string) {
  2272  			switch option {
  2273  			case "list":
  2274  				columnPath = columnPath.append("list", "element")
  2275  			case "optional":
  2276  				optional = true
  2277  			}
  2278  		})
  2279  
  2280  		writeRows := writeRowsFuncOf(f.Type, schema, columnPath)
  2281  		if optional {
  2282  			switch f.Type.Kind() {
  2283  			case reflect.Pointer, reflect.Slice:
  2284  			default:
  2285  				writeRows = writeRowsFuncOfOptional(f.Type, schema, columnPath, writeRows)
  2286  			}
  2287  		}
  2288  
  2289  		columns[i] = column{
  2290  			offset:    f.Offset,
  2291  			writeRows: writeRows,
  2292  		}
  2293  	}
  2294  
  2295  	return func(buffers []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2296  		if rows.Len() == 0 {
  2297  			for _, column := range columns {
  2298  				if err := column.writeRows(buffers, rows, levels); err != nil {
  2299  					return err
  2300  				}
  2301  			}
  2302  		} else {
  2303  			for _, column := range columns {
  2304  				if err := column.writeRows(buffers, rows.Offset(column.offset), levels); err != nil {
  2305  					return err
  2306  				}
  2307  			}
  2308  		}
  2309  		return nil
  2310  	}
  2311  }
  2312  
  2313  func writeRowsFuncOfMap(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
  2314  	keyPath := path.append("key_value", "key")
  2315  	keyType := t.Key()
  2316  	keySize := uintptr(keyType.Size())
  2317  	writeKeys := writeRowsFuncOf(keyType, schema, keyPath)
  2318  
  2319  	valuePath := path.append("key_value", "value")
  2320  	valueType := t.Elem()
  2321  	valueSize := uintptr(valueType.Size())
  2322  	writeValues := writeRowsFuncOf(valueType, schema, valuePath)
  2323  
  2324  	writeKeyValues := func(columns []ColumnBuffer, keys, values sparse.Array, levels columnLevels) error {
  2325  		if err := writeKeys(columns, keys, levels); err != nil {
  2326  			return err
  2327  		}
  2328  		if err := writeValues(columns, values, levels); err != nil {
  2329  			return err
  2330  		}
  2331  		return nil
  2332  	}
  2333  
  2334  	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2335  		if rows.Len() == 0 {
  2336  			return writeKeyValues(columns, rows, rows, levels)
  2337  		}
  2338  
  2339  		levels.repetitionDepth++
  2340  		mapKey := reflect.New(keyType).Elem()
  2341  		mapValue := reflect.New(valueType).Elem()
  2342  
  2343  		for i := 0; i < rows.Len(); i++ {
  2344  			m := reflect.NewAt(t, rows.Index(i)).Elem()
  2345  
  2346  			if m.Len() == 0 {
  2347  				empty := sparse.Array{}
  2348  				if err := writeKeyValues(columns, empty, empty, levels); err != nil {
  2349  					return err
  2350  				}
  2351  			} else {
  2352  				elemLevels := levels
  2353  				elemLevels.definitionLevel++
  2354  
  2355  				for it := m.MapRange(); it.Next(); {
  2356  					mapKey.SetIterKey(it)
  2357  					mapValue.SetIterValue(it)
  2358  
  2359  					k := makeArray(unsafecast.PointerOfValue(mapKey), 1, keySize)
  2360  					v := makeArray(unsafecast.PointerOfValue(mapValue), 1, valueSize)
  2361  
  2362  					if err := writeKeyValues(columns, k, v, elemLevels); err != nil {
  2363  						return err
  2364  					}
  2365  
  2366  					elemLevels.repetitionLevel = elemLevels.repetitionDepth
  2367  				}
  2368  			}
  2369  		}
  2370  
  2371  		return nil
  2372  	}
  2373  }
  2374  
  2375  func writeRowsFuncOfJSON(t reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
  2376  	// If this is a string or a byte array write directly.
  2377  	switch t.Kind() {
  2378  	case reflect.String:
  2379  		return writeRowsFuncOfRequired(t, schema, path)
  2380  	case reflect.Slice:
  2381  		if t.Elem().Kind() == reflect.Uint8 {
  2382  			return writeRowsFuncOfRequired(t, schema, path)
  2383  		}
  2384  	}
  2385  
  2386  	// Otherwise handle with a json.Marshal
  2387  	asStrT := reflect.TypeOf(string(""))
  2388  	writer := writeRowsFuncOfRequired(asStrT, schema, path)
  2389  
  2390  	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2391  		if rows.Len() == 0 {
  2392  			return writer(columns, rows, levels)
  2393  		}
  2394  		for i := 0; i < rows.Len(); i++ {
  2395  			val := reflect.NewAt(t, rows.Index(i))
  2396  			asI := val.Interface()
  2397  
  2398  			b, err := json.Marshal(asI)
  2399  			if err != nil {
  2400  				return err
  2401  			}
  2402  
  2403  			asStr := string(b)
  2404  			a := sparse.MakeStringArray([]string{asStr})
  2405  			if err := writer(columns, a.UnsafeArray(), levels); err != nil {
  2406  				return err
  2407  			}
  2408  		}
  2409  		return nil
  2410  	}
  2411  }
  2412  
  2413  func writeRowsFuncOfTime(_ reflect.Type, schema *Schema, path columnPath) writeRowsFunc {
  2414  	t := reflect.TypeOf(int64(0))
  2415  	elemSize := uintptr(t.Size())
  2416  	writeRows := writeRowsFuncOf(t, schema, path)
  2417  
  2418  	col, _ := schema.Lookup(path...)
  2419  	unit := Nanosecond.TimeUnit()
  2420  	lt := col.Node.Type().LogicalType()
  2421  	if lt != nil && lt.Timestamp != nil {
  2422  		unit = lt.Timestamp.Unit
  2423  	}
  2424  
  2425  	return func(columns []ColumnBuffer, rows sparse.Array, levels columnLevels) error {
  2426  		if rows.Len() == 0 {
  2427  			return writeRows(columns, rows, levels)
  2428  		}
  2429  
  2430  		times := rows.TimeArray()
  2431  		for i := 0; i < times.Len(); i++ {
  2432  			t := times.Index(i)
  2433  			var val int64
  2434  			switch {
  2435  			case unit.Millis != nil:
  2436  				val = t.UnixMilli()
  2437  			case unit.Micros != nil:
  2438  				val = t.UnixMicro()
  2439  			default:
  2440  				val = t.UnixNano()
  2441  			}
  2442  
  2443  			a := makeArray(unsafecast.PointerOfValue(reflect.ValueOf(val)), 1, elemSize)
  2444  			if err := writeRows(columns, a, levels); err != nil {
  2445  				return err
  2446  			}
  2447  		}
  2448  
  2449  		return nil
  2450  	}
  2451  }