github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/column_buffer.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"io"
     7  	"sort"
     8  	"unsafe"
     9  
    10  	"github.com/segmentio/parquet-go/deprecated"
    11  	"github.com/segmentio/parquet-go/encoding/plain"
    12  	"github.com/segmentio/parquet-go/internal/bitpack"
    13  	"github.com/segmentio/parquet-go/internal/unsafecast"
    14  	"github.com/segmentio/parquet-go/sparse"
    15  )
    16  
    17  // ColumnBuffer is an interface representing columns of a row group.
    18  //
    19  // ColumnBuffer implements sort.Interface as a way to support reordering the
    20  // rows that have been written to it.
    21  //
    22  // The current implementation has a limitation which prevents applications from
    23  // providing custom versions of this interface because it contains unexported
    24  // methods. The only way to create ColumnBuffer values is to call the
    25  // NewColumnBuffer of Type instances. This limitation may be lifted in future
    26  // releases.
    27  type ColumnBuffer interface {
    28  	// Exposes a read-only view of the column buffer.
    29  	ColumnChunk
    30  
    31  	// The column implements ValueReaderAt as a mechanism to read values at
    32  	// specific locations within the buffer.
    33  	ValueReaderAt
    34  
    35  	// The column implements ValueWriter as a mechanism to optimize the copy
    36  	// of values into the buffer in contexts where the row information is
    37  	// provided by the values because the repetition and definition levels
    38  	// are set.
    39  	ValueWriter
    40  
    41  	// For indexed columns, returns the underlying dictionary holding the column
    42  	// values. If the column is not indexed, nil is returned.
    43  	Dictionary() Dictionary
    44  
    45  	// Returns a copy of the column. The returned copy shares no memory with
    46  	// the original, mutations of either column will not modify the other.
    47  	Clone() ColumnBuffer
    48  
    49  	// Returns the column as a Page.
    50  	Page() Page
    51  
    52  	// Clears all rows written to the column.
    53  	Reset()
    54  
    55  	// Returns the current capacity of the column (rows).
    56  	Cap() int
    57  
    58  	// Returns the number of rows currently written to the column.
    59  	Len() int
    60  
    61  	// Compares rows at index i and j and reports whether i < j.
    62  	Less(i, j int) bool
    63  
    64  	// Swaps rows at index i and j.
    65  	Swap(i, j int)
    66  
    67  	// Returns the size of the column buffer in bytes.
    68  	Size() int64
    69  
    70  	// This method is employed to write rows from arrays of Go values into the
    71  	// column buffer. The method is currently unexported because it uses unsafe
    72  	// APIs which would be difficult for applications to leverage, increasing
    73  	// the risk of introducing bugs in the code. As a consequence, applications
    74  	// cannot use custom implementations of the ColumnBuffer interface since
    75  	// they cannot declare an unexported method that would match this signature.
    76  	// It means that in order to create a ColumnBuffer value, programs need to
    77  	// go through a call to NewColumnBuffer on a Type instance. We make this
    78  	// trade off for now as it is preferrable to optimize for safety over
    79  	// extensibility in the public APIs, we might revisit in the future if we
    80  	// learn about valid use cases for custom column buffer types.
    81  	writeValues(rows sparse.Array, levels columnLevels)
    82  }
    83  
    84  type columnLevels struct {
    85  	repetitionDepth byte
    86  	repetitionLevel byte
    87  	definitionLevel byte
    88  }
    89  
    90  func columnIndexOfNullable(base ColumnBuffer, maxDefinitionLevel byte, definitionLevels []byte) ColumnIndex {
    91  	return &nullableColumnIndex{
    92  		ColumnIndex:        base.ColumnIndex(),
    93  		maxDefinitionLevel: maxDefinitionLevel,
    94  		definitionLevels:   definitionLevels,
    95  	}
    96  }
    97  
    98  type nullableColumnIndex struct {
    99  	ColumnIndex
   100  	maxDefinitionLevel byte
   101  	definitionLevels   []byte
   102  }
   103  
   104  func (index *nullableColumnIndex) NullPage(i int) bool {
   105  	return index.NullCount(i) == int64(len(index.definitionLevels))
   106  }
   107  
   108  func (index *nullableColumnIndex) NullCount(i int) int64 {
   109  	return int64(countLevelsNotEqual(index.definitionLevels, index.maxDefinitionLevel))
   110  }
   111  
   112  type nullOrdering func(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool
   113  
   114  func nullsGoFirst(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool {
   115  	if definitionLevel1 != maxDefinitionLevel {
   116  		return definitionLevel2 == maxDefinitionLevel
   117  	} else {
   118  		return definitionLevel2 == maxDefinitionLevel && column.Less(i, j)
   119  	}
   120  }
   121  
   122  func nullsGoLast(column ColumnBuffer, i, j int, maxDefinitionLevel, definitionLevel1, definitionLevel2 byte) bool {
   123  	return definitionLevel1 == maxDefinitionLevel && (definitionLevel2 != maxDefinitionLevel || column.Less(i, j))
   124  }
   125  
   126  // reversedColumnBuffer is an adapter of ColumnBuffer which inverses the order
   127  // in which rows are ordered when the column gets sorted.
   128  //
   129  // This type is used when buffers are constructed with sorting columns ordering
   130  // values in descending order.
   131  type reversedColumnBuffer struct{ ColumnBuffer }
   132  
   133  func (col *reversedColumnBuffer) Less(i, j int) bool { return col.ColumnBuffer.Less(j, i) }
   134  
   135  // optionalColumnBuffer is an implementation of the ColumnBuffer interface used
   136  // as a wrapper to an underlying ColumnBuffer to manage the creation of
   137  // definition levels.
   138  //
   139  // Null values are not written to the underlying column; instead, the buffer
   140  // tracks offsets of row values in the column, null row values are represented
   141  // by the value -1 and a definition level less than the max.
   142  //
   143  // This column buffer type is used for all leaf columns that have a non-zero
   144  // max definition level and a zero repetition level, which may be because the
   145  // column or one of its parent(s) are marked optional.
   146  type optionalColumnBuffer struct {
   147  	base               ColumnBuffer
   148  	reordered          bool
   149  	maxDefinitionLevel byte
   150  	rows               []int32
   151  	sortIndex          []int32
   152  	definitionLevels   []byte
   153  	nullOrdering       nullOrdering
   154  }
   155  
   156  func newOptionalColumnBuffer(base ColumnBuffer, maxDefinitionLevel byte, nullOrdering nullOrdering) *optionalColumnBuffer {
   157  	n := base.Cap()
   158  	return &optionalColumnBuffer{
   159  		base:               base,
   160  		maxDefinitionLevel: maxDefinitionLevel,
   161  		rows:               make([]int32, 0, n),
   162  		definitionLevels:   make([]byte, 0, n),
   163  		nullOrdering:       nullOrdering,
   164  	}
   165  }
   166  
   167  func (col *optionalColumnBuffer) Clone() ColumnBuffer {
   168  	return &optionalColumnBuffer{
   169  		base:               col.base.Clone(),
   170  		reordered:          col.reordered,
   171  		maxDefinitionLevel: col.maxDefinitionLevel,
   172  		rows:               append([]int32{}, col.rows...),
   173  		definitionLevels:   append([]byte{}, col.definitionLevels...),
   174  		nullOrdering:       col.nullOrdering,
   175  	}
   176  }
   177  
   178  func (col *optionalColumnBuffer) Type() Type {
   179  	return col.base.Type()
   180  }
   181  
   182  func (col *optionalColumnBuffer) NumValues() int64 {
   183  	return int64(len(col.definitionLevels))
   184  }
   185  
   186  func (col *optionalColumnBuffer) ColumnIndex() ColumnIndex {
   187  	return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels)
   188  }
   189  
   190  func (col *optionalColumnBuffer) OffsetIndex() OffsetIndex {
   191  	return col.base.OffsetIndex()
   192  }
   193  
   194  func (col *optionalColumnBuffer) BloomFilter() BloomFilter {
   195  	return col.base.BloomFilter()
   196  }
   197  
   198  func (col *optionalColumnBuffer) Dictionary() Dictionary {
   199  	return col.base.Dictionary()
   200  }
   201  
   202  func (col *optionalColumnBuffer) Column() int {
   203  	return col.base.Column()
   204  }
   205  
   206  func (col *optionalColumnBuffer) Pages() Pages {
   207  	return onePage(col.Page())
   208  }
   209  
   210  func (col *optionalColumnBuffer) Page() Page {
   211  	// No need for any cyclic sorting if the rows have not been reordered.
   212  	// This case is also important because the cyclic sorting modifies the
   213  	// buffer which makes it unsafe to read the buffer concurrently.
   214  	if col.reordered {
   215  		numNulls := countLevelsNotEqual(col.definitionLevels, col.maxDefinitionLevel)
   216  		numValues := len(col.rows) - numNulls
   217  
   218  		if numValues > 0 {
   219  			if cap(col.sortIndex) < numValues {
   220  				col.sortIndex = make([]int32, numValues)
   221  			}
   222  			sortIndex := col.sortIndex[:numValues]
   223  			i := 0
   224  			for _, j := range col.rows {
   225  				if j >= 0 {
   226  					sortIndex[j] = int32(i)
   227  					i++
   228  				}
   229  			}
   230  
   231  			// Cyclic sort: O(N)
   232  			for i := range sortIndex {
   233  				for j := int(sortIndex[i]); i != j; j = int(sortIndex[i]) {
   234  					col.base.Swap(i, j)
   235  					sortIndex[i], sortIndex[j] = sortIndex[j], sortIndex[i]
   236  				}
   237  			}
   238  		}
   239  
   240  		i := 0
   241  		for _, r := range col.rows {
   242  			if r >= 0 {
   243  				col.rows[i] = int32(i)
   244  				i++
   245  			}
   246  		}
   247  
   248  		col.reordered = false
   249  	}
   250  
   251  	return newOptionalPage(col.base.Page(), col.maxDefinitionLevel, col.definitionLevels)
   252  }
   253  
   254  func (col *optionalColumnBuffer) Reset() {
   255  	col.base.Reset()
   256  	col.rows = col.rows[:0]
   257  	col.definitionLevels = col.definitionLevels[:0]
   258  }
   259  
   260  func (col *optionalColumnBuffer) Size() int64 {
   261  	return int64(4*len(col.rows)+4*len(col.sortIndex)+len(col.definitionLevels)) + col.base.Size()
   262  }
   263  
   264  func (col *optionalColumnBuffer) Cap() int { return cap(col.rows) }
   265  
   266  func (col *optionalColumnBuffer) Len() int { return len(col.rows) }
   267  
   268  func (col *optionalColumnBuffer) Less(i, j int) bool {
   269  	return col.nullOrdering(
   270  		col.base,
   271  		int(col.rows[i]),
   272  		int(col.rows[j]),
   273  		col.maxDefinitionLevel,
   274  		col.definitionLevels[i],
   275  		col.definitionLevels[j],
   276  	)
   277  }
   278  
   279  func (col *optionalColumnBuffer) Swap(i, j int) {
   280  	// Because the underlying column does not contain null values, we cannot
   281  	// swap its values at indexes i and j. We swap the row indexes only, then
   282  	// reorder the underlying buffer using a cyclic sort when the buffer is
   283  	// materialized into a page view.
   284  	col.reordered = true
   285  	col.rows[i], col.rows[j] = col.rows[j], col.rows[i]
   286  	col.definitionLevels[i], col.definitionLevels[j] = col.definitionLevels[j], col.definitionLevels[i]
   287  }
   288  
   289  func (col *optionalColumnBuffer) WriteValues(values []Value) (n int, err error) {
   290  	rowIndex := int32(col.base.Len())
   291  
   292  	for n < len(values) {
   293  		// Collect index range of contiguous null values, from i to n. If this
   294  		// for loop exhausts the values, all remaining if statements and for
   295  		// loops will be no-ops and the loop will terminate.
   296  		i := n
   297  		for n < len(values) && values[n].definitionLevel != col.maxDefinitionLevel {
   298  			n++
   299  		}
   300  
   301  		// Write the contiguous null values up until the first non-null value
   302  		// obtained in the for loop above.
   303  		for _, v := range values[i:n] {
   304  			col.rows = append(col.rows, -1)
   305  			col.definitionLevels = append(col.definitionLevels, v.definitionLevel)
   306  		}
   307  
   308  		// Collect index range of contiguous non-null values, from i to n.
   309  		i = n
   310  		for n < len(values) && values[n].definitionLevel == col.maxDefinitionLevel {
   311  			n++
   312  		}
   313  
   314  		// As long as i < n we have non-null values still to write. It is
   315  		// possible that we just exhausted the input values in which case i == n
   316  		// and the outer for loop will terminate.
   317  		if i < n {
   318  			count, err := col.base.WriteValues(values[i:n])
   319  			col.definitionLevels = appendLevel(col.definitionLevels, col.maxDefinitionLevel, count)
   320  
   321  			for count > 0 {
   322  				col.rows = append(col.rows, rowIndex)
   323  				rowIndex++
   324  				count--
   325  			}
   326  
   327  			if err != nil {
   328  				return n, err
   329  			}
   330  		}
   331  	}
   332  	return n, nil
   333  }
   334  
   335  func (col *optionalColumnBuffer) writeValues(rows sparse.Array, levels columnLevels) {
   336  	// The row count is zero when writing an null optional value, in which case
   337  	// we still need to output a row to the buffer to record the definition
   338  	// level.
   339  	if rows.Len() == 0 {
   340  		col.definitionLevels = append(col.definitionLevels, levels.definitionLevel)
   341  		col.rows = append(col.rows, -1)
   342  		return
   343  	}
   344  
   345  	col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, rows.Len())
   346  
   347  	i := len(col.rows)
   348  	j := len(col.rows) + rows.Len()
   349  
   350  	if j <= cap(col.rows) {
   351  		col.rows = col.rows[:j]
   352  	} else {
   353  		tmp := make([]int32, j, 2*j)
   354  		copy(tmp, col.rows)
   355  		col.rows = tmp
   356  	}
   357  
   358  	if levels.definitionLevel != col.maxDefinitionLevel {
   359  		broadcastValueInt32(col.rows[i:], -1)
   360  	} else {
   361  		broadcastRangeInt32(col.rows[i:], int32(col.base.Len()))
   362  		col.base.writeValues(rows, levels)
   363  	}
   364  }
   365  
   366  func (col *optionalColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) {
   367  	length := int64(len(col.definitionLevels))
   368  	if offset < 0 {
   369  		return 0, errRowIndexOutOfBounds(offset, length)
   370  	}
   371  	if offset >= length {
   372  		return 0, io.EOF
   373  	}
   374  	if length -= offset; length < int64(len(values)) {
   375  		values = values[:length]
   376  	}
   377  
   378  	numNulls1 := int64(countLevelsNotEqual(col.definitionLevels[:offset], col.maxDefinitionLevel))
   379  	numNulls2 := int64(countLevelsNotEqual(col.definitionLevels[offset:offset+length], col.maxDefinitionLevel))
   380  
   381  	if numNulls2 < length {
   382  		n, err := col.base.ReadValuesAt(values[:length-numNulls2], offset-numNulls1)
   383  		if err != nil {
   384  			return n, err
   385  		}
   386  	}
   387  
   388  	if numNulls2 > 0 {
   389  		columnIndex := ^int16(col.Column())
   390  		i := numNulls2 - 1
   391  		j := length - 1
   392  		definitionLevels := col.definitionLevels[offset : offset+length]
   393  		maxDefinitionLevel := col.maxDefinitionLevel
   394  
   395  		for n := len(definitionLevels) - 1; n >= 0 && j > i; n-- {
   396  			if definitionLevels[n] != maxDefinitionLevel {
   397  				values[j] = Value{definitionLevel: definitionLevels[n], columnIndex: columnIndex}
   398  			} else {
   399  				values[j] = values[i]
   400  				i--
   401  			}
   402  			j--
   403  		}
   404  	}
   405  
   406  	return int(length), nil
   407  }
   408  
   409  // repeatedColumnBuffer is an implementation of the ColumnBuffer interface used
   410  // as a wrapper to an underlying ColumnBuffer to manage the creation of
   411  // repetition levels, definition levels, and map rows to the region of the
   412  // underlying buffer that contains their sequence of values.
   413  //
   414  // Null values are not written to the underlying column; instead, the buffer
   415  // tracks offsets of row values in the column, null row values are represented
   416  // by the value -1 and a definition level less than the max.
   417  //
   418  // This column buffer type is used for all leaf columns that have a non-zero
   419  // max repetition level, which may be because the column or one of its parent(s)
   420  // are marked repeated.
   421  type repeatedColumnBuffer struct {
   422  	base               ColumnBuffer
   423  	reordered          bool
   424  	maxRepetitionLevel byte
   425  	maxDefinitionLevel byte
   426  	rows               []offsetMapping
   427  	repetitionLevels   []byte
   428  	definitionLevels   []byte
   429  	buffer             []Value
   430  	reordering         *repeatedColumnBuffer
   431  	nullOrdering       nullOrdering
   432  }
   433  
   434  // The offsetMapping type maps the logical offset of rows within the repetition
   435  // and definition levels, to the base offsets in the underlying column buffers
   436  // where the non-null values have been written.
   437  type offsetMapping struct {
   438  	offset     uint32
   439  	baseOffset uint32
   440  }
   441  
   442  func newRepeatedColumnBuffer(base ColumnBuffer, maxRepetitionLevel, maxDefinitionLevel byte, nullOrdering nullOrdering) *repeatedColumnBuffer {
   443  	n := base.Cap()
   444  	return &repeatedColumnBuffer{
   445  		base:               base,
   446  		maxRepetitionLevel: maxRepetitionLevel,
   447  		maxDefinitionLevel: maxDefinitionLevel,
   448  		rows:               make([]offsetMapping, 0, n/8),
   449  		repetitionLevels:   make([]byte, 0, n),
   450  		definitionLevels:   make([]byte, 0, n),
   451  		nullOrdering:       nullOrdering,
   452  	}
   453  }
   454  
   455  func (col *repeatedColumnBuffer) Clone() ColumnBuffer {
   456  	return &repeatedColumnBuffer{
   457  		base:               col.base.Clone(),
   458  		reordered:          col.reordered,
   459  		maxRepetitionLevel: col.maxRepetitionLevel,
   460  		maxDefinitionLevel: col.maxDefinitionLevel,
   461  		rows:               append([]offsetMapping{}, col.rows...),
   462  		repetitionLevels:   append([]byte{}, col.repetitionLevels...),
   463  		definitionLevels:   append([]byte{}, col.definitionLevels...),
   464  		nullOrdering:       col.nullOrdering,
   465  	}
   466  }
   467  
   468  func (col *repeatedColumnBuffer) Type() Type {
   469  	return col.base.Type()
   470  }
   471  
   472  func (col *repeatedColumnBuffer) NumValues() int64 {
   473  	return int64(len(col.definitionLevels))
   474  }
   475  
   476  func (col *repeatedColumnBuffer) ColumnIndex() ColumnIndex {
   477  	return columnIndexOfNullable(col.base, col.maxDefinitionLevel, col.definitionLevels)
   478  }
   479  
   480  func (col *repeatedColumnBuffer) OffsetIndex() OffsetIndex {
   481  	return col.base.OffsetIndex()
   482  }
   483  
   484  func (col *repeatedColumnBuffer) BloomFilter() BloomFilter {
   485  	return col.base.BloomFilter()
   486  }
   487  
   488  func (col *repeatedColumnBuffer) Dictionary() Dictionary {
   489  	return col.base.Dictionary()
   490  }
   491  
   492  func (col *repeatedColumnBuffer) Column() int {
   493  	return col.base.Column()
   494  }
   495  
   496  func (col *repeatedColumnBuffer) Pages() Pages {
   497  	return onePage(col.Page())
   498  }
   499  
   500  func (col *repeatedColumnBuffer) Page() Page {
   501  	if col.reordered {
   502  		if col.reordering == nil {
   503  			col.reordering = col.Clone().(*repeatedColumnBuffer)
   504  		}
   505  
   506  		column := col.reordering
   507  		column.Reset()
   508  		maxNumValues := 0
   509  		defer func() {
   510  			clearValues(col.buffer[:maxNumValues])
   511  		}()
   512  
   513  		baseOffset := 0
   514  
   515  		for _, row := range col.rows {
   516  			rowOffset := int(row.offset)
   517  			rowLength := repeatedRowLength(col.repetitionLevels[rowOffset:])
   518  			numNulls := countLevelsNotEqual(col.definitionLevels[rowOffset:rowOffset+rowLength], col.maxDefinitionLevel)
   519  			numValues := rowLength - numNulls
   520  
   521  			if numValues > 0 {
   522  				if numValues > cap(col.buffer) {
   523  					col.buffer = make([]Value, numValues)
   524  				} else {
   525  					col.buffer = col.buffer[:numValues]
   526  				}
   527  				n, err := col.base.ReadValuesAt(col.buffer, int64(row.baseOffset))
   528  				if err != nil && n < numValues {
   529  					return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err)
   530  				}
   531  				if _, err := column.base.WriteValues(col.buffer); err != nil {
   532  					return newErrorPage(col.Type(), col.Column(), "reordering rows of repeated column: %w", err)
   533  				}
   534  				if numValues > maxNumValues {
   535  					maxNumValues = numValues
   536  				}
   537  			}
   538  
   539  			column.rows = append(column.rows, offsetMapping{
   540  				offset:     uint32(len(column.repetitionLevels)),
   541  				baseOffset: uint32(baseOffset),
   542  			})
   543  
   544  			column.repetitionLevels = append(column.repetitionLevels, col.repetitionLevels[rowOffset:rowOffset+rowLength]...)
   545  			column.definitionLevels = append(column.definitionLevels, col.definitionLevels[rowOffset:rowOffset+rowLength]...)
   546  			baseOffset += numValues
   547  		}
   548  
   549  		col.swapReorderingBuffer(column)
   550  		col.reordered = false
   551  	}
   552  
   553  	return newRepeatedPage(
   554  		col.base.Page(),
   555  		col.maxRepetitionLevel,
   556  		col.maxDefinitionLevel,
   557  		col.repetitionLevels,
   558  		col.definitionLevels,
   559  	)
   560  }
   561  
   562  func (col *repeatedColumnBuffer) swapReorderingBuffer(buf *repeatedColumnBuffer) {
   563  	col.base, buf.base = buf.base, col.base
   564  	col.rows, buf.rows = buf.rows, col.rows
   565  	col.repetitionLevels, buf.repetitionLevels = buf.repetitionLevels, col.repetitionLevels
   566  	col.definitionLevels, buf.definitionLevels = buf.definitionLevels, col.definitionLevels
   567  }
   568  
   569  func (col *repeatedColumnBuffer) Reset() {
   570  	col.base.Reset()
   571  	col.rows = col.rows[:0]
   572  	col.repetitionLevels = col.repetitionLevels[:0]
   573  	col.definitionLevels = col.definitionLevels[:0]
   574  }
   575  
   576  func (col *repeatedColumnBuffer) Size() int64 {
   577  	return int64(8*len(col.rows)+len(col.repetitionLevels)+len(col.definitionLevels)) + col.base.Size()
   578  }
   579  
   580  func (col *repeatedColumnBuffer) Cap() int { return cap(col.rows) }
   581  
   582  func (col *repeatedColumnBuffer) Len() int { return len(col.rows) }
   583  
   584  func (col *repeatedColumnBuffer) Less(i, j int) bool {
   585  	row1 := col.rows[i]
   586  	row2 := col.rows[j]
   587  	less := col.nullOrdering
   588  	row1Length := repeatedRowLength(col.repetitionLevels[row1.offset:])
   589  	row2Length := repeatedRowLength(col.repetitionLevels[row2.offset:])
   590  
   591  	for k := 0; k < row1Length && k < row2Length; k++ {
   592  		x := int(row1.baseOffset)
   593  		y := int(row2.baseOffset)
   594  		definitionLevel1 := col.definitionLevels[int(row1.offset)+k]
   595  		definitionLevel2 := col.definitionLevels[int(row2.offset)+k]
   596  		switch {
   597  		case less(col.base, x, y, col.maxDefinitionLevel, definitionLevel1, definitionLevel2):
   598  			return true
   599  		case less(col.base, y, x, col.maxDefinitionLevel, definitionLevel2, definitionLevel1):
   600  			return false
   601  		}
   602  	}
   603  
   604  	return row1Length < row2Length
   605  }
   606  
   607  func (col *repeatedColumnBuffer) Swap(i, j int) {
   608  	// Because the underlying column does not contain null values, and may hold
   609  	// an arbitrary number of values per row, we cannot swap its values at
   610  	// indexes i and j. We swap the row indexes only, then reorder the base
   611  	// column buffer when its view is materialized into a page by creating a
   612  	// copy and writing rows back to it following the order of rows in the
   613  	// repeated column buffer.
   614  	col.reordered = true
   615  	col.rows[i], col.rows[j] = col.rows[j], col.rows[i]
   616  }
   617  
   618  func (col *repeatedColumnBuffer) WriteValues(values []Value) (numValues int, err error) {
   619  	maxRowLen := 0
   620  	defer func() {
   621  		clearValues(col.buffer[:maxRowLen])
   622  	}()
   623  
   624  	for i := 0; i < len(values); {
   625  		j := i
   626  
   627  		if values[j].repetitionLevel == 0 {
   628  			j++
   629  		}
   630  
   631  		for j < len(values) && values[j].repetitionLevel != 0 {
   632  			j++
   633  		}
   634  
   635  		if err := col.writeRow(values[i:j]); err != nil {
   636  			return numValues, err
   637  		}
   638  
   639  		if len(col.buffer) > maxRowLen {
   640  			maxRowLen = len(col.buffer)
   641  		}
   642  
   643  		numValues += j - i
   644  		i = j
   645  	}
   646  
   647  	return numValues, nil
   648  }
   649  
   650  func (col *repeatedColumnBuffer) writeRow(row []Value) error {
   651  	col.buffer = col.buffer[:0]
   652  
   653  	for _, v := range row {
   654  		if v.definitionLevel == col.maxDefinitionLevel {
   655  			col.buffer = append(col.buffer, v)
   656  		}
   657  	}
   658  
   659  	baseOffset := col.base.NumValues()
   660  	if len(col.buffer) > 0 {
   661  		if _, err := col.base.WriteValues(col.buffer); err != nil {
   662  			return err
   663  		}
   664  	}
   665  
   666  	if row[0].repetitionLevel == 0 {
   667  		col.rows = append(col.rows, offsetMapping{
   668  			offset:     uint32(len(col.repetitionLevels)),
   669  			baseOffset: uint32(baseOffset),
   670  		})
   671  	}
   672  
   673  	for _, v := range row {
   674  		col.repetitionLevels = append(col.repetitionLevels, v.repetitionLevel)
   675  		col.definitionLevels = append(col.definitionLevels, v.definitionLevel)
   676  	}
   677  
   678  	return nil
   679  }
   680  
   681  func (col *repeatedColumnBuffer) writeValues(row sparse.Array, levels columnLevels) {
   682  	if levels.repetitionLevel == 0 {
   683  		col.rows = append(col.rows, offsetMapping{
   684  			offset:     uint32(len(col.repetitionLevels)),
   685  			baseOffset: uint32(col.base.NumValues()),
   686  		})
   687  	}
   688  
   689  	if row.Len() == 0 {
   690  		col.repetitionLevels = append(col.repetitionLevels, levels.repetitionLevel)
   691  		col.definitionLevels = append(col.definitionLevels, levels.definitionLevel)
   692  		return
   693  	}
   694  
   695  	col.repetitionLevels = appendLevel(col.repetitionLevels, levels.repetitionLevel, row.Len())
   696  	col.definitionLevels = appendLevel(col.definitionLevels, levels.definitionLevel, row.Len())
   697  
   698  	if levels.definitionLevel == col.maxDefinitionLevel {
   699  		col.base.writeValues(row, levels)
   700  	}
   701  }
   702  
   703  func (col *repeatedColumnBuffer) ReadValuesAt(values []Value, offset int64) (int, error) {
   704  	// TODO:
   705  	panic("NOT IMPLEMENTED")
   706  }
   707  
   708  // repeatedRowLength gives the length of the repeated row starting at the
   709  // beginning of the repetitionLevels slice.
   710  func repeatedRowLength(repetitionLevels []byte) int {
   711  	// If a repetition level exists, at least one value is required to represent
   712  	// the column.
   713  	if len(repetitionLevels) > 0 {
   714  		// The subsequent levels will represent the start of a new record when
   715  		// they go back to zero.
   716  		if i := bytes.IndexByte(repetitionLevels[1:], 0); i >= 0 {
   717  			return i + 1
   718  		}
   719  	}
   720  	return len(repetitionLevels)
   721  }
   722  
   723  // =============================================================================
   724  // The types below are in-memory implementations of the ColumnBuffer interface
   725  // for each parquet type.
   726  //
   727  // These column buffers are created by calling NewColumnBuffer on parquet.Type
   728  // instances; each parquet type manages to construct column buffers of the
   729  // appropriate type, which ensures that we are packing as many values as we
   730  // can in memory.
   731  //
   732  // See Type.NewColumnBuffer for details about how these types get created.
   733  // =============================================================================
   734  
   735  type booleanColumnBuffer struct{ booleanPage }
   736  
   737  func newBooleanColumnBuffer(typ Type, columnIndex int16, numValues int32) *booleanColumnBuffer {
   738  	// Boolean values are bit-packed, we can fit up to 8 values per byte.
   739  	bufferSize := (numValues + 7) / 8
   740  	return &booleanColumnBuffer{
   741  		booleanPage: booleanPage{
   742  			typ:         typ,
   743  			bits:        make([]byte, 0, bufferSize),
   744  			columnIndex: ^columnIndex,
   745  		},
   746  	}
   747  }
   748  
   749  func (col *booleanColumnBuffer) Clone() ColumnBuffer {
   750  	return &booleanColumnBuffer{
   751  		booleanPage: booleanPage{
   752  			typ:         col.typ,
   753  			bits:        append([]byte{}, col.bits...),
   754  			offset:      col.offset,
   755  			numValues:   col.numValues,
   756  			columnIndex: col.columnIndex,
   757  		},
   758  	}
   759  }
   760  
   761  func (col *booleanColumnBuffer) ColumnIndex() ColumnIndex {
   762  	return booleanColumnIndex{&col.booleanPage}
   763  }
   764  
   765  func (col *booleanColumnBuffer) OffsetIndex() OffsetIndex {
   766  	return booleanOffsetIndex{&col.booleanPage}
   767  }
   768  
   769  func (col *booleanColumnBuffer) BloomFilter() BloomFilter { return nil }
   770  
   771  func (col *booleanColumnBuffer) Dictionary() Dictionary { return nil }
   772  
   773  func (col *booleanColumnBuffer) Pages() Pages { return onePage(col.Page()) }
   774  
   775  func (col *booleanColumnBuffer) Page() Page { return &col.booleanPage }
   776  
   777  func (col *booleanColumnBuffer) Reset() {
   778  	col.bits = col.bits[:0]
   779  	col.offset = 0
   780  	col.numValues = 0
   781  }
   782  
   783  func (col *booleanColumnBuffer) Cap() int { return 8 * cap(col.bits) }
   784  
   785  func (col *booleanColumnBuffer) Len() int { return int(col.numValues) }
   786  
   787  func (col *booleanColumnBuffer) Less(i, j int) bool {
   788  	a := col.valueAt(i)
   789  	b := col.valueAt(j)
   790  	return a != b && !a
   791  }
   792  
   793  func (col *booleanColumnBuffer) valueAt(i int) bool {
   794  	j := uint32(i) / 8
   795  	k := uint32(i) % 8
   796  	return ((col.bits[j] >> k) & 1) != 0
   797  }
   798  
   799  func (col *booleanColumnBuffer) setValueAt(i int, v bool) {
   800  	// `offset` is always zero in the page of a column buffer
   801  	j := uint32(i) / 8
   802  	k := uint32(i) % 8
   803  	x := byte(0)
   804  	if v {
   805  		x = 1
   806  	}
   807  	col.bits[j] = (col.bits[j] & ^(1 << k)) | (x << k)
   808  }
   809  
   810  func (col *booleanColumnBuffer) Swap(i, j int) {
   811  	a := col.valueAt(i)
   812  	b := col.valueAt(j)
   813  	col.setValueAt(i, b)
   814  	col.setValueAt(j, a)
   815  }
   816  
   817  func (col *booleanColumnBuffer) WriteBooleans(values []bool) (int, error) {
   818  	col.writeValues(sparse.MakeBoolArray(values).UnsafeArray(), columnLevels{})
   819  	return len(values), nil
   820  }
   821  
   822  func (col *booleanColumnBuffer) WriteValues(values []Value) (int, error) {
   823  	var model Value
   824  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
   825  	return len(values), nil
   826  }
   827  
   828  func (col *booleanColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
   829  	numBytes := bitpack.ByteCount(uint(col.numValues) + uint(rows.Len()))
   830  	if cap(col.bits) < numBytes {
   831  		col.bits = append(make([]byte, 0, max(numBytes, 2*cap(col.bits))), col.bits...)
   832  	}
   833  	col.bits = col.bits[:numBytes]
   834  	i := 0
   835  	r := 8 - (int(col.numValues) % 8)
   836  	bytes := rows.Uint8Array()
   837  
   838  	if r <= bytes.Len() {
   839  		// First we attempt to write enough bits to align the number of values
   840  		// in the column buffer on 8 bytes. After this step the next bit should
   841  		// be written at the zero'th index of a byte of the buffer.
   842  		if r < 8 {
   843  			var b byte
   844  			for i < r {
   845  				v := bytes.Index(i)
   846  				b |= (v & 1) << uint(i)
   847  				i++
   848  			}
   849  			x := uint(col.numValues) / 8
   850  			y := uint(col.numValues) % 8
   851  			col.bits[x] = (b << y) | (col.bits[x] & ^(0xFF << y))
   852  			col.numValues += int32(i)
   853  		}
   854  
   855  		if n := ((bytes.Len() - i) / 8) * 8; n > 0 {
   856  			// At this stage, we know that that we have at least 8 bits to write
   857  			// and the bits will be aligned on the address of a byte in the
   858  			// output buffer. We can work on 8 values per loop iteration,
   859  			// packing them into a single byte and writing it to the output
   860  			// buffer. This effectively reduces by 87.5% the number of memory
   861  			// stores that the program needs to perform to generate the values.
   862  			i += sparse.GatherBits(col.bits[col.numValues/8:], bytes.Slice(i, i+n))
   863  			col.numValues += int32(n)
   864  		}
   865  	}
   866  
   867  	for i < bytes.Len() {
   868  		x := uint(col.numValues) / 8
   869  		y := uint(col.numValues) % 8
   870  		b := bytes.Index(i)
   871  		col.bits[x] = ((b & 1) << y) | (col.bits[x] & ^(1 << y))
   872  		col.numValues++
   873  		i++
   874  	}
   875  
   876  	col.bits = col.bits[:bitpack.ByteCount(uint(col.numValues))]
   877  }
   878  
   879  func (col *booleanColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
   880  	i := int(offset)
   881  	switch {
   882  	case i < 0:
   883  		return 0, errRowIndexOutOfBounds(offset, int64(col.numValues))
   884  	case i >= int(col.numValues):
   885  		return 0, io.EOF
   886  	default:
   887  		for n < len(values) && i < int(col.numValues) {
   888  			values[n] = col.makeValue(col.valueAt(i))
   889  			n++
   890  			i++
   891  		}
   892  		if n < len(values) {
   893  			err = io.EOF
   894  		}
   895  		return n, err
   896  	}
   897  }
   898  
   899  type int32ColumnBuffer struct{ int32Page }
   900  
   901  func newInt32ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int32ColumnBuffer {
   902  	return &int32ColumnBuffer{
   903  		int32Page: int32Page{
   904  			typ:         typ,
   905  			values:      make([]int32, 0, numValues),
   906  			columnIndex: ^columnIndex,
   907  		},
   908  	}
   909  }
   910  
   911  func (col *int32ColumnBuffer) Clone() ColumnBuffer {
   912  	return &int32ColumnBuffer{
   913  		int32Page: int32Page{
   914  			typ:         col.typ,
   915  			values:      append([]int32{}, col.values...),
   916  			columnIndex: col.columnIndex,
   917  		},
   918  	}
   919  }
   920  
   921  func (col *int32ColumnBuffer) ColumnIndex() ColumnIndex { return int32ColumnIndex{&col.int32Page} }
   922  
   923  func (col *int32ColumnBuffer) OffsetIndex() OffsetIndex { return int32OffsetIndex{&col.int32Page} }
   924  
   925  func (col *int32ColumnBuffer) BloomFilter() BloomFilter { return nil }
   926  
   927  func (col *int32ColumnBuffer) Dictionary() Dictionary { return nil }
   928  
   929  func (col *int32ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
   930  
   931  func (col *int32ColumnBuffer) Page() Page { return &col.int32Page }
   932  
   933  func (col *int32ColumnBuffer) Reset() { col.values = col.values[:0] }
   934  
   935  func (col *int32ColumnBuffer) Cap() int { return cap(col.values) }
   936  
   937  func (col *int32ColumnBuffer) Len() int { return len(col.values) }
   938  
   939  func (col *int32ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
   940  
   941  func (col *int32ColumnBuffer) Swap(i, j int) {
   942  	col.values[i], col.values[j] = col.values[j], col.values[i]
   943  }
   944  
   945  func (col *int32ColumnBuffer) Write(b []byte) (int, error) {
   946  	if (len(b) % 4) != 0 {
   947  		return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b))
   948  	}
   949  	col.values = append(col.values, unsafecast.BytesToInt32(b)...)
   950  	return len(b), nil
   951  }
   952  
   953  func (col *int32ColumnBuffer) WriteInt32s(values []int32) (int, error) {
   954  	col.values = append(col.values, values...)
   955  	return len(values), nil
   956  }
   957  
   958  func (col *int32ColumnBuffer) WriteValues(values []Value) (int, error) {
   959  	var model Value
   960  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
   961  	return len(values), nil
   962  }
   963  
   964  func (col *int32ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
   965  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
   966  		col.values = append(make([]int32, 0, max(n, 2*cap(col.values))), col.values...)
   967  	}
   968  	n := len(col.values)
   969  	col.values = col.values[:n+rows.Len()]
   970  	sparse.GatherInt32(col.values[n:], rows.Int32Array())
   971  
   972  }
   973  
   974  func (col *int32ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
   975  	i := int(offset)
   976  	switch {
   977  	case i < 0:
   978  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
   979  	case i >= len(col.values):
   980  		return 0, io.EOF
   981  	default:
   982  		for n < len(values) && i < len(col.values) {
   983  			values[n] = col.makeValue(col.values[i])
   984  			n++
   985  			i++
   986  		}
   987  		if n < len(values) {
   988  			err = io.EOF
   989  		}
   990  		return n, err
   991  	}
   992  }
   993  
   994  type int64ColumnBuffer struct{ int64Page }
   995  
   996  func newInt64ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int64ColumnBuffer {
   997  	return &int64ColumnBuffer{
   998  		int64Page: int64Page{
   999  			typ:         typ,
  1000  			values:      make([]int64, 0, numValues),
  1001  			columnIndex: ^columnIndex,
  1002  		},
  1003  	}
  1004  }
  1005  
  1006  func (col *int64ColumnBuffer) Clone() ColumnBuffer {
  1007  	return &int64ColumnBuffer{
  1008  		int64Page: int64Page{
  1009  			typ:         col.typ,
  1010  			values:      append([]int64{}, col.values...),
  1011  			columnIndex: col.columnIndex,
  1012  		},
  1013  	}
  1014  }
  1015  
  1016  func (col *int64ColumnBuffer) ColumnIndex() ColumnIndex { return int64ColumnIndex{&col.int64Page} }
  1017  
  1018  func (col *int64ColumnBuffer) OffsetIndex() OffsetIndex { return int64OffsetIndex{&col.int64Page} }
  1019  
  1020  func (col *int64ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1021  
  1022  func (col *int64ColumnBuffer) Dictionary() Dictionary { return nil }
  1023  
  1024  func (col *int64ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1025  
  1026  func (col *int64ColumnBuffer) Page() Page { return &col.int64Page }
  1027  
  1028  func (col *int64ColumnBuffer) Reset() { col.values = col.values[:0] }
  1029  
  1030  func (col *int64ColumnBuffer) Cap() int { return cap(col.values) }
  1031  
  1032  func (col *int64ColumnBuffer) Len() int { return len(col.values) }
  1033  
  1034  func (col *int64ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1035  
  1036  func (col *int64ColumnBuffer) Swap(i, j int) {
  1037  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1038  }
  1039  
  1040  func (col *int64ColumnBuffer) Write(b []byte) (int, error) {
  1041  	if (len(b) % 8) != 0 {
  1042  		return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b))
  1043  	}
  1044  	col.values = append(col.values, unsafecast.BytesToInt64(b)...)
  1045  	return len(b), nil
  1046  }
  1047  
  1048  func (col *int64ColumnBuffer) WriteInt64s(values []int64) (int, error) {
  1049  	col.values = append(col.values, values...)
  1050  	return len(values), nil
  1051  }
  1052  
  1053  func (col *int64ColumnBuffer) WriteValues(values []Value) (int, error) {
  1054  	var model Value
  1055  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1056  	return len(values), nil
  1057  }
  1058  
  1059  func (col *int64ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1060  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1061  		col.values = append(make([]int64, 0, max(n, 2*cap(col.values))), col.values...)
  1062  	}
  1063  	n := len(col.values)
  1064  	col.values = col.values[:n+rows.Len()]
  1065  	sparse.GatherInt64(col.values[n:], rows.Int64Array())
  1066  }
  1067  
  1068  func (col *int64ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1069  	i := int(offset)
  1070  	switch {
  1071  	case i < 0:
  1072  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1073  	case i >= len(col.values):
  1074  		return 0, io.EOF
  1075  	default:
  1076  		for n < len(values) && i < len(col.values) {
  1077  			values[n] = col.makeValue(col.values[i])
  1078  			n++
  1079  			i++
  1080  		}
  1081  		if n < len(values) {
  1082  			err = io.EOF
  1083  		}
  1084  		return n, err
  1085  	}
  1086  }
  1087  
  1088  type int96ColumnBuffer struct{ int96Page }
  1089  
  1090  func newInt96ColumnBuffer(typ Type, columnIndex int16, numValues int32) *int96ColumnBuffer {
  1091  	return &int96ColumnBuffer{
  1092  		int96Page: int96Page{
  1093  			typ:         typ,
  1094  			values:      make([]deprecated.Int96, 0, numValues),
  1095  			columnIndex: ^columnIndex,
  1096  		},
  1097  	}
  1098  }
  1099  
  1100  func (col *int96ColumnBuffer) Clone() ColumnBuffer {
  1101  	return &int96ColumnBuffer{
  1102  		int96Page: int96Page{
  1103  			typ:         col.typ,
  1104  			values:      append([]deprecated.Int96{}, col.values...),
  1105  			columnIndex: col.columnIndex,
  1106  		},
  1107  	}
  1108  }
  1109  
  1110  func (col *int96ColumnBuffer) ColumnIndex() ColumnIndex { return int96ColumnIndex{&col.int96Page} }
  1111  
  1112  func (col *int96ColumnBuffer) OffsetIndex() OffsetIndex { return int96OffsetIndex{&col.int96Page} }
  1113  
  1114  func (col *int96ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1115  
  1116  func (col *int96ColumnBuffer) Dictionary() Dictionary { return nil }
  1117  
  1118  func (col *int96ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1119  
  1120  func (col *int96ColumnBuffer) Page() Page { return &col.int96Page }
  1121  
  1122  func (col *int96ColumnBuffer) Reset() { col.values = col.values[:0] }
  1123  
  1124  func (col *int96ColumnBuffer) Cap() int { return cap(col.values) }
  1125  
  1126  func (col *int96ColumnBuffer) Len() int { return len(col.values) }
  1127  
  1128  func (col *int96ColumnBuffer) Less(i, j int) bool { return col.values[i].Less(col.values[j]) }
  1129  
  1130  func (col *int96ColumnBuffer) Swap(i, j int) {
  1131  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1132  }
  1133  
  1134  func (col *int96ColumnBuffer) Write(b []byte) (int, error) {
  1135  	if (len(b) % 12) != 0 {
  1136  		return 0, fmt.Errorf("cannot write INT96 values from input of size %d", len(b))
  1137  	}
  1138  	col.values = append(col.values, deprecated.BytesToInt96(b)...)
  1139  	return len(b), nil
  1140  }
  1141  
  1142  func (col *int96ColumnBuffer) WriteInt96s(values []deprecated.Int96) (int, error) {
  1143  	col.values = append(col.values, values...)
  1144  	return len(values), nil
  1145  }
  1146  
  1147  func (col *int96ColumnBuffer) WriteValues(values []Value) (int, error) {
  1148  	for _, v := range values {
  1149  		col.values = append(col.values, v.Int96())
  1150  	}
  1151  	return len(values), nil
  1152  }
  1153  
  1154  func (col *int96ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1155  	for i := 0; i < rows.Len(); i++ {
  1156  		p := rows.Index(i)
  1157  		col.values = append(col.values, *(*deprecated.Int96)(p))
  1158  	}
  1159  }
  1160  
  1161  func (col *int96ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1162  	i := int(offset)
  1163  	switch {
  1164  	case i < 0:
  1165  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1166  	case i >= len(col.values):
  1167  		return 0, io.EOF
  1168  	default:
  1169  		for n < len(values) && i < len(col.values) {
  1170  			values[n] = col.makeValue(col.values[i])
  1171  			n++
  1172  			i++
  1173  		}
  1174  		if n < len(values) {
  1175  			err = io.EOF
  1176  		}
  1177  		return n, err
  1178  	}
  1179  }
  1180  
  1181  type floatColumnBuffer struct{ floatPage }
  1182  
  1183  func newFloatColumnBuffer(typ Type, columnIndex int16, numValues int32) *floatColumnBuffer {
  1184  	return &floatColumnBuffer{
  1185  		floatPage: floatPage{
  1186  			typ:         typ,
  1187  			values:      make([]float32, 0, numValues),
  1188  			columnIndex: ^columnIndex,
  1189  		},
  1190  	}
  1191  }
  1192  
  1193  func (col *floatColumnBuffer) Clone() ColumnBuffer {
  1194  	return &floatColumnBuffer{
  1195  		floatPage: floatPage{
  1196  			typ:         col.typ,
  1197  			values:      append([]float32{}, col.values...),
  1198  			columnIndex: col.columnIndex,
  1199  		},
  1200  	}
  1201  }
  1202  
  1203  func (col *floatColumnBuffer) ColumnIndex() ColumnIndex { return floatColumnIndex{&col.floatPage} }
  1204  
  1205  func (col *floatColumnBuffer) OffsetIndex() OffsetIndex { return floatOffsetIndex{&col.floatPage} }
  1206  
  1207  func (col *floatColumnBuffer) BloomFilter() BloomFilter { return nil }
  1208  
  1209  func (col *floatColumnBuffer) Dictionary() Dictionary { return nil }
  1210  
  1211  func (col *floatColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1212  
  1213  func (col *floatColumnBuffer) Page() Page { return &col.floatPage }
  1214  
  1215  func (col *floatColumnBuffer) Reset() { col.values = col.values[:0] }
  1216  
  1217  func (col *floatColumnBuffer) Cap() int { return cap(col.values) }
  1218  
  1219  func (col *floatColumnBuffer) Len() int { return len(col.values) }
  1220  
  1221  func (col *floatColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1222  
  1223  func (col *floatColumnBuffer) Swap(i, j int) {
  1224  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1225  }
  1226  
  1227  func (col *floatColumnBuffer) Write(b []byte) (int, error) {
  1228  	if (len(b) % 4) != 0 {
  1229  		return 0, fmt.Errorf("cannot write FLOAT values from input of size %d", len(b))
  1230  	}
  1231  	col.values = append(col.values, unsafecast.BytesToFloat32(b)...)
  1232  	return len(b), nil
  1233  }
  1234  
  1235  func (col *floatColumnBuffer) WriteFloats(values []float32) (int, error) {
  1236  	col.values = append(col.values, values...)
  1237  	return len(values), nil
  1238  }
  1239  
  1240  func (col *floatColumnBuffer) WriteValues(values []Value) (int, error) {
  1241  	var model Value
  1242  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1243  	return len(values), nil
  1244  }
  1245  
  1246  func (col *floatColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1247  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1248  		col.values = append(make([]float32, 0, max(n, 2*cap(col.values))), col.values...)
  1249  	}
  1250  	n := len(col.values)
  1251  	col.values = col.values[:n+rows.Len()]
  1252  	sparse.GatherFloat32(col.values[n:], rows.Float32Array())
  1253  }
  1254  
  1255  func (col *floatColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1256  	i := int(offset)
  1257  	switch {
  1258  	case i < 0:
  1259  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1260  	case i >= len(col.values):
  1261  		return 0, io.EOF
  1262  	default:
  1263  		for n < len(values) && i < len(col.values) {
  1264  			values[n] = col.makeValue(col.values[i])
  1265  			n++
  1266  			i++
  1267  		}
  1268  		if n < len(values) {
  1269  			err = io.EOF
  1270  		}
  1271  		return n, err
  1272  	}
  1273  }
  1274  
  1275  type doubleColumnBuffer struct{ doublePage }
  1276  
  1277  func newDoubleColumnBuffer(typ Type, columnIndex int16, numValues int32) *doubleColumnBuffer {
  1278  	return &doubleColumnBuffer{
  1279  		doublePage: doublePage{
  1280  			typ:         typ,
  1281  			values:      make([]float64, 0, numValues),
  1282  			columnIndex: ^columnIndex,
  1283  		},
  1284  	}
  1285  }
  1286  
  1287  func (col *doubleColumnBuffer) Clone() ColumnBuffer {
  1288  	return &doubleColumnBuffer{
  1289  		doublePage: doublePage{
  1290  			typ:         col.typ,
  1291  			values:      append([]float64{}, col.values...),
  1292  			columnIndex: col.columnIndex,
  1293  		},
  1294  	}
  1295  }
  1296  
  1297  func (col *doubleColumnBuffer) ColumnIndex() ColumnIndex { return doubleColumnIndex{&col.doublePage} }
  1298  
  1299  func (col *doubleColumnBuffer) OffsetIndex() OffsetIndex { return doubleOffsetIndex{&col.doublePage} }
  1300  
  1301  func (col *doubleColumnBuffer) BloomFilter() BloomFilter { return nil }
  1302  
  1303  func (col *doubleColumnBuffer) Dictionary() Dictionary { return nil }
  1304  
  1305  func (col *doubleColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1306  
  1307  func (col *doubleColumnBuffer) Page() Page { return &col.doublePage }
  1308  
  1309  func (col *doubleColumnBuffer) Reset() { col.values = col.values[:0] }
  1310  
  1311  func (col *doubleColumnBuffer) Cap() int { return cap(col.values) }
  1312  
  1313  func (col *doubleColumnBuffer) Len() int { return len(col.values) }
  1314  
  1315  func (col *doubleColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1316  
  1317  func (col *doubleColumnBuffer) Swap(i, j int) {
  1318  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1319  }
  1320  
  1321  func (col *doubleColumnBuffer) Write(b []byte) (int, error) {
  1322  	if (len(b) % 8) != 0 {
  1323  		return 0, fmt.Errorf("cannot write DOUBLE values from input of size %d", len(b))
  1324  	}
  1325  	col.values = append(col.values, unsafecast.BytesToFloat64(b)...)
  1326  	return len(b), nil
  1327  }
  1328  
  1329  func (col *doubleColumnBuffer) WriteDoubles(values []float64) (int, error) {
  1330  	col.values = append(col.values, values...)
  1331  	return len(values), nil
  1332  }
  1333  
  1334  func (col *doubleColumnBuffer) WriteValues(values []Value) (int, error) {
  1335  	var model Value
  1336  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1337  	return len(values), nil
  1338  }
  1339  
  1340  func (col *doubleColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1341  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1342  		col.values = append(make([]float64, 0, max(n, 2*cap(col.values))), col.values...)
  1343  	}
  1344  	n := len(col.values)
  1345  	col.values = col.values[:n+rows.Len()]
  1346  	sparse.GatherFloat64(col.values[n:], rows.Float64Array())
  1347  }
  1348  
  1349  func (col *doubleColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1350  	i := int(offset)
  1351  	switch {
  1352  	case i < 0:
  1353  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1354  	case i >= len(col.values):
  1355  		return 0, io.EOF
  1356  	default:
  1357  		for n < len(values) && i < len(col.values) {
  1358  			values[n] = col.makeValue(col.values[i])
  1359  			n++
  1360  			i++
  1361  		}
  1362  		if n < len(values) {
  1363  			err = io.EOF
  1364  		}
  1365  		return n, err
  1366  	}
  1367  }
  1368  
  1369  type byteArrayColumnBuffer struct {
  1370  	byteArrayPage
  1371  	lengths []uint32
  1372  	scratch []byte
  1373  }
  1374  
  1375  func newByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int32) *byteArrayColumnBuffer {
  1376  	return &byteArrayColumnBuffer{
  1377  		byteArrayPage: byteArrayPage{
  1378  			typ:         typ,
  1379  			values:      make([]byte, 0, typ.EstimateSize(int(numValues))),
  1380  			offsets:     make([]uint32, 0, numValues+1),
  1381  			columnIndex: ^columnIndex,
  1382  		},
  1383  		lengths: make([]uint32, 0, numValues),
  1384  	}
  1385  }
  1386  
  1387  func (col *byteArrayColumnBuffer) Clone() ColumnBuffer {
  1388  	return &byteArrayColumnBuffer{
  1389  		byteArrayPage: byteArrayPage{
  1390  			typ:         col.typ,
  1391  			values:      col.cloneValues(),
  1392  			offsets:     col.cloneOffsets(),
  1393  			columnIndex: col.columnIndex,
  1394  		},
  1395  		lengths: col.cloneLengths(),
  1396  	}
  1397  }
  1398  
  1399  func (col *byteArrayColumnBuffer) cloneLengths() []uint32 {
  1400  	lengths := make([]uint32, len(col.lengths))
  1401  	copy(lengths, col.lengths)
  1402  	return lengths
  1403  }
  1404  
  1405  func (col *byteArrayColumnBuffer) ColumnIndex() ColumnIndex {
  1406  	return byteArrayColumnIndex{&col.byteArrayPage}
  1407  }
  1408  
  1409  func (col *byteArrayColumnBuffer) OffsetIndex() OffsetIndex {
  1410  	return byteArrayOffsetIndex{&col.byteArrayPage}
  1411  }
  1412  
  1413  func (col *byteArrayColumnBuffer) BloomFilter() BloomFilter { return nil }
  1414  
  1415  func (col *byteArrayColumnBuffer) Dictionary() Dictionary { return nil }
  1416  
  1417  func (col *byteArrayColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1418  
  1419  func (col *byteArrayColumnBuffer) Page() Page {
  1420  	if len(col.lengths) > 0 && orderOfUint32(col.offsets) < 1 { // unordered?
  1421  		if cap(col.scratch) < len(col.values) {
  1422  			col.scratch = make([]byte, 0, cap(col.values))
  1423  		} else {
  1424  			col.scratch = col.scratch[:0]
  1425  		}
  1426  
  1427  		for i := range col.lengths {
  1428  			n := len(col.scratch)
  1429  			col.scratch = append(col.scratch, col.index(i)...)
  1430  			col.offsets[i] = uint32(n)
  1431  		}
  1432  
  1433  		col.values, col.scratch = col.scratch, col.values
  1434  	}
  1435  	// The offsets have the total length as the last item. Since we are about to
  1436  	// expose the column buffer's internal state as a Page value we ensure that
  1437  	// the last offset is the total length of all values.
  1438  	col.offsets = append(col.offsets[:len(col.lengths)], uint32(len(col.values)))
  1439  	return &col.byteArrayPage
  1440  }
  1441  
  1442  func (col *byteArrayColumnBuffer) Reset() {
  1443  	col.values = col.values[:0]
  1444  	col.offsets = col.offsets[:0]
  1445  	col.lengths = col.lengths[:0]
  1446  }
  1447  
  1448  func (col *byteArrayColumnBuffer) NumRows() int64 { return int64(col.Len()) }
  1449  
  1450  func (col *byteArrayColumnBuffer) NumValues() int64 { return int64(col.Len()) }
  1451  
  1452  func (col *byteArrayColumnBuffer) Cap() int { return cap(col.lengths) }
  1453  
  1454  func (col *byteArrayColumnBuffer) Len() int { return len(col.lengths) }
  1455  
  1456  func (col *byteArrayColumnBuffer) Less(i, j int) bool {
  1457  	return bytes.Compare(col.index(i), col.index(j)) < 0
  1458  }
  1459  
  1460  func (col *byteArrayColumnBuffer) Swap(i, j int) {
  1461  	col.offsets[i], col.offsets[j] = col.offsets[j], col.offsets[i]
  1462  	col.lengths[i], col.lengths[j] = col.lengths[j], col.lengths[i]
  1463  }
  1464  
  1465  func (col *byteArrayColumnBuffer) Write(b []byte) (int, error) {
  1466  	_, n, err := col.writeByteArrays(b)
  1467  	return n, err
  1468  }
  1469  
  1470  func (col *byteArrayColumnBuffer) WriteByteArrays(values []byte) (int, error) {
  1471  	n, _, err := col.writeByteArrays(values)
  1472  	return n, err
  1473  }
  1474  
  1475  func (col *byteArrayColumnBuffer) writeByteArrays(values []byte) (count, bytes int, err error) {
  1476  	baseCount := len(col.lengths)
  1477  	baseBytes := len(col.values) + (plain.ByteArrayLengthSize * len(col.lengths))
  1478  
  1479  	err = plain.RangeByteArray(values, func(value []byte) error {
  1480  		col.append(unsafecast.BytesToString(value))
  1481  		return nil
  1482  	})
  1483  
  1484  	count = len(col.lengths) - baseCount
  1485  	bytes = (len(col.values) - baseBytes) + (plain.ByteArrayLengthSize * count)
  1486  	return count, bytes, err
  1487  }
  1488  
  1489  func (col *byteArrayColumnBuffer) WriteValues(values []Value) (int, error) {
  1490  	var model Value
  1491  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.ptr)), columnLevels{})
  1492  	return len(values), nil
  1493  }
  1494  
  1495  func (col *byteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1496  	for i := 0; i < rows.Len(); i++ {
  1497  		p := rows.Index(i)
  1498  		col.append(*(*string)(p))
  1499  	}
  1500  }
  1501  
  1502  func (col *byteArrayColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1503  	i := int(offset)
  1504  	switch {
  1505  	case i < 0:
  1506  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.lengths)))
  1507  	case i >= len(col.lengths):
  1508  		return 0, io.EOF
  1509  	default:
  1510  		for n < len(values) && i < len(col.lengths) {
  1511  			values[n] = col.makeValueBytes(col.index(i))
  1512  			n++
  1513  			i++
  1514  		}
  1515  		if n < len(values) {
  1516  			err = io.EOF
  1517  		}
  1518  		return n, err
  1519  	}
  1520  }
  1521  
  1522  func (col *byteArrayColumnBuffer) append(value string) {
  1523  	col.offsets = append(col.offsets, uint32(len(col.values)))
  1524  	col.lengths = append(col.lengths, uint32(len(value)))
  1525  	col.values = append(col.values, value...)
  1526  }
  1527  
  1528  func (col *byteArrayColumnBuffer) index(i int) []byte {
  1529  	offset := col.offsets[i]
  1530  	length := col.lengths[i]
  1531  	end := offset + length
  1532  	return col.values[offset:end:end]
  1533  }
  1534  
  1535  type fixedLenByteArrayColumnBuffer struct {
  1536  	fixedLenByteArrayPage
  1537  	tmp []byte
  1538  }
  1539  
  1540  func newFixedLenByteArrayColumnBuffer(typ Type, columnIndex int16, numValues int32) *fixedLenByteArrayColumnBuffer {
  1541  	size := typ.Length()
  1542  	return &fixedLenByteArrayColumnBuffer{
  1543  		fixedLenByteArrayPage: fixedLenByteArrayPage{
  1544  			typ:         typ,
  1545  			size:        size,
  1546  			data:        make([]byte, 0, typ.EstimateSize(int(numValues))),
  1547  			columnIndex: ^columnIndex,
  1548  		},
  1549  		tmp: make([]byte, size),
  1550  	}
  1551  }
  1552  
  1553  func (col *fixedLenByteArrayColumnBuffer) Clone() ColumnBuffer {
  1554  	return &fixedLenByteArrayColumnBuffer{
  1555  		fixedLenByteArrayPage: fixedLenByteArrayPage{
  1556  			typ:         col.typ,
  1557  			size:        col.size,
  1558  			data:        append([]byte{}, col.data...),
  1559  			columnIndex: col.columnIndex,
  1560  		},
  1561  		tmp: make([]byte, col.size),
  1562  	}
  1563  }
  1564  
  1565  func (col *fixedLenByteArrayColumnBuffer) ColumnIndex() ColumnIndex {
  1566  	return fixedLenByteArrayColumnIndex{&col.fixedLenByteArrayPage}
  1567  }
  1568  
  1569  func (col *fixedLenByteArrayColumnBuffer) OffsetIndex() OffsetIndex {
  1570  	return fixedLenByteArrayOffsetIndex{&col.fixedLenByteArrayPage}
  1571  }
  1572  
  1573  func (col *fixedLenByteArrayColumnBuffer) BloomFilter() BloomFilter { return nil }
  1574  
  1575  func (col *fixedLenByteArrayColumnBuffer) Dictionary() Dictionary { return nil }
  1576  
  1577  func (col *fixedLenByteArrayColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1578  
  1579  func (col *fixedLenByteArrayColumnBuffer) Page() Page { return &col.fixedLenByteArrayPage }
  1580  
  1581  func (col *fixedLenByteArrayColumnBuffer) Reset() { col.data = col.data[:0] }
  1582  
  1583  func (col *fixedLenByteArrayColumnBuffer) Cap() int { return cap(col.data) / col.size }
  1584  
  1585  func (col *fixedLenByteArrayColumnBuffer) Len() int { return len(col.data) / col.size }
  1586  
  1587  func (col *fixedLenByteArrayColumnBuffer) Less(i, j int) bool {
  1588  	return bytes.Compare(col.index(i), col.index(j)) < 0
  1589  }
  1590  
  1591  func (col *fixedLenByteArrayColumnBuffer) Swap(i, j int) {
  1592  	t, u, v := col.tmp[:col.size], col.index(i), col.index(j)
  1593  	copy(t, u)
  1594  	copy(u, v)
  1595  	copy(v, t)
  1596  }
  1597  
  1598  func (col *fixedLenByteArrayColumnBuffer) index(i int) []byte {
  1599  	j := (i + 0) * col.size
  1600  	k := (i + 1) * col.size
  1601  	return col.data[j:k:k]
  1602  }
  1603  
  1604  func (col *fixedLenByteArrayColumnBuffer) Write(b []byte) (int, error) {
  1605  	n, err := col.WriteFixedLenByteArrays(b)
  1606  	return n * col.size, err
  1607  }
  1608  
  1609  func (col *fixedLenByteArrayColumnBuffer) WriteFixedLenByteArrays(values []byte) (int, error) {
  1610  	d, m := len(values)/col.size, len(values)%col.size
  1611  	if m != 0 {
  1612  		return 0, fmt.Errorf("cannot write FIXED_LEN_BYTE_ARRAY values of size %d from input of size %d", col.size, len(values))
  1613  	}
  1614  	col.data = append(col.data, values...)
  1615  	return d, nil
  1616  }
  1617  
  1618  func (col *fixedLenByteArrayColumnBuffer) WriteValues(values []Value) (int, error) {
  1619  	for _, v := range values {
  1620  		col.data = append(col.data, v.byteArray()...)
  1621  	}
  1622  	return len(values), nil
  1623  }
  1624  
  1625  func (col *fixedLenByteArrayColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1626  	n := col.size * rows.Len()
  1627  	i := len(col.data)
  1628  	j := len(col.data) + n
  1629  
  1630  	if cap(col.data) < j {
  1631  		col.data = append(make([]byte, 0, max(i+n, 2*cap(col.data))), col.data...)
  1632  	}
  1633  
  1634  	col.data = col.data[:j]
  1635  	newData := col.data[i:]
  1636  
  1637  	for i := 0; i < rows.Len(); i++ {
  1638  		p := rows.Index(i)
  1639  		copy(newData[i*col.size:], unsafe.Slice((*byte)(p), col.size))
  1640  	}
  1641  }
  1642  
  1643  func (col *fixedLenByteArrayColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1644  	i := int(offset) * col.size
  1645  	switch {
  1646  	case i < 0:
  1647  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.data)/col.size))
  1648  	case i >= len(col.data):
  1649  		return 0, io.EOF
  1650  	default:
  1651  		for n < len(values) && i < len(col.data) {
  1652  			values[n] = col.makeValueBytes(col.data[i : i+col.size])
  1653  			n++
  1654  			i += col.size
  1655  		}
  1656  		if n < len(values) {
  1657  			err = io.EOF
  1658  		}
  1659  		return n, err
  1660  	}
  1661  }
  1662  
  1663  type uint32ColumnBuffer struct{ uint32Page }
  1664  
  1665  func newUint32ColumnBuffer(typ Type, columnIndex int16, numValues int32) *uint32ColumnBuffer {
  1666  	return &uint32ColumnBuffer{
  1667  		uint32Page: uint32Page{
  1668  			typ:         typ,
  1669  			values:      make([]uint32, 0, numValues),
  1670  			columnIndex: ^columnIndex,
  1671  		},
  1672  	}
  1673  }
  1674  
  1675  func (col *uint32ColumnBuffer) Clone() ColumnBuffer {
  1676  	return &uint32ColumnBuffer{
  1677  		uint32Page: uint32Page{
  1678  			typ:         col.typ,
  1679  			values:      append([]uint32{}, col.values...),
  1680  			columnIndex: col.columnIndex,
  1681  		},
  1682  	}
  1683  }
  1684  
  1685  func (col *uint32ColumnBuffer) ColumnIndex() ColumnIndex { return uint32ColumnIndex{&col.uint32Page} }
  1686  
  1687  func (col *uint32ColumnBuffer) OffsetIndex() OffsetIndex { return uint32OffsetIndex{&col.uint32Page} }
  1688  
  1689  func (col *uint32ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1690  
  1691  func (col *uint32ColumnBuffer) Dictionary() Dictionary { return nil }
  1692  
  1693  func (col *uint32ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1694  
  1695  func (col *uint32ColumnBuffer) Page() Page { return &col.uint32Page }
  1696  
  1697  func (col *uint32ColumnBuffer) Reset() { col.values = col.values[:0] }
  1698  
  1699  func (col *uint32ColumnBuffer) Cap() int { return cap(col.values) }
  1700  
  1701  func (col *uint32ColumnBuffer) Len() int { return len(col.values) }
  1702  
  1703  func (col *uint32ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1704  
  1705  func (col *uint32ColumnBuffer) Swap(i, j int) {
  1706  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1707  }
  1708  
  1709  func (col *uint32ColumnBuffer) Write(b []byte) (int, error) {
  1710  	if (len(b) % 4) != 0 {
  1711  		return 0, fmt.Errorf("cannot write INT32 values from input of size %d", len(b))
  1712  	}
  1713  	col.values = append(col.values, unsafecast.BytesToUint32(b)...)
  1714  	return len(b), nil
  1715  }
  1716  
  1717  func (col *uint32ColumnBuffer) WriteUint32s(values []uint32) (int, error) {
  1718  	col.values = append(col.values, values...)
  1719  	return len(values), nil
  1720  }
  1721  
  1722  func (col *uint32ColumnBuffer) WriteValues(values []Value) (int, error) {
  1723  	var model Value
  1724  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1725  	return len(values), nil
  1726  }
  1727  
  1728  func (col *uint32ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1729  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1730  		col.values = append(make([]uint32, 0, max(n, 2*cap(col.values))), col.values...)
  1731  	}
  1732  	n := len(col.values)
  1733  	col.values = col.values[:n+rows.Len()]
  1734  	sparse.GatherUint32(col.values[n:], rows.Uint32Array())
  1735  }
  1736  
  1737  func (col *uint32ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1738  	i := int(offset)
  1739  	switch {
  1740  	case i < 0:
  1741  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1742  	case i >= len(col.values):
  1743  		return 0, io.EOF
  1744  	default:
  1745  		for n < len(values) && i < len(col.values) {
  1746  			values[n] = col.makeValue(col.values[i])
  1747  			n++
  1748  			i++
  1749  		}
  1750  		if n < len(values) {
  1751  			err = io.EOF
  1752  		}
  1753  		return n, err
  1754  	}
  1755  }
  1756  
  1757  type uint64ColumnBuffer struct{ uint64Page }
  1758  
  1759  func newUint64ColumnBuffer(typ Type, columnIndex int16, numValues int32) *uint64ColumnBuffer {
  1760  	return &uint64ColumnBuffer{
  1761  		uint64Page: uint64Page{
  1762  			typ:         typ,
  1763  			values:      make([]uint64, 0, numValues),
  1764  			columnIndex: ^columnIndex,
  1765  		},
  1766  	}
  1767  }
  1768  
  1769  func (col *uint64ColumnBuffer) Clone() ColumnBuffer {
  1770  	return &uint64ColumnBuffer{
  1771  		uint64Page: uint64Page{
  1772  			typ:         col.typ,
  1773  			values:      append([]uint64{}, col.values...),
  1774  			columnIndex: col.columnIndex,
  1775  		},
  1776  	}
  1777  }
  1778  
  1779  func (col *uint64ColumnBuffer) ColumnIndex() ColumnIndex { return uint64ColumnIndex{&col.uint64Page} }
  1780  
  1781  func (col *uint64ColumnBuffer) OffsetIndex() OffsetIndex { return uint64OffsetIndex{&col.uint64Page} }
  1782  
  1783  func (col *uint64ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1784  
  1785  func (col *uint64ColumnBuffer) Dictionary() Dictionary { return nil }
  1786  
  1787  func (col *uint64ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1788  
  1789  func (col *uint64ColumnBuffer) Page() Page { return &col.uint64Page }
  1790  
  1791  func (col *uint64ColumnBuffer) Reset() { col.values = col.values[:0] }
  1792  
  1793  func (col *uint64ColumnBuffer) Cap() int { return cap(col.values) }
  1794  
  1795  func (col *uint64ColumnBuffer) Len() int { return len(col.values) }
  1796  
  1797  func (col *uint64ColumnBuffer) Less(i, j int) bool { return col.values[i] < col.values[j] }
  1798  
  1799  func (col *uint64ColumnBuffer) Swap(i, j int) {
  1800  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1801  }
  1802  
  1803  func (col *uint64ColumnBuffer) Write(b []byte) (int, error) {
  1804  	if (len(b) % 8) != 0 {
  1805  		return 0, fmt.Errorf("cannot write INT64 values from input of size %d", len(b))
  1806  	}
  1807  	col.values = append(col.values, unsafecast.BytesToUint64(b)...)
  1808  	return len(b), nil
  1809  }
  1810  
  1811  func (col *uint64ColumnBuffer) WriteUint64s(values []uint64) (int, error) {
  1812  	col.values = append(col.values, values...)
  1813  	return len(values), nil
  1814  }
  1815  
  1816  func (col *uint64ColumnBuffer) WriteValues(values []Value) (int, error) {
  1817  	var model Value
  1818  	col.writeValues(makeArrayValue(values, unsafe.Offsetof(model.u64)), columnLevels{})
  1819  	return len(values), nil
  1820  }
  1821  
  1822  func (col *uint64ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1823  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1824  		col.values = append(make([]uint64, 0, max(n, 2*cap(col.values))), col.values...)
  1825  	}
  1826  	n := len(col.values)
  1827  	col.values = col.values[:n+rows.Len()]
  1828  	sparse.GatherUint64(col.values[n:], rows.Uint64Array())
  1829  }
  1830  
  1831  func (col *uint64ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1832  	i := int(offset)
  1833  	switch {
  1834  	case i < 0:
  1835  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1836  	case i >= len(col.values):
  1837  		return 0, io.EOF
  1838  	default:
  1839  		for n < len(values) && i < len(col.values) {
  1840  			values[n] = col.makeValue(col.values[i])
  1841  			n++
  1842  			i++
  1843  		}
  1844  		if n < len(values) {
  1845  			err = io.EOF
  1846  		}
  1847  		return n, err
  1848  	}
  1849  }
  1850  
  1851  type be128ColumnBuffer struct{ be128Page }
  1852  
  1853  func newBE128ColumnBuffer(typ Type, columnIndex int16, numValues int32) *be128ColumnBuffer {
  1854  	return &be128ColumnBuffer{
  1855  		be128Page: be128Page{
  1856  			typ:         typ,
  1857  			values:      make([][16]byte, 0, numValues),
  1858  			columnIndex: ^columnIndex,
  1859  		},
  1860  	}
  1861  }
  1862  
  1863  func (col *be128ColumnBuffer) Clone() ColumnBuffer {
  1864  	return &be128ColumnBuffer{
  1865  		be128Page: be128Page{
  1866  			typ:         col.typ,
  1867  			values:      append([][16]byte{}, col.values...),
  1868  			columnIndex: col.columnIndex,
  1869  		},
  1870  	}
  1871  }
  1872  
  1873  func (col *be128ColumnBuffer) ColumnIndex() ColumnIndex {
  1874  	return be128ColumnIndex{&col.be128Page}
  1875  }
  1876  
  1877  func (col *be128ColumnBuffer) OffsetIndex() OffsetIndex {
  1878  	return be128OffsetIndex{&col.be128Page}
  1879  }
  1880  
  1881  func (col *be128ColumnBuffer) BloomFilter() BloomFilter { return nil }
  1882  
  1883  func (col *be128ColumnBuffer) Dictionary() Dictionary { return nil }
  1884  
  1885  func (col *be128ColumnBuffer) Pages() Pages { return onePage(col.Page()) }
  1886  
  1887  func (col *be128ColumnBuffer) Page() Page { return &col.be128Page }
  1888  
  1889  func (col *be128ColumnBuffer) Reset() { col.values = col.values[:0] }
  1890  
  1891  func (col *be128ColumnBuffer) Cap() int { return cap(col.values) }
  1892  
  1893  func (col *be128ColumnBuffer) Len() int { return len(col.values) }
  1894  
  1895  func (col *be128ColumnBuffer) Less(i, j int) bool {
  1896  	return lessBE128(&col.values[i], &col.values[j])
  1897  }
  1898  
  1899  func (col *be128ColumnBuffer) Swap(i, j int) {
  1900  	col.values[i], col.values[j] = col.values[j], col.values[i]
  1901  }
  1902  
  1903  func (col *be128ColumnBuffer) WriteValues(values []Value) (int, error) {
  1904  	if n := len(col.values) + len(values); n > cap(col.values) {
  1905  		col.values = append(make([][16]byte, 0, max(n, 2*cap(col.values))), col.values...)
  1906  	}
  1907  	n := len(col.values)
  1908  	col.values = col.values[:n+len(values)]
  1909  	newValues := col.values[n:]
  1910  	for i, v := range values {
  1911  		copy(newValues[i][:], v.byteArray())
  1912  	}
  1913  	return len(values), nil
  1914  }
  1915  
  1916  func (col *be128ColumnBuffer) writeValues(rows sparse.Array, _ columnLevels) {
  1917  	if n := len(col.values) + rows.Len(); n > cap(col.values) {
  1918  		col.values = append(make([][16]byte, 0, max(n, 2*cap(col.values))), col.values...)
  1919  	}
  1920  	n := len(col.values)
  1921  	col.values = col.values[:n+rows.Len()]
  1922  	sparse.GatherUint128(col.values[n:], rows.Uint128Array())
  1923  }
  1924  
  1925  func (col *be128ColumnBuffer) ReadValuesAt(values []Value, offset int64) (n int, err error) {
  1926  	i := int(offset)
  1927  	switch {
  1928  	case i < 0:
  1929  		return 0, errRowIndexOutOfBounds(offset, int64(len(col.values)))
  1930  	case i >= len(col.values):
  1931  		return 0, io.EOF
  1932  	default:
  1933  		for n < len(values) && i < len(col.values) {
  1934  			values[n] = col.makeValue(&col.values[i])
  1935  			n++
  1936  			i++
  1937  		}
  1938  		if n < len(values) {
  1939  			err = io.EOF
  1940  		}
  1941  		return n, err
  1942  	}
  1943  }
  1944  
  1945  var (
  1946  	_ sort.Interface = (ColumnBuffer)(nil)
  1947  	_ io.Writer      = (*byteArrayColumnBuffer)(nil)
  1948  	_ io.Writer      = (*fixedLenByteArrayColumnBuffer)(nil)
  1949  )