github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/column_index.go (about)

     1  package parquet
     2  
     3  import (
     4  	"github.com/vc42/parquet-go/deprecated"
     5  	"github.com/vc42/parquet-go/encoding/plain"
     6  	"github.com/vc42/parquet-go/format"
     7  	"github.com/vc42/parquet-go/internal/unsafecast"
     8  )
     9  
    10  type ColumnIndex interface {
    11  	// NumPages returns the number of paged in the column index.
    12  	NumPages() int
    13  
    14  	// Returns the number of null values in the page at the given index.
    15  	NullCount(int) int64
    16  
    17  	// Tells whether the page at the given index contains null values only.
    18  	NullPage(int) bool
    19  
    20  	// PageIndex return min/max bounds for the page at the given index in the
    21  	// column.
    22  	MinValue(int) Value
    23  	MaxValue(int) Value
    24  
    25  	// IsAscending returns true if the column index min/max values are sorted
    26  	// in ascending order (based on the ordering rules of the column's logical
    27  	// type).
    28  	IsAscending() bool
    29  
    30  	// IsDescending returns true if the column index min/max values are sorted
    31  	// in descending order (based on the ordering rules of the column's logical
    32  	// type).
    33  	IsDescending() bool
    34  }
    35  
    36  // NewColumnIndex constructs a ColumnIndex instance from the given parquet
    37  // format column index. The kind argument configures the type of values
    38  func NewColumnIndex(kind Kind, index *format.ColumnIndex) ColumnIndex {
    39  	return &formatColumnIndex{
    40  		kind:  kind,
    41  		index: index,
    42  	}
    43  }
    44  
    45  type formatColumnIndex struct {
    46  	kind  Kind
    47  	index *format.ColumnIndex
    48  }
    49  
    50  func (f *formatColumnIndex) NumPages() int {
    51  	return len(f.index.MinValues)
    52  }
    53  
    54  func (f *formatColumnIndex) NullCount(i int) int64 {
    55  	if len(f.index.NullCounts) > 0 {
    56  		return f.index.NullCounts[i]
    57  	}
    58  	return 0
    59  }
    60  
    61  func (f *formatColumnIndex) NullPage(i int) bool {
    62  	return len(f.index.NullPages) > 0 && f.index.NullPages[i]
    63  }
    64  
    65  func (f *formatColumnIndex) MinValue(i int) Value {
    66  	if f.NullPage(i) {
    67  		return Value{}
    68  	}
    69  	return f.kind.Value(f.index.MinValues[i])
    70  }
    71  
    72  func (f *formatColumnIndex) MaxValue(i int) Value {
    73  	if f.NullPage(i) {
    74  		return Value{}
    75  	}
    76  	return f.kind.Value(f.index.MaxValues[i])
    77  }
    78  
    79  func (f *formatColumnIndex) IsAscending() bool {
    80  	return f.index.BoundaryOrder == format.Ascending
    81  }
    82  
    83  func (f *formatColumnIndex) IsDescending() bool {
    84  	return f.index.BoundaryOrder == format.Descending
    85  }
    86  
    87  type fileColumnIndex struct{ chunk *fileColumnChunk }
    88  
    89  func (i fileColumnIndex) NumPages() int {
    90  	return len(i.chunk.columnIndex.NullPages)
    91  }
    92  
    93  func (i fileColumnIndex) NullCount(j int) int64 {
    94  	if len(i.chunk.columnIndex.NullCounts) > 0 {
    95  		return i.chunk.columnIndex.NullCounts[j]
    96  	}
    97  	return 0
    98  }
    99  
   100  func (i fileColumnIndex) NullPage(j int) bool {
   101  	return len(i.chunk.columnIndex.NullPages) > 0 && i.chunk.columnIndex.NullPages[j]
   102  }
   103  
   104  func (i fileColumnIndex) MinValue(j int) Value {
   105  	if i.NullPage(j) {
   106  		return Value{}
   107  	}
   108  	return i.makeValue(i.chunk.columnIndex.MinValues[j])
   109  }
   110  
   111  func (i fileColumnIndex) MaxValue(j int) Value {
   112  	if i.NullPage(j) {
   113  		return Value{}
   114  	}
   115  	return i.makeValue(i.chunk.columnIndex.MaxValues[j])
   116  }
   117  
   118  func (i fileColumnIndex) IsAscending() bool {
   119  	return i.chunk.columnIndex.BoundaryOrder == format.Ascending
   120  }
   121  
   122  func (i fileColumnIndex) IsDescending() bool {
   123  	return i.chunk.columnIndex.BoundaryOrder == format.Descending
   124  }
   125  
   126  func (i *fileColumnIndex) makeValue(b []byte) Value {
   127  	return i.chunk.column.typ.Kind().Value(b)
   128  }
   129  
   130  type emptyColumnIndex struct{}
   131  
   132  func (emptyColumnIndex) NumPages() int       { return 0 }
   133  func (emptyColumnIndex) NullCount(int) int64 { return 0 }
   134  func (emptyColumnIndex) NullPage(int) bool   { return false }
   135  func (emptyColumnIndex) MinValue(int) Value  { return Value{} }
   136  func (emptyColumnIndex) MaxValue(int) Value  { return Value{} }
   137  func (emptyColumnIndex) IsAscending() bool   { return false }
   138  func (emptyColumnIndex) IsDescending() bool  { return false }
   139  
   140  type booleanColumnIndex struct{ page *booleanPage }
   141  
   142  func (i booleanColumnIndex) NumPages() int       { return 1 }
   143  func (i booleanColumnIndex) NullCount(int) int64 { return 0 }
   144  func (i booleanColumnIndex) NullPage(int) bool   { return false }
   145  func (i booleanColumnIndex) MinValue(int) Value  { return makeValueBoolean(i.page.min()) }
   146  func (i booleanColumnIndex) MaxValue(int) Value  { return makeValueBoolean(i.page.max()) }
   147  func (i booleanColumnIndex) IsAscending() bool   { return false }
   148  func (i booleanColumnIndex) IsDescending() bool  { return false }
   149  
   150  type int32ColumnIndex struct{ page *int32Page }
   151  
   152  func (i int32ColumnIndex) NumPages() int       { return 1 }
   153  func (i int32ColumnIndex) NullCount(int) int64 { return 0 }
   154  func (i int32ColumnIndex) NullPage(int) bool   { return false }
   155  func (i int32ColumnIndex) MinValue(int) Value  { return makeValueInt32(i.page.min()) }
   156  func (i int32ColumnIndex) MaxValue(int) Value  { return makeValueInt32(i.page.max()) }
   157  func (i int32ColumnIndex) IsAscending() bool   { return false }
   158  func (i int32ColumnIndex) IsDescending() bool  { return false }
   159  
   160  type int64ColumnIndex struct{ page *int64Page }
   161  
   162  func (i int64ColumnIndex) NumPages() int       { return 1 }
   163  func (i int64ColumnIndex) NullCount(int) int64 { return 0 }
   164  func (i int64ColumnIndex) NullPage(int) bool   { return false }
   165  func (i int64ColumnIndex) MinValue(int) Value  { return makeValueInt64(i.page.min()) }
   166  func (i int64ColumnIndex) MaxValue(int) Value  { return makeValueInt64(i.page.max()) }
   167  func (i int64ColumnIndex) IsAscending() bool   { return false }
   168  func (i int64ColumnIndex) IsDescending() bool  { return false }
   169  
   170  type int96ColumnIndex struct{ page *int96Page }
   171  
   172  func (i int96ColumnIndex) NumPages() int       { return 1 }
   173  func (i int96ColumnIndex) NullCount(int) int64 { return 0 }
   174  func (i int96ColumnIndex) NullPage(int) bool   { return false }
   175  func (i int96ColumnIndex) MinValue(int) Value  { return makeValueInt96(i.page.min()) }
   176  func (i int96ColumnIndex) MaxValue(int) Value  { return makeValueInt96(i.page.max()) }
   177  func (i int96ColumnIndex) IsAscending() bool   { return false }
   178  func (i int96ColumnIndex) IsDescending() bool  { return false }
   179  
   180  type floatColumnIndex struct{ page *floatPage }
   181  
   182  func (i floatColumnIndex) NumPages() int       { return 1 }
   183  func (i floatColumnIndex) NullCount(int) int64 { return 0 }
   184  func (i floatColumnIndex) NullPage(int) bool   { return false }
   185  func (i floatColumnIndex) MinValue(int) Value  { return makeValueFloat(i.page.min()) }
   186  func (i floatColumnIndex) MaxValue(int) Value  { return makeValueFloat(i.page.max()) }
   187  func (i floatColumnIndex) IsAscending() bool   { return false }
   188  func (i floatColumnIndex) IsDescending() bool  { return false }
   189  
   190  type doubleColumnIndex struct{ page *doublePage }
   191  
   192  func (i doubleColumnIndex) NumPages() int       { return 1 }
   193  func (i doubleColumnIndex) NullCount(int) int64 { return 0 }
   194  func (i doubleColumnIndex) NullPage(int) bool   { return false }
   195  func (i doubleColumnIndex) MinValue(int) Value  { return makeValueDouble(i.page.min()) }
   196  func (i doubleColumnIndex) MaxValue(int) Value  { return makeValueDouble(i.page.max()) }
   197  func (i doubleColumnIndex) IsAscending() bool   { return false }
   198  func (i doubleColumnIndex) IsDescending() bool  { return false }
   199  
   200  type byteArrayColumnIndex struct{ page *byteArrayPage }
   201  
   202  func (i byteArrayColumnIndex) NumPages() int       { return 1 }
   203  func (i byteArrayColumnIndex) NullCount(int) int64 { return 0 }
   204  func (i byteArrayColumnIndex) NullPage(int) bool   { return false }
   205  func (i byteArrayColumnIndex) MinValue(int) Value  { return makeValueBytes(ByteArray, i.page.min()) }
   206  func (i byteArrayColumnIndex) MaxValue(int) Value  { return makeValueBytes(ByteArray, i.page.max()) }
   207  func (i byteArrayColumnIndex) IsAscending() bool   { return false }
   208  func (i byteArrayColumnIndex) IsDescending() bool  { return false }
   209  
   210  type fixedLenByteArrayColumnIndex struct{ page *fixedLenByteArrayPage }
   211  
   212  func (i fixedLenByteArrayColumnIndex) NumPages() int       { return 1 }
   213  func (i fixedLenByteArrayColumnIndex) NullCount(int) int64 { return 0 }
   214  func (i fixedLenByteArrayColumnIndex) NullPage(int) bool   { return false }
   215  func (i fixedLenByteArrayColumnIndex) MinValue(int) Value {
   216  	return makeValueBytes(FixedLenByteArray, i.page.min())
   217  }
   218  func (i fixedLenByteArrayColumnIndex) MaxValue(int) Value {
   219  	return makeValueBytes(FixedLenByteArray, i.page.max())
   220  }
   221  func (i fixedLenByteArrayColumnIndex) IsAscending() bool  { return false }
   222  func (i fixedLenByteArrayColumnIndex) IsDescending() bool { return false }
   223  
   224  type uint32ColumnIndex struct{ page *uint32Page }
   225  
   226  func (i uint32ColumnIndex) NumPages() int       { return 1 }
   227  func (i uint32ColumnIndex) NullCount(int) int64 { return 0 }
   228  func (i uint32ColumnIndex) NullPage(int) bool   { return false }
   229  func (i uint32ColumnIndex) MinValue(int) Value  { return makeValueUint32(i.page.min()) }
   230  func (i uint32ColumnIndex) MaxValue(int) Value  { return makeValueUint32(i.page.max()) }
   231  func (i uint32ColumnIndex) IsAscending() bool   { return false }
   232  func (i uint32ColumnIndex) IsDescending() bool  { return false }
   233  
   234  type uint64ColumnIndex struct{ page *uint64Page }
   235  
   236  func (i uint64ColumnIndex) NumPages() int       { return 1 }
   237  func (i uint64ColumnIndex) NullCount(int) int64 { return 0 }
   238  func (i uint64ColumnIndex) NullPage(int) bool   { return false }
   239  func (i uint64ColumnIndex) MinValue(int) Value  { return makeValueUint64(i.page.min()) }
   240  func (i uint64ColumnIndex) MaxValue(int) Value  { return makeValueUint64(i.page.max()) }
   241  func (i uint64ColumnIndex) IsAscending() bool   { return false }
   242  func (i uint64ColumnIndex) IsDescending() bool  { return false }
   243  
   244  type be128ColumnIndex struct{ page *be128Page }
   245  
   246  func (i be128ColumnIndex) NumPages() int       { return 1 }
   247  func (i be128ColumnIndex) NullCount(int) int64 { return 0 }
   248  func (i be128ColumnIndex) NullPage(int) bool   { return false }
   249  func (i be128ColumnIndex) MinValue(int) Value  { return makeValueBytes(FixedLenByteArray, i.page.min()) }
   250  func (i be128ColumnIndex) MaxValue(int) Value  { return makeValueBytes(FixedLenByteArray, i.page.max()) }
   251  func (i be128ColumnIndex) IsAscending() bool   { return false }
   252  func (i be128ColumnIndex) IsDescending() bool  { return false }
   253  
   254  // The ColumnIndexer interface is implemented by types that support generating
   255  // parquet column indexes.
   256  //
   257  // The package does not export any types that implement this interface, programs
   258  // must call NewColumnIndexer on a Type instance to construct column indexers.
   259  type ColumnIndexer interface {
   260  	// Resets the column indexer state.
   261  	Reset()
   262  
   263  	// Add a page to the column indexer.
   264  	IndexPage(numValues, numNulls int64, min, max Value)
   265  
   266  	// Generates a format.ColumnIndex value from the current state of the
   267  	// column indexer.
   268  	//
   269  	// The returned value may reference internal buffers, in which case the
   270  	// values remain valid until the next call to IndexPage or Reset on the
   271  	// column indexer.
   272  	ColumnIndex() format.ColumnIndex
   273  }
   274  
   275  type baseColumnIndexer struct {
   276  	nullPages  []bool
   277  	nullCounts []int64
   278  }
   279  
   280  func (i *baseColumnIndexer) reset() {
   281  	i.nullPages = i.nullPages[:0]
   282  	i.nullCounts = i.nullCounts[:0]
   283  }
   284  
   285  func (i *baseColumnIndexer) observe(numValues, numNulls int64) {
   286  	i.nullPages = append(i.nullPages, numValues == numNulls)
   287  	i.nullCounts = append(i.nullCounts, numNulls)
   288  }
   289  
   290  func (i *baseColumnIndexer) columnIndex(minValues, maxValues [][]byte, minOrder, maxOrder int) format.ColumnIndex {
   291  	return format.ColumnIndex{
   292  		NullPages:     i.nullPages,
   293  		NullCounts:    i.nullCounts,
   294  		MinValues:     minValues,
   295  		MaxValues:     maxValues,
   296  		BoundaryOrder: boundaryOrderOf(minOrder, maxOrder),
   297  	}
   298  }
   299  
   300  type booleanColumnIndexer struct {
   301  	baseColumnIndexer
   302  	minValues []bool
   303  	maxValues []bool
   304  }
   305  
   306  func newBooleanColumnIndexer() *booleanColumnIndexer {
   307  	return new(booleanColumnIndexer)
   308  }
   309  
   310  func (i *booleanColumnIndexer) Reset() {
   311  	i.reset()
   312  	i.minValues = i.minValues[:0]
   313  	i.maxValues = i.maxValues[:0]
   314  }
   315  
   316  func (i *booleanColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   317  	i.observe(numValues, numNulls)
   318  	i.minValues = append(i.minValues, min.Boolean())
   319  	i.maxValues = append(i.maxValues, max.Boolean())
   320  }
   321  
   322  func (i *booleanColumnIndexer) ColumnIndex() format.ColumnIndex {
   323  	return i.columnIndex(
   324  		splitFixedLenByteArrays(unsafecast.BoolToBytes(i.minValues), 1),
   325  		splitFixedLenByteArrays(unsafecast.BoolToBytes(i.maxValues), 1),
   326  		orderOfBool(i.minValues),
   327  		orderOfBool(i.maxValues),
   328  	)
   329  }
   330  
   331  type int32ColumnIndexer struct {
   332  	baseColumnIndexer
   333  	minValues []int32
   334  	maxValues []int32
   335  }
   336  
   337  func newInt32ColumnIndexer() *int32ColumnIndexer {
   338  	return new(int32ColumnIndexer)
   339  }
   340  
   341  func (i *int32ColumnIndexer) Reset() {
   342  	i.reset()
   343  	i.minValues = i.minValues[:0]
   344  	i.maxValues = i.maxValues[:0]
   345  }
   346  
   347  func (i *int32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   348  	i.observe(numValues, numNulls)
   349  	i.minValues = append(i.minValues, min.Int32())
   350  	i.maxValues = append(i.maxValues, max.Int32())
   351  }
   352  
   353  func (i *int32ColumnIndexer) ColumnIndex() format.ColumnIndex {
   354  	return i.columnIndex(
   355  		splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.minValues), 4),
   356  		splitFixedLenByteArrays(unsafecast.Int32ToBytes(i.maxValues), 4),
   357  		orderOfInt32(i.minValues),
   358  		orderOfInt32(i.maxValues),
   359  	)
   360  }
   361  
   362  type int64ColumnIndexer struct {
   363  	baseColumnIndexer
   364  	minValues []int64
   365  	maxValues []int64
   366  }
   367  
   368  func newInt64ColumnIndexer() *int64ColumnIndexer {
   369  	return new(int64ColumnIndexer)
   370  }
   371  
   372  func (i *int64ColumnIndexer) Reset() {
   373  	i.reset()
   374  	i.minValues = i.minValues[:0]
   375  	i.maxValues = i.maxValues[:0]
   376  }
   377  
   378  func (i *int64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   379  	i.observe(numValues, numNulls)
   380  	i.minValues = append(i.minValues, min.Int64())
   381  	i.maxValues = append(i.maxValues, max.Int64())
   382  }
   383  
   384  func (i *int64ColumnIndexer) ColumnIndex() format.ColumnIndex {
   385  	return i.columnIndex(
   386  		splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.minValues), 8),
   387  		splitFixedLenByteArrays(unsafecast.Int64ToBytes(i.maxValues), 8),
   388  		orderOfInt64(i.minValues),
   389  		orderOfInt64(i.maxValues),
   390  	)
   391  }
   392  
   393  type int96ColumnIndexer struct {
   394  	baseColumnIndexer
   395  	minValues []deprecated.Int96
   396  	maxValues []deprecated.Int96
   397  }
   398  
   399  func newInt96ColumnIndexer() *int96ColumnIndexer {
   400  	return new(int96ColumnIndexer)
   401  }
   402  
   403  func (i *int96ColumnIndexer) Reset() {
   404  	i.reset()
   405  	i.minValues = i.minValues[:0]
   406  	i.maxValues = i.maxValues[:0]
   407  }
   408  
   409  func (i *int96ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   410  	i.observe(numValues, numNulls)
   411  	i.minValues = append(i.minValues, min.Int96())
   412  	i.maxValues = append(i.maxValues, max.Int96())
   413  }
   414  
   415  func (i *int96ColumnIndexer) ColumnIndex() format.ColumnIndex {
   416  	return i.columnIndex(
   417  		splitFixedLenByteArrays(deprecated.Int96ToBytes(i.minValues), 12),
   418  		splitFixedLenByteArrays(deprecated.Int96ToBytes(i.maxValues), 12),
   419  		deprecated.OrderOfInt96(i.minValues),
   420  		deprecated.OrderOfInt96(i.maxValues),
   421  	)
   422  }
   423  
   424  type floatColumnIndexer struct {
   425  	baseColumnIndexer
   426  	minValues []float32
   427  	maxValues []float32
   428  }
   429  
   430  func newFloatColumnIndexer() *floatColumnIndexer {
   431  	return new(floatColumnIndexer)
   432  }
   433  
   434  func (i *floatColumnIndexer) Reset() {
   435  	i.reset()
   436  	i.minValues = i.minValues[:0]
   437  	i.maxValues = i.maxValues[:0]
   438  }
   439  
   440  func (i *floatColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   441  	i.observe(numValues, numNulls)
   442  	i.minValues = append(i.minValues, min.Float())
   443  	i.maxValues = append(i.maxValues, max.Float())
   444  }
   445  
   446  func (i *floatColumnIndexer) ColumnIndex() format.ColumnIndex {
   447  	return i.columnIndex(
   448  		splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.minValues), 4),
   449  		splitFixedLenByteArrays(unsafecast.Float32ToBytes(i.maxValues), 4),
   450  		orderOfFloat32(i.minValues),
   451  		orderOfFloat32(i.maxValues),
   452  	)
   453  }
   454  
   455  type doubleColumnIndexer struct {
   456  	baseColumnIndexer
   457  	minValues []float64
   458  	maxValues []float64
   459  }
   460  
   461  func newDoubleColumnIndexer() *doubleColumnIndexer {
   462  	return new(doubleColumnIndexer)
   463  }
   464  
   465  func (i *doubleColumnIndexer) Reset() {
   466  	i.reset()
   467  	i.minValues = i.minValues[:0]
   468  	i.maxValues = i.maxValues[:0]
   469  }
   470  
   471  func (i *doubleColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   472  	i.observe(numValues, numNulls)
   473  	i.minValues = append(i.minValues, min.Double())
   474  	i.maxValues = append(i.maxValues, max.Double())
   475  }
   476  
   477  func (i *doubleColumnIndexer) ColumnIndex() format.ColumnIndex {
   478  	return i.columnIndex(
   479  		splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.minValues), 8),
   480  		splitFixedLenByteArrays(unsafecast.Float64ToBytes(i.maxValues), 8),
   481  		orderOfFloat64(i.minValues),
   482  		orderOfFloat64(i.maxValues),
   483  	)
   484  }
   485  
   486  type byteArrayColumnIndexer struct {
   487  	baseColumnIndexer
   488  	sizeLimit int
   489  	minValues []byte
   490  	maxValues []byte
   491  }
   492  
   493  func newByteArrayColumnIndexer(sizeLimit int) *byteArrayColumnIndexer {
   494  	return &byteArrayColumnIndexer{sizeLimit: sizeLimit}
   495  }
   496  
   497  func (i *byteArrayColumnIndexer) Reset() {
   498  	i.reset()
   499  	i.minValues = i.minValues[:0]
   500  	i.maxValues = i.maxValues[:0]
   501  }
   502  
   503  func (i *byteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   504  	i.observe(numValues, numNulls)
   505  	minValue := min.ByteArray()
   506  	maxValue := max.ByteArray()
   507  	if i.sizeLimit > 0 {
   508  		minValue = truncateLargeMinByteArrayValue(minValue, i.sizeLimit)
   509  		maxValue = truncateLargeMaxByteArrayValue(maxValue, i.sizeLimit)
   510  	}
   511  	i.minValues = plain.AppendByteArray(i.minValues, minValue)
   512  	i.maxValues = plain.AppendByteArray(i.maxValues, maxValue)
   513  }
   514  
   515  func (i *byteArrayColumnIndexer) ColumnIndex() format.ColumnIndex {
   516  	minValues := splitByteArrays(i.minValues)
   517  	maxValues := splitByteArrays(i.maxValues)
   518  	return i.columnIndex(
   519  		minValues,
   520  		maxValues,
   521  		orderOfBytes(minValues),
   522  		orderOfBytes(maxValues),
   523  	)
   524  }
   525  
   526  type fixedLenByteArrayColumnIndexer struct {
   527  	baseColumnIndexer
   528  	size      int
   529  	sizeLimit int
   530  	minValues []byte
   531  	maxValues []byte
   532  }
   533  
   534  func newFixedLenByteArrayColumnIndexer(size, sizeLimit int) *fixedLenByteArrayColumnIndexer {
   535  	return &fixedLenByteArrayColumnIndexer{
   536  		size:      size,
   537  		sizeLimit: sizeLimit,
   538  	}
   539  }
   540  
   541  func (i *fixedLenByteArrayColumnIndexer) Reset() {
   542  	i.reset()
   543  	i.minValues = i.minValues[:0]
   544  	i.maxValues = i.maxValues[:0]
   545  }
   546  
   547  func (i *fixedLenByteArrayColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   548  	i.observe(numValues, numNulls)
   549  	i.minValues = append(i.minValues, min.ByteArray()...)
   550  	i.maxValues = append(i.maxValues, max.ByteArray()...)
   551  }
   552  
   553  func (i *fixedLenByteArrayColumnIndexer) ColumnIndex() format.ColumnIndex {
   554  	minValues := splitFixedLenByteArrays(i.minValues, i.size)
   555  	maxValues := splitFixedLenByteArrays(i.maxValues, i.size)
   556  	if sizeLimit := i.sizeLimit; sizeLimit > 0 {
   557  		for i, v := range minValues {
   558  			minValues[i] = truncateLargeMinByteArrayValue(v, sizeLimit)
   559  		}
   560  		for i, v := range maxValues {
   561  			maxValues[i] = truncateLargeMaxByteArrayValue(v, sizeLimit)
   562  		}
   563  	}
   564  	return i.columnIndex(
   565  		minValues,
   566  		maxValues,
   567  		orderOfBytes(minValues),
   568  		orderOfBytes(maxValues),
   569  	)
   570  }
   571  
   572  type uint32ColumnIndexer struct {
   573  	baseColumnIndexer
   574  	minValues []uint32
   575  	maxValues []uint32
   576  }
   577  
   578  func newUint32ColumnIndexer() *uint32ColumnIndexer {
   579  	return new(uint32ColumnIndexer)
   580  }
   581  
   582  func (i *uint32ColumnIndexer) Reset() {
   583  	i.reset()
   584  	i.minValues = i.minValues[:0]
   585  	i.maxValues = i.maxValues[:0]
   586  }
   587  
   588  func (i *uint32ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   589  	i.observe(numValues, numNulls)
   590  	i.minValues = append(i.minValues, min.Uint32())
   591  	i.maxValues = append(i.maxValues, max.Uint32())
   592  }
   593  
   594  func (i *uint32ColumnIndexer) ColumnIndex() format.ColumnIndex {
   595  	return i.columnIndex(
   596  		splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.minValues), 4),
   597  		splitFixedLenByteArrays(unsafecast.Uint32ToBytes(i.maxValues), 4),
   598  		orderOfUint32(i.minValues),
   599  		orderOfUint32(i.maxValues),
   600  	)
   601  }
   602  
   603  type uint64ColumnIndexer struct {
   604  	baseColumnIndexer
   605  	minValues []uint64
   606  	maxValues []uint64
   607  }
   608  
   609  func newUint64ColumnIndexer() *uint64ColumnIndexer {
   610  	return new(uint64ColumnIndexer)
   611  }
   612  
   613  func (i *uint64ColumnIndexer) Reset() {
   614  	i.reset()
   615  	i.minValues = i.minValues[:0]
   616  	i.maxValues = i.maxValues[:0]
   617  }
   618  
   619  func (i *uint64ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   620  	i.observe(numValues, numNulls)
   621  	i.minValues = append(i.minValues, min.Uint64())
   622  	i.maxValues = append(i.maxValues, max.Uint64())
   623  }
   624  
   625  func (i *uint64ColumnIndexer) ColumnIndex() format.ColumnIndex {
   626  	return i.columnIndex(
   627  		splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.minValues), 8),
   628  		splitFixedLenByteArrays(unsafecast.Uint64ToBytes(i.maxValues), 8),
   629  		orderOfUint64(i.minValues),
   630  		orderOfUint64(i.maxValues),
   631  	)
   632  }
   633  
   634  type be128ColumnIndexer struct {
   635  	baseColumnIndexer
   636  	minValues [][16]byte
   637  	maxValues [][16]byte
   638  }
   639  
   640  func newBE128ColumnIndexer() *be128ColumnIndexer {
   641  	return new(be128ColumnIndexer)
   642  }
   643  
   644  func (i *be128ColumnIndexer) Reset() {
   645  	i.reset()
   646  	i.minValues = i.minValues[:0]
   647  	i.maxValues = i.maxValues[:0]
   648  }
   649  
   650  func (i *be128ColumnIndexer) IndexPage(numValues, numNulls int64, min, max Value) {
   651  	i.observe(numValues, numNulls)
   652  	if !min.IsNull() {
   653  		i.minValues = append(i.minValues, *(*[16]byte)(min.ByteArray()))
   654  	}
   655  	if !max.IsNull() {
   656  		i.maxValues = append(i.maxValues, *(*[16]byte)(max.ByteArray()))
   657  	}
   658  }
   659  
   660  func (i *be128ColumnIndexer) ColumnIndex() format.ColumnIndex {
   661  	minValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.minValues), 16)
   662  	maxValues := splitFixedLenByteArrays(unsafecast.Uint128ToBytes(i.maxValues), 16)
   663  	return i.columnIndex(
   664  		minValues,
   665  		maxValues,
   666  		orderOfBytes(minValues),
   667  		orderOfBytes(maxValues),
   668  	)
   669  }
   670  
   671  func truncateLargeMinByteArrayValue(value []byte, sizeLimit int) []byte {
   672  	if len(value) > sizeLimit {
   673  		value = value[:sizeLimit]
   674  	}
   675  	return value
   676  }
   677  
   678  func truncateLargeMaxByteArrayValue(value []byte, sizeLimit int) []byte {
   679  	if len(value) > sizeLimit && !isMaxByteArrayValue(value) {
   680  		value = value[:sizeLimit]
   681  	}
   682  	return value
   683  }
   684  
   685  func isMaxByteArrayValue(value []byte) bool {
   686  	for i := range value {
   687  		if value[i] != 0xFF {
   688  			return false
   689  		}
   690  	}
   691  	return true
   692  }
   693  
   694  func splitByteArrays(data []byte) [][]byte {
   695  	length := 0
   696  	plain.RangeByteArray(data, func([]byte) error {
   697  		length++
   698  		return nil
   699  	})
   700  	buffer := make([]byte, 0, len(data)-(4*length))
   701  	values := make([][]byte, 0, length)
   702  	plain.RangeByteArray(data, func(value []byte) error {
   703  		offset := len(buffer)
   704  		buffer = append(buffer, value...)
   705  		values = append(values, buffer[offset:])
   706  		return nil
   707  	})
   708  	return values
   709  }
   710  
   711  func splitFixedLenByteArrays(data []byte, size int) [][]byte {
   712  	data = copyBytes(data)
   713  	values := make([][]byte, len(data)/size)
   714  	for i := range values {
   715  		j := (i + 0) * size
   716  		k := (i + 1) * size
   717  		values[i] = data[j:k:k]
   718  	}
   719  	return values
   720  }
   721  
   722  func boundaryOrderOf(minOrder, maxOrder int) format.BoundaryOrder {
   723  	if minOrder == maxOrder {
   724  		switch {
   725  		case minOrder > 0:
   726  			return format.Ascending
   727  		case minOrder < 0:
   728  			return format.Descending
   729  		}
   730  	}
   731  	return format.Unordered
   732  }