github.com/apache/arrow/go/v14@v14.0.1/arrow/compute/internal/kernels/vector_run_end_encode.go (about)

     1  // Licensed to the Apache Software Foundation (ASF) under one
     2  // or more contributor license agreements.  See the NOTICE file
     3  // distributed with this work for additional information
     4  // regarding copyright ownership.  The ASF licenses this file
     5  // to you under the Apache License, Version 2.0 (the
     6  // "License"); you may not use this file except in compliance
     7  // with the License.  You may obtain a copy of the License at
     8  //
     9  // http://www.apache.org/licenses/LICENSE-2.0
    10  //
    11  // Unless required by applicable law or agreed to in writing, software
    12  // distributed under the License is distributed on an "AS IS" BASIS,
    13  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    14  // See the License for the specific language governing permissions and
    15  // limitations under the License.
    16  
    17  //go:build go1.18
    18  
    19  package kernels
    20  
    21  import (
    22  	"bytes"
    23  	"fmt"
    24  	"sort"
    25  	"unsafe"
    26  
    27  	"github.com/apache/arrow/go/v14/arrow"
    28  	"github.com/apache/arrow/go/v14/arrow/bitutil"
    29  	"github.com/apache/arrow/go/v14/arrow/compute/exec"
    30  	"github.com/apache/arrow/go/v14/arrow/decimal128"
    31  	"github.com/apache/arrow/go/v14/arrow/decimal256"
    32  	"github.com/apache/arrow/go/v14/arrow/float16"
    33  	"github.com/apache/arrow/go/v14/arrow/internal/debug"
    34  	"github.com/apache/arrow/go/v14/arrow/memory"
    35  )
    36  
    37  type RunEndEncodeState struct {
    38  	RunEndType arrow.DataType
    39  }
    40  
    41  func (RunEndEncodeState) TypeName() string {
    42  	return "RunEndEncodeOptions"
    43  }
    44  
    45  type RunEndsType interface {
    46  	int16 | int32 | int64
    47  }
    48  
    49  func readFixedWidthVal[V exec.FixedWidthTypes](inputValidity, inputValues []byte, offset int64, out *V) bool {
    50  	sz := int64(unsafe.Sizeof(*out))
    51  	*out = *(*V)(unsafe.Pointer(&inputValues[offset*sz]))
    52  	return bitutil.BitIsSet(inputValidity, int(offset))
    53  }
    54  
    55  func writeFixedWidthVal[V exec.FixedWidthTypes](result *exec.ExecResult, offset int64, valid bool, value V) {
    56  	if len(result.Buffers[0].Buf) != 0 {
    57  		bitutil.SetBitTo(result.Buffers[0].Buf, int(offset), valid)
    58  	}
    59  
    60  	arr := exec.GetData[V](result.Buffers[1].Buf)
    61  	arr[offset] = value
    62  }
    63  
    64  func readBoolVal(inputValidity, inputValues []byte, offset int64, out *bool) bool {
    65  	*out = bitutil.BitIsSet(inputValues, int(offset))
    66  	return bitutil.BitIsSet(inputValidity, int(offset))
    67  }
    68  
    69  func writeBoolVal(result *exec.ExecResult, offset int64, valid bool, value bool) {
    70  	if len(result.Buffers[0].Buf) != 0 {
    71  		bitutil.SetBitTo(result.Buffers[0].Buf, int(offset), valid)
    72  	}
    73  	bitutil.SetBitTo(result.Buffers[1].Buf, int(offset), value)
    74  }
    75  
    76  type runEndEncodeLoopFixedWidth[R RunEndsType, V exec.FixedWidthTypes | bool] struct {
    77  	inputLen, inputOffset int64
    78  	inputValidity         []byte
    79  	inputValues           []byte
    80  	valueType             arrow.DataType
    81  
    82  	readValue  func(inputValidity, inputValues []byte, offset int64, out *V) bool
    83  	writeValue func(*exec.ExecResult, int64, bool, V)
    84  }
    85  
    86  func (re *runEndEncodeLoopFixedWidth[R, V]) WriteEncodedRuns(out *exec.ExecResult) int64 {
    87  	outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf)
    88  
    89  	readOffset := re.inputOffset
    90  	var currentRun V
    91  	curRunValid := re.readValue(re.inputValidity, re.inputValues, readOffset, &currentRun)
    92  	readOffset++
    93  
    94  	var writeOffset int64
    95  	var value V
    96  	for readOffset < re.inputOffset+re.inputLen {
    97  		valid := re.readValue(re.inputValidity, re.inputValues, readOffset, &value)
    98  		if valid != curRunValid || value != currentRun {
    99  			// close the current run by writing it out
   100  			re.writeValue(&out.Children[1], writeOffset, curRunValid, currentRun)
   101  			runEnd := R(readOffset - re.inputOffset)
   102  			outputRunEnds[writeOffset] = runEnd
   103  			writeOffset++
   104  			curRunValid, currentRun = valid, value
   105  		}
   106  		readOffset++
   107  	}
   108  
   109  	re.writeValue(&out.Children[1], writeOffset, curRunValid, currentRun)
   110  	outputRunEnds[writeOffset] = R(re.inputLen)
   111  	return writeOffset + 1
   112  }
   113  
   114  func (re *runEndEncodeLoopFixedWidth[R, V]) CountNumberOfRuns() (numValid, numOutput int64) {
   115  	offset := re.inputOffset
   116  	var currentRun V
   117  	curRunValid := re.readValue(re.inputValidity, re.inputValues, offset, &currentRun)
   118  	offset++
   119  
   120  	if curRunValid {
   121  		numValid = 1
   122  	}
   123  	numOutput = 1
   124  
   125  	var value V
   126  	for offset < re.inputOffset+re.inputLen {
   127  		valid := re.readValue(re.inputValidity, re.inputValues, offset, &value)
   128  		offset++
   129  		// new run
   130  		if valid != curRunValid || value != currentRun {
   131  			currentRun = value
   132  			curRunValid = valid
   133  
   134  			numOutput++
   135  			if valid {
   136  				numValid++
   137  			}
   138  		}
   139  	}
   140  	return
   141  }
   142  
   143  func (re *runEndEncodeLoopFixedWidth[R, V]) PreallocOutput(ctx *exec.KernelCtx, numOutput int64, out *exec.ExecResult) {
   144  	runEndsBuffer := ctx.Allocate(int(numOutput) * int(SizeOf[R]()))
   145  	var validityBuffer *memory.Buffer
   146  	if len(re.inputValidity) > 0 {
   147  		validityBuffer = ctx.AllocateBitmap(numOutput)
   148  	}
   149  
   150  	var valueBuffer *memory.Buffer
   151  	bufSpec := re.valueType.Layout().Buffers[1]
   152  	if bufSpec.Kind == arrow.KindBitmap {
   153  		valueBuffer = ctx.AllocateBitmap(numOutput)
   154  	} else {
   155  		valueBuffer = ctx.Allocate(int(numOutput) * bufSpec.ByteWidth)
   156  	}
   157  
   158  	reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType)
   159  	out.Release()
   160  
   161  	*out = exec.ExecResult{
   162  		Type:   reeType,
   163  		Len:    re.inputLen,
   164  		Nulls:  0,
   165  		Offset: 0,
   166  		Children: []exec.ArraySpan{
   167  			{
   168  				Type: reeType.RunEnds(),
   169  				Len:  numOutput,
   170  			},
   171  			{
   172  				Type: reeType.Encoded(),
   173  				Len:  numOutput,
   174  			},
   175  		},
   176  	}
   177  
   178  	out.Children[0].Buffers[1].WrapBuffer(runEndsBuffer)
   179  	if validityBuffer != nil {
   180  		out.Children[1].Buffers[0].WrapBuffer(validityBuffer)
   181  	}
   182  	out.Children[1].Buffers[1].WrapBuffer(valueBuffer)
   183  }
   184  
   185  type runEndEncodeFSB[R RunEndsType] struct {
   186  	inputLen, inputOffset      int64
   187  	inputValidity, inputValues []byte
   188  	valueType                  arrow.DataType
   189  	width                      int
   190  }
   191  
   192  func (re *runEndEncodeFSB[R]) readValue(idx int64) ([]byte, bool) {
   193  	if len(re.inputValidity) > 0 && bitutil.BitIsNotSet(re.inputValidity, int(idx)) {
   194  		return nil, false
   195  	}
   196  
   197  	start, end := idx*int64(re.width), (idx+1)*int64(re.width)
   198  	return re.inputValues[start:end], true
   199  }
   200  
   201  func (re *runEndEncodeFSB[R]) CountNumberOfRuns() (numValid, numOutput int64) {
   202  	offset := re.inputOffset
   203  	currentRun, curRunValid := re.readValue(offset)
   204  	offset++
   205  
   206  	if curRunValid {
   207  		numValid++
   208  	}
   209  	numOutput = 1
   210  
   211  	for offset < re.inputOffset+re.inputLen {
   212  		value, valid := re.readValue(offset)
   213  		offset++
   214  		if valid != curRunValid || !bytes.Equal(value, currentRun) {
   215  			currentRun, curRunValid = value, valid
   216  			numOutput++
   217  			if valid {
   218  				numValid++
   219  			}
   220  		}
   221  	}
   222  	return
   223  }
   224  
   225  func (re *runEndEncodeFSB[R]) PreallocOutput(ctx *exec.KernelCtx, numOutput int64, out *exec.ExecResult) {
   226  	runEndsBuffer := ctx.Allocate(int(numOutput) * int(SizeOf[R]()))
   227  	var validityBuffer *memory.Buffer
   228  	if len(re.inputValidity) > 0 {
   229  		validityBuffer = ctx.AllocateBitmap(numOutput)
   230  	}
   231  
   232  	valueBuffer := ctx.Allocate(re.width * int(numOutput))
   233  	reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType)
   234  	out.Release()
   235  
   236  	*out = exec.ExecResult{
   237  		Type:   reeType,
   238  		Len:    re.inputLen,
   239  		Nulls:  0,
   240  		Offset: 0,
   241  		Children: []exec.ArraySpan{
   242  			{
   243  				Type: reeType.RunEnds(),
   244  				Len:  numOutput,
   245  			},
   246  			{
   247  				Type: reeType.Encoded(),
   248  				Len:  numOutput,
   249  			},
   250  		},
   251  	}
   252  
   253  	out.Children[0].Buffers[1].WrapBuffer(runEndsBuffer)
   254  	if validityBuffer != nil {
   255  		out.Children[1].Buffers[0].WrapBuffer(validityBuffer)
   256  	}
   257  	out.Children[1].Buffers[1].WrapBuffer(valueBuffer)
   258  }
   259  
   260  func (re *runEndEncodeFSB[R]) WriteEncodedRuns(out *exec.ExecResult) int64 {
   261  	outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf)
   262  	outputValues := out.Children[1].Buffers[1].Buf
   263  
   264  	readOffset := re.inputOffset
   265  	currentRun, curRunValid := re.readValue(readOffset)
   266  	readOffset++
   267  
   268  	var writeOffset int64
   269  	validityBuf := out.Children[1].Buffers[0].Buf
   270  	setValidity := func(valid bool) {}
   271  	if len(validityBuf) > 0 {
   272  		setValidity = func(valid bool) {
   273  			bitutil.SetBitTo(validityBuf, int(writeOffset), valid)
   274  		}
   275  	}
   276  
   277  	writeValue := func(valid bool, value []byte) {
   278  		setValidity(valid)
   279  		start := writeOffset * int64(re.width)
   280  		copy(outputValues[start:], value)
   281  	}
   282  
   283  	for readOffset < re.inputOffset+re.inputLen {
   284  		value, valid := re.readValue(readOffset)
   285  
   286  		if valid != curRunValid || !bytes.Equal(value, currentRun) {
   287  			writeValue(curRunValid, currentRun)
   288  			runEnd := R(readOffset - re.inputOffset)
   289  			outputRunEnds[writeOffset] = runEnd
   290  			writeOffset++
   291  			curRunValid, currentRun = valid, value
   292  		}
   293  
   294  		readOffset++
   295  	}
   296  
   297  	writeValue(curRunValid, currentRun)
   298  	outputRunEnds[writeOffset] = R(re.inputLen)
   299  	return writeOffset + 1
   300  }
   301  
   302  type runEndEncodeLoopBinary[R RunEndsType, O int32 | int64] struct {
   303  	inputLen, inputOffset      int64
   304  	inputValidity, inputValues []byte
   305  	offsetValues               []O
   306  	valueType                  arrow.DataType
   307  
   308  	estimatedValuesLen int64
   309  }
   310  
   311  func (re *runEndEncodeLoopBinary[R, O]) readValue(idx int64) ([]byte, bool) {
   312  	if len(re.inputValidity) > 0 && bitutil.BitIsNotSet(re.inputValidity, int(idx+re.inputOffset)) {
   313  		return nil, false
   314  	}
   315  
   316  	start, end := re.offsetValues[idx], re.offsetValues[idx+1]
   317  	return re.inputValues[start:end], true
   318  }
   319  
   320  func (re *runEndEncodeLoopBinary[R, O]) CountNumberOfRuns() (numValid, numOutput int64) {
   321  	re.estimatedValuesLen = 0
   322  	// re.offsetValues already accounts for the input.Offset so we don't
   323  	// need to use it as the initial value for `offset` here.
   324  	var offset int64
   325  	currentRun, curRunValid := re.readValue(offset)
   326  	offset++
   327  
   328  	if curRunValid {
   329  		numValid = 1
   330  		re.estimatedValuesLen += int64(len(currentRun))
   331  	}
   332  	numOutput = 1
   333  
   334  	for offset < re.inputLen {
   335  		value, valid := re.readValue(offset)
   336  		offset++
   337  		// new run
   338  		if valid != curRunValid || !bytes.Equal(value, currentRun) {
   339  			if valid {
   340  				re.estimatedValuesLen += int64(len(value))
   341  			}
   342  
   343  			currentRun = value
   344  			curRunValid = valid
   345  
   346  			numOutput++
   347  			if valid {
   348  				numValid++
   349  			}
   350  		}
   351  	}
   352  	return
   353  }
   354  
   355  func (re *runEndEncodeLoopBinary[R, O]) PreallocOutput(ctx *exec.KernelCtx, numOutput int64, out *exec.ExecResult) {
   356  	runEndsBuffer := ctx.Allocate(int(numOutput) * int(SizeOf[R]()))
   357  	var validityBuffer *memory.Buffer
   358  	if len(re.inputValidity) > 0 {
   359  		validityBuffer = ctx.AllocateBitmap(numOutput)
   360  	}
   361  
   362  	valueBuffer := ctx.Allocate(int(re.estimatedValuesLen))
   363  	offsetsBuffer := ctx.Allocate(int(numOutput+1) * int(SizeOf[O]()))
   364  
   365  	reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), re.valueType)
   366  	*out = exec.ExecResult{
   367  		Type:   reeType,
   368  		Len:    re.inputLen,
   369  		Nulls:  0,
   370  		Offset: 0,
   371  		Children: []exec.ArraySpan{
   372  			{
   373  				Type: reeType.RunEnds(),
   374  				Len:  numOutput,
   375  			},
   376  			{
   377  				Type: reeType.Encoded(),
   378  				Len:  numOutput,
   379  			},
   380  		},
   381  	}
   382  
   383  	out.Children[0].Buffers[1].WrapBuffer(runEndsBuffer)
   384  	if validityBuffer != nil {
   385  		out.Children[1].Buffers[0].WrapBuffer(validityBuffer)
   386  	}
   387  	out.Children[1].Buffers[1].WrapBuffer(offsetsBuffer)
   388  	out.Children[1].Buffers[2].WrapBuffer(valueBuffer)
   389  }
   390  
   391  func (re *runEndEncodeLoopBinary[R, O]) WriteEncodedRuns(out *exec.ExecResult) int64 {
   392  	outputRunEnds := exec.GetData[R](out.Children[0].Buffers[1].Buf)
   393  	outputOffsets := exec.GetSpanOffsets[O](&out.Children[1], 1)
   394  	outputValues := out.Children[1].Buffers[2].Buf
   395  
   396  	// re.offsetValues already accounts for the input.offset so we don't
   397  	// need to initalize readOffset to re.inputOffset
   398  	var readOffset int64
   399  	currentRun, curRunValid := re.readValue(readOffset)
   400  	readOffset++
   401  
   402  	var writeOffset, valueOffset int64
   403  	validityBuf := out.Children[1].Buffers[0].Buf
   404  	setValidity := func(valid bool) {}
   405  	if len(validityBuf) > 0 {
   406  		setValidity = func(valid bool) {
   407  			bitutil.SetBitTo(validityBuf, int(writeOffset), valid)
   408  		}
   409  	}
   410  
   411  	outputOffsets[0], outputOffsets = 0, outputOffsets[1:]
   412  
   413  	writeValue := func(valid bool, value []byte) {
   414  		setValidity(valid)
   415  		valueOffset += int64(copy(outputValues[valueOffset:], value))
   416  		outputOffsets[writeOffset] = O(valueOffset)
   417  	}
   418  
   419  	for readOffset < re.inputLen {
   420  		value, valid := re.readValue(readOffset)
   421  
   422  		if valid != curRunValid || !bytes.Equal(value, currentRun) {
   423  			writeValue(curRunValid, currentRun)
   424  			runEnd := R(readOffset)
   425  			outputRunEnds[writeOffset] = runEnd
   426  			writeOffset++
   427  			curRunValid, currentRun = valid, value
   428  		}
   429  		readOffset++
   430  	}
   431  
   432  	writeValue(curRunValid, currentRun)
   433  	outputRunEnds[writeOffset] = R(re.inputLen)
   434  	return writeOffset + 1
   435  }
   436  
   437  func validateRunEndType[R RunEndsType](length int64) error {
   438  	runEndMax := MaxOf[R]()
   439  	if length > int64(runEndMax) {
   440  		return fmt.Errorf("%w: cannot run-end encode arrays with more elements than the run end type can hold: %d",
   441  			arrow.ErrInvalid, runEndMax)
   442  	}
   443  	return nil
   444  }
   445  
   446  func createEncoder[R RunEndsType, V exec.FixedWidthTypes](input *exec.ArraySpan) *runEndEncodeLoopFixedWidth[R, V] {
   447  	return &runEndEncodeLoopFixedWidth[R, V]{
   448  		inputLen:      input.Len,
   449  		inputOffset:   input.Offset,
   450  		inputValidity: input.Buffers[0].Buf,
   451  		inputValues:   input.Buffers[1].Buf,
   452  		valueType:     input.Type,
   453  		readValue:     readFixedWidthVal[V],
   454  		writeValue:    writeFixedWidthVal[V],
   455  	}
   456  }
   457  
   458  func createVarBinaryEncoder[R RunEndsType, O int32 | int64](input *exec.ArraySpan) *runEndEncodeLoopBinary[R, O] {
   459  	return &runEndEncodeLoopBinary[R, O]{
   460  		inputLen:      input.Len,
   461  		inputOffset:   input.Offset,
   462  		inputValidity: input.Buffers[0].Buf,
   463  		inputValues:   input.Buffers[2].Buf,
   464  		// exec.GetSpanOffsets applies input.Offset to the resulting slice
   465  		offsetValues: exec.GetSpanOffsets[O](input, 1),
   466  		valueType:    input.Type,
   467  	}
   468  }
   469  
   470  func newEncoder[R RunEndsType](input *exec.ArraySpan) encoder {
   471  	switch input.Type.ID() {
   472  	case arrow.BOOL:
   473  		return &runEndEncodeLoopFixedWidth[R, bool]{
   474  			inputLen:      input.Len,
   475  			inputOffset:   input.Offset,
   476  			inputValidity: input.Buffers[0].Buf,
   477  			inputValues:   input.Buffers[1].Buf,
   478  			valueType:     input.Type,
   479  			readValue:     readBoolVal,
   480  			writeValue:    writeBoolVal,
   481  		}
   482  	// for the other fixed size types, we only need to
   483  	// handle the different physical representations.
   484  	case arrow.INT8, arrow.UINT8:
   485  		return createEncoder[R, uint8](input)
   486  	case arrow.INT16, arrow.UINT16:
   487  		return createEncoder[R, uint16](input)
   488  	case arrow.INT32, arrow.UINT32, arrow.DATE32,
   489  		arrow.TIME32, arrow.INTERVAL_MONTHS:
   490  		return createEncoder[R, uint32](input)
   491  	case arrow.INT64, arrow.UINT64, arrow.DATE64,
   492  		arrow.TIME64, arrow.DURATION, arrow.TIMESTAMP:
   493  		return createEncoder[R, uint64](input)
   494  	case arrow.FLOAT16:
   495  		return createEncoder[R, float16.Num](input)
   496  	case arrow.FLOAT32:
   497  		return createEncoder[R, float32](input)
   498  	case arrow.FLOAT64:
   499  		return createEncoder[R, float64](input)
   500  	case arrow.DECIMAL128:
   501  		return createEncoder[R, decimal128.Num](input)
   502  	case arrow.DECIMAL256:
   503  		return createEncoder[R, decimal256.Num](input)
   504  	case arrow.INTERVAL_DAY_TIME:
   505  		return createEncoder[R, arrow.DayTimeInterval](input)
   506  	case arrow.INTERVAL_MONTH_DAY_NANO:
   507  		return createEncoder[R, arrow.MonthDayNanoInterval](input)
   508  	case arrow.BINARY, arrow.STRING:
   509  		return createVarBinaryEncoder[R, int32](input)
   510  	case arrow.LARGE_BINARY, arrow.LARGE_STRING:
   511  		return createVarBinaryEncoder[R, int64](input)
   512  	case arrow.FIXED_SIZE_BINARY:
   513  		return &runEndEncodeFSB[R]{
   514  			inputLen:      input.Len,
   515  			inputOffset:   input.Offset,
   516  			inputValidity: input.Buffers[0].Buf,
   517  			inputValues:   input.Buffers[1].Buf,
   518  			valueType:     input.Type,
   519  			width:         input.Type.(*arrow.FixedSizeBinaryType).ByteWidth,
   520  		}
   521  	}
   522  	return nil
   523  }
   524  
   525  type encoder interface {
   526  	CountNumberOfRuns() (numValid, numOutput int64)
   527  	PreallocOutput(*exec.KernelCtx, int64, *exec.ExecResult)
   528  	WriteEncodedRuns(*exec.ExecResult) int64
   529  }
   530  
   531  func runEndEncodeImpl[R RunEndsType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
   532  	// first pass: count the number of runs
   533  	var (
   534  		inputArr      = &batch.Values[0].Array
   535  		inputLen      = inputArr.Len
   536  		numOutputRuns int64
   537  		numValidRuns  int64
   538  		enc           encoder
   539  	)
   540  
   541  	if inputLen == 0 {
   542  		reeType := arrow.RunEndEncodedOf(exec.GetDataType[R](), inputArr.Type)
   543  		*out = exec.ExecResult{
   544  			Type: reeType,
   545  			Children: []exec.ArraySpan{
   546  				{Type: reeType.RunEnds()}, {Type: reeType.Encoded()},
   547  			},
   548  		}
   549  		return nil
   550  	}
   551  
   552  	if err := validateRunEndType[R](inputLen); err != nil {
   553  		return err
   554  	}
   555  
   556  	enc = newEncoder[R](inputArr)
   557  	numValidRuns, numOutputRuns = enc.CountNumberOfRuns()
   558  	enc.PreallocOutput(ctx, numOutputRuns, out)
   559  
   560  	out.Children[1].Nulls = numOutputRuns - numValidRuns
   561  
   562  	written := enc.WriteEncodedRuns(out)
   563  	debug.Assert(written == numOutputRuns, "mismatch number of written values")
   564  	return nil
   565  }
   566  
   567  func runEndEncodeExec(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
   568  	reeType := ctx.State.(RunEndEncodeState).RunEndType
   569  	switch reeType.ID() {
   570  	case arrow.INT16:
   571  		return runEndEncodeImpl[int16](ctx, batch, out)
   572  	case arrow.INT32:
   573  		return runEndEncodeImpl[int32](ctx, batch, out)
   574  	case arrow.INT64:
   575  		return runEndEncodeImpl[int64](ctx, batch, out)
   576  	}
   577  
   578  	return fmt.Errorf("%w: bad run end type %s", arrow.ErrInvalid, reeType)
   579  }
   580  
   581  type decodeBool[R RunEndsType] struct {
   582  	inputLen, inputOffset int64
   583  	inputRunEnds          []R
   584  
   585  	inputPhysicalOffset int64
   586  	inputValidity       []byte
   587  	inputValues         []byte
   588  	inputValueOffset    int64
   589  }
   590  
   591  func (de *decodeBool[R]) PreallocOutput(ctx *exec.KernelCtx, out *exec.ExecResult) {
   592  	*out = exec.ExecResult{
   593  		Type: arrow.FixedWidthTypes.Boolean,
   594  		Len:  de.inputLen,
   595  	}
   596  
   597  	if len(de.inputValidity) != 0 {
   598  		out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(de.inputLen))
   599  	}
   600  
   601  	out.Buffers[1].WrapBuffer(ctx.AllocateBitmap(de.inputLen))
   602  }
   603  
   604  func (de *decodeBool[R]) ExpandAllRuns(out *exec.ExecResult) int64 {
   605  	var (
   606  		writeOffset         int64
   607  		runLength, numValid int64
   608  		outputValues        = out.Buffers[1].Buf
   609  		prevRunEnd          = R(de.inputOffset)
   610  		hasValidity         = len(de.inputValidity) != 0 && len(out.Buffers[0].Buf) != 0
   611  	)
   612  
   613  	for i, runEnd := range de.inputRunEnds[de.inputPhysicalOffset:] {
   614  		runLength, prevRunEnd = int64(runEnd-prevRunEnd), runEnd
   615  		// if this run is a null, clear the bits and update writeOffset
   616  		if hasValidity {
   617  			if bitutil.BitIsNotSet(de.inputValidity, int(de.inputValueOffset+de.inputPhysicalOffset)+i) {
   618  				bitutil.SetBitsTo(out.Buffers[0].Buf, writeOffset, runLength, false)
   619  				writeOffset += runLength
   620  				continue
   621  			}
   622  
   623  			// if the output has a validity bitmap, update it with 1s
   624  			bitutil.SetBitsTo(out.Buffers[0].Buf, writeOffset, runLength, true)
   625  		}
   626  
   627  		// get the value for this run + where to start writing
   628  		value := bitutil.BitIsSet(de.inputValues, int(de.inputValueOffset+de.inputPhysicalOffset)+i)
   629  		bitutil.SetBitsTo(outputValues, writeOffset, runLength, value)
   630  		writeOffset += runLength
   631  		numValid += runLength
   632  	}
   633  
   634  	return numValid
   635  }
   636  
   637  type decodeFixedWidth[R RunEndsType] struct {
   638  	inputLen, inputOffset int64
   639  	inputRunEnds          []R
   640  
   641  	inputPhysicalOffset int64
   642  	inputValidity       []byte
   643  	inputValues         []byte
   644  	inputValueOffset    int64
   645  
   646  	valueType arrow.DataType
   647  }
   648  
   649  func (de *decodeFixedWidth[R]) PreallocOutput(ctx *exec.KernelCtx, out *exec.ExecResult) {
   650  	*out = exec.ExecResult{
   651  		Type: de.valueType,
   652  		Len:  de.inputLen,
   653  	}
   654  
   655  	if len(de.inputValidity) != 0 {
   656  		out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(de.inputLen))
   657  	}
   658  
   659  	out.Buffers[1].WrapBuffer(ctx.Allocate(int(de.inputLen) * de.valueType.(arrow.FixedWidthDataType).Bytes()))
   660  }
   661  
   662  func (de *decodeFixedWidth[R]) ExpandAllRuns(out *exec.ExecResult) int64 {
   663  	var (
   664  		writeOffset         int64
   665  		runLength, numValid int64
   666  		outputValues        = out.Buffers[1].Buf
   667  		width               = de.valueType.(arrow.FixedWidthDataType).Bytes()
   668  		inputValues         = de.inputValues[(de.inputValueOffset+de.inputPhysicalOffset)*int64(width):]
   669  		prevRunEnd          = R(de.inputOffset)
   670  		hasValidity         = len(de.inputValidity) != 0 && len(out.Buffers[0].Buf) != 0
   671  	)
   672  
   673  	for i, runEnd := range de.inputRunEnds[de.inputPhysicalOffset:] {
   674  		runLength, prevRunEnd = int64(runEnd-prevRunEnd), runEnd
   675  		// if this run is a null, clear the bits and update writeOffset
   676  		if hasValidity {
   677  			if bitutil.BitIsNotSet(de.inputValidity, int(de.inputValueOffset+de.inputPhysicalOffset)+i) {
   678  				bitutil.SetBitsTo(out.Buffers[0].Buf, writeOffset, runLength, false)
   679  				writeOffset += runLength
   680  				continue
   681  			}
   682  
   683  			// if the output has a validity bitmap, update it with 1s
   684  			bitutil.SetBitsTo(out.Buffers[0].Buf, writeOffset, runLength, true)
   685  		}
   686  
   687  		// get the value for this run + where to start writing
   688  		var (
   689  			value       = inputValues[i*width : (i+1)*width]
   690  			outputStart = writeOffset * int64(width)
   691  		)
   692  		writeOffset += runLength
   693  		numValid += runLength
   694  
   695  		// get the slice of our output buffer we want to fill
   696  		// just incrementally duplicate the bytes until we've filled
   697  		// the slice with runLength copies of the value
   698  		outputSlice := outputValues[outputStart : writeOffset*int64(width)]
   699  		copy(outputSlice, value)
   700  		for j := width; j < len(outputSlice); j *= 2 {
   701  			copy(outputSlice[j:], outputSlice[:j])
   702  		}
   703  	}
   704  
   705  	return numValid
   706  }
   707  
   708  type decodeBinary[R RunEndsType, O int32 | int64] struct {
   709  	inputLen, inputLogicalOffset int64
   710  	inputRunEnds                 []R
   711  
   712  	inputPhysicalOffset int64
   713  	inputValuesOffset   int64
   714  	inputValidity       []byte
   715  	inputValues         []byte
   716  	inputOffsets        []O
   717  
   718  	valueType arrow.DataType
   719  }
   720  
   721  func (de *decodeBinary[R, O]) PreallocOutput(ctx *exec.KernelCtx, out *exec.ExecResult) {
   722  	var (
   723  		runLength  int64
   724  		prevRunEnd = R(de.inputLogicalOffset)
   725  		totalSize  int
   726  	)
   727  
   728  	for i, runEnd := range de.inputRunEnds[de.inputPhysicalOffset:] {
   729  		runLength, prevRunEnd = int64(runEnd-prevRunEnd), runEnd
   730  
   731  		start := de.inputOffsets[de.inputPhysicalOffset+int64(i)]
   732  		end := de.inputOffsets[de.inputPhysicalOffset+int64(i)+1]
   733  
   734  		totalSize += int(end-start) * int(runLength)
   735  	}
   736  
   737  	*out = exec.ExecResult{
   738  		Type: de.valueType,
   739  		Len:  de.inputLen,
   740  	}
   741  
   742  	if len(de.inputValidity) != 0 {
   743  		out.Buffers[0].WrapBuffer(ctx.AllocateBitmap(de.inputLen))
   744  	}
   745  
   746  	out.Buffers[1].WrapBuffer(ctx.Allocate(int(de.inputLen+1) * int(SizeOf[O]())))
   747  	out.Buffers[2].WrapBuffer(ctx.Allocate(totalSize))
   748  }
   749  
   750  func (de *decodeBinary[R, O]) ExpandAllRuns(out *exec.ExecResult) int64 {
   751  	var (
   752  		writeOffset, valueWriteOffset int64
   753  		runLength, numValid           int64
   754  		outputOffsets                 = exec.GetSpanOffsets[O](out, 1)
   755  		outputValues                  = out.Buffers[2].Buf
   756  		prevRunEnd                    = R(de.inputLogicalOffset)
   757  		hasValidity                   = len(de.inputValidity) != 0 && len(out.Buffers[0].Buf) != 0
   758  	)
   759  
   760  	for i, runEnd := range de.inputRunEnds[de.inputPhysicalOffset:] {
   761  		runLength, prevRunEnd = int64(runEnd-prevRunEnd), runEnd
   762  
   763  		// if this run is a null, clear the bits and update writeOffset
   764  		if hasValidity && bitutil.BitIsNotSet(de.inputValidity, int(de.inputValuesOffset+de.inputPhysicalOffset)+i) {
   765  			bitutil.SetBitsTo(out.Buffers[0].Buf, writeOffset, runLength, false)
   766  		} else {
   767  			numValid += runLength
   768  			if hasValidity {
   769  				bitutil.SetBitsTo(out.Buffers[0].Buf, writeOffset, runLength, true)
   770  			}
   771  		}
   772  
   773  		// get the value for this run + where to start writing
   774  		// de.inputOffsets already accounts for inputOffset so we don't
   775  		// need to add it here, we can just use the physicaloffset and that's
   776  		// sufficient to get the correct values.
   777  		var (
   778  			start = de.inputOffsets[de.inputPhysicalOffset+int64(i)]
   779  			end   = de.inputOffsets[de.inputPhysicalOffset+int64(i)+1]
   780  			value = de.inputValues[start:end]
   781  
   782  			outputValueEnd = valueWriteOffset + int64(len(value)*int(runLength))
   783  		)
   784  
   785  		// get the slice of our output buffer we want to fill
   786  		// just incrementally duplicate the bytes until we've filled
   787  		// the slice with runLength copies of the value
   788  		outputSlice := outputValues[valueWriteOffset:outputValueEnd]
   789  		copy(outputSlice, value)
   790  		for j := len(value); j < len(outputSlice); j *= 2 {
   791  			copy(outputSlice[j:], outputSlice[:j])
   792  		}
   793  
   794  		for j := int64(0); j < runLength; j++ {
   795  			outputOffsets[writeOffset+j] = O(valueWriteOffset)
   796  			valueWriteOffset += int64(len(value))
   797  		}
   798  
   799  		writeOffset += runLength
   800  	}
   801  
   802  	outputOffsets[writeOffset] = O(valueWriteOffset)
   803  	return numValid
   804  }
   805  
   806  type decoder interface {
   807  	PreallocOutput(*exec.KernelCtx, *exec.ExecResult)
   808  	ExpandAllRuns(*exec.ExecResult) int64
   809  }
   810  
   811  func newDecoder[R RunEndsType](input *exec.ArraySpan) decoder {
   812  	logicalOffset := R(input.Offset)
   813  	runEnds := exec.GetSpanValues[R](&input.Children[0], 1)
   814  	physicalOffset := sort.Search(len(runEnds), func(i int) bool { return runEnds[i] > logicalOffset })
   815  
   816  	switch dt := input.Children[1].Type.(type) {
   817  	case *arrow.BooleanType:
   818  		return &decodeBool[R]{
   819  			inputLen:            input.Len,
   820  			inputOffset:         input.Offset,
   821  			inputValidity:       input.Children[1].Buffers[0].Buf,
   822  			inputValues:         input.Children[1].Buffers[1].Buf,
   823  			inputValueOffset:    input.Children[1].Offset,
   824  			inputPhysicalOffset: int64(physicalOffset),
   825  			inputRunEnds:        runEnds,
   826  		}
   827  	case *arrow.BinaryType, *arrow.StringType:
   828  		return &decodeBinary[R, int32]{
   829  			inputLen:            input.Len,
   830  			inputLogicalOffset:  input.Offset,
   831  			inputRunEnds:        runEnds,
   832  			inputPhysicalOffset: int64(physicalOffset),
   833  			inputValuesOffset:   input.Children[1].Offset,
   834  			inputValidity:       input.Children[1].Buffers[0].Buf,
   835  			inputValues:         input.Children[1].Buffers[2].Buf,
   836  			inputOffsets:        exec.GetSpanOffsets[int32](&input.Children[1], 1),
   837  			valueType:           input.Children[1].Type,
   838  		}
   839  	case *arrow.LargeBinaryType, *arrow.LargeStringType:
   840  		return &decodeBinary[R, int64]{
   841  			inputLen:            input.Len,
   842  			inputLogicalOffset:  input.Offset,
   843  			inputRunEnds:        runEnds,
   844  			inputPhysicalOffset: int64(physicalOffset),
   845  			inputValuesOffset:   input.Children[1].Offset,
   846  			inputValidity:       input.Children[1].Buffers[0].Buf,
   847  			inputValues:         input.Children[1].Buffers[2].Buf,
   848  			inputOffsets:        exec.GetSpanOffsets[int64](&input.Children[1], 1),
   849  			valueType:           input.Children[1].Type,
   850  		}
   851  	case arrow.FixedWidthDataType:
   852  		return &decodeFixedWidth[R]{
   853  			inputLen:            input.Len,
   854  			inputOffset:         input.Offset,
   855  			inputRunEnds:        runEnds,
   856  			inputPhysicalOffset: int64(physicalOffset),
   857  			inputValidity:       input.Children[1].Buffers[0].Buf,
   858  			inputValues:         input.Children[1].Buffers[1].Buf,
   859  			inputValueOffset:    input.Children[1].Offset,
   860  			valueType:           dt,
   861  		}
   862  	}
   863  
   864  	return nil
   865  }
   866  
   867  func runEndDecodeImpl[R RunEndsType](ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
   868  	inputArr := &batch.Values[0].Array
   869  
   870  	if inputArr.Len == 0 {
   871  		return nil
   872  	}
   873  
   874  	dec := newDecoder[R](inputArr)
   875  	dec.PreallocOutput(ctx, out)
   876  	out.Nulls = inputArr.Len - dec.ExpandAllRuns(out)
   877  	return nil
   878  }
   879  
   880  func runEndDecodeExec(ctx *exec.KernelCtx, batch *exec.ExecSpan, out *exec.ExecResult) error {
   881  	reeType := batch.Values[0].Type().(*arrow.RunEndEncodedType)
   882  	switch reeType.RunEnds().ID() {
   883  	case arrow.INT16:
   884  		return runEndDecodeImpl[int16](ctx, batch, out)
   885  	case arrow.INT32:
   886  		return runEndDecodeImpl[int32](ctx, batch, out)
   887  	case arrow.INT64:
   888  		return runEndDecodeImpl[int64](ctx, batch, out)
   889  	}
   890  
   891  	return fmt.Errorf("%w: bad run end type %s", arrow.ErrInvalid, reeType.RunEnds())
   892  }
   893  
   894  func runEndEncodeOutputTypeResolver(ctx *exec.KernelCtx, inputTypes []arrow.DataType) (arrow.DataType, error) {
   895  	reeType := ctx.State.(RunEndEncodeState).RunEndType
   896  	return arrow.RunEndEncodedOf(reeType, inputTypes[0]), nil
   897  }
   898  
   899  func runEndDecodeOutputTypeResolver(ctx *exec.KernelCtx, inputTypes []arrow.DataType) (arrow.DataType, error) {
   900  	reeType := inputTypes[0].(*arrow.RunEndEncodedType)
   901  	return reeType.Encoded(), nil
   902  }
   903  
   904  func GetRunEndEncodeKernels() (encodeKns, decodeKns []exec.VectorKernel) {
   905  	baseEncode := exec.VectorKernel{
   906  		NullHandling:        exec.NullNoOutput,
   907  		MemAlloc:            exec.MemNoPrealloc,
   908  		CanExecuteChunkWise: true,
   909  		ExecFn:              runEndEncodeExec,
   910  		OutputChunked:       true,
   911  	}
   912  
   913  	baseDecode := exec.VectorKernel{
   914  		NullHandling:        exec.NullNoOutput,
   915  		MemAlloc:            exec.MemNoPrealloc,
   916  		CanExecuteChunkWise: true,
   917  		ExecFn:              runEndDecodeExec,
   918  		OutputChunked:       true,
   919  	}
   920  
   921  	baseEncode.Init = exec.OptionsInit[RunEndEncodeState]
   922  
   923  	encodeKns, decodeKns = make([]exec.VectorKernel, 0), make([]exec.VectorKernel, 0)
   924  	addKernel := func(ty arrow.Type) {
   925  		baseEncode.Signature = &exec.KernelSignature{
   926  			InputTypes: []exec.InputType{exec.NewIDInput(ty)},
   927  			OutType:    exec.NewComputedOutputType(runEndEncodeOutputTypeResolver),
   928  		}
   929  		encodeKns = append(encodeKns, baseEncode)
   930  
   931  		baseDecode.Signature = &exec.KernelSignature{
   932  			InputTypes: []exec.InputType{exec.NewMatchedInput(
   933  				exec.RunEndEncoded(exec.Integer(), exec.SameTypeID(ty)))},
   934  			OutType: exec.NewComputedOutputType(runEndDecodeOutputTypeResolver),
   935  		}
   936  		decodeKns = append(decodeKns, baseDecode)
   937  	}
   938  
   939  	for _, ty := range primitiveTypes {
   940  		addKernel(ty.ID())
   941  	}
   942  	addKernel(arrow.BOOL)
   943  
   944  	nonPrimitiveSupported := []arrow.Type{
   945  		arrow.FLOAT16, arrow.DECIMAL128, arrow.DECIMAL256,
   946  		arrow.TIME32, arrow.TIME64, arrow.TIMESTAMP,
   947  		arrow.INTERVAL_DAY_TIME, arrow.INTERVAL_MONTHS,
   948  		arrow.INTERVAL_MONTH_DAY_NANO,
   949  		arrow.FIXED_SIZE_BINARY,
   950  	}
   951  
   952  	for _, ty := range nonPrimitiveSupported {
   953  		addKernel(ty)
   954  	}
   955  
   956  	return
   957  }