github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/null.go (about)

     1  package parquet
     2  
     3  import (
     4  	"reflect"
     5  	"unsafe"
     6  
     7  	"github.com/parquet-go/parquet-go/deprecated"
     8  	"github.com/parquet-go/parquet-go/internal/bytealg"
     9  	"github.com/parquet-go/parquet-go/internal/unsafecast"
    10  	"github.com/parquet-go/parquet-go/sparse"
    11  )
    12  
    13  // nullIndexFunc is the type of functions used to detect null values in rows.
    14  //
    15  // For each value of the rows array, the bitmap passed as first argument is
    16  // populated to indicate whether the values were null (0) or not (1).
    17  //
    18  // The function writes one bit to the output buffer for each row in the input,
    19  // the buffer must be sized accordingly.
    20  type nullIndexFunc func(bits []uint64, rows sparse.Array)
    21  
    22  func nullIndex[T comparable](bits []uint64, rows sparse.Array) {
    23  	var zero T
    24  	for i := 0; i < rows.Len(); i++ {
    25  		v := *(*T)(rows.Index(i))
    26  		if v != zero {
    27  			x := uint(i) / 64
    28  			y := uint(i) % 64
    29  			bits[x] |= 1 << y
    30  		}
    31  	}
    32  }
    33  
    34  func nullIndexStruct(bits []uint64, rows sparse.Array) {
    35  	bytealg.Broadcast(unsafecast.Slice[byte](bits), 0xFF)
    36  }
    37  
    38  func nullIndexFuncOf(t reflect.Type) nullIndexFunc {
    39  	switch t {
    40  	case reflect.TypeOf(deprecated.Int96{}):
    41  		return nullIndex[deprecated.Int96]
    42  	}
    43  
    44  	switch t.Kind() {
    45  	case reflect.Bool:
    46  		return nullIndexBool
    47  
    48  	case reflect.Int:
    49  		return nullIndexInt
    50  
    51  	case reflect.Int32:
    52  		return nullIndexInt32
    53  
    54  	case reflect.Int64:
    55  		return nullIndexInt64
    56  
    57  	case reflect.Uint:
    58  		return nullIndexUint
    59  
    60  	case reflect.Uint32:
    61  		return nullIndexUint32
    62  
    63  	case reflect.Uint64:
    64  		return nullIndexUint64
    65  
    66  	case reflect.Float32:
    67  		return nullIndexFloat32
    68  
    69  	case reflect.Float64:
    70  		return nullIndexFloat64
    71  
    72  	case reflect.String:
    73  		return nullIndexString
    74  
    75  	case reflect.Slice:
    76  		return nullIndexSlice
    77  
    78  	case reflect.Map:
    79  		return nullIndexPointer
    80  
    81  	case reflect.Array:
    82  		if t.Elem().Kind() == reflect.Uint8 {
    83  			switch size := t.Len(); size {
    84  			case 16:
    85  				return nullIndexUint128
    86  			default:
    87  				return nullIndexFuncOfByteArray(size)
    88  			}
    89  		}
    90  
    91  	case reflect.Pointer:
    92  		return nullIndexPointer
    93  
    94  	case reflect.Struct:
    95  		return nullIndexStruct
    96  	}
    97  
    98  	panic("cannot convert Go values of type " + typeNameOf(t) + " to parquet value")
    99  }
   100  
   101  func nullIndexFuncOfByteArray(n int) nullIndexFunc {
   102  	return func(bits []uint64, rows sparse.Array) {
   103  		for i := 0; i < rows.Len(); i++ {
   104  			p := (*byte)(rows.Index(i))
   105  			b := unsafe.Slice(p, n)
   106  			if !isZero(b) {
   107  				x := uint(i) / 64
   108  				y := uint(i) % 64
   109  				bits[x] |= 1 << y
   110  			}
   111  		}
   112  	}
   113  }