github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/null.go (about)

     1  //go:build go1.18
     2  
     3  package parquet
     4  
     5  import (
     6  	"reflect"
     7  	"unsafe"
     8  
     9  	"github.com/segmentio/parquet-go/deprecated"
    10  	"github.com/segmentio/parquet-go/internal/bytealg"
    11  	"github.com/segmentio/parquet-go/internal/unsafecast"
    12  	"github.com/segmentio/parquet-go/sparse"
    13  )
    14  
    15  // nullIndexFunc is the type of functions used to detect null values in rows.
    16  //
    17  // For each value of the rows array, the bitmap passed as first argument is
    18  // populated to indicate whether the values were null (0) or not (1).
    19  //
    20  // The function writes one bit to the output buffer for each row in the input,
    21  // the buffer must be sized accordingly.
    22  type nullIndexFunc func(bits []uint64, rows sparse.Array)
    23  
    24  func nullIndex[T comparable](bits []uint64, rows sparse.Array) {
    25  	var zero T
    26  	for i := 0; i < rows.Len(); i++ {
    27  		v := *(*T)(rows.Index(i))
    28  		if v != zero {
    29  			x := uint(i) / 64
    30  			y := uint(i) % 64
    31  			bits[x] |= 1 << y
    32  		}
    33  	}
    34  }
    35  
    36  func nullIndexStruct(bits []uint64, rows sparse.Array) {
    37  	bytealg.Broadcast(unsafecast.Slice[byte](bits), 0xFF)
    38  }
    39  
    40  func nullIndexFuncOf(t reflect.Type) nullIndexFunc {
    41  	switch t {
    42  	case reflect.TypeOf(deprecated.Int96{}):
    43  		return nullIndex[deprecated.Int96]
    44  	}
    45  
    46  	switch t.Kind() {
    47  	case reflect.Bool:
    48  		return nullIndexBool
    49  
    50  	case reflect.Int:
    51  		return nullIndexInt
    52  
    53  	case reflect.Int32:
    54  		return nullIndexInt32
    55  
    56  	case reflect.Int64:
    57  		return nullIndexInt64
    58  
    59  	case reflect.Uint:
    60  		return nullIndexUint
    61  
    62  	case reflect.Uint32:
    63  		return nullIndexUint32
    64  
    65  	case reflect.Uint64:
    66  		return nullIndexUint64
    67  
    68  	case reflect.Float32:
    69  		return nullIndexFloat32
    70  
    71  	case reflect.Float64:
    72  		return nullIndexFloat64
    73  
    74  	case reflect.String:
    75  		return nullIndexString
    76  
    77  	case reflect.Slice:
    78  		return nullIndexSlice
    79  
    80  	case reflect.Map:
    81  		return nullIndexPointer
    82  
    83  	case reflect.Array:
    84  		if t.Elem().Kind() == reflect.Uint8 {
    85  			switch size := t.Len(); size {
    86  			case 16:
    87  				return nullIndexUint128
    88  			default:
    89  				return nullIndexFuncOfByteArray(size)
    90  			}
    91  		}
    92  
    93  	case reflect.Pointer:
    94  		return nullIndexPointer
    95  
    96  	case reflect.Struct:
    97  		return nullIndexStruct
    98  	}
    99  
   100  	panic("cannot convert Go values of type " + typeNameOf(t) + " to parquet value")
   101  }
   102  
   103  func nullIndexFuncOfByteArray(n int) nullIndexFunc {
   104  	return func(bits []uint64, rows sparse.Array) {
   105  		for i := 0; i < rows.Len(); i++ {
   106  			p := (*byte)(rows.Index(i))
   107  			b := unsafe.Slice(p, n)
   108  			if !isZero(b) {
   109  				x := uint(i) / 64
   110  				y := uint(i) % 64
   111  				bits[x] |= 1 << y
   112  			}
   113  		}
   114  	}
   115  }