github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/null.go (about)

     1  //go:build go1.18
     2  
     3  package parquet
     4  
     5  import (
     6  	"reflect"
     7  	"unsafe"
     8  
     9  	"github.com/vc42/parquet-go/deprecated"
    10  	"github.com/vc42/parquet-go/internal/bytealg"
    11  	"github.com/vc42/parquet-go/internal/unsafecast"
    12  	"github.com/vc42/parquet-go/sparse"
    13  )
    14  
    15  // nullIndexFunc is the type of functions used to detect null values in rows.
    16  //
    17  // For each value of the rows array, the bitmap passed as first argument is
    18  // populated to indicate whether the values were null (0) or not (1).
    19  //
    20  // The function writes one bit to the output buffer for each row in the input,
    21  // the buffer must be sized accordingly.
    22  type nullIndexFunc func(bits []uint64, rows sparse.Array)
    23  
    24  func nullIndex[T comparable](bits []uint64, rows sparse.Array) {
    25  	var zero T
    26  	for i := 0; i < rows.Len(); i++ {
    27  		v := *(*T)(rows.Index(i))
    28  		if v != zero {
    29  			x := uint(i) / 64
    30  			y := uint(i) % 64
    31  			bits[x] |= 1 << y
    32  		}
    33  	}
    34  }
    35  
    36  func nullIndexStruct(bits []uint64, rows sparse.Array) {
    37  	bytealg.Broadcast(unsafecast.Slice[byte](bits), 0xFF)
    38  }
    39  
    40  func nullIndexFuncOf(t reflect.Type) nullIndexFunc {
    41  	switch t {
    42  	case reflect.TypeOf(deprecated.Int96{}):
    43  		return nullIndex[deprecated.Int96]
    44  	}
    45  
    46  	switch t.Kind() {
    47  	case reflect.Bool:
    48  		return nullIndexBool
    49  
    50  	case reflect.Int:
    51  		return nullIndexInt
    52  
    53  	case reflect.Int32:
    54  		return nullIndexInt32
    55  
    56  	case reflect.Int64:
    57  		return nullIndexInt64
    58  
    59  	case reflect.Uint:
    60  		return nullIndexUint
    61  
    62  	case reflect.Uint32:
    63  		return nullIndexUint32
    64  
    65  	case reflect.Uint64:
    66  		return nullIndexUint64
    67  
    68  	case reflect.Float32:
    69  		return nullIndexFloat32
    70  
    71  	case reflect.Float64:
    72  		return nullIndexFloat64
    73  
    74  	case reflect.String:
    75  		return nullIndexString
    76  
    77  	case reflect.Slice:
    78  		return nullIndexSlice
    79  
    80  	case reflect.Array:
    81  		if t.Elem().Kind() == reflect.Uint8 {
    82  			switch size := t.Len(); size {
    83  			case 16:
    84  				return nullIndexUint128
    85  			default:
    86  				return nullIndexFuncOfByteArray(size)
    87  			}
    88  		}
    89  
    90  	case reflect.Pointer:
    91  		return nullIndexPointer
    92  
    93  	case reflect.Struct:
    94  		return nullIndexStruct
    95  	}
    96  
    97  	panic("cannot convert Go values of type " + t.String() + " to parquet value")
    98  }
    99  
   100  func nullIndexFuncOfByteArray(n int) nullIndexFunc {
   101  	return func(bits []uint64, rows sparse.Array) {
   102  		for i := 0; i < rows.Len(); i++ {
   103  			p := (*byte)(rows.Index(i))
   104  			b := unsafe.Slice(p, n)
   105  			if !isZero(b) {
   106  				x := uint(i) / 64
   107  				y := uint(i) % 64
   108  				bits[x] |= 1 << y
   109  			}
   110  		}
   111  	}
   112  }
   113  
   114  func isZero(b []byte) bool {
   115  	for _, c := range b {
   116  		if c != 0 {
   117  			return false
   118  		}
   119  	}
   120  	return true
   121  }