github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/encoding/fuzz/fuzz.go (about) 1 // Package fuzz contains functions to help fuzz test parquet encodings. 2 package fuzz 3 4 import ( 5 "math/rand" 6 "testing" 7 "unsafe" 8 9 "github.com/parquet-go/parquet-go/encoding" 10 "github.com/parquet-go/parquet-go/internal/unsafecast" 11 ) 12 13 func EncodeBoolean(f *testing.F, e encoding.Encoding) { 14 encode(f, e, 15 encoding.Encoding.EncodeBoolean, 16 encoding.Encoding.DecodeBoolean, 17 generate[byte], 18 ) 19 } 20 21 func EncodeLevels(f *testing.F, e encoding.Encoding) { 22 encode(f, e, 23 encoding.Encoding.EncodeLevels, 24 encoding.Encoding.DecodeLevels, 25 generate[byte], 26 ) 27 } 28 29 func EncodeInt32(f *testing.F, e encoding.Encoding) { 30 encode(f, e, 31 encoding.Encoding.EncodeInt32, 32 encoding.Encoding.DecodeInt32, 33 generate[int32], 34 ) 35 } 36 37 func EncodeInt64(f *testing.F, e encoding.Encoding) { 38 encode(f, e, 39 encoding.Encoding.EncodeInt64, 40 encoding.Encoding.DecodeInt64, 41 generate[int64], 42 ) 43 } 44 45 func EncodeFloat(f *testing.F, e encoding.Encoding) { 46 encode(f, e, 47 encoding.Encoding.EncodeFloat, 48 encoding.Encoding.DecodeFloat, 49 generate[float32], 50 ) 51 } 52 53 func EncodeDouble(f *testing.F, e encoding.Encoding) { 54 encode(f, e, 55 encoding.Encoding.EncodeDouble, 56 encoding.Encoding.DecodeDouble, 57 generate[float64], 58 ) 59 } 60 61 func EncodeByteArray(f *testing.F, e encoding.Encoding) { 62 encode(f, e, 63 func(enc encoding.Encoding, dst []byte, src []string) ([]byte, error) { 64 size := 0 65 for _, s := range src { 66 size += len(s) 67 } 68 69 offsets := make([]uint32, 0, len(src)+1) 70 values := make([]byte, 0, size) 71 72 for _, s := range src { 73 offsets = append(offsets, uint32(len(values))) 74 values = append(values, s...) 75 } 76 77 offsets = append(offsets, uint32(len(values))) 78 return enc.EncodeByteArray(dst, values, offsets) 79 }, 80 81 func(enc encoding.Encoding, dst []string, src []byte) ([]string, error) { 82 dst = dst[:0] 83 84 values, offsets, err := enc.DecodeByteArray(nil, src, nil) 85 if err != nil { 86 return dst, err 87 } 88 89 if len(offsets) > 0 { 90 baseOffset := offsets[0] 91 92 for _, endOffset := range offsets[1:] { 93 dst = append(dst, unsafecast.BytesToString(values[baseOffset:endOffset])) 94 baseOffset = endOffset 95 } 96 } 97 98 return dst, nil 99 }, 100 101 func(dst []string, src []byte, prng *rand.Rand) []string { 102 limit := len(src)/10 + 1 103 104 for i := 0; i < len(src); { 105 n := prng.Intn(limit) + 1 106 r := len(src) - i 107 if n > r { 108 n = r 109 } 110 dst = append(dst, unsafecast.BytesToString(src[i:i+n])) 111 i += n 112 } 113 114 return dst 115 }, 116 ) 117 } 118 119 type encodingFunc[T comparable] func(encoding.Encoding, []byte, []T) ([]byte, error) 120 121 type decodingFunc[T comparable] func(encoding.Encoding, []T, []byte) ([]T, error) 122 123 type generateFunc[T comparable] func(dst []T, src []byte, prng *rand.Rand) []T 124 125 func encode[T comparable](f *testing.F, e encoding.Encoding, encode encodingFunc[T], decode decodingFunc[T], generate generateFunc[T]) { 126 const bufferSize = 64 * 1024 127 var zero T 128 var err error 129 var buf = make([]T, bufferSize/unsafe.Sizeof(zero)) 130 var src = make([]T, bufferSize/unsafe.Sizeof(zero)) 131 var dst = make([]byte, bufferSize) 132 var prng = rand.New(rand.NewSource(0)) 133 134 f.Fuzz(func(t *testing.T, input []byte, seed int64) { 135 prng.Seed(seed) 136 src = generate(src[:0], input, prng) 137 138 dst, err = encode(e, dst, src) 139 if err != nil { 140 t.Error(err) 141 return 142 } 143 144 buf, err = decode(e, buf, dst) 145 if err != nil { 146 t.Error(err) 147 return 148 } 149 150 if !equal(buf, src) { 151 t.Error("decoded output does not match the original input") 152 return 153 } 154 }) 155 } 156 157 func equal[T comparable](a, b []T) bool { 158 if len(a) != len(b) { 159 return false 160 } 161 for i := range a { 162 if a[i] != b[i] { 163 return false 164 } 165 } 166 return true 167 } 168 169 func generate[T comparable](dst []T, src []byte, prng *rand.Rand) []T { 170 return append(dst[:0], unsafecast.Slice[T](src)...) 171 }