github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/encoding/fuzz/fuzz.go (about)

     1  // Package fuzz contains functions to help fuzz test parquet encodings.
     2  package fuzz
     3  
     4  import (
     5  	"math/rand"
     6  	"testing"
     7  	"unsafe"
     8  
     9  	"github.com/parquet-go/parquet-go/encoding"
    10  	"github.com/parquet-go/parquet-go/internal/unsafecast"
    11  )
    12  
    13  func EncodeBoolean(f *testing.F, e encoding.Encoding) {
    14  	encode(f, e,
    15  		encoding.Encoding.EncodeBoolean,
    16  		encoding.Encoding.DecodeBoolean,
    17  		generate[byte],
    18  	)
    19  }
    20  
    21  func EncodeLevels(f *testing.F, e encoding.Encoding) {
    22  	encode(f, e,
    23  		encoding.Encoding.EncodeLevels,
    24  		encoding.Encoding.DecodeLevels,
    25  		generate[byte],
    26  	)
    27  }
    28  
    29  func EncodeInt32(f *testing.F, e encoding.Encoding) {
    30  	encode(f, e,
    31  		encoding.Encoding.EncodeInt32,
    32  		encoding.Encoding.DecodeInt32,
    33  		generate[int32],
    34  	)
    35  }
    36  
    37  func EncodeInt64(f *testing.F, e encoding.Encoding) {
    38  	encode(f, e,
    39  		encoding.Encoding.EncodeInt64,
    40  		encoding.Encoding.DecodeInt64,
    41  		generate[int64],
    42  	)
    43  }
    44  
    45  func EncodeFloat(f *testing.F, e encoding.Encoding) {
    46  	encode(f, e,
    47  		encoding.Encoding.EncodeFloat,
    48  		encoding.Encoding.DecodeFloat,
    49  		generate[float32],
    50  	)
    51  }
    52  
    53  func EncodeDouble(f *testing.F, e encoding.Encoding) {
    54  	encode(f, e,
    55  		encoding.Encoding.EncodeDouble,
    56  		encoding.Encoding.DecodeDouble,
    57  		generate[float64],
    58  	)
    59  }
    60  
    61  func EncodeByteArray(f *testing.F, e encoding.Encoding) {
    62  	encode(f, e,
    63  		func(enc encoding.Encoding, dst []byte, src []string) ([]byte, error) {
    64  			size := 0
    65  			for _, s := range src {
    66  				size += len(s)
    67  			}
    68  
    69  			offsets := make([]uint32, 0, len(src)+1)
    70  			values := make([]byte, 0, size)
    71  
    72  			for _, s := range src {
    73  				offsets = append(offsets, uint32(len(values)))
    74  				values = append(values, s...)
    75  			}
    76  
    77  			offsets = append(offsets, uint32(len(values)))
    78  			return enc.EncodeByteArray(dst, values, offsets)
    79  		},
    80  
    81  		func(enc encoding.Encoding, dst []string, src []byte) ([]string, error) {
    82  			dst = dst[:0]
    83  
    84  			values, offsets, err := enc.DecodeByteArray(nil, src, nil)
    85  			if err != nil {
    86  				return dst, err
    87  			}
    88  
    89  			if len(offsets) > 0 {
    90  				baseOffset := offsets[0]
    91  
    92  				for _, endOffset := range offsets[1:] {
    93  					dst = append(dst, unsafecast.BytesToString(values[baseOffset:endOffset]))
    94  					baseOffset = endOffset
    95  				}
    96  			}
    97  
    98  			return dst, nil
    99  		},
   100  
   101  		func(dst []string, src []byte, prng *rand.Rand) []string {
   102  			limit := len(src)/10 + 1
   103  
   104  			for i := 0; i < len(src); {
   105  				n := prng.Intn(limit) + 1
   106  				r := len(src) - i
   107  				if n > r {
   108  					n = r
   109  				}
   110  				dst = append(dst, unsafecast.BytesToString(src[i:i+n]))
   111  				i += n
   112  			}
   113  
   114  			return dst
   115  		},
   116  	)
   117  }
   118  
   119  type encodingFunc[T comparable] func(encoding.Encoding, []byte, []T) ([]byte, error)
   120  
   121  type decodingFunc[T comparable] func(encoding.Encoding, []T, []byte) ([]T, error)
   122  
   123  type generateFunc[T comparable] func(dst []T, src []byte, prng *rand.Rand) []T
   124  
   125  func encode[T comparable](f *testing.F, e encoding.Encoding, encode encodingFunc[T], decode decodingFunc[T], generate generateFunc[T]) {
   126  	const bufferSize = 64 * 1024
   127  	var zero T
   128  	var err error
   129  	var buf = make([]T, bufferSize/unsafe.Sizeof(zero))
   130  	var src = make([]T, bufferSize/unsafe.Sizeof(zero))
   131  	var dst = make([]byte, bufferSize)
   132  	var prng = rand.New(rand.NewSource(0))
   133  
   134  	f.Fuzz(func(t *testing.T, input []byte, seed int64) {
   135  		prng.Seed(seed)
   136  		src = generate(src[:0], input, prng)
   137  
   138  		dst, err = encode(e, dst, src)
   139  		if err != nil {
   140  			t.Error(err)
   141  			return
   142  		}
   143  
   144  		buf, err = decode(e, buf, dst)
   145  		if err != nil {
   146  			t.Error(err)
   147  			return
   148  		}
   149  
   150  		if !equal(buf, src) {
   151  			t.Error("decoded output does not match the original input")
   152  			return
   153  		}
   154  	})
   155  }
   156  
   157  func equal[T comparable](a, b []T) bool {
   158  	if len(a) != len(b) {
   159  		return false
   160  	}
   161  	for i := range a {
   162  		if a[i] != b[i] {
   163  			return false
   164  		}
   165  	}
   166  	return true
   167  }
   168  
   169  func generate[T comparable](dst []T, src []byte, prng *rand.Rand) []T {
   170  	return append(dst[:0], unsafecast.Slice[T](src)...)
   171  }