github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/page_bounds_test.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"math/rand"
     7  	"reflect"
     8  	"testing"
     9  
    10  	"github.com/segmentio/parquet-go/internal/quick"
    11  )
    12  
    13  var benchmarkBufferSizes = [...]int{
    14  	4 * 1024,
    15  	256 * 1024,
    16  	2048 * 1024,
    17  }
    18  
    19  func forEachBenchmarkBufferSize(b *testing.B, f func(*testing.B, int)) {
    20  	for _, bufferSize := range benchmarkBufferSizes {
    21  		b.Run(fmt.Sprintf("%dKiB", bufferSize/1024), func(b *testing.B) {
    22  			b.SetBytes(int64(bufferSize))
    23  			f(b, bufferSize)
    24  		})
    25  	}
    26  }
    27  
    28  func TestBoundsInt32(t *testing.T) {
    29  	err := quick.Check(func(values []int32) bool {
    30  		min := int32(0)
    31  		max := int32(0)
    32  		if len(values) > 0 {
    33  			min = values[0]
    34  			max = values[0]
    35  			for _, v := range values[1:] {
    36  				if v < min {
    37  					min = v
    38  				}
    39  				if v > max {
    40  					max = v
    41  				}
    42  			}
    43  		}
    44  		minValue, maxValue := boundsInt32(values)
    45  		return min == minValue && max == maxValue
    46  	})
    47  	if err != nil {
    48  		t.Error(err)
    49  	}
    50  }
    51  
    52  func TestBoundsInt64(t *testing.T) {
    53  	err := quick.Check(func(values []int64) bool {
    54  		min := int64(0)
    55  		max := int64(0)
    56  		if len(values) > 0 {
    57  			min = values[0]
    58  			max = values[0]
    59  			for _, v := range values[1:] {
    60  				if v < min {
    61  					min = v
    62  				}
    63  				if v > max {
    64  					max = v
    65  				}
    66  			}
    67  		}
    68  		minValue, maxValue := boundsInt64(values)
    69  		return min == minValue && max == maxValue
    70  	})
    71  	if err != nil {
    72  		t.Error(err)
    73  	}
    74  }
    75  
    76  func TestBoundsUint32(t *testing.T) {
    77  	err := quick.Check(func(values []uint32) bool {
    78  		min := uint32(0)
    79  		max := uint32(0)
    80  		if len(values) > 0 {
    81  			min = values[0]
    82  			max = values[0]
    83  			for _, v := range values[1:] {
    84  				if v < min {
    85  					min = v
    86  				}
    87  				if v > max {
    88  					max = v
    89  				}
    90  			}
    91  		}
    92  		minValue, maxValue := boundsUint32(values)
    93  		return min == minValue && max == maxValue
    94  	})
    95  	if err != nil {
    96  		t.Error(err)
    97  	}
    98  }
    99  
   100  func TestBoundsUint64(t *testing.T) {
   101  	err := quick.Check(func(values []uint64) bool {
   102  		min := uint64(0)
   103  		max := uint64(0)
   104  		if len(values) > 0 {
   105  			min = values[0]
   106  			max = values[0]
   107  			for _, v := range values[1:] {
   108  				if v < min {
   109  					min = v
   110  				}
   111  				if v > max {
   112  					max = v
   113  				}
   114  			}
   115  		}
   116  		minValue, maxValue := boundsUint64(values)
   117  		return min == minValue && max == maxValue
   118  	})
   119  	if err != nil {
   120  		t.Error(err)
   121  	}
   122  }
   123  
   124  func TestBoundsFloat32(t *testing.T) {
   125  	err := quick.Check(func(values []float32) bool {
   126  		min := float32(0)
   127  		max := float32(0)
   128  		if len(values) > 0 {
   129  			min = values[0]
   130  			max = values[0]
   131  			for _, v := range values[1:] {
   132  				if v < min {
   133  					min = v
   134  				}
   135  				if v > max {
   136  					max = v
   137  				}
   138  			}
   139  		}
   140  		minValue, maxValue := boundsFloat32(values)
   141  		return min == minValue && max == maxValue
   142  	})
   143  	if err != nil {
   144  		t.Error(err)
   145  	}
   146  }
   147  
   148  func TestBoundsFloat64(t *testing.T) {
   149  	err := quick.Check(func(values []float64) bool {
   150  		min := float64(0)
   151  		max := float64(0)
   152  		if len(values) > 0 {
   153  			min = values[0]
   154  			max = values[0]
   155  			for _, v := range values[1:] {
   156  				if v < min {
   157  					min = v
   158  				}
   159  				if v > max {
   160  					max = v
   161  				}
   162  			}
   163  		}
   164  		minValue, maxValue := boundsFloat64(values)
   165  		return min == minValue && max == maxValue
   166  	})
   167  	if err != nil {
   168  		t.Error(err)
   169  	}
   170  }
   171  
   172  func TestBE128MinMaxSimilar(t *testing.T) {
   173  	var min [16]byte
   174  
   175  	// Test values:
   176  	//   [1 1 ... 1 1]
   177  	//   [0 1 ... 1 1]
   178  	//   ...
   179  	//   [0 0 ... 0 1]
   180  	//   [0 0 ... 0 0]
   181  	for i := 0; i < 17; i++ {
   182  		var max [16]byte
   183  		for j := i; j < 16; j++ {
   184  			max[j] = 1
   185  		}
   186  		testBE182MinMaxPerm(t, min, max)
   187  	}
   188  
   189  	// Test values:
   190  	//   [0 0 ... 0 0]
   191  	//   [1 0 ... 0 0]
   192  	//   ...
   193  	//   [1 1 ... 1 0]
   194  	//   [1 1 ... 1 1]
   195  	for i := 0; i < 17; i++ {
   196  		var max [16]byte
   197  		for j := 0; j < i; j++ {
   198  			max[j] = 1
   199  		}
   200  		testBE182MinMaxPerm(t, min, max)
   201  	}
   202  }
   203  
   204  func testBE182MinMaxPerm(t *testing.T, min, max [16]byte) {
   205  	testBE128MinMax(t, min[:], max[:], [][16]byte{min, max})
   206  	testBE128MinMax(t, min[:], max[:], [][16]byte{max, min})
   207  }
   208  
   209  func testBE128MinMax(t *testing.T, min, max []byte, data [][16]byte) {
   210  	bmin := minBE128(data)
   211  	if !reflect.DeepEqual(bmin, min[:]) {
   212  		t.Errorf("unexpected min value\nexpected %v\n     got %v", min, bmin)
   213  	}
   214  
   215  	bmax := maxBE128(data)
   216  	if !reflect.DeepEqual(bmax, max[:]) {
   217  		t.Errorf("unexpected max value\nexpected %v\n     got %v", max, bmax)
   218  	}
   219  }
   220  
   221  func TestBoundsBE128(t *testing.T) {
   222  	err := quick.Check(func(values [][16]byte) bool {
   223  		min := [16]byte{}
   224  		max := [16]byte{}
   225  		if len(values) > 0 {
   226  			min = values[0]
   227  			max = values[0]
   228  			for _, v := range values[1:] {
   229  				if bytes.Compare(v[:], min[:]) < 0 {
   230  					min = v
   231  				}
   232  				if bytes.Compare(v[:], max[:]) > 0 {
   233  					max = v
   234  				}
   235  			}
   236  		}
   237  		minValue, maxValue := boundsBE128(values)
   238  		return (len(values) == 0 && minValue == nil && maxValue == nil) ||
   239  			(bytes.Equal(min[:], minValue) && bytes.Equal(max[:], maxValue))
   240  	})
   241  	if err != nil {
   242  		t.Error(err)
   243  	}
   244  }
   245  
   246  func TestBoundsFixedLenByteArray(t *testing.T) {
   247  	err := quick.Check(func(values []byte) bool {
   248  		min := [1]byte{}
   249  		max := [1]byte{}
   250  		if len(values) > 0 {
   251  			min[0] = values[0]
   252  			max[0] = values[0]
   253  			for _, v := range values[1:] {
   254  				if v < min[0] {
   255  					min[0] = v
   256  				}
   257  				if v > max[0] {
   258  					max[0] = v
   259  				}
   260  			}
   261  		}
   262  		minValue, maxValue := boundsFixedLenByteArray(values, 1)
   263  		return (len(values) == 0 && minValue == nil && maxValue == nil) ||
   264  			(bytes.Equal(min[:], minValue) && bytes.Equal(max[:], maxValue))
   265  	})
   266  	if err != nil {
   267  		t.Error(err)
   268  	}
   269  }
   270  
   271  func BenchmarkBoundsInt32(b *testing.B) {
   272  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   273  		values := make([]int32, bufferSize/4)
   274  		prng := rand.New(rand.NewSource(1))
   275  		for i := range values {
   276  			values[i] = prng.Int31()
   277  		}
   278  		for i := 0; i < b.N; i++ {
   279  			boundsInt32(values)
   280  		}
   281  	})
   282  }
   283  
   284  func BenchmarkBoundsInt64(b *testing.B) {
   285  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   286  		values := make([]int64, bufferSize/8)
   287  		prng := rand.New(rand.NewSource(1))
   288  		for i := range values {
   289  			values[i] = prng.Int63()
   290  		}
   291  		for i := 0; i < b.N; i++ {
   292  			boundsInt64(values)
   293  		}
   294  	})
   295  }
   296  
   297  func BenchmarkBoundsUint32(b *testing.B) {
   298  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   299  		values := make([]uint32, bufferSize/4)
   300  		prng := rand.New(rand.NewSource(1))
   301  		for i := range values {
   302  			values[i] = prng.Uint32()
   303  		}
   304  		for i := 0; i < b.N; i++ {
   305  			boundsUint32(values)
   306  		}
   307  	})
   308  }
   309  
   310  func BenchmarkBoundsUint64(b *testing.B) {
   311  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   312  		values := make([]uint64, bufferSize/8)
   313  		prng := rand.New(rand.NewSource(1))
   314  		for i := range values {
   315  			values[i] = prng.Uint64()
   316  		}
   317  		for i := 0; i < b.N; i++ {
   318  			boundsUint64(values)
   319  		}
   320  	})
   321  }
   322  
   323  func BenchmarkBoundsFloat32(b *testing.B) {
   324  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   325  		values := make([]float32, bufferSize/4)
   326  		prng := rand.New(rand.NewSource(1))
   327  		for i := range values {
   328  			values[i] = prng.Float32()
   329  		}
   330  		for i := 0; i < b.N; i++ {
   331  			boundsFloat32(values)
   332  		}
   333  	})
   334  }
   335  
   336  func BenchmarkBoundsFloat64(b *testing.B) {
   337  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   338  		values := make([]float64, bufferSize/8)
   339  		prng := rand.New(rand.NewSource(1))
   340  		for i := range values {
   341  			values[i] = prng.Float64()
   342  		}
   343  		for i := 0; i < b.N; i++ {
   344  			boundsFloat64(values)
   345  		}
   346  	})
   347  }
   348  
   349  func BenchmarkBoundsBE128(b *testing.B) {
   350  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   351  		values := make([][16]byte, bufferSize)
   352  		prng := rand.New(rand.NewSource(1))
   353  		for i := range values {
   354  			prng.Read(values[i][:])
   355  		}
   356  		for i := 0; i < b.N; i++ {
   357  			boundsBE128(values)
   358  		}
   359  	})
   360  }
   361  
   362  func BenchmarkBoundsFixedLenByteArray(b *testing.B) {
   363  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   364  		values := make([]byte, bufferSize)
   365  		prng := rand.New(rand.NewSource(1))
   366  		prng.Read(values)
   367  		for i := 0; i < b.N; i++ {
   368  			boundsFixedLenByteArray(values, 32)
   369  		}
   370  	})
   371  }