github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/order_test.go (about)

     1  package parquet
     2  
     3  import (
     4  	"bytes"
     5  	"sort"
     6  	"testing"
     7  
     8  	"github.com/segmentio/parquet-go/internal/quick"
     9  )
    10  
    11  type boolOrder []bool
    12  
    13  func (v boolOrder) Len() int           { return len(v) }
    14  func (v boolOrder) Less(i, j int) bool { return !v[i] && v[j] }
    15  func (v boolOrder) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
    16  
    17  type int32Order []int32
    18  
    19  func (v int32Order) Len() int           { return len(v) }
    20  func (v int32Order) Less(i, j int) bool { return v[i] < v[j] }
    21  func (v int32Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
    22  
    23  type int64Order []int64
    24  
    25  func (v int64Order) Len() int           { return len(v) }
    26  func (v int64Order) Less(i, j int) bool { return v[i] < v[j] }
    27  func (v int64Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
    28  
    29  type uint32Order []uint32
    30  
    31  func (v uint32Order) Len() int           { return len(v) }
    32  func (v uint32Order) Less(i, j int) bool { return v[i] < v[j] }
    33  func (v uint32Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
    34  
    35  type uint64Order []uint64
    36  
    37  func (v uint64Order) Len() int           { return len(v) }
    38  func (v uint64Order) Less(i, j int) bool { return v[i] < v[j] }
    39  func (v uint64Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
    40  
    41  type float32Order []float32
    42  
    43  func (v float32Order) Len() int           { return len(v) }
    44  func (v float32Order) Less(i, j int) bool { return v[i] < v[j] }
    45  func (v float32Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
    46  
    47  type float64Order []float64
    48  
    49  func (v float64Order) Len() int           { return len(v) }
    50  func (v float64Order) Less(i, j int) bool { return v[i] < v[j] }
    51  func (v float64Order) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
    52  
    53  type bytesOrder [][]byte
    54  
    55  func (v bytesOrder) Len() int           { return len(v) }
    56  func (v bytesOrder) Less(i, j int) bool { return bytes.Compare(v[i], v[j]) < 0 }
    57  func (v bytesOrder) Swap(i, j int)      { v[i], v[j] = v[j], v[i] }
    58  
    59  func orderingName(ordering int) string {
    60  	switch {
    61  	case isAscending(ordering):
    62  		return "ascending"
    63  	case isDescending(ordering):
    64  		return "descending"
    65  	default:
    66  		return "undefined"
    67  	}
    68  }
    69  
    70  func isAscending(ordering int) bool {
    71  	return ordering > 0
    72  }
    73  
    74  func isDescending(ordering int) bool {
    75  	return ordering < 0
    76  }
    77  
    78  func isUndefined(ordering int) bool {
    79  	return ordering == 0
    80  }
    81  
    82  func isOrdered(set sort.Interface) bool {
    83  	return set.Len() > 1 && sort.IsSorted(set)
    84  }
    85  
    86  func checkOrdering(t *testing.T, set sort.Interface, ordering int) bool {
    87  	t.Helper()
    88  	switch {
    89  	case isOrdered(set):
    90  		if !isAscending(ordering) {
    91  			t.Errorf("got=%s want=ascending", orderingName(ordering))
    92  			return false
    93  		}
    94  	case isOrdered(sort.Reverse(set)):
    95  		if !isDescending(ordering) {
    96  			t.Errorf("got=%s want=descending", orderingName(ordering))
    97  			return false
    98  		}
    99  	default:
   100  		if !isUndefined(ordering) {
   101  			t.Errorf("got=%s want=undefined", orderingName(ordering))
   102  			return false
   103  		}
   104  	}
   105  	return true
   106  }
   107  
   108  func TestOrderOfBool(t *testing.T) {
   109  	check := func(values []bool) bool {
   110  		return checkOrdering(t, boolOrder(values), orderOfBool(values))
   111  	}
   112  	err := quick.Check(func(values []bool) bool {
   113  		if !check(values) {
   114  			return false
   115  		}
   116  		sort.Sort(boolOrder(values))
   117  		if !check(values) {
   118  			return false
   119  		}
   120  		sort.Sort(sort.Reverse(boolOrder(values)))
   121  		if !check(values) {
   122  			return false
   123  		}
   124  		return true
   125  	})
   126  	if err != nil {
   127  		t.Error(err)
   128  	}
   129  }
   130  
   131  func TestOrderOfInt32(t *testing.T) {
   132  	check := func(values []int32) bool {
   133  		return checkOrdering(t, int32Order(values), orderOfInt32(values))
   134  	}
   135  	err := quick.Check(func(values []int32) bool {
   136  		if !check(values) {
   137  			return false
   138  		}
   139  		sort.Sort(int32Order(values))
   140  		if !check(values) {
   141  			return false
   142  		}
   143  		sort.Sort(sort.Reverse(int32Order(values)))
   144  		if !check(values) {
   145  			return false
   146  		}
   147  		return true
   148  	})
   149  	if err != nil {
   150  		t.Error(err)
   151  	}
   152  
   153  	// This extra test validates that out-of-order values at 64 byte boundaries
   154  	// are properly detected; it tests corner cases of the vectorized code path
   155  	// which works on 64 bytes per loop iteration.
   156  	values := []int32{
   157  		0, 1, 2, 3, 4, 5, 6, 7,
   158  		8, 9, 10, 11, 12, 13, 14, 15,
   159  		// 15 > 14, the algorithm must detect that the values are not ordered.
   160  		14, 17, 18, 19, 20, 21, 22, 23,
   161  		24, 25, 26, 27, 28, 29, 30, 31,
   162  	}
   163  
   164  	if !check(values) {
   165  		t.Error("failed due to not checking the connection between sequences of 16 elements")
   166  	}
   167  }
   168  
   169  func TestOrderOfInt64(t *testing.T) {
   170  	check := func(values []int64) bool {
   171  		return checkOrdering(t, int64Order(values), orderOfInt64(values))
   172  	}
   173  	err := quick.Check(func(values []int64) bool {
   174  		if !check(values) {
   175  			return false
   176  		}
   177  		sort.Sort(int64Order(values))
   178  		if !check(values) {
   179  			return false
   180  		}
   181  		sort.Sort(sort.Reverse(int64Order(values)))
   182  		if !check(values) {
   183  			return false
   184  		}
   185  		return true
   186  	})
   187  	if err != nil {
   188  		t.Error(err)
   189  	}
   190  
   191  	values := []int64{
   192  		0, 1, 2, 3, 4, 5, 6, 7,
   193  		6, 9, 10, 11, 12, 13, 14, 15,
   194  		14, 17, 18, 19, 20, 21, 22, 23,
   195  		24, 25, 26, 27, 28, 29, 30, 31,
   196  	}
   197  
   198  	if !check(values) {
   199  		t.Error("failed due to not checking the connection between sequences of 8 elements")
   200  	}
   201  }
   202  
   203  func TestOrderOfUint32(t *testing.T) {
   204  	check := func(values []uint32) bool {
   205  		return checkOrdering(t, uint32Order(values), orderOfUint32(values))
   206  	}
   207  	err := quick.Check(func(values []uint32) bool {
   208  		if !check(values) {
   209  			return false
   210  		}
   211  		sort.Sort(uint32Order(values))
   212  		if !check(values) {
   213  			return false
   214  		}
   215  		sort.Sort(sort.Reverse(uint32Order(values)))
   216  		if !check(values) {
   217  			return false
   218  		}
   219  		return true
   220  	})
   221  	if err != nil {
   222  		t.Error(err)
   223  	}
   224  
   225  	values := []uint32{
   226  		0, 1, 2, 3, 4, 5, 6, 7,
   227  		8, 9, 10, 11, 12, 13, 14, 15,
   228  		14, 17, 18, 19, 20, 21, 22, 23,
   229  		24, 25, 26, 27, 28, 29, 30, 31,
   230  	}
   231  
   232  	if !check(values) {
   233  		t.Error("failed due to not checking the connection between sequences of 16 elements")
   234  	}
   235  }
   236  
   237  func TestOrderOfUint64(t *testing.T) {
   238  	check := func(values []uint64) bool {
   239  		return checkOrdering(t, uint64Order(values), orderOfUint64(values))
   240  	}
   241  	err := quick.Check(func(values []uint64) bool {
   242  		if !check(values) {
   243  			return false
   244  		}
   245  		sort.Sort(uint64Order(values))
   246  		if !check(values) {
   247  			return false
   248  		}
   249  		sort.Sort(sort.Reverse(uint64Order(values)))
   250  		if !check(values) {
   251  			return false
   252  		}
   253  		return true
   254  	})
   255  	if err != nil {
   256  		t.Error(err)
   257  	}
   258  
   259  	values := []uint64{
   260  		0, 1, 2, 3, 4, 5, 6, 7,
   261  		6, 9, 10, 11, 12, 13, 14, 15,
   262  		14, 17, 18, 19, 20, 21, 22, 23,
   263  		24, 25, 26, 27, 28, 29, 30, 31,
   264  	}
   265  
   266  	if !check(values) {
   267  		t.Error("failed due to not checking the connection between sequences of 8 elements")
   268  	}
   269  }
   270  
   271  func TestOrderOfFloat32(t *testing.T) {
   272  	check := func(values []float32) bool {
   273  		return checkOrdering(t, float32Order(values), orderOfFloat32(values))
   274  	}
   275  	err := quick.Check(func(values []float32) bool {
   276  		if !check(values) {
   277  			return false
   278  		}
   279  		sort.Sort(float32Order(values))
   280  		if !check(values) {
   281  			return false
   282  		}
   283  		sort.Sort(sort.Reverse(float32Order(values)))
   284  		if !check(values) {
   285  			return false
   286  		}
   287  		return true
   288  	})
   289  	if err != nil {
   290  		t.Error(err)
   291  	}
   292  
   293  	values := []float32{
   294  		0, 1, 2, 3, 4, 5, 6, 7,
   295  		8, 9, 10, 11, 12, 13, 14, 15,
   296  		14, 17, 18, 19, 20, 21, 22, 23,
   297  		24, 25, 26, 27, 28, 29, 30, 31,
   298  	}
   299  
   300  	if !check(values) {
   301  		t.Error("failed due to not checking the connection between sequences of 16 elements")
   302  	}
   303  }
   304  
   305  func TestOrderOfFloat64(t *testing.T) {
   306  	check := func(values []float64) bool {
   307  		return checkOrdering(t, float64Order(values), orderOfFloat64(values))
   308  	}
   309  	err := quick.Check(func(values []float64) bool {
   310  		if !check(values) {
   311  			return false
   312  		}
   313  		sort.Sort(float64Order(values))
   314  		if !check(values) {
   315  			return false
   316  		}
   317  		sort.Sort(sort.Reverse(float64Order(values)))
   318  		if !check(values) {
   319  			return false
   320  		}
   321  		return true
   322  	})
   323  	if err != nil {
   324  		t.Error(err)
   325  	}
   326  
   327  	values := []float64{
   328  		0, 1, 2, 3, 4, 5, 6, 7,
   329  		6, 9, 10, 11, 12, 13, 14, 15,
   330  		14, 17, 18, 19, 20, 21, 22, 23,
   331  		24, 25, 26, 27, 28, 29, 30, 31,
   332  	}
   333  
   334  	if !check(values) {
   335  		t.Error("failed due to not checking the connection between sequences of 8 elements")
   336  	}
   337  }
   338  
   339  func TestOrderOfBytes(t *testing.T) {
   340  	check := func(values [][]byte) bool {
   341  		return checkOrdering(t, bytesOrder(values), orderOfBytes(values))
   342  	}
   343  	err := quick.Check(func(values [][16]byte) bool {
   344  		slices := make([][]byte, len(values))
   345  		for i := range values {
   346  			slices[i] = values[i][:]
   347  		}
   348  		if !check(slices) {
   349  			return false
   350  		}
   351  		sort.Sort(bytesOrder(slices))
   352  		if !check(slices) {
   353  			return false
   354  		}
   355  		sort.Sort(sort.Reverse(bytesOrder(slices)))
   356  		if !check(slices) {
   357  			return false
   358  		}
   359  		return true
   360  	})
   361  	if err != nil {
   362  		t.Error(err)
   363  	}
   364  }
   365  
   366  func BenchmarkOrderOfBool(b *testing.B) {
   367  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   368  		values := make([]bool, bufferSize/1)
   369  		for i := 0; i < b.N; i++ {
   370  			orderOfBool(values)
   371  		}
   372  	})
   373  }
   374  
   375  func BenchmarkOrderOfInt32(b *testing.B) {
   376  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   377  		values := make([]int32, bufferSize/4)
   378  		for i := 0; i < b.N; i++ {
   379  			orderOfInt32(values)
   380  		}
   381  	})
   382  }
   383  
   384  func BenchmarkOrderOfInt64(b *testing.B) {
   385  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   386  		values := make([]int64, bufferSize/8)
   387  		for i := 0; i < b.N; i++ {
   388  			orderOfInt64(values)
   389  		}
   390  	})
   391  }
   392  
   393  func BenchmarkOrderOfUint32(b *testing.B) {
   394  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   395  		values := make([]uint32, bufferSize/4)
   396  		for i := 0; i < b.N; i++ {
   397  			orderOfUint32(values)
   398  		}
   399  	})
   400  }
   401  
   402  func BenchmarkOrderOfUint64(b *testing.B) {
   403  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   404  		values := make([]uint64, bufferSize/8)
   405  		for i := 0; i < b.N; i++ {
   406  			orderOfUint64(values)
   407  		}
   408  	})
   409  }
   410  
   411  func BenchmarkOrderOfFloat32(b *testing.B) {
   412  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   413  		values := make([]float32, bufferSize/4)
   414  		for i := 0; i < b.N; i++ {
   415  			orderOfFloat32(values)
   416  		}
   417  	})
   418  }
   419  
   420  func BenchmarkOrderOfFloat64(b *testing.B) {
   421  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   422  		values := make([]float64, bufferSize/8)
   423  		for i := 0; i < b.N; i++ {
   424  			orderOfFloat64(values)
   425  		}
   426  	})
   427  }
   428  
   429  func BenchmarkOrderOfBytes(b *testing.B) {
   430  	forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) {
   431  		data := make([]byte, bufferSize)
   432  		values := make([][]byte, len(data)/16)
   433  		for i := range values {
   434  			values[i] = data[i*16 : (i+1)*16]
   435  		}
   436  		for i := 0; i < b.N; i++ {
   437  			orderOfBytes(values)
   438  		}
   439  	})
   440  }