github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/order_test.go (about) 1 package parquet 2 3 import ( 4 "bytes" 5 "sort" 6 "testing" 7 8 "github.com/segmentio/parquet-go/internal/quick" 9 ) 10 11 type boolOrder []bool 12 13 func (v boolOrder) Len() int { return len(v) } 14 func (v boolOrder) Less(i, j int) bool { return !v[i] && v[j] } 15 func (v boolOrder) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 16 17 type int32Order []int32 18 19 func (v int32Order) Len() int { return len(v) } 20 func (v int32Order) Less(i, j int) bool { return v[i] < v[j] } 21 func (v int32Order) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 22 23 type int64Order []int64 24 25 func (v int64Order) Len() int { return len(v) } 26 func (v int64Order) Less(i, j int) bool { return v[i] < v[j] } 27 func (v int64Order) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 28 29 type uint32Order []uint32 30 31 func (v uint32Order) Len() int { return len(v) } 32 func (v uint32Order) Less(i, j int) bool { return v[i] < v[j] } 33 func (v uint32Order) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 34 35 type uint64Order []uint64 36 37 func (v uint64Order) Len() int { return len(v) } 38 func (v uint64Order) Less(i, j int) bool { return v[i] < v[j] } 39 func (v uint64Order) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 40 41 type float32Order []float32 42 43 func (v float32Order) Len() int { return len(v) } 44 func (v float32Order) Less(i, j int) bool { return v[i] < v[j] } 45 func (v float32Order) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 46 47 type float64Order []float64 48 49 func (v float64Order) Len() int { return len(v) } 50 func (v float64Order) Less(i, j int) bool { return v[i] < v[j] } 51 func (v float64Order) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 52 53 type bytesOrder [][]byte 54 55 func (v bytesOrder) Len() int { return len(v) } 56 func (v bytesOrder) Less(i, j int) bool { return bytes.Compare(v[i], v[j]) < 0 } 57 func (v bytesOrder) Swap(i, j int) { v[i], v[j] = v[j], v[i] } 58 59 func orderingName(ordering int) string { 60 switch { 61 case isAscending(ordering): 62 return "ascending" 63 case isDescending(ordering): 64 return "descending" 65 default: 66 return "undefined" 67 } 68 } 69 70 func isAscending(ordering int) bool { 71 return ordering > 0 72 } 73 74 func isDescending(ordering int) bool { 75 return ordering < 0 76 } 77 78 func isUndefined(ordering int) bool { 79 return ordering == 0 80 } 81 82 func isOrdered(set sort.Interface) bool { 83 return set.Len() > 1 && sort.IsSorted(set) 84 } 85 86 func checkOrdering(t *testing.T, set sort.Interface, ordering int) bool { 87 t.Helper() 88 switch { 89 case isOrdered(set): 90 if !isAscending(ordering) { 91 t.Errorf("got=%s want=ascending", orderingName(ordering)) 92 return false 93 } 94 case isOrdered(sort.Reverse(set)): 95 if !isDescending(ordering) { 96 t.Errorf("got=%s want=descending", orderingName(ordering)) 97 return false 98 } 99 default: 100 if !isUndefined(ordering) { 101 t.Errorf("got=%s want=undefined", orderingName(ordering)) 102 return false 103 } 104 } 105 return true 106 } 107 108 func TestOrderOfBool(t *testing.T) { 109 check := func(values []bool) bool { 110 return checkOrdering(t, boolOrder(values), orderOfBool(values)) 111 } 112 err := quick.Check(func(values []bool) bool { 113 if !check(values) { 114 return false 115 } 116 sort.Sort(boolOrder(values)) 117 if !check(values) { 118 return false 119 } 120 sort.Sort(sort.Reverse(boolOrder(values))) 121 if !check(values) { 122 return false 123 } 124 return true 125 }) 126 if err != nil { 127 t.Error(err) 128 } 129 } 130 131 func TestOrderOfInt32(t *testing.T) { 132 check := func(values []int32) bool { 133 return checkOrdering(t, int32Order(values), orderOfInt32(values)) 134 } 135 err := quick.Check(func(values []int32) bool { 136 if !check(values) { 137 return false 138 } 139 sort.Sort(int32Order(values)) 140 if !check(values) { 141 return false 142 } 143 sort.Sort(sort.Reverse(int32Order(values))) 144 if !check(values) { 145 return false 146 } 147 return true 148 }) 149 if err != nil { 150 t.Error(err) 151 } 152 153 // This extra test validates that out-of-order values at 64 byte boundaries 154 // are properly detected; it tests corner cases of the vectorized code path 155 // which works on 64 bytes per loop iteration. 156 values := []int32{ 157 0, 1, 2, 3, 4, 5, 6, 7, 158 8, 9, 10, 11, 12, 13, 14, 15, 159 // 15 > 14, the algorithm must detect that the values are not ordered. 160 14, 17, 18, 19, 20, 21, 22, 23, 161 24, 25, 26, 27, 28, 29, 30, 31, 162 } 163 164 if !check(values) { 165 t.Error("failed due to not checking the connection between sequences of 16 elements") 166 } 167 } 168 169 func TestOrderOfInt64(t *testing.T) { 170 check := func(values []int64) bool { 171 return checkOrdering(t, int64Order(values), orderOfInt64(values)) 172 } 173 err := quick.Check(func(values []int64) bool { 174 if !check(values) { 175 return false 176 } 177 sort.Sort(int64Order(values)) 178 if !check(values) { 179 return false 180 } 181 sort.Sort(sort.Reverse(int64Order(values))) 182 if !check(values) { 183 return false 184 } 185 return true 186 }) 187 if err != nil { 188 t.Error(err) 189 } 190 191 values := []int64{ 192 0, 1, 2, 3, 4, 5, 6, 7, 193 6, 9, 10, 11, 12, 13, 14, 15, 194 14, 17, 18, 19, 20, 21, 22, 23, 195 24, 25, 26, 27, 28, 29, 30, 31, 196 } 197 198 if !check(values) { 199 t.Error("failed due to not checking the connection between sequences of 8 elements") 200 } 201 } 202 203 func TestOrderOfUint32(t *testing.T) { 204 check := func(values []uint32) bool { 205 return checkOrdering(t, uint32Order(values), orderOfUint32(values)) 206 } 207 err := quick.Check(func(values []uint32) bool { 208 if !check(values) { 209 return false 210 } 211 sort.Sort(uint32Order(values)) 212 if !check(values) { 213 return false 214 } 215 sort.Sort(sort.Reverse(uint32Order(values))) 216 if !check(values) { 217 return false 218 } 219 return true 220 }) 221 if err != nil { 222 t.Error(err) 223 } 224 225 values := []uint32{ 226 0, 1, 2, 3, 4, 5, 6, 7, 227 8, 9, 10, 11, 12, 13, 14, 15, 228 14, 17, 18, 19, 20, 21, 22, 23, 229 24, 25, 26, 27, 28, 29, 30, 31, 230 } 231 232 if !check(values) { 233 t.Error("failed due to not checking the connection between sequences of 16 elements") 234 } 235 } 236 237 func TestOrderOfUint64(t *testing.T) { 238 check := func(values []uint64) bool { 239 return checkOrdering(t, uint64Order(values), orderOfUint64(values)) 240 } 241 err := quick.Check(func(values []uint64) bool { 242 if !check(values) { 243 return false 244 } 245 sort.Sort(uint64Order(values)) 246 if !check(values) { 247 return false 248 } 249 sort.Sort(sort.Reverse(uint64Order(values))) 250 if !check(values) { 251 return false 252 } 253 return true 254 }) 255 if err != nil { 256 t.Error(err) 257 } 258 259 values := []uint64{ 260 0, 1, 2, 3, 4, 5, 6, 7, 261 6, 9, 10, 11, 12, 13, 14, 15, 262 14, 17, 18, 19, 20, 21, 22, 23, 263 24, 25, 26, 27, 28, 29, 30, 31, 264 } 265 266 if !check(values) { 267 t.Error("failed due to not checking the connection between sequences of 8 elements") 268 } 269 } 270 271 func TestOrderOfFloat32(t *testing.T) { 272 check := func(values []float32) bool { 273 return checkOrdering(t, float32Order(values), orderOfFloat32(values)) 274 } 275 err := quick.Check(func(values []float32) bool { 276 if !check(values) { 277 return false 278 } 279 sort.Sort(float32Order(values)) 280 if !check(values) { 281 return false 282 } 283 sort.Sort(sort.Reverse(float32Order(values))) 284 if !check(values) { 285 return false 286 } 287 return true 288 }) 289 if err != nil { 290 t.Error(err) 291 } 292 293 values := []float32{ 294 0, 1, 2, 3, 4, 5, 6, 7, 295 8, 9, 10, 11, 12, 13, 14, 15, 296 14, 17, 18, 19, 20, 21, 22, 23, 297 24, 25, 26, 27, 28, 29, 30, 31, 298 } 299 300 if !check(values) { 301 t.Error("failed due to not checking the connection between sequences of 16 elements") 302 } 303 } 304 305 func TestOrderOfFloat64(t *testing.T) { 306 check := func(values []float64) bool { 307 return checkOrdering(t, float64Order(values), orderOfFloat64(values)) 308 } 309 err := quick.Check(func(values []float64) bool { 310 if !check(values) { 311 return false 312 } 313 sort.Sort(float64Order(values)) 314 if !check(values) { 315 return false 316 } 317 sort.Sort(sort.Reverse(float64Order(values))) 318 if !check(values) { 319 return false 320 } 321 return true 322 }) 323 if err != nil { 324 t.Error(err) 325 } 326 327 values := []float64{ 328 0, 1, 2, 3, 4, 5, 6, 7, 329 6, 9, 10, 11, 12, 13, 14, 15, 330 14, 17, 18, 19, 20, 21, 22, 23, 331 24, 25, 26, 27, 28, 29, 30, 31, 332 } 333 334 if !check(values) { 335 t.Error("failed due to not checking the connection between sequences of 8 elements") 336 } 337 } 338 339 func TestOrderOfBytes(t *testing.T) { 340 check := func(values [][]byte) bool { 341 return checkOrdering(t, bytesOrder(values), orderOfBytes(values)) 342 } 343 err := quick.Check(func(values [][16]byte) bool { 344 slices := make([][]byte, len(values)) 345 for i := range values { 346 slices[i] = values[i][:] 347 } 348 if !check(slices) { 349 return false 350 } 351 sort.Sort(bytesOrder(slices)) 352 if !check(slices) { 353 return false 354 } 355 sort.Sort(sort.Reverse(bytesOrder(slices))) 356 if !check(slices) { 357 return false 358 } 359 return true 360 }) 361 if err != nil { 362 t.Error(err) 363 } 364 } 365 366 func BenchmarkOrderOfBool(b *testing.B) { 367 forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) { 368 values := make([]bool, bufferSize/1) 369 for i := 0; i < b.N; i++ { 370 orderOfBool(values) 371 } 372 }) 373 } 374 375 func BenchmarkOrderOfInt32(b *testing.B) { 376 forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) { 377 values := make([]int32, bufferSize/4) 378 for i := 0; i < b.N; i++ { 379 orderOfInt32(values) 380 } 381 }) 382 } 383 384 func BenchmarkOrderOfInt64(b *testing.B) { 385 forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) { 386 values := make([]int64, bufferSize/8) 387 for i := 0; i < b.N; i++ { 388 orderOfInt64(values) 389 } 390 }) 391 } 392 393 func BenchmarkOrderOfUint32(b *testing.B) { 394 forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) { 395 values := make([]uint32, bufferSize/4) 396 for i := 0; i < b.N; i++ { 397 orderOfUint32(values) 398 } 399 }) 400 } 401 402 func BenchmarkOrderOfUint64(b *testing.B) { 403 forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) { 404 values := make([]uint64, bufferSize/8) 405 for i := 0; i < b.N; i++ { 406 orderOfUint64(values) 407 } 408 }) 409 } 410 411 func BenchmarkOrderOfFloat32(b *testing.B) { 412 forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) { 413 values := make([]float32, bufferSize/4) 414 for i := 0; i < b.N; i++ { 415 orderOfFloat32(values) 416 } 417 }) 418 } 419 420 func BenchmarkOrderOfFloat64(b *testing.B) { 421 forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) { 422 values := make([]float64, bufferSize/8) 423 for i := 0; i < b.N; i++ { 424 orderOfFloat64(values) 425 } 426 }) 427 } 428 429 func BenchmarkOrderOfBytes(b *testing.B) { 430 forEachBenchmarkBufferSize(b, func(b *testing.B, bufferSize int) { 431 data := make([]byte, bufferSize) 432 values := make([][]byte, len(data)/16) 433 for i := range values { 434 values[i] = data[i*16 : (i+1)*16] 435 } 436 for i := 0; i < b.N; i++ { 437 orderOfBytes(values) 438 } 439 }) 440 }