github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/search_test.go (about) 1 package parquet_test 2 3 import ( 4 "fmt" 5 "math" 6 "testing" 7 8 "github.com/vc42/parquet-go" 9 ) 10 11 func assertCompare(t *testing.T, a, b parquet.Value, cmp func(parquet.Value, parquet.Value) int, want int) { 12 if got := cmp(a, b); got != want { 13 t.Errorf("compare(%v, %v): got=%d want=%d", a, b, got, want) 14 } 15 } 16 17 func TestCompareNullsFirst(t *testing.T) { 18 cmp := parquet.CompareNullsFirst(parquet.Int32Type.Compare) 19 assertCompare(t, parquet.Value{}, parquet.Value{}, cmp, 0) 20 assertCompare(t, parquet.Value{}, parquet.ValueOf(int32(0)), cmp, -1) 21 assertCompare(t, parquet.ValueOf(int32(0)), parquet.Value{}, cmp, +1) 22 assertCompare(t, parquet.ValueOf(int32(0)), parquet.ValueOf(int32(1)), cmp, -1) 23 } 24 25 func TestCompareNullsLast(t *testing.T) { 26 cmp := parquet.CompareNullsLast(parquet.Int32Type.Compare) 27 assertCompare(t, parquet.Value{}, parquet.Value{}, cmp, 0) 28 assertCompare(t, parquet.Value{}, parquet.ValueOf(int32(0)), cmp, +1) 29 assertCompare(t, parquet.ValueOf(int32(0)), parquet.Value{}, cmp, -1) 30 assertCompare(t, parquet.ValueOf(int32(0)), parquet.ValueOf(int32(1)), cmp, -1) 31 } 32 33 func TestSearchBinary(t *testing.T) { 34 testSearch(t, [][]int32{ 35 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, 36 {10, 10, 10, 10}, 37 {21, 22, 23, 24, 25}, 38 {30}, 39 {31}, 40 {32}, 41 {42, 43, 44, 45, 46, 47, 48, 49}, 42 }) 43 } 44 45 func TestSearchLinear(t *testing.T) { 46 testSearch(t, [][]int32{ 47 {10, 10, 10, 10}, 48 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, 49 {21, 22, 23, 24, 25}, 50 {19, 18, 17, 16, 15, 14, 13, 12, 11}, 51 {42, 43, 44, 45, 46, 47, 48, 49}, 52 }) 53 } 54 55 func testSearch(t *testing.T, pages [][]int32) { 56 indexer := parquet.Int32Type.NewColumnIndexer(0) 57 58 for _, values := range pages { 59 min := values[0] 60 max := values[0] 61 62 for _, v := range values[1:] { 63 switch { 64 case v < min: 65 min = v 66 case v > max: 67 max = v 68 } 69 } 70 71 indexer.IndexPage(int64(len(values)), 0, 72 parquet.ValueOf(min), 73 parquet.ValueOf(max), 74 ) 75 } 76 77 formatIndex := indexer.ColumnIndex() 78 columnIndex := parquet.NewColumnIndex(parquet.Int32, &formatIndex) 79 80 for i, values := range pages { 81 t.Run(fmt.Sprintf("page#%02d", i), func(t *testing.T) { 82 for _, value := range values { 83 v := parquet.ValueOf(value) 84 j := parquet.Search(columnIndex, v, parquet.Int32Type) 85 86 if i != j { 87 t.Errorf("searching for value %v: got=%d want=%d", v, j, i) 88 } 89 } 90 91 for _, test := range []int32{math.MinInt32, math.MaxInt32} { 92 if page := parquet.Search(columnIndex, parquet.ValueOf(test), parquet.Int32Type); page != len(pages) { 93 t.Errorf("search for non-existing value %v: got=%d want=%d", test, page, len(pages)) 94 } 95 } 96 }) 97 } 98 }