github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/search_test.go (about) 1 package parquet_test 2 3 import ( 4 "testing" 5 6 "github.com/segmentio/parquet-go" 7 ) 8 9 func TestSearchBinary(t *testing.T) { 10 testSearch(t, [][]int32{ 11 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, 12 {10, 10, 10, 10}, 13 {21, 22, 24, 25, 30}, 14 {30, 30}, 15 {30, 31}, 16 {32}, 17 {42, 43, 44, 45, 46, 47, 48, 49}, 18 }, [][]int{ 19 {10, 1}, 20 {0, 0}, 21 {9, 0}, 22 // non-existant, but would be in this page 23 {23, 2}, 24 // ensure we find the first page 25 {30, 2}, 26 {31, 4}, 27 // out of bounds 28 {99, 7}, 29 // out of bounds 30 {-1, 7}, 31 }) 32 } 33 34 func TestSearchLinear(t *testing.T) { 35 testSearch(t, [][]int32{ 36 {10, 10, 10, 10}, 37 {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, 38 {21, 22, 23, 24, 25}, 39 {19, 18, 17, 16, 14, 13, 12, 11}, 40 {42, 43, 44, 45, 46, 47, 48, 49}, 41 }, [][]int{ 42 {10, 0}, 43 {0, 1}, 44 {9, 1}, 45 {48, 4}, 46 // non-existant, but could be in this page 47 {15, 3}, 48 // out of bounds 49 {99, 5}, 50 // out of bounds 51 {-1, 5}, 52 }) 53 } 54 55 func testSearch(t *testing.T, pages [][]int32, expectIndex [][]int) { 56 indexer := parquet.Int32Type.NewColumnIndexer(0) 57 58 for _, values := range pages { 59 min := values[0] 60 max := values[0] 61 62 for _, v := range values[1:] { 63 switch { 64 case v < min: 65 min = v 66 case v > max: 67 max = v 68 } 69 } 70 71 indexer.IndexPage(int64(len(values)), 0, 72 parquet.ValueOf(min), 73 parquet.ValueOf(max), 74 ) 75 } 76 77 formatIndex := indexer.ColumnIndex() 78 columnIndex := parquet.NewColumnIndex(parquet.Int32, &formatIndex) 79 80 for _, values := range expectIndex { 81 v := parquet.ValueOf(values[0]) 82 j := parquet.Search(columnIndex, v, parquet.Int32Type) 83 84 if values[1] != j { 85 t.Errorf("searching for value %v: got=%d want=%d", v, j, values[1]) 86 } 87 } 88 }