github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/column_index_test.go (about) 1 package parquet_test 2 3 import ( 4 "testing" 5 6 "github.com/parquet-go/parquet-go" 7 "github.com/stretchr/testify/require" 8 ) 9 10 func TestBinaryColumnIndexMinMax(t *testing.T) { 11 testCases := [][]interface{}{ 12 // kind, type, page min, page max, size limit, [value to search, expected result]... 13 {parquet.ByteArray, parquet.ByteArrayType, 14 []byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4, 15 []byte{0, 0, 0, 0, 0, 0}, true, 16 []byte{0, 1, 2, 3, 4, 5}, true, 17 []byte{1, 2, 3, 4}, true, 18 []byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit 19 []byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit 20 []byte{1, 2, 3, 5}, true, // false positive due to size limit 21 []byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max 22 []byte{2, 3, 4, 5}, false, // should be no hit since it definitely exceeds page max 23 }, 24 {parquet.FixedLenByteArray, parquet.FixedLenByteArrayType(6), 25 []byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4, 26 []byte{0, 0, 0, 0, 0, 0}, true, 27 []byte{0, 1, 2, 3, 4, 5}, true, 28 []byte{1, 2, 3, 4, 0, 0}, true, 29 []byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit 30 []byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit 31 []byte{1, 2, 3, 4, 0xFF, 0xFF}, true, // false positive due to size limit 32 []byte{1, 2, 3, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max 33 []byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max 34 []byte{2, 3, 4, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max 35 }, 36 } 37 for _, testCase := range testCases { 38 kind := testCase[0].(parquet.Kind) 39 typ := testCase[1].(parquet.Type) 40 min := testCase[2].([]byte) 41 max := testCase[3].([]byte) 42 sizeLimit := testCase[4].(int) 43 indexer := typ.NewColumnIndexer(sizeLimit) 44 indexer.IndexPage(100, 0, 45 parquet.ValueOf(min), 46 parquet.ValueOf(max), 47 ) 48 formatIndex := indexer.ColumnIndex() 49 columnIndex := parquet.NewColumnIndex(kind, &formatIndex) 50 for i := 5; i < len(testCase); i += 2 { 51 value := testCase[i].([]byte) 52 expected := testCase[i+1].(bool) 53 54 v := parquet.ValueOf(value) 55 actual := parquet.Search(columnIndex, v, typ) == 0 56 if actual != expected { 57 t.Errorf("checkByteArrayMinMax(%v, %v, %v, %v) = %v, want %v", min, max, value, sizeLimit, actual, expected) 58 } 59 } 60 } 61 } 62 63 func Test_ColumnIndexReuse(t *testing.T) { 64 min := "a" 65 max := "z" 66 indexer := parquet.ByteArrayType.NewColumnIndexer(16) 67 indexer.IndexPage(100, 0, 68 parquet.ValueOf(min), 69 parquet.ValueOf(max), 70 ) 71 before := indexer.ColumnIndex() 72 require.Equal(t, 1, len(before.NullPages)) 73 require.False(t, before.NullPages[0]) 74 75 // Reset the indexer. Should be safe for reuse. 76 indexer.Reset() 77 78 // Index two pages that are both nul pages, expect the previous index to not have changed. 79 indexer.IndexPage(100, 100, 80 parquet.ValueOf(min), 81 parquet.ValueOf(max), 82 ) 83 indexer.IndexPage(10, 10, 84 parquet.ValueOf(min), 85 parquet.ValueOf(max), 86 ) 87 after := indexer.ColumnIndex() 88 89 require.Equal(t, 2, len(after.NullPages)) 90 require.True(t, after.NullPages[0]) 91 require.True(t, after.NullPages[1]) 92 93 // Validate null pages of the previous index. 94 require.Equal(t, 1, len(before.NullPages)) 95 require.False(t, before.NullPages[0]) 96 }