github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/column_index_test.go (about) 1 package parquet_test 2 3 import ( 4 "testing" 5 6 "github.com/segmentio/parquet-go" 7 ) 8 9 func TestBinaryColumnIndexMinMax(t *testing.T) { 10 testCases := [][]interface{}{ 11 // kind, type, page min, page max, size limit, [value to search, expected result]... 12 {parquet.ByteArray, parquet.ByteArrayType, 13 []byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4, 14 []byte{0, 0, 0, 0, 0, 0}, true, 15 []byte{0, 1, 2, 3, 4, 5}, true, 16 []byte{1, 2, 3, 4}, true, 17 []byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit 18 []byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit 19 []byte{1, 2, 3, 5}, true, // false positive due to size limit 20 []byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max 21 []byte{2, 3, 4, 5}, false, // should be no hit since it definitely exceeds page max 22 }, 23 {parquet.FixedLenByteArray, parquet.FixedLenByteArrayType(6), 24 []byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4, 25 []byte{0, 0, 0, 0, 0, 0}, true, 26 []byte{0, 1, 2, 3, 4, 5}, true, 27 []byte{1, 2, 3, 4, 0, 0}, true, 28 []byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit 29 []byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit 30 []byte{1, 2, 3, 4, 0xFF, 0xFF}, true, // false positive due to size limit 31 []byte{1, 2, 3, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max 32 []byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max 33 []byte{2, 3, 4, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max 34 }, 35 } 36 for _, testCase := range testCases { 37 kind := testCase[0].(parquet.Kind) 38 typ := testCase[1].(parquet.Type) 39 min := testCase[2].([]byte) 40 max := testCase[3].([]byte) 41 sizeLimit := testCase[4].(int) 42 indexer := typ.NewColumnIndexer(sizeLimit) 43 indexer.IndexPage(100, 0, 44 parquet.ValueOf(min), 45 parquet.ValueOf(max), 46 ) 47 formatIndex := indexer.ColumnIndex() 48 columnIndex := parquet.NewColumnIndex(kind, &formatIndex) 49 for i := 5; i < len(testCase); i += 2 { 50 value := testCase[i].([]byte) 51 expected := testCase[i+1].(bool) 52 53 v := parquet.ValueOf(value) 54 actual := parquet.Search(columnIndex, v, typ) == 0 55 if actual != expected { 56 t.Errorf("checkByteArrayMinMax(%v, %v, %v, %v) = %v, want %v", min, max, value, sizeLimit, actual, expected) 57 } 58 } 59 } 60 }