github.com/parquet-go/parquet-go@v0.21.1-0.20240501160520-b3c3a0c3ed6f/column_index_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"testing"
     5  
     6  	"github.com/parquet-go/parquet-go"
     7  	"github.com/stretchr/testify/require"
     8  )
     9  
    10  func TestBinaryColumnIndexMinMax(t *testing.T) {
    11  	testCases := [][]interface{}{
    12  		// kind, type, page min, page max, size limit, [value to search, expected result]...
    13  		{parquet.ByteArray, parquet.ByteArrayType,
    14  			[]byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4,
    15  			[]byte{0, 0, 0, 0, 0, 0}, true,
    16  			[]byte{0, 1, 2, 3, 4, 5}, true,
    17  			[]byte{1, 2, 3, 4}, true,
    18  			[]byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit
    19  			[]byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit
    20  			[]byte{1, 2, 3, 5}, true, // false positive due to size limit
    21  			[]byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max
    22  			[]byte{2, 3, 4, 5}, false, // should be no hit since it definitely exceeds page max
    23  		},
    24  		{parquet.FixedLenByteArray, parquet.FixedLenByteArrayType(6),
    25  			[]byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4,
    26  			[]byte{0, 0, 0, 0, 0, 0}, true,
    27  			[]byte{0, 1, 2, 3, 4, 5}, true,
    28  			[]byte{1, 2, 3, 4, 0, 0}, true,
    29  			[]byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit
    30  			[]byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit
    31  			[]byte{1, 2, 3, 4, 0xFF, 0xFF}, true, // false positive due to size limit
    32  			[]byte{1, 2, 3, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max
    33  			[]byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max
    34  			[]byte{2, 3, 4, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max
    35  		},
    36  	}
    37  	for _, testCase := range testCases {
    38  		kind := testCase[0].(parquet.Kind)
    39  		typ := testCase[1].(parquet.Type)
    40  		min := testCase[2].([]byte)
    41  		max := testCase[3].([]byte)
    42  		sizeLimit := testCase[4].(int)
    43  		indexer := typ.NewColumnIndexer(sizeLimit)
    44  		indexer.IndexPage(100, 0,
    45  			parquet.ValueOf(min),
    46  			parquet.ValueOf(max),
    47  		)
    48  		formatIndex := indexer.ColumnIndex()
    49  		columnIndex := parquet.NewColumnIndex(kind, &formatIndex)
    50  		for i := 5; i < len(testCase); i += 2 {
    51  			value := testCase[i].([]byte)
    52  			expected := testCase[i+1].(bool)
    53  
    54  			v := parquet.ValueOf(value)
    55  			actual := parquet.Search(columnIndex, v, typ) == 0
    56  			if actual != expected {
    57  				t.Errorf("checkByteArrayMinMax(%v, %v, %v, %v) = %v, want %v", min, max, value, sizeLimit, actual, expected)
    58  			}
    59  		}
    60  	}
    61  }
    62  
    63  func Test_ColumnIndexReuse(t *testing.T) {
    64  	min := "a"
    65  	max := "z"
    66  	indexer := parquet.ByteArrayType.NewColumnIndexer(16)
    67  	indexer.IndexPage(100, 0,
    68  		parquet.ValueOf(min),
    69  		parquet.ValueOf(max),
    70  	)
    71  	before := indexer.ColumnIndex()
    72  	require.Equal(t, 1, len(before.NullPages))
    73  	require.False(t, before.NullPages[0])
    74  
    75  	// Reset the indexer. Should be safe for reuse.
    76  	indexer.Reset()
    77  
    78  	// Index two pages that are both nul pages, expect the previous index to not have changed.
    79  	indexer.IndexPage(100, 100,
    80  		parquet.ValueOf(min),
    81  		parquet.ValueOf(max),
    82  	)
    83  	indexer.IndexPage(10, 10,
    84  		parquet.ValueOf(min),
    85  		parquet.ValueOf(max),
    86  	)
    87  	after := indexer.ColumnIndex()
    88  
    89  	require.Equal(t, 2, len(after.NullPages))
    90  	require.True(t, after.NullPages[0])
    91  	require.True(t, after.NullPages[1])
    92  
    93  	// Validate null pages of the previous index.
    94  	require.Equal(t, 1, len(before.NullPages))
    95  	require.False(t, before.NullPages[0])
    96  }