github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/column_index_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"testing"
     5  
     6  	"github.com/segmentio/parquet-go"
     7  )
     8  
     9  func TestBinaryColumnIndexMinMax(t *testing.T) {
    10  	testCases := [][]interface{}{
    11  		// kind, type, page min, page max, size limit, [value to search, expected result]...
    12  		{parquet.ByteArray, parquet.ByteArrayType,
    13  			[]byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4,
    14  			[]byte{0, 0, 0, 0, 0, 0}, true,
    15  			[]byte{0, 1, 2, 3, 4, 5}, true,
    16  			[]byte{1, 2, 3, 4}, true,
    17  			[]byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit
    18  			[]byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit
    19  			[]byte{1, 2, 3, 5}, true, // false positive due to size limit
    20  			[]byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max
    21  			[]byte{2, 3, 4, 5}, false, // should be no hit since it definitely exceeds page max
    22  		},
    23  		{parquet.FixedLenByteArray, parquet.FixedLenByteArrayType(6),
    24  			[]byte{0, 0, 0, 0, 0, 0}, []byte{1, 2, 3, 4, 5, 6}, 4,
    25  			[]byte{0, 0, 0, 0, 0, 0}, true,
    26  			[]byte{0, 1, 2, 3, 4, 5}, true,
    27  			[]byte{1, 2, 3, 4, 0, 0}, true,
    28  			[]byte{1, 2, 3, 4, 5, 6}, true, // the page max value should be a hit
    29  			[]byte{1, 2, 3, 4, 5, 7}, true, // false positive due to size limit
    30  			[]byte{1, 2, 3, 4, 0xFF, 0xFF}, true, // false positive due to size limit
    31  			[]byte{1, 2, 3, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max
    32  			[]byte{1, 2, 3, 5, 6, 7}, false, // should be no hit since it definitely exceeds page max
    33  			[]byte{2, 3, 4, 5, 0, 0}, false, // should be no hit since it definitely exceeds page max
    34  		},
    35  	}
    36  	for _, testCase := range testCases {
    37  		kind := testCase[0].(parquet.Kind)
    38  		typ := testCase[1].(parquet.Type)
    39  		min := testCase[2].([]byte)
    40  		max := testCase[3].([]byte)
    41  		sizeLimit := testCase[4].(int)
    42  		indexer := typ.NewColumnIndexer(sizeLimit)
    43  		indexer.IndexPage(100, 0,
    44  			parquet.ValueOf(min),
    45  			parquet.ValueOf(max),
    46  		)
    47  		formatIndex := indexer.ColumnIndex()
    48  		columnIndex := parquet.NewColumnIndex(kind, &formatIndex)
    49  		for i := 5; i < len(testCase); i += 2 {
    50  			value := testCase[i].([]byte)
    51  			expected := testCase[i+1].(bool)
    52  
    53  			v := parquet.ValueOf(value)
    54  			actual := parquet.Search(columnIndex, v, typ) == 0
    55  			if actual != expected {
    56  				t.Errorf("checkByteArrayMinMax(%v, %v, %v, %v) = %v, want %v", min, max, value, sizeLimit, actual, expected)
    57  			}
    58  		}
    59  	}
    60  }