github.com/segmentio/parquet-go@v0.0.0-20230712180008-5d42db8f0d47/search_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"testing"
     5  
     6  	"github.com/segmentio/parquet-go"
     7  )
     8  
     9  func TestSearchBinary(t *testing.T) {
    10  	testSearch(t, [][]int32{
    11  		{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
    12  		{10, 10, 10, 10},
    13  		{21, 22, 24, 25, 30},
    14  		{30, 30},
    15  		{30, 31},
    16  		{32},
    17  		{42, 43, 44, 45, 46, 47, 48, 49},
    18  	}, [][]int{
    19  		{10, 1},
    20  		{0, 0},
    21  		{9, 0},
    22  		// non-existant, but would be in this page
    23  		{23, 2},
    24  		// ensure we find the first page
    25  		{30, 2},
    26  		{31, 4},
    27  		// out of bounds
    28  		{99, 7},
    29  		// out of bounds
    30  		{-1, 7},
    31  	})
    32  }
    33  
    34  func TestSearchLinear(t *testing.T) {
    35  	testSearch(t, [][]int32{
    36  		{10, 10, 10, 10},
    37  		{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
    38  		{21, 22, 23, 24, 25},
    39  		{19, 18, 17, 16, 14, 13, 12, 11},
    40  		{42, 43, 44, 45, 46, 47, 48, 49},
    41  	}, [][]int{
    42  		{10, 0},
    43  		{0, 1},
    44  		{9, 1},
    45  		{48, 4},
    46  		// non-existant, but could be in this page
    47  		{15, 3},
    48  		// out of bounds
    49  		{99, 5},
    50  		// out of bounds
    51  		{-1, 5},
    52  	})
    53  }
    54  
    55  func testSearch(t *testing.T, pages [][]int32, expectIndex [][]int) {
    56  	indexer := parquet.Int32Type.NewColumnIndexer(0)
    57  
    58  	for _, values := range pages {
    59  		min := values[0]
    60  		max := values[0]
    61  
    62  		for _, v := range values[1:] {
    63  			switch {
    64  			case v < min:
    65  				min = v
    66  			case v > max:
    67  				max = v
    68  			}
    69  		}
    70  
    71  		indexer.IndexPage(int64(len(values)), 0,
    72  			parquet.ValueOf(min),
    73  			parquet.ValueOf(max),
    74  		)
    75  	}
    76  
    77  	formatIndex := indexer.ColumnIndex()
    78  	columnIndex := parquet.NewColumnIndex(parquet.Int32, &formatIndex)
    79  
    80  	for _, values := range expectIndex {
    81  		v := parquet.ValueOf(values[0])
    82  		j := parquet.Search(columnIndex, v, parquet.Int32Type)
    83  
    84  		if values[1] != j {
    85  			t.Errorf("searching for value %v: got=%d want=%d", v, j, values[1])
    86  		}
    87  	}
    88  }