github.com/vc42/parquet-go@v0.0.0-20240320194221-1a9adb5f23f5/search_test.go (about)

     1  package parquet_test
     2  
     3  import (
     4  	"fmt"
     5  	"math"
     6  	"testing"
     7  
     8  	"github.com/vc42/parquet-go"
     9  )
    10  
    11  func assertCompare(t *testing.T, a, b parquet.Value, cmp func(parquet.Value, parquet.Value) int, want int) {
    12  	if got := cmp(a, b); got != want {
    13  		t.Errorf("compare(%v, %v): got=%d want=%d", a, b, got, want)
    14  	}
    15  }
    16  
    17  func TestCompareNullsFirst(t *testing.T) {
    18  	cmp := parquet.CompareNullsFirst(parquet.Int32Type.Compare)
    19  	assertCompare(t, parquet.Value{}, parquet.Value{}, cmp, 0)
    20  	assertCompare(t, parquet.Value{}, parquet.ValueOf(int32(0)), cmp, -1)
    21  	assertCompare(t, parquet.ValueOf(int32(0)), parquet.Value{}, cmp, +1)
    22  	assertCompare(t, parquet.ValueOf(int32(0)), parquet.ValueOf(int32(1)), cmp, -1)
    23  }
    24  
    25  func TestCompareNullsLast(t *testing.T) {
    26  	cmp := parquet.CompareNullsLast(parquet.Int32Type.Compare)
    27  	assertCompare(t, parquet.Value{}, parquet.Value{}, cmp, 0)
    28  	assertCompare(t, parquet.Value{}, parquet.ValueOf(int32(0)), cmp, +1)
    29  	assertCompare(t, parquet.ValueOf(int32(0)), parquet.Value{}, cmp, -1)
    30  	assertCompare(t, parquet.ValueOf(int32(0)), parquet.ValueOf(int32(1)), cmp, -1)
    31  }
    32  
    33  func TestSearchBinary(t *testing.T) {
    34  	testSearch(t, [][]int32{
    35  		{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
    36  		{10, 10, 10, 10},
    37  		{21, 22, 23, 24, 25},
    38  		{30},
    39  		{31},
    40  		{32},
    41  		{42, 43, 44, 45, 46, 47, 48, 49},
    42  	})
    43  }
    44  
    45  func TestSearchLinear(t *testing.T) {
    46  	testSearch(t, [][]int32{
    47  		{10, 10, 10, 10},
    48  		{0, 1, 2, 3, 4, 5, 6, 7, 8, 9},
    49  		{21, 22, 23, 24, 25},
    50  		{19, 18, 17, 16, 15, 14, 13, 12, 11},
    51  		{42, 43, 44, 45, 46, 47, 48, 49},
    52  	})
    53  }
    54  
    55  func testSearch(t *testing.T, pages [][]int32) {
    56  	indexer := parquet.Int32Type.NewColumnIndexer(0)
    57  
    58  	for _, values := range pages {
    59  		min := values[0]
    60  		max := values[0]
    61  
    62  		for _, v := range values[1:] {
    63  			switch {
    64  			case v < min:
    65  				min = v
    66  			case v > max:
    67  				max = v
    68  			}
    69  		}
    70  
    71  		indexer.IndexPage(int64(len(values)), 0,
    72  			parquet.ValueOf(min),
    73  			parquet.ValueOf(max),
    74  		)
    75  	}
    76  
    77  	formatIndex := indexer.ColumnIndex()
    78  	columnIndex := parquet.NewColumnIndex(parquet.Int32, &formatIndex)
    79  
    80  	for i, values := range pages {
    81  		t.Run(fmt.Sprintf("page#%02d", i), func(t *testing.T) {
    82  			for _, value := range values {
    83  				v := parquet.ValueOf(value)
    84  				j := parquet.Search(columnIndex, v, parquet.Int32Type)
    85  
    86  				if i != j {
    87  					t.Errorf("searching for value %v: got=%d want=%d", v, j, i)
    88  				}
    89  			}
    90  
    91  			for _, test := range []int32{math.MinInt32, math.MaxInt32} {
    92  				if page := parquet.Search(columnIndex, parquet.ValueOf(test), parquet.Int32Type); page != len(pages) {
    93  					t.Errorf("search for non-existing value %v: got=%d want=%d", test, page, len(pages))
    94  				}
    95  			}
    96  		})
    97  	}
    98  }