github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/query/repeated_test.go (about)

     1  package query
     2  
     3  import (
     4  	"context"
     5  	"fmt"
     6  	"testing"
     7  
     8  	"github.com/google/go-cmp/cmp"
     9  	"github.com/parquet-go/parquet-go"
    10  	"github.com/stretchr/testify/assert"
    11  	"github.com/stretchr/testify/require"
    12  
    13  	"github.com/grafana/pyroscope/pkg/iter"
    14  )
    15  
    16  type repeatedTestRow struct {
    17  	List []int64
    18  }
    19  
    20  type testRowGetter struct {
    21  	RowNum int64
    22  }
    23  
    24  func (t testRowGetter) RowNumber() int64 {
    25  	return t.RowNum
    26  }
    27  
    28  func Test_RepeatedRowIterator_SingleColumn(t *testing.T) {
    29  	for _, tc := range []struct {
    30  		name     string
    31  		rows     []testRowGetter
    32  		rgs      [][]repeatedTestRow
    33  		expected []RepeatedRow[testRowGetter]
    34  		readSize int
    35  	}{
    36  		{
    37  			name: "single row group no repeated and repeated",
    38  			rows: []testRowGetter{
    39  				{0},
    40  				{1},
    41  				{2},
    42  			},
    43  			rgs: [][]repeatedTestRow{
    44  				{
    45  					{[]int64{1, 1, 1, 1}},
    46  					{[]int64{2}},
    47  					{[]int64{3, 4}},
    48  				},
    49  			},
    50  			expected: []RepeatedRow[testRowGetter]{
    51  				{testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(1), parquet.ValueOf(1), parquet.ValueOf(1)}}},
    52  				{testRowGetter{1}, [][]parquet.Value{{parquet.ValueOf(2)}}},
    53  				{testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(3), parquet.ValueOf(4)}}},
    54  			},
    55  		},
    56  		{
    57  			name: "multiple row group no repeated skip group and page",
    58  			rows: []testRowGetter{
    59  				{0},
    60  				{2},
    61  				{7},
    62  			},
    63  			rgs: [][]repeatedTestRow{
    64  				{
    65  					{[]int64{1}},
    66  					{[]int64{2}},
    67  					{[]int64{3}},
    68  				},
    69  				{
    70  					{[]int64{4}},
    71  					{[]int64{5}},
    72  					{[]int64{6}},
    73  				},
    74  				{
    75  					{[]int64{7}},
    76  					{[]int64{8}},
    77  					{[]int64{9}},
    78  				},
    79  			},
    80  			expected: []RepeatedRow[testRowGetter]{
    81  				{testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1)}}},
    82  				{testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(3)}}},
    83  				{testRowGetter{7}, [][]parquet.Value{{parquet.ValueOf(8)}}},
    84  			},
    85  		},
    86  		{
    87  			name: "single row group",
    88  			rows: []testRowGetter{
    89  				{0},
    90  				{1},
    91  				{2},
    92  			},
    93  			rgs: [][]repeatedTestRow{
    94  				{
    95  					{[]int64{1, 2, 3}},
    96  					{[]int64{4, 5, 6}},
    97  					{[]int64{7, 8, 9}},
    98  				},
    99  			},
   100  			expected: []RepeatedRow[testRowGetter]{
   101  				{testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}},
   102  				{testRowGetter{1}, [][]parquet.Value{{parquet.ValueOf(4), parquet.ValueOf(5), parquet.ValueOf(6)}}},
   103  				{testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7), parquet.ValueOf(8), parquet.ValueOf(9)}}},
   104  			},
   105  		},
   106  		{
   107  			name: "skip row group",
   108  			rows: []testRowGetter{
   109  				{0}, {1}, {2}, {6}, {7}, {8},
   110  			},
   111  			rgs: [][]repeatedTestRow{
   112  				{
   113  					{[]int64{1, 2, 3}},
   114  					{[]int64{4, 5, 6}},
   115  					{[]int64{7, 8, 9}},
   116  				},
   117  				{
   118  					{[]int64{10, 11, 12}},
   119  					{[]int64{13, 14, 15}},
   120  					{[]int64{16, 17, 18}},
   121  				},
   122  				{
   123  					{[]int64{19, 20, 21}},
   124  					{[]int64{22, 23, 24}},
   125  					{[]int64{25, 26, 27}},
   126  				},
   127  			},
   128  			expected: []RepeatedRow[testRowGetter]{
   129  				{testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}},
   130  				{testRowGetter{1}, [][]parquet.Value{{parquet.ValueOf(4), parquet.ValueOf(5), parquet.ValueOf(6)}}},
   131  				{testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7), parquet.ValueOf(8), parquet.ValueOf(9)}}},
   132  				{testRowGetter{6}, [][]parquet.Value{{parquet.ValueOf(19), parquet.ValueOf(20), parquet.ValueOf(21)}}},
   133  				{testRowGetter{7}, [][]parquet.Value{{parquet.ValueOf(22), parquet.ValueOf(23), parquet.ValueOf(24)}}},
   134  				{testRowGetter{8}, [][]parquet.Value{{parquet.ValueOf(25), parquet.ValueOf(26), parquet.ValueOf(27)}}},
   135  			},
   136  		},
   137  		{
   138  			name: "single row group skip through page",
   139  			rows: []testRowGetter{
   140  				{1},
   141  			},
   142  			rgs: [][]repeatedTestRow{
   143  				{
   144  					{[]int64{1, 2, 3}},
   145  					{[]int64{4, 5, 6}},
   146  					{[]int64{7, 8, 9}},
   147  				},
   148  			},
   149  			expected: []RepeatedRow[testRowGetter]{
   150  				{testRowGetter{1}, [][]parquet.Value{{parquet.ValueOf(4), parquet.ValueOf(5), parquet.ValueOf(6)}}},
   151  			},
   152  		},
   153  		{
   154  			name: "multiple row group skip within page",
   155  			rows: []testRowGetter{
   156  				{0},
   157  				{2},
   158  				{5},
   159  				{7},
   160  			},
   161  			rgs: [][]repeatedTestRow{
   162  				{
   163  					{[]int64{1, 2, 3}}, // 0
   164  					{[]int64{4, 5, 6}},
   165  					{[]int64{7, 8, 9}}, // 2
   166  					{[]int64{0, 0, 0}},
   167  					{[]int64{0, 0, 0}},
   168  				},
   169  				{
   170  					{[]int64{10, 11, 12}}, // 5
   171  					{[]int64{0, 0, 0}},
   172  					{[]int64{13, 14, 15}}, // 7
   173  
   174  				},
   175  			},
   176  			expected: []RepeatedRow[testRowGetter]{
   177  				{testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}},
   178  				{testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7), parquet.ValueOf(8), parquet.ValueOf(9)}}},
   179  				{testRowGetter{5}, [][]parquet.Value{{parquet.ValueOf(10), parquet.ValueOf(11), parquet.ValueOf(12)}}},
   180  				{testRowGetter{7}, [][]parquet.Value{{parquet.ValueOf(13), parquet.ValueOf(14), parquet.ValueOf(15)}}},
   181  			},
   182  		},
   183  		{
   184  			name: "multiple row group skip within and through pages and row group",
   185  			rows: []testRowGetter{
   186  				{0},
   187  				{2},
   188  				{8},
   189  				{10},
   190  			},
   191  			rgs: [][]repeatedTestRow{
   192  				{
   193  					{[]int64{1, 2, 3}}, // 0
   194  					{[]int64{4, 5, 6}},
   195  					{[]int64{7, 8, 9}}, // 2
   196  					{[]int64{0, 0, 0}},
   197  					{[]int64{0, 0, 0}},
   198  				},
   199  				{
   200  					{[]int64{0, 0, 0}},
   201  					{[]int64{0, 0, 0}},
   202  					{[]int64{0, 0, 0}},
   203  				},
   204  				{
   205  					{[]int64{10, 11, 12}}, // 8
   206  					{[]int64{0, 0, 0}},
   207  					{[]int64{13, 14, 15}}, // 10
   208  
   209  				},
   210  			},
   211  			expected: []RepeatedRow[testRowGetter]{
   212  				{testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}},
   213  				{testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7), parquet.ValueOf(8), parquet.ValueOf(9)}}},
   214  				{testRowGetter{8}, [][]parquet.Value{{parquet.ValueOf(10), parquet.ValueOf(11), parquet.ValueOf(12)}}},
   215  				{testRowGetter{10}, [][]parquet.Value{{parquet.ValueOf(13), parquet.ValueOf(14), parquet.ValueOf(15)}}},
   216  			},
   217  		},
   218  		{
   219  			name: "multiple row group skip within and through pages and row group mix repeated",
   220  			rows: []testRowGetter{
   221  				{0},
   222  				{2},
   223  				{8},
   224  				{10},
   225  			},
   226  			rgs: [][]repeatedTestRow{
   227  				{
   228  					{[]int64{1, 2, 3}}, // 0
   229  					{[]int64{4, 5}},
   230  					{[]int64{7}}, // 2
   231  					{[]int64{0}},
   232  					{[]int64{0, 0, 0}},
   233  				},
   234  				{
   235  					{[]int64{0, 0, 0}},
   236  					{[]int64{0, 0, 0}},
   237  					{[]int64{0, 0, 0}},
   238  				},
   239  				{
   240  					{[]int64{10, 11, 12}}, // 8
   241  					{[]int64{0, 0, 0}},
   242  					{[]int64{13, 14}}, // 10
   243  
   244  				},
   245  			},
   246  			expected: []RepeatedRow[testRowGetter]{
   247  				{testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}},
   248  				{testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7)}}},
   249  				{testRowGetter{8}, [][]parquet.Value{{parquet.ValueOf(10), parquet.ValueOf(11), parquet.ValueOf(12)}}},
   250  				{testRowGetter{10}, [][]parquet.Value{{parquet.ValueOf(13), parquet.ValueOf(14)}}},
   251  			},
   252  		},
   253  	} {
   254  		tc := tc
   255  		for _, readSize := range []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10000} {
   256  			tc.readSize = readSize
   257  			t.Run(tc.name+fmt.Sprintf("_rs_%d", readSize), func(t *testing.T) {
   258  				var groups []parquet.RowGroup
   259  				for _, rg := range tc.rgs {
   260  					buffer := parquet.NewBuffer()
   261  					for _, row := range rg {
   262  						require.NoError(t, buffer.Write(row))
   263  					}
   264  					groups = append(groups, buffer)
   265  				}
   266  				actual := readRepeatedRowIterator(t,
   267  					NewRepeatedRowIterator(context.Background(),
   268  						iter.NewSliceIterator(tc.rows), groups, 0))
   269  				if diff := cmp.Diff(tc.expected, actual, int64ParquetComparer()); diff != "" {
   270  					t.Errorf("result mismatch (-want +got):\n%s", diff)
   271  				}
   272  			})
   273  		}
   274  
   275  	}
   276  }
   277  
   278  func Test_RepeatedRowIterator_Cancellation(t *testing.T) {
   279  	var groups []parquet.RowGroup
   280  	for _, rg := range [][]repeatedTestRow{
   281  		{
   282  			{[]int64{1, 1, 1, 1}},
   283  			{[]int64{2}},
   284  			{[]int64{3, 4}},
   285  		},
   286  	} {
   287  		buffer := parquet.NewBuffer()
   288  		for _, row := range rg {
   289  			require.NoError(t, buffer.Write(row))
   290  		}
   291  		groups = append(groups, buffer)
   292  	}
   293  
   294  	rows := iter.NewSliceIterator([]testRowGetter{{0}})
   295  	ctx, cancel := context.WithCancel(context.Background())
   296  	cancel()
   297  	it := NewRepeatedRowIterator(ctx, rows, groups, 0)
   298  	assert.False(t, it.Next())
   299  	assert.Error(t, context.Canceled, it.Err())
   300  	assert.NoError(t, it.Close())
   301  }
   302  
   303  type multiColumnItem struct {
   304  	X int64
   305  	Y int64
   306  }
   307  
   308  type multiColumnRepeatedTestRow struct {
   309  	List []multiColumnItem
   310  }
   311  
   312  func Test_RepeatedRowPageIterator_MultipleColumns(t *testing.T) {
   313  	for _, tc := range []struct {
   314  		name     string
   315  		rows     []testRowGetter
   316  		rgs      [][]multiColumnRepeatedTestRow
   317  		expected []RepeatedRow[testRowGetter]
   318  	}{
   319  		{
   320  			name: "single row group",
   321  			rows: []testRowGetter{
   322  				{0},
   323  			},
   324  			rgs: [][]multiColumnRepeatedTestRow{
   325  				{
   326  					{
   327  						List: []multiColumnItem{
   328  							{1, 2},
   329  							{3, 4},
   330  							{5, 6},
   331  						},
   332  					},
   333  				},
   334  			},
   335  			expected: []RepeatedRow[testRowGetter]{
   336  				{
   337  					testRowGetter{0},
   338  					[][]parquet.Value{
   339  						{parquet.ValueOf(1), parquet.ValueOf(3), parquet.ValueOf(5)},
   340  						{parquet.ValueOf(2), parquet.ValueOf(4), parquet.ValueOf(6)},
   341  					},
   342  				},
   343  			},
   344  		},
   345  		{
   346  			name: "row group and page seek",
   347  			rows: []testRowGetter{
   348  				{1},
   349  				{4},
   350  				{7},
   351  			},
   352  			rgs: [][]multiColumnRepeatedTestRow{
   353  				{
   354  					{List: []multiColumnItem{{0, 0}, {0, 0}}},
   355  					{List: []multiColumnItem{{1, 2}, {3, 4}}}, // 1
   356  					{List: []multiColumnItem{{0, 0}, {0, 0}}},
   357  				},
   358  				{
   359  					{List: []multiColumnItem{{0, 0}, {0, 0}}},
   360  					{List: []multiColumnItem{{5, 6}, {7, 8}}}, // 4
   361  					{List: []multiColumnItem{{0, 0}, {0, 0}}},
   362  					{List: []multiColumnItem{{0, 0}, {0, 0}}},
   363  					{List: []multiColumnItem{{9, 10}}}, // 7
   364  				},
   365  			},
   366  			expected: []RepeatedRow[testRowGetter]{
   367  				{
   368  					testRowGetter{1},
   369  					[][]parquet.Value{
   370  						{parquet.ValueOf(1), parquet.ValueOf(3)},
   371  						{parquet.ValueOf(2), parquet.ValueOf(4)},
   372  					},
   373  				},
   374  				{
   375  					testRowGetter{4},
   376  					[][]parquet.Value{
   377  						{parquet.ValueOf(5), parquet.ValueOf(7)},
   378  						{parquet.ValueOf(6), parquet.ValueOf(8)},
   379  					},
   380  				},
   381  				{
   382  					testRowGetter{7},
   383  					[][]parquet.Value{
   384  						{parquet.ValueOf(9)},
   385  						{parquet.ValueOf(10)},
   386  					},
   387  				},
   388  			},
   389  		},
   390  	} {
   391  		tc := tc
   392  		t.Run(tc.name, func(t *testing.T) {
   393  			var groups []parquet.RowGroup
   394  			for _, rg := range tc.rgs {
   395  				buffer := parquet.NewBuffer()
   396  				for _, row := range rg {
   397  					require.NoError(t, buffer.Write(row))
   398  				}
   399  				groups = append(groups, buffer)
   400  			}
   401  			actual := readRepeatedRowIterator(t,
   402  				NewRepeatedRowIterator(context.Background(),
   403  					iter.NewSliceIterator(tc.rows), groups, 0, 1),
   404  			)
   405  			if diff := cmp.Diff(tc.expected, actual, int64ParquetComparer()); diff != "" {
   406  				t.Errorf("result mismatch (-want +got):\n%s", diff)
   407  			}
   408  		})
   409  	}
   410  }
   411  
   412  func readRepeatedRowIterator(t *testing.T, it iter.Iterator[RepeatedRow[testRowGetter]]) []RepeatedRow[testRowGetter] {
   413  	defer func() {
   414  		require.NoError(t, it.Close())
   415  	}()
   416  	var result []RepeatedRow[testRowGetter]
   417  	for it.Next() {
   418  		current := RepeatedRow[testRowGetter]{
   419  			Row:    it.At().Row,
   420  			Values: make([][]parquet.Value, len(it.At().Values)),
   421  		}
   422  		for i, v := range it.At().Values {
   423  			current.Values[i] = make([]parquet.Value, len(v))
   424  			copy(current.Values[i], v)
   425  		}
   426  		if len(result) > 0 && current.Row.RowNumber() == result[len(result)-1].Row.RowNumber() {
   427  			for i, v := range current.Values {
   428  				result[len(result)-1].Values[i] = append(result[len(result)-1].Values[i], v...)
   429  			}
   430  			continue
   431  		}
   432  
   433  		result = append(result, current)
   434  	}
   435  	require.NoError(t, it.Err())
   436  	return result
   437  }
   438  
   439  func int64ParquetComparer() cmp.Option {
   440  	return cmp.Comparer(func(x, y parquet.Value) bool {
   441  		return x.Int64() == y.Int64()
   442  	})
   443  }