github.com/grafana/pyroscope@v1.18.0/pkg/phlaredb/query/repeated_test.go (about) 1 package query 2 3 import ( 4 "context" 5 "fmt" 6 "testing" 7 8 "github.com/google/go-cmp/cmp" 9 "github.com/parquet-go/parquet-go" 10 "github.com/stretchr/testify/assert" 11 "github.com/stretchr/testify/require" 12 13 "github.com/grafana/pyroscope/pkg/iter" 14 ) 15 16 type repeatedTestRow struct { 17 List []int64 18 } 19 20 type testRowGetter struct { 21 RowNum int64 22 } 23 24 func (t testRowGetter) RowNumber() int64 { 25 return t.RowNum 26 } 27 28 func Test_RepeatedRowIterator_SingleColumn(t *testing.T) { 29 for _, tc := range []struct { 30 name string 31 rows []testRowGetter 32 rgs [][]repeatedTestRow 33 expected []RepeatedRow[testRowGetter] 34 readSize int 35 }{ 36 { 37 name: "single row group no repeated and repeated", 38 rows: []testRowGetter{ 39 {0}, 40 {1}, 41 {2}, 42 }, 43 rgs: [][]repeatedTestRow{ 44 { 45 {[]int64{1, 1, 1, 1}}, 46 {[]int64{2}}, 47 {[]int64{3, 4}}, 48 }, 49 }, 50 expected: []RepeatedRow[testRowGetter]{ 51 {testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(1), parquet.ValueOf(1), parquet.ValueOf(1)}}}, 52 {testRowGetter{1}, [][]parquet.Value{{parquet.ValueOf(2)}}}, 53 {testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(3), parquet.ValueOf(4)}}}, 54 }, 55 }, 56 { 57 name: "multiple row group no repeated skip group and page", 58 rows: []testRowGetter{ 59 {0}, 60 {2}, 61 {7}, 62 }, 63 rgs: [][]repeatedTestRow{ 64 { 65 {[]int64{1}}, 66 {[]int64{2}}, 67 {[]int64{3}}, 68 }, 69 { 70 {[]int64{4}}, 71 {[]int64{5}}, 72 {[]int64{6}}, 73 }, 74 { 75 {[]int64{7}}, 76 {[]int64{8}}, 77 {[]int64{9}}, 78 }, 79 }, 80 expected: []RepeatedRow[testRowGetter]{ 81 {testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1)}}}, 82 {testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(3)}}}, 83 {testRowGetter{7}, [][]parquet.Value{{parquet.ValueOf(8)}}}, 84 }, 85 }, 86 { 87 name: "single row group", 88 rows: []testRowGetter{ 89 {0}, 90 {1}, 91 {2}, 92 }, 93 rgs: [][]repeatedTestRow{ 94 { 95 {[]int64{1, 2, 3}}, 96 {[]int64{4, 5, 6}}, 97 {[]int64{7, 8, 9}}, 98 }, 99 }, 100 expected: []RepeatedRow[testRowGetter]{ 101 {testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}}, 102 {testRowGetter{1}, [][]parquet.Value{{parquet.ValueOf(4), parquet.ValueOf(5), parquet.ValueOf(6)}}}, 103 {testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7), parquet.ValueOf(8), parquet.ValueOf(9)}}}, 104 }, 105 }, 106 { 107 name: "skip row group", 108 rows: []testRowGetter{ 109 {0}, {1}, {2}, {6}, {7}, {8}, 110 }, 111 rgs: [][]repeatedTestRow{ 112 { 113 {[]int64{1, 2, 3}}, 114 {[]int64{4, 5, 6}}, 115 {[]int64{7, 8, 9}}, 116 }, 117 { 118 {[]int64{10, 11, 12}}, 119 {[]int64{13, 14, 15}}, 120 {[]int64{16, 17, 18}}, 121 }, 122 { 123 {[]int64{19, 20, 21}}, 124 {[]int64{22, 23, 24}}, 125 {[]int64{25, 26, 27}}, 126 }, 127 }, 128 expected: []RepeatedRow[testRowGetter]{ 129 {testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}}, 130 {testRowGetter{1}, [][]parquet.Value{{parquet.ValueOf(4), parquet.ValueOf(5), parquet.ValueOf(6)}}}, 131 {testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7), parquet.ValueOf(8), parquet.ValueOf(9)}}}, 132 {testRowGetter{6}, [][]parquet.Value{{parquet.ValueOf(19), parquet.ValueOf(20), parquet.ValueOf(21)}}}, 133 {testRowGetter{7}, [][]parquet.Value{{parquet.ValueOf(22), parquet.ValueOf(23), parquet.ValueOf(24)}}}, 134 {testRowGetter{8}, [][]parquet.Value{{parquet.ValueOf(25), parquet.ValueOf(26), parquet.ValueOf(27)}}}, 135 }, 136 }, 137 { 138 name: "single row group skip through page", 139 rows: []testRowGetter{ 140 {1}, 141 }, 142 rgs: [][]repeatedTestRow{ 143 { 144 {[]int64{1, 2, 3}}, 145 {[]int64{4, 5, 6}}, 146 {[]int64{7, 8, 9}}, 147 }, 148 }, 149 expected: []RepeatedRow[testRowGetter]{ 150 {testRowGetter{1}, [][]parquet.Value{{parquet.ValueOf(4), parquet.ValueOf(5), parquet.ValueOf(6)}}}, 151 }, 152 }, 153 { 154 name: "multiple row group skip within page", 155 rows: []testRowGetter{ 156 {0}, 157 {2}, 158 {5}, 159 {7}, 160 }, 161 rgs: [][]repeatedTestRow{ 162 { 163 {[]int64{1, 2, 3}}, // 0 164 {[]int64{4, 5, 6}}, 165 {[]int64{7, 8, 9}}, // 2 166 {[]int64{0, 0, 0}}, 167 {[]int64{0, 0, 0}}, 168 }, 169 { 170 {[]int64{10, 11, 12}}, // 5 171 {[]int64{0, 0, 0}}, 172 {[]int64{13, 14, 15}}, // 7 173 174 }, 175 }, 176 expected: []RepeatedRow[testRowGetter]{ 177 {testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}}, 178 {testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7), parquet.ValueOf(8), parquet.ValueOf(9)}}}, 179 {testRowGetter{5}, [][]parquet.Value{{parquet.ValueOf(10), parquet.ValueOf(11), parquet.ValueOf(12)}}}, 180 {testRowGetter{7}, [][]parquet.Value{{parquet.ValueOf(13), parquet.ValueOf(14), parquet.ValueOf(15)}}}, 181 }, 182 }, 183 { 184 name: "multiple row group skip within and through pages and row group", 185 rows: []testRowGetter{ 186 {0}, 187 {2}, 188 {8}, 189 {10}, 190 }, 191 rgs: [][]repeatedTestRow{ 192 { 193 {[]int64{1, 2, 3}}, // 0 194 {[]int64{4, 5, 6}}, 195 {[]int64{7, 8, 9}}, // 2 196 {[]int64{0, 0, 0}}, 197 {[]int64{0, 0, 0}}, 198 }, 199 { 200 {[]int64{0, 0, 0}}, 201 {[]int64{0, 0, 0}}, 202 {[]int64{0, 0, 0}}, 203 }, 204 { 205 {[]int64{10, 11, 12}}, // 8 206 {[]int64{0, 0, 0}}, 207 {[]int64{13, 14, 15}}, // 10 208 209 }, 210 }, 211 expected: []RepeatedRow[testRowGetter]{ 212 {testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}}, 213 {testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7), parquet.ValueOf(8), parquet.ValueOf(9)}}}, 214 {testRowGetter{8}, [][]parquet.Value{{parquet.ValueOf(10), parquet.ValueOf(11), parquet.ValueOf(12)}}}, 215 {testRowGetter{10}, [][]parquet.Value{{parquet.ValueOf(13), parquet.ValueOf(14), parquet.ValueOf(15)}}}, 216 }, 217 }, 218 { 219 name: "multiple row group skip within and through pages and row group mix repeated", 220 rows: []testRowGetter{ 221 {0}, 222 {2}, 223 {8}, 224 {10}, 225 }, 226 rgs: [][]repeatedTestRow{ 227 { 228 {[]int64{1, 2, 3}}, // 0 229 {[]int64{4, 5}}, 230 {[]int64{7}}, // 2 231 {[]int64{0}}, 232 {[]int64{0, 0, 0}}, 233 }, 234 { 235 {[]int64{0, 0, 0}}, 236 {[]int64{0, 0, 0}}, 237 {[]int64{0, 0, 0}}, 238 }, 239 { 240 {[]int64{10, 11, 12}}, // 8 241 {[]int64{0, 0, 0}}, 242 {[]int64{13, 14}}, // 10 243 244 }, 245 }, 246 expected: []RepeatedRow[testRowGetter]{ 247 {testRowGetter{0}, [][]parquet.Value{{parquet.ValueOf(1), parquet.ValueOf(2), parquet.ValueOf(3)}}}, 248 {testRowGetter{2}, [][]parquet.Value{{parquet.ValueOf(7)}}}, 249 {testRowGetter{8}, [][]parquet.Value{{parquet.ValueOf(10), parquet.ValueOf(11), parquet.ValueOf(12)}}}, 250 {testRowGetter{10}, [][]parquet.Value{{parquet.ValueOf(13), parquet.ValueOf(14)}}}, 251 }, 252 }, 253 } { 254 tc := tc 255 for _, readSize := range []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10000} { 256 tc.readSize = readSize 257 t.Run(tc.name+fmt.Sprintf("_rs_%d", readSize), func(t *testing.T) { 258 var groups []parquet.RowGroup 259 for _, rg := range tc.rgs { 260 buffer := parquet.NewBuffer() 261 for _, row := range rg { 262 require.NoError(t, buffer.Write(row)) 263 } 264 groups = append(groups, buffer) 265 } 266 actual := readRepeatedRowIterator(t, 267 NewRepeatedRowIterator(context.Background(), 268 iter.NewSliceIterator(tc.rows), groups, 0)) 269 if diff := cmp.Diff(tc.expected, actual, int64ParquetComparer()); diff != "" { 270 t.Errorf("result mismatch (-want +got):\n%s", diff) 271 } 272 }) 273 } 274 275 } 276 } 277 278 func Test_RepeatedRowIterator_Cancellation(t *testing.T) { 279 var groups []parquet.RowGroup 280 for _, rg := range [][]repeatedTestRow{ 281 { 282 {[]int64{1, 1, 1, 1}}, 283 {[]int64{2}}, 284 {[]int64{3, 4}}, 285 }, 286 } { 287 buffer := parquet.NewBuffer() 288 for _, row := range rg { 289 require.NoError(t, buffer.Write(row)) 290 } 291 groups = append(groups, buffer) 292 } 293 294 rows := iter.NewSliceIterator([]testRowGetter{{0}}) 295 ctx, cancel := context.WithCancel(context.Background()) 296 cancel() 297 it := NewRepeatedRowIterator(ctx, rows, groups, 0) 298 assert.False(t, it.Next()) 299 assert.Error(t, context.Canceled, it.Err()) 300 assert.NoError(t, it.Close()) 301 } 302 303 type multiColumnItem struct { 304 X int64 305 Y int64 306 } 307 308 type multiColumnRepeatedTestRow struct { 309 List []multiColumnItem 310 } 311 312 func Test_RepeatedRowPageIterator_MultipleColumns(t *testing.T) { 313 for _, tc := range []struct { 314 name string 315 rows []testRowGetter 316 rgs [][]multiColumnRepeatedTestRow 317 expected []RepeatedRow[testRowGetter] 318 }{ 319 { 320 name: "single row group", 321 rows: []testRowGetter{ 322 {0}, 323 }, 324 rgs: [][]multiColumnRepeatedTestRow{ 325 { 326 { 327 List: []multiColumnItem{ 328 {1, 2}, 329 {3, 4}, 330 {5, 6}, 331 }, 332 }, 333 }, 334 }, 335 expected: []RepeatedRow[testRowGetter]{ 336 { 337 testRowGetter{0}, 338 [][]parquet.Value{ 339 {parquet.ValueOf(1), parquet.ValueOf(3), parquet.ValueOf(5)}, 340 {parquet.ValueOf(2), parquet.ValueOf(4), parquet.ValueOf(6)}, 341 }, 342 }, 343 }, 344 }, 345 { 346 name: "row group and page seek", 347 rows: []testRowGetter{ 348 {1}, 349 {4}, 350 {7}, 351 }, 352 rgs: [][]multiColumnRepeatedTestRow{ 353 { 354 {List: []multiColumnItem{{0, 0}, {0, 0}}}, 355 {List: []multiColumnItem{{1, 2}, {3, 4}}}, // 1 356 {List: []multiColumnItem{{0, 0}, {0, 0}}}, 357 }, 358 { 359 {List: []multiColumnItem{{0, 0}, {0, 0}}}, 360 {List: []multiColumnItem{{5, 6}, {7, 8}}}, // 4 361 {List: []multiColumnItem{{0, 0}, {0, 0}}}, 362 {List: []multiColumnItem{{0, 0}, {0, 0}}}, 363 {List: []multiColumnItem{{9, 10}}}, // 7 364 }, 365 }, 366 expected: []RepeatedRow[testRowGetter]{ 367 { 368 testRowGetter{1}, 369 [][]parquet.Value{ 370 {parquet.ValueOf(1), parquet.ValueOf(3)}, 371 {parquet.ValueOf(2), parquet.ValueOf(4)}, 372 }, 373 }, 374 { 375 testRowGetter{4}, 376 [][]parquet.Value{ 377 {parquet.ValueOf(5), parquet.ValueOf(7)}, 378 {parquet.ValueOf(6), parquet.ValueOf(8)}, 379 }, 380 }, 381 { 382 testRowGetter{7}, 383 [][]parquet.Value{ 384 {parquet.ValueOf(9)}, 385 {parquet.ValueOf(10)}, 386 }, 387 }, 388 }, 389 }, 390 } { 391 tc := tc 392 t.Run(tc.name, func(t *testing.T) { 393 var groups []parquet.RowGroup 394 for _, rg := range tc.rgs { 395 buffer := parquet.NewBuffer() 396 for _, row := range rg { 397 require.NoError(t, buffer.Write(row)) 398 } 399 groups = append(groups, buffer) 400 } 401 actual := readRepeatedRowIterator(t, 402 NewRepeatedRowIterator(context.Background(), 403 iter.NewSliceIterator(tc.rows), groups, 0, 1), 404 ) 405 if diff := cmp.Diff(tc.expected, actual, int64ParquetComparer()); diff != "" { 406 t.Errorf("result mismatch (-want +got):\n%s", diff) 407 } 408 }) 409 } 410 } 411 412 func readRepeatedRowIterator(t *testing.T, it iter.Iterator[RepeatedRow[testRowGetter]]) []RepeatedRow[testRowGetter] { 413 defer func() { 414 require.NoError(t, it.Close()) 415 }() 416 var result []RepeatedRow[testRowGetter] 417 for it.Next() { 418 current := RepeatedRow[testRowGetter]{ 419 Row: it.At().Row, 420 Values: make([][]parquet.Value, len(it.At().Values)), 421 } 422 for i, v := range it.At().Values { 423 current.Values[i] = make([]parquet.Value, len(v)) 424 copy(current.Values[i], v) 425 } 426 if len(result) > 0 && current.Row.RowNumber() == result[len(result)-1].Row.RowNumber() { 427 for i, v := range current.Values { 428 result[len(result)-1].Values[i] = append(result[len(result)-1].Values[i], v...) 429 } 430 continue 431 } 432 433 result = append(result, current) 434 } 435 require.NoError(t, it.Err()) 436 return result 437 } 438 439 func int64ParquetComparer() cmp.Option { 440 return cmp.Comparer(func(x, y parquet.Value) bool { 441 return x.Int64() == y.Int64() 442 }) 443 }