github.com/tobgu/qframe@v0.4.0/qframe_test.go (about) 1 package qframe_test 2 3 import ( 4 "bytes" 5 "fmt" 6 "math" 7 "reflect" 8 "regexp" 9 "strconv" 10 "strings" 11 "testing" 12 13 "github.com/tobgu/qframe/config/rolling" 14 15 "io" 16 "log" 17 18 "github.com/tobgu/qframe" 19 "github.com/tobgu/qframe/aggregation" 20 "github.com/tobgu/qframe/config/csv" 21 "github.com/tobgu/qframe/config/eval" 22 "github.com/tobgu/qframe/config/groupby" 23 "github.com/tobgu/qframe/config/newqf" 24 "github.com/tobgu/qframe/types" 25 ) 26 27 func assertEquals(t *testing.T, expected, actual qframe.QFrame) { 28 t.Helper() 29 equal, reason := expected.Equals(actual) 30 if !equal { 31 t.Errorf("QFrames not equal, %s.\nexpected=\n%s\nactual=\n%s", reason, expected, actual) 32 } 33 } 34 35 func assertNotErr(t *testing.T, err error) { 36 t.Helper() 37 if err != nil { 38 t.Errorf("Unexpected error: %s", err) 39 } 40 } 41 42 func assertErr(t *testing.T, err error, expectedErr string) { 43 t.Helper() 44 if err == nil { 45 t.Errorf("Expected error, was nil") 46 return 47 } 48 49 if !strings.Contains(strings.ToLower(err.Error()), strings.ToLower(expectedErr)) { 50 t.Errorf("Expected error to contain: %s, was: %s", expectedErr, err.Error()) 51 } 52 } 53 54 func assertTrue(t *testing.T, b bool) { 55 t.Helper() 56 if !b { 57 t.Error("Expected true") 58 } 59 } 60 61 func TestQFrame_FilterAgainstConstant(t *testing.T) { 62 table := []struct { 63 name string 64 clause qframe.FilterClause 65 input interface{} 66 configs []newqf.ConfigFunc 67 expected interface{} 68 }{ 69 { 70 name: "built in greater than", 71 clause: qframe.Filter{Column: "COL1", Comparator: ">", Arg: 3}, 72 input: []int{1, 2, 3, 4, 5}, 73 expected: []int{4, 5}}, 74 { 75 name: "built in 'in' with int", 76 clause: qframe.Filter{Column: "COL1", Comparator: "in", Arg: []int{3, 5}}, 77 input: []int{1, 2, 3, 4, 5}, 78 expected: []int{3, 5}}, 79 { 80 name: "built in 'in' with float (truncated to int)", 81 clause: qframe.Filter{Column: "COL1", Comparator: "in", Arg: []float64{3.4, 5.1}}, 82 input: []int{1, 2, 3, 4, 5}, 83 expected: []int{3, 5}}, 84 { 85 name: "combined with OR", 86 clause: qframe.Or(qframe.Filter{Column: "COL1", Comparator: ">", Arg: 4}, qframe.Filter{Column: "COL1", Comparator: "<", Arg: 2}), 87 input: []int{1, 2, 3, 4, 5}, 88 expected: []int{1, 5}}, 89 { 90 name: "inverse", 91 clause: qframe.Filter{Column: "COL1", Comparator: ">", Arg: 4, Inverse: true}, 92 input: []int{1, 2, 3, 4, 5}, 93 expected: []int{1, 2, 3, 4}}, 94 { 95 name: "all_bits", 96 clause: qframe.Filter{Column: "COL1", Comparator: "all_bits", Arg: 6}, 97 input: []int{7, 2, 4, 1, 6}, 98 expected: []int{7, 6}}, 99 { 100 name: "all_bits inverse", 101 clause: qframe.Filter{Column: "COL1", Comparator: "all_bits", Arg: 6, Inverse: true}, 102 input: []int{7, 2, 4, 1, 6}, 103 expected: []int{2, 4, 1}}, 104 { 105 name: "any_bits", 106 clause: qframe.Filter{Column: "COL1", Comparator: "any_bits", Arg: 6}, 107 input: []int{7, 2, 4, 1, 6}, 108 expected: []int{7, 2, 4, 6}}, 109 { 110 name: "boolean equals", 111 clause: qframe.Filter{Column: "COL1", Comparator: "=", Arg: true}, 112 input: []bool{true, false, true}, 113 expected: []bool{true, true}}, 114 { 115 name: "enum custom function", 116 clause: qframe.Filter{ 117 Column: "COL1", 118 Comparator: func(s *string) bool { return *s == "a" }}, 119 input: []string{"a", "b", "c"}, 120 expected: []string{"a"}, 121 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": {"a", "b", "c"}})}}, 122 { 123 name: "float custom function", 124 clause: qframe.Filter{ 125 Column: "COL1", 126 Comparator: func(f float64) bool { return f > 1.0 }}, 127 input: []float64{1.0, 1.25}, 128 expected: []float64{1.25}}, 129 { 130 name: "int column against float arg (float will be truncated)", 131 clause: qframe.Filter{Column: "COL1", Comparator: ">=", Arg: 1.5}, 132 input: []int{0, 1, 2}, 133 expected: []int{1, 2}}, 134 } 135 136 for i, tc := range table { 137 t.Run(fmt.Sprintf("Filter %d", i), func(t *testing.T) { 138 input := qframe.New(map[string]interface{}{"COL1": tc.input}) 139 output := input.Filter(tc.clause) 140 assertNotErr(t, output.Err) 141 expected := qframe.New(map[string]interface{}{"COL1": tc.expected}) 142 assertEquals(t, expected, output) 143 }) 144 } 145 } 146 147 func TestQFrame_FilterColConstNull(t *testing.T) { 148 // For null columns all comparisons will always produce false except for != which will always produce true 149 comparisons := []struct { 150 operation string 151 expectCount int 152 }{ 153 {operation: "<", expectCount: 1}, 154 {operation: ">=", expectCount: 1}, 155 {operation: "=", expectCount: 1}, 156 {operation: "!=", expectCount: 2}, 157 } 158 159 a, b := "a", "b" 160 table := []struct { 161 name string 162 input interface{} 163 isEnum bool 164 arg interface{} 165 }{ 166 {name: "string", input: []*string{&a, &b, nil}, arg: "b"}, 167 {name: "enum", input: []*string{&a, &b, nil}, arg: "b", isEnum: true}, 168 {name: "float", input: []float64{1.0, 2.0, math.NaN()}, arg: 2.0}, 169 } 170 171 for _, comp := range comparisons { 172 for _, tc := range table { 173 t.Run(fmt.Sprintf("%s, %s", tc.name, comp.operation), func(t *testing.T) { 174 enums := map[string][]string{} 175 if tc.isEnum { 176 enums["COL1"] = nil 177 } 178 input := qframe.New(map[string]interface{}{"COL1": tc.input}, newqf.Enums(enums)) 179 output := input.Filter(qframe.Filter{Column: "COL1", Comparator: comp.operation, Arg: tc.arg}) 180 assertNotErr(t, output.Err) 181 if output.Len() != comp.expectCount { 182 fmt.Println(output.String()) 183 t.Errorf("Unexpected frame length: %d", output.Len()) 184 } 185 }) 186 } 187 } 188 } 189 190 func TestQFrame_FilterColColNull(t *testing.T) { 191 // For null columns all comparisons will always produce false except for != which will always produce true 192 comparisons := []struct { 193 operation string 194 expectCount int 195 }{ 196 {operation: "<", expectCount: 0}, 197 {operation: ">=", expectCount: 1}, 198 {operation: "=", expectCount: 1}, 199 {operation: "!=", expectCount: 3}, 200 } 201 202 a, b := "a", "b" 203 table := []struct { 204 name string 205 inputCol1 interface{} 206 inputCol2 interface{} 207 isEnum bool 208 }{ 209 {name: "string", inputCol1: []*string{&a, &b, nil, nil}, inputCol2: []*string{&a, nil, nil, &b}}, 210 {name: "enum", inputCol1: []*string{&a, &b, nil, nil}, inputCol2: []*string{&a, nil, nil, &b}, isEnum: true}, 211 {name: "enum", inputCol1: []float64{1.0, 2.0, math.NaN(), math.NaN()}, inputCol2: []float64{1.0, math.NaN(), math.NaN(), 2.0}}, 212 } 213 214 for _, comp := range comparisons { 215 for _, tc := range table { 216 t.Run(fmt.Sprintf("%s, %s", tc.name, comp.operation), func(t *testing.T) { 217 enums := map[string][]string{} 218 if tc.isEnum { 219 enums["COL1"] = nil 220 enums["COL2"] = nil 221 } 222 input := qframe.New(map[string]interface{}{"COL1": tc.inputCol1, "COL2": tc.inputCol2}, newqf.Enums(enums)) 223 output := input.Filter(qframe.Filter{Column: "COL1", Comparator: comp.operation, Arg: col("COL2")}) 224 assertNotErr(t, output.Err) 225 if output.Len() != comp.expectCount { 226 fmt.Println(output.String()) 227 t.Errorf("Unexpected frame length: %d", output.Len()) 228 } 229 }) 230 } 231 } 232 } 233 234 func TestQFrame_FilterIsNull(t *testing.T) { 235 a, b := "a", "b" 236 table := []struct { 237 input interface{} 238 expected interface{} 239 isEnum bool 240 inverse bool 241 operation string 242 }{ 243 {operation: "isnull", input: []*string{&a, nil, nil, &b}, expected: []*string{nil, nil}}, 244 {operation: "isnotnull", input: []*string{&a, nil, nil, &b}, expected: []*string{nil, nil}, inverse: true}, 245 {operation: "isnotnull", input: []*string{&a, nil, nil, &b}, expected: []*string{&a, &b}}, 246 {operation: "isnull", input: []*string{&a, nil, nil, &b}, expected: []*string{&a, &b}, inverse: true}, 247 {operation: "isnull", input: []*string{&a, nil, nil, &b}, expected: []*string{nil, nil}, isEnum: true}, 248 {operation: "isnotnull", input: []*string{&a, nil, nil, &b}, expected: []*string{&a, &b}, isEnum: true}, 249 {operation: "isnull", input: []float64{1, math.NaN(), 2}, expected: []float64{math.NaN()}}, 250 {operation: "isnotnull", input: []float64{1, math.NaN(), 2}, expected: []float64{1, 2}}, 251 {operation: "isnull", input: []int{1, 2, 3}, expected: []int{}}, 252 {operation: "isnotnull", input: []int{1, 2, 3}, expected: []int{1, 2, 3}}, 253 } 254 255 for _, tc := range table { 256 t.Run(fmt.Sprintf("%v, %s", reflect.TypeOf(tc.input), tc.operation), func(t *testing.T) { 257 enums := map[string][]string{} 258 if tc.isEnum { 259 enums["COL1"] = nil 260 } 261 input := qframe.New(map[string]interface{}{"COL1": tc.input}, newqf.Enums(enums)) 262 expected := qframe.New(map[string]interface{}{"COL1": tc.expected}, newqf.Enums(enums)) 263 output := input.Filter(qframe.Filter{Column: "COL1", Comparator: tc.operation, Inverse: tc.inverse}) 264 assertNotErr(t, output.Err) 265 assertEquals(t, expected, output) 266 }) 267 } 268 } 269 270 func TestQFrame_FilterNullArg(t *testing.T) { 271 // This should result in an error 272 table := []struct { 273 name string 274 input interface{} 275 isEnum bool 276 arg interface{} 277 }{ 278 {name: "string", input: []string{"a"}, arg: nil}, 279 {name: "enum", input: []string{"a"}, arg: nil, isEnum: true}, 280 {name: "float", input: []float64{1.0}, arg: math.NaN()}, 281 } 282 283 for _, tc := range table { 284 t.Run(tc.name, func(t *testing.T) { 285 enums := map[string][]string{} 286 if tc.isEnum { 287 enums["COL1"] = nil 288 } 289 290 input := qframe.New(map[string]interface{}{"COL1": tc.input}, newqf.Enums(enums)) 291 output := input.Filter(qframe.Filter{Column: "COL1", Comparator: "<", Arg: tc.arg}) 292 assertErr(t, output.Err, "filter") 293 }) 294 } 295 } 296 297 func TestQFrame_FilterAgainstColumn(t *testing.T) { 298 table := []struct { 299 name string 300 comparator interface{} 301 input map[string]interface{} 302 expected map[string]interface{} 303 configs []newqf.ConfigFunc 304 }{ 305 { 306 name: "built in int compare", 307 comparator: ">", 308 input: map[string]interface{}{"COL1": []int{1, 2, 3}, "COL2": []int{10, 1, 10}}, 309 expected: map[string]interface{}{"COL1": []int{1, 3}, "COL2": []int{10, 10}}}, 310 { 311 name: "int with float compare possible", 312 comparator: "=", 313 input: map[string]interface{}{"COL1": []int{1, 2, 3}, "COL2": []float64{1.0, 2.5, 3.0}}, 314 expected: map[string]interface{}{"COL1": []int{1, 3}, "COL2": []float64{1.0, 3.0}}}, 315 { 316 name: "float with int compare possible", 317 comparator: "=", 318 input: map[string]interface{}{"COL1": []float64{1.0, 2.5, 3.0}, "COL2": []int{1, 2, 3}}, 319 expected: map[string]interface{}{"COL1": []float64{1.0, 3.0}, "COL2": []int{1, 3}}}, 320 { 321 name: "int with float neq NaN compare possible", 322 comparator: "!=", 323 input: map[string]interface{}{"COL1": []int{1, 2, 3}, "COL2": []float64{1.0, math.NaN(), 3.0}}, 324 expected: map[string]interface{}{"COL1": []int{2}, "COL2": []float64{math.NaN()}}}, 325 { 326 name: "float with int neq NaN compare possible", 327 comparator: "!=", 328 input: map[string]interface{}{"COL1": []float64{1.0, math.NaN(), 3.0}, "COL2": []int{1, 2, 3}}, 329 expected: map[string]interface{}{"COL1": []float64{math.NaN()}, "COL2": []int{2}}}, 330 { 331 name: "custom int compare", 332 comparator: func(a, b int) bool { return a > b }, 333 input: map[string]interface{}{"COL1": []int{1, 2, 3}, "COL2": []int{10, 1, 10}}, 334 expected: map[string]interface{}{"COL1": []int{1, 3}, "COL2": []int{10, 10}}}, 335 { 336 name: "built in bool compare", 337 comparator: "=", 338 input: map[string]interface{}{"COL1": []bool{true, false, false}, "COL2": []bool{true, true, false}}, 339 expected: map[string]interface{}{"COL1": []bool{true, false}, "COL2": []bool{true, false}}}, 340 { 341 name: "custom bool compare", 342 comparator: func(a, b bool) bool { return a == b }, 343 input: map[string]interface{}{"COL1": []bool{true, false, false}, "COL2": []bool{true, true, false}}, 344 expected: map[string]interface{}{"COL1": []bool{true, false}, "COL2": []bool{true, false}}}, 345 { 346 name: "built in float compare", 347 comparator: "<", 348 input: map[string]interface{}{"COL1": []float64{1, 2, 3}, "COL2": []float64{10, 1, 10}}, 349 expected: map[string]interface{}{"COL1": []float64{2}, "COL2": []float64{1}}}, 350 { 351 name: "custom float compare", 352 comparator: func(a, b float64) bool { return a < b }, 353 input: map[string]interface{}{"COL1": []float64{1, 2, 3}, "COL2": []float64{10, 1, 10}}, 354 expected: map[string]interface{}{"COL1": []float64{2}, "COL2": []float64{1}}}, 355 { 356 name: "built in string compare", 357 comparator: "<", 358 input: map[string]interface{}{"COL1": []string{"a", "b", "c"}, "COL2": []string{"o", "a", "q"}}, 359 expected: map[string]interface{}{"COL1": []string{"b"}, "COL2": []string{"a"}}}, 360 { 361 name: "custom string compare", 362 comparator: func(a, b *string) bool { return *a < *b }, 363 input: map[string]interface{}{"COL1": []string{"a", "b", "c"}, "COL2": []string{"o", "a", "q"}}, 364 expected: map[string]interface{}{"COL1": []string{"b"}, "COL2": []string{"a"}}}, 365 { 366 name: "built in enum compare", 367 comparator: "<", 368 input: map[string]interface{}{"COL1": []string{"a", "b", "c"}, "COL2": []string{"o", "a", "q"}}, 369 expected: map[string]interface{}{"COL1": []string{"b"}, "COL2": []string{"a"}}}, 370 { 371 name: "custom enum compare", 372 comparator: func(a, b *string) bool { return *a < *b }, 373 input: map[string]interface{}{"COL1": []string{"a", "b", "c"}, "COL2": []string{"o", "a", "q"}}, 374 expected: map[string]interface{}{"COL1": []string{"b"}, "COL2": []string{"a"}}, 375 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{ 376 "COL1": {"a", "b", "c", "o", "q"}, 377 "COL2": {"a", "b", "c", "o", "q"}, 378 })}}, 379 } 380 381 for _, tc := range table { 382 t.Run(fmt.Sprintf("Filter %s", tc.name), func(t *testing.T) { 383 input := qframe.New(tc.input, tc.configs...) 384 output := input.Filter(qframe.Filter{Comparator: tc.comparator, Column: "COL2", Arg: col("COL1")}) 385 expected := qframe.New(tc.expected, tc.configs...) 386 assertEquals(t, expected, output) 387 }) 388 } 389 } 390 391 func TestQFrame_Sort(t *testing.T) { 392 a, b := "a", "b" 393 table := []struct { 394 orders []qframe.Order 395 expected qframe.QFrame 396 input map[string]interface{} 397 configs []newqf.ConfigFunc 398 }{ 399 { 400 orders: []qframe.Order{{Column: "COL1"}}, 401 expected: qframe.New(map[string]interface{}{ 402 "COL1": []int{0, 1, 2, 3}, 403 "COL2": []int{3, 2, 1, 1}})}, 404 { 405 orders: []qframe.Order{{Column: "COL1", Reverse: true}}, 406 expected: qframe.New(map[string]interface{}{ 407 "COL1": []int{3, 2, 1, 0}, 408 "COL2": []int{1, 1, 2, 3}})}, 409 { 410 orders: []qframe.Order{{Column: "COL2"}, {Column: "COL1"}}, 411 expected: qframe.New(map[string]interface{}{ 412 "COL1": []int{2, 3, 1, 0}, 413 "COL2": []int{1, 1, 2, 3}})}, 414 { 415 orders: []qframe.Order{{Column: "COL1"}}, 416 expected: qframe.New(map[string]interface{}{ 417 "COL1": []bool{false, true, true}}), 418 input: map[string]interface{}{ 419 "COL1": []bool{true, false, true}}}, 420 { 421 orders: []qframe.Order{{Column: "COL1"}}, 422 expected: qframe.New(map[string]interface{}{ 423 "COL1": []*string{nil, &b, &a}}, 424 newqf.Enums(map[string][]string{"COL1": {"b", "a"}})), 425 input: map[string]interface{}{ 426 "COL1": []*string{&b, nil, &a}}, 427 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": {"b", "a"}})}}, 428 { 429 orders: []qframe.Order{{Column: "COL1", NullLast: true}}, 430 expected: qframe.New(map[string]interface{}{ 431 "COL1": []*string{&b, &a, nil}}, 432 newqf.Enums(map[string][]string{"COL1": {"b", "a"}})), 433 input: map[string]interface{}{ 434 "COL1": []*string{&b, nil, &a}}, 435 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": {"b", "a"}})}}, 436 } 437 438 for i, tc := range table { 439 t.Run(fmt.Sprintf("Sort %d", i), func(t *testing.T) { 440 if tc.input == nil { 441 tc.input = map[string]interface{}{ 442 "COL1": []int{0, 1, 3, 2}, 443 "COL2": []int{3, 2, 1, 1}} 444 } 445 a := qframe.New(tc.input, tc.configs...) 446 b := a.Sort(tc.orders...) 447 assertEquals(t, tc.expected, b) 448 }) 449 } 450 } 451 452 func TestQFrame_SortNull(t *testing.T) { 453 a, b, c := "a", "b", "c" 454 stringIn := map[string]interface{}{ 455 "COL1": []*string{&b, nil, &a, nil, &c, &a, nil}, 456 } 457 458 floatIn := map[string]interface{}{ 459 "COL1": []float64{1.0, math.NaN(), -1.0, math.NaN()}, 460 } 461 462 table := []struct { 463 in map[string]interface{} 464 orders []qframe.Order 465 expected map[string]interface{} 466 }{ 467 { 468 stringIn, 469 []qframe.Order{{Column: "COL1"}}, 470 map[string]interface{}{ 471 "COL1": []*string{nil, nil, nil, &a, &a, &b, &c}, 472 }, 473 }, 474 { 475 stringIn, 476 []qframe.Order{{Column: "COL1", NullLast: true}}, 477 map[string]interface{}{ 478 "COL1": []*string{&a, &a, &b, &c, nil, nil, nil}, 479 }, 480 }, 481 { 482 stringIn, 483 []qframe.Order{{Column: "COL1", Reverse: true}}, 484 map[string]interface{}{ 485 "COL1": []*string{&c, &b, &a, &a, nil, nil, nil}, 486 }, 487 }, 488 { 489 floatIn, 490 []qframe.Order{{Column: "COL1"}}, 491 map[string]interface{}{ 492 "COL1": []float64{math.NaN(), math.NaN(), -1.0, 1.0}, 493 }, 494 }, 495 { 496 floatIn, 497 []qframe.Order{{Column: "COL1", Reverse: true}}, 498 map[string]interface{}{ 499 "COL1": []float64{1.0, -1.0, math.NaN(), math.NaN()}, 500 }, 501 }, 502 { 503 floatIn, 504 []qframe.Order{{Column: "COL1", NullLast: true}}, 505 map[string]interface{}{ 506 "COL1": []float64{-1.0, 1.0, math.NaN(), math.NaN()}, 507 }, 508 }, 509 } 510 511 for i, tc := range table { 512 t.Run(fmt.Sprintf("Sort %d", i), func(t *testing.T) { 513 in := qframe.New(tc.in) 514 out := in.Sort(tc.orders...) 515 assertNotErr(t, out.Err) 516 assertEquals(t, qframe.New(tc.expected), out) 517 }) 518 } 519 } 520 521 func TestQFrame_SortStability(t *testing.T) { 522 a := qframe.New(map[string]interface{}{ 523 "COL1": []int{0, 1, 3, 2}, 524 "COL2": []int{1, 1, 1, 1}, 525 }) 526 527 table := []struct { 528 orders []qframe.Order 529 expected qframe.QFrame 530 }{ 531 { 532 []qframe.Order{{Column: "COL2", Reverse: true}, {Column: "COL1"}}, 533 qframe.New(map[string]interface{}{ 534 "COL1": []int{0, 1, 2, 3}, 535 "COL2": []int{1, 1, 1, 1}}), 536 }, 537 } 538 539 for i, tc := range table { 540 t.Run(fmt.Sprintf("Sort %d", i), func(t *testing.T) { 541 b := a.Sort(tc.orders...) 542 assertEquals(t, tc.expected, b) 543 }) 544 } 545 } 546 547 func TestQFrame_Distinct(t *testing.T) { 548 table := []struct { 549 input map[string]interface{} 550 expected map[string]interface{} 551 columns []string 552 }{ 553 { 554 input: map[string]interface{}{ 555 "COL1": []int{0, 1, 0, 1}, 556 "COL2": []int{0, 1, 0, 1}}, 557 expected: map[string]interface{}{ 558 "COL1": []int{0, 1}, 559 "COL2": []int{0, 1}}, 560 columns: []string{"COL1", "COL2"}, 561 }, 562 { 563 input: map[string]interface{}{ 564 "COL1": []int{}, 565 "COL2": []int{}}, 566 expected: map[string]interface{}{ 567 "COL1": []int{}, 568 "COL2": []int{}}, 569 columns: []string{"COL1", "COL2"}, 570 }, 571 } 572 573 for i, tc := range table { 574 t.Run(fmt.Sprintf("Distinct %d", i), func(t *testing.T) { 575 in := qframe.New(tc.input) 576 out := in.Distinct() 577 assertEquals(t, qframe.New(tc.expected), out.Sort(colNamesToOrders("COL1", "COL2")...)) 578 }) 579 } 580 } 581 582 func incSlice(size, step int) []int { 583 result := make([]int, size) 584 for i := range result { 585 result[i] = step * i 586 } 587 return result 588 } 589 590 func TestQFrame_GroupByAggregate(t *testing.T) { 591 ownSum := func(col []int) int { 592 result := 0 593 for _, x := range col { 594 result += x 595 } 596 return result 597 } 598 599 table := []struct { 600 name string 601 input map[string]interface{} 602 expected map[string]interface{} 603 groupColumns []string 604 aggregations []qframe.Aggregation 605 }{ 606 { 607 name: "built in aggregation function", 608 input: map[string]interface{}{ 609 "COL1": []int{0, 0, 1, 2}, 610 "COL2": []int{0, 0, 1, 1}, 611 "COL3": []int{1, 2, 5, 7}}, 612 expected: map[string]interface{}{ 613 "COL1": []int{0, 1, 2}, 614 "COL2": []int{0, 1, 1}, 615 "COL3": []int{3, 5, 7}}, 616 groupColumns: []string{"COL1", "COL2"}, 617 aggregations: []qframe.Aggregation{{Fn: "sum", Column: "COL3"}}, 618 }, 619 { 620 name: "built in max aggregation function", 621 input: map[string]interface{}{ 622 "COL1": []int{0, 0, 1, 1, 2}, 623 "COL2": []int{1, 2, 3, 5, 7}}, 624 expected: map[string]interface{}{ 625 "COL1": []int{0, 1, 2}, 626 "COL2": []int{2, 5, 7}}, 627 groupColumns: []string{"COL1"}, 628 aggregations: []qframe.Aggregation{{Fn: "max", Column: "COL2"}}, 629 }, 630 { 631 name: "built in min aggregation function", 632 input: map[string]interface{}{ 633 "COL1": []int{0, 0, 1, 1, 2}, 634 "COL2": []int{1, 2, 3, 5, 7}}, 635 expected: map[string]interface{}{ 636 "COL1": []int{0, 1, 2}, 637 "COL2": []int{1, 3, 7}}, 638 groupColumns: []string{"COL1"}, 639 aggregations: []qframe.Aggregation{{Fn: "min", Column: "COL2"}}, 640 }, 641 { 642 name: "combined max and min aggregation", 643 input: map[string]interface{}{ 644 "COL1": []int{0, 0, 1, 1, 2}, 645 "COL2": []int{1, 2, 3, 5, 7}}, 646 expected: map[string]interface{}{ 647 "COL1": []int{0, 1, 2}, 648 "min_COL2": []int{1, 3, 7}, 649 "max_COL2": []int{2, 5, 7}}, 650 groupColumns: []string{"COL1"}, 651 aggregations: []qframe.Aggregation{ 652 {Fn: "max", Column: "COL2", As: "max_COL2"}, 653 {Fn: "min", Column: "COL2", As: "min_COL2"}, 654 }, 655 }, 656 { 657 name: "user defined aggregation function", 658 input: map[string]interface{}{ 659 "COL1": []int{0, 0, 1, 1}, 660 "COL2": []int{1, 2, 5, 7}}, 661 expected: map[string]interface{}{ 662 "COL1": []int{0, 1}, 663 "COL2": []int{3, 12}}, 664 groupColumns: []string{"COL1"}, 665 aggregations: []qframe.Aggregation{{Fn: ownSum, Column: "COL2"}}, 666 }, 667 { 668 name: "empty qframe", 669 input: map[string]interface{}{ 670 "COL1": []int{}, 671 "COL2": []int{}}, 672 expected: map[string]interface{}{ 673 "COL1": []int{}, 674 "COL2": []int{}}, 675 groupColumns: []string{"COL1"}, 676 aggregations: []qframe.Aggregation{{Fn: "sum", Column: "COL2"}}, 677 }, 678 { 679 name: "empty max qframe", 680 input: map[string]interface{}{ 681 "COL1": []int{}, 682 "COL2": []int{}}, 683 expected: map[string]interface{}{ 684 "COL1": []int{}, 685 "COL2": []int{}}, 686 groupColumns: []string{"COL1"}, 687 aggregations: []qframe.Aggregation{{Fn: "max", Column: "COL2"}}, 688 }, 689 { 690 name: "empty min qframe", 691 input: map[string]interface{}{ 692 "COL1": []int{}, 693 "COL2": []int{}}, 694 expected: map[string]interface{}{ 695 "COL1": []int{}, 696 "COL2": []int{}}, 697 groupColumns: []string{"COL1"}, 698 aggregations: []qframe.Aggregation{{Fn: "min", Column: "COL2"}}, 699 }, 700 { 701 // This will trigger hash table relocations 702 name: "high cardinality grouping column", 703 input: map[string]interface{}{ 704 "COL1": incSlice(1000, 1), 705 "COL2": incSlice(1000, 2)}, 706 expected: map[string]interface{}{ 707 "COL1": incSlice(1000, 1), 708 "COL2": incSlice(1000, 2)}, 709 groupColumns: []string{"COL1"}, 710 aggregations: []qframe.Aggregation{{Fn: "sum", Column: "COL2"}}, 711 }, 712 { 713 name: "aggregate booleans over all rows", 714 input: map[string]interface{}{"COL1": []bool{true, false, true}}, 715 expected: map[string]interface{}{"COL1": []bool{true}}, 716 groupColumns: []string{}, 717 aggregations: []qframe.Aggregation{{Fn: "majority", Column: "COL1"}}, 718 }, 719 { 720 name: "group by booleans", 721 input: map[string]interface{}{"COL1": []bool{true, false, true}, "COL2": []int{1, 2, 3}}, 722 expected: map[string]interface{}{"COL1": []bool{false, true}, "COL2": []int{2, 4}}, 723 groupColumns: []string{"COL1"}, 724 aggregations: []qframe.Aggregation{{Fn: "sum", Column: "COL2"}}, 725 }, 726 } 727 728 for _, tc := range table { 729 t.Run(fmt.Sprintf("GroupByAggregate %s", tc.name), func(t *testing.T) { 730 in := qframe.New(tc.input) 731 out := in.GroupBy(groupby.Columns(tc.groupColumns...)).Aggregate(tc.aggregations...) 732 733 assertEquals(t, qframe.New(tc.expected), out.Sort(colNamesToOrders(tc.groupColumns...)...)) 734 }) 735 } 736 } 737 738 func TestQFrame_GroupByAggregateFloats(t *testing.T) { 739 ownSum := func(col []float64) float64 { 740 result := 0.0 741 for _, x := range col { 742 result += x 743 } 744 return result 745 } 746 747 table := []struct { 748 name string 749 input map[string]interface{} 750 expected map[string]interface{} 751 groupColumns []string 752 aggregations []qframe.Aggregation 753 }{ 754 { 755 name: "built in aggregation function", 756 input: map[string]interface{}{ 757 "COL1": []int{0, 0, 1, 2}, 758 "COL2": []int{0, 0, 1, 1}, 759 "COL3": []float64{1.0, 2.0, 5.0, 7.0}}, 760 expected: map[string]interface{}{ 761 "COL1": []int{0, 1, 2}, 762 "COL2": []int{0, 1, 1}, 763 "COL3": []float64{3.0, 5.0, 7.0}}, 764 groupColumns: []string{"COL1", "COL2"}, 765 aggregations: []qframe.Aggregation{{Fn: "sum", Column: "COL3"}}, 766 }, 767 { 768 name: "built in count aggregation function", 769 input: map[string]interface{}{ 770 "COL1": []int{0, 0, 1, 1, 2}, 771 "COL2": []float64{1.0, 2.0, 3.0, 5.0, 7.0}}, 772 expected: map[string]interface{}{ 773 "COL1": []int{0, 1, 2}, 774 "COL2": []int{2, 2, 1}}, 775 groupColumns: []string{"COL1"}, 776 aggregations: []qframe.Aggregation{{Fn: "count", Column: "COL2"}}, 777 }, 778 { 779 name: "built in max aggregation function", 780 input: map[string]interface{}{ 781 "COL1": []int{0, 0, 1, 1, 2}, 782 "COL2": []float64{1.0, 2.0, 3.0, 5.0, 7.0}}, 783 expected: map[string]interface{}{ 784 "COL1": []int{0, 1, 2}, 785 "COL2": []float64{2.0, 5.0, 7.0}}, 786 groupColumns: []string{"COL1"}, 787 aggregations: []qframe.Aggregation{{Fn: "max", Column: "COL2"}}, 788 }, 789 { 790 name: "built in min aggregation function", 791 input: map[string]interface{}{ 792 "COL1": []int{0, 0, 1, 1, 2}, 793 "COL2": []float64{1.0, 2.0, 3.0, 5.0, 7.0}}, 794 expected: map[string]interface{}{ 795 "COL1": []int{0, 1, 2}, 796 "COL2": []float64{1.0, 3.0, 7.0}}, 797 groupColumns: []string{"COL1"}, 798 aggregations: []qframe.Aggregation{{Fn: "min", Column: "COL2"}}, 799 }, 800 { 801 name: "user defined aggregation function", 802 input: map[string]interface{}{ 803 "COL1": []int{0, 0, 1, 1}, 804 "COL2": []float64{1.0, 2.0, 5.0, 7.0}}, 805 expected: map[string]interface{}{ 806 "COL1": []int{0, 1}, 807 "COL2": []float64{3.0, 12.0}}, 808 groupColumns: []string{"COL1"}, 809 aggregations: []qframe.Aggregation{{Fn: ownSum, Column: "COL2"}}, 810 }, 811 { 812 name: "empty qframe", 813 input: map[string]interface{}{ 814 "COL1": []int{}, 815 "COL2": []float64{}}, 816 expected: map[string]interface{}{ 817 "COL1": []int{}, 818 "COL2": []float64{}}, 819 groupColumns: []string{"COL1"}, 820 aggregations: []qframe.Aggregation{{Fn: "sum", Column: "COL2"}}, 821 }, 822 { 823 name: "empty max qframe", 824 input: map[string]interface{}{ 825 "COL1": []int{}, 826 "COL2": []float64{}}, 827 expected: map[string]interface{}{ 828 "COL1": []int{}, 829 "COL2": []float64{}}, 830 groupColumns: []string{"COL1"}, 831 aggregations: []qframe.Aggregation{{Fn: "max", Column: "COL2"}}, 832 }, 833 { 834 name: "empty min qframe", 835 input: map[string]interface{}{ 836 "COL1": []int{}, 837 "COL2": []float64{}}, 838 expected: map[string]interface{}{ 839 "COL1": []int{}, 840 "COL2": []float64{}}, 841 groupColumns: []string{"COL1"}, 842 aggregations: []qframe.Aggregation{{Fn: "min", Column: "COL2"}}, 843 }, 844 } 845 846 for _, tc := range table { 847 t.Run(fmt.Sprintf("GroupByAggregate %s", tc.name), func(t *testing.T) { 848 in := qframe.New(tc.input) 849 out := in.GroupBy(groupby.Columns(tc.groupColumns...)).Aggregate(tc.aggregations...) 850 851 assertEquals(t, qframe.New(tc.expected), out.Sort(colNamesToOrders(tc.groupColumns...)...)) 852 }) 853 } 854 } 855 856 func TestQFrame_RollingWindow(t *testing.T) { 857 sum := func(col []int) int { 858 result := 0 859 for _, x := range col { 860 result += x 861 } 862 return result 863 } 864 865 table := []struct { 866 name string 867 input map[string]interface{} 868 expected map[string]interface{} 869 fn interface{} 870 configs []rolling.ConfigFunc 871 }{ 872 { 873 name: "default one element window", 874 input: map[string]interface{}{"source": []int{1, 2, 3}}, 875 expected: map[string]interface{}{"destination": []int{1, 2, 3}}, 876 fn: sum, 877 }, 878 } 879 880 for _, tc := range table { 881 t.Run(fmt.Sprintf("Rolling %s", tc.name), func(t *testing.T) { 882 in := qframe.New(tc.input) 883 884 out := in.Rolling(tc.fn, "destination", "source") 885 886 assertEquals(t, qframe.New(tc.expected), out.Select("destination")) 887 }) 888 } 889 } 890 891 func colNamesToOrders(colNames ...string) []qframe.Order { 892 result := make([]qframe.Order, len(colNames)) 893 for i, name := range colNames { 894 result[i] = qframe.Order{Column: name} 895 } 896 return result 897 } 898 899 func TestQFrame_Select(t *testing.T) { 900 table := []struct { 901 input map[string]interface{} 902 expected map[string]interface{} 903 selectCols []string 904 }{ 905 { 906 input: map[string]interface{}{ 907 "COL1": []int{0, 1}, 908 "COL2": []int{1, 2}}, 909 expected: map[string]interface{}{ 910 "COL1": []int{0, 1}}, 911 selectCols: []string{"COL1"}, 912 }, 913 { 914 input: map[string]interface{}{ 915 "COL1": []int{0, 1}, 916 "COL2": []int{1, 2}}, 917 expected: map[string]interface{}{}, 918 selectCols: []string{}, 919 }, 920 } 921 922 for i, tc := range table { 923 t.Run(fmt.Sprintf("Select %d", i), func(t *testing.T) { 924 in := qframe.New(tc.input) 925 out := in.Select(tc.selectCols...) 926 assertEquals(t, qframe.New(tc.expected), out) 927 }) 928 } 929 } 930 931 func TestQFrame_Slice(t *testing.T) { 932 table := []struct { 933 input map[string]interface{} 934 expected map[string]interface{} 935 start int 936 end int 937 err string 938 }{ 939 { 940 input: map[string]interface{}{ 941 "COL1": []float64{0.0, 1.5, 2.5, 3.5}, 942 "COL2": []int{1, 2, 3, 4}}, 943 expected: map[string]interface{}{ 944 "COL1": []float64{1.5, 2.5}, 945 "COL2": []int{2, 3}}, 946 start: 1, 947 end: 3}, 948 { 949 input: map[string]interface{}{ 950 "COL1": []int{}, 951 "COL2": []int{}}, 952 expected: map[string]interface{}{ 953 "COL1": []int{}, 954 "COL2": []int{}}, 955 start: 0, 956 end: 0}, 957 { 958 input: map[string]interface{}{"COL1": []int{1, 2}}, 959 start: -1, 960 end: 0, 961 err: "start must be non negative"}, 962 { 963 input: map[string]interface{}{"COL1": []int{1, 2}}, 964 start: 0, 965 end: 3, 966 err: "end must not be greater than"}, 967 { 968 input: map[string]interface{}{"COL1": []int{1, 2}}, 969 start: 2, 970 end: 1, 971 err: "start must not be greater than end"}, 972 } 973 974 for i, tc := range table { 975 t.Run(fmt.Sprintf("Slice %d", i), func(t *testing.T) { 976 in := qframe.New(tc.input) 977 out := in.Slice(tc.start, tc.end) 978 if tc.err != "" { 979 assertErr(t, out.Err, tc.err) 980 } else { 981 assertEquals(t, qframe.New(tc.expected), out) 982 } 983 }) 984 } 985 } 986 987 func TestQFrame_ReadCSV(t *testing.T) { 988 /* 989 Pandas reference 990 >>> data = """ 991 ... foo,bar,baz,qux 992 ... ccc,,,www 993 ... aaa,3.25,7,""" 994 >>> pd.read_csv(StringIO(data)) 995 foo bar baz qux 996 0 ccc NaN NaN www 997 1 aaa 3.25 7.0 NaN 998 */ 999 a, b, c, empty := "a", "b", "c", "" 1000 table := []struct { 1001 name string 1002 inputHeaders []string 1003 inputData string 1004 emptyNull bool 1005 ignoreEmptyLines bool 1006 expected map[string]interface{} 1007 types map[string]string 1008 expectedErr string 1009 delimiter byte 1010 rowDelimiter string 1011 }{ 1012 { 1013 name: "base", 1014 inputHeaders: []string{"foo", "bar"}, 1015 inputData: "1,2\n3,4\n", 1016 expected: map[string]interface{}{ 1017 "foo": []int{1, 3}, 1018 "bar": []int{2, 4}}, 1019 }, 1020 { 1021 name: "tab delimiter", 1022 inputHeaders: []string{"foo", "bar"}, 1023 inputData: "1\t2\n3\t4\n", 1024 expected: map[string]interface{}{ 1025 "foo": []int{1, 3}, 1026 "bar": []int{2, 4}}, 1027 delimiter: '\t', 1028 }, 1029 { 1030 name: "empty lines ignored, multiple columns", 1031 inputHeaders: []string{"foo", "bar"}, 1032 inputData: "1,2\n\n3,4\n", 1033 ignoreEmptyLines: true, 1034 expected: map[string]interface{}{ 1035 "foo": []int{1, 3}, 1036 "bar": []int{2, 4}}, 1037 }, 1038 { 1039 name: "column count mismatch results in error", 1040 inputHeaders: []string{"foo", "bar"}, 1041 inputData: "1,2\n33\n3,4\n", 1042 expectedErr: "Wrong number of columns", 1043 }, 1044 { 1045 name: "empty lines kept, single column", 1046 inputHeaders: []string{"foo"}, 1047 inputData: "1\n\n3\n", 1048 ignoreEmptyLines: false, 1049 expected: map[string]interface{}{ 1050 "foo": []float64{1, math.NaN(), 3}}, 1051 }, 1052 { 1053 name: "mixed", 1054 inputHeaders: []string{"int", "float", "bool", "string"}, 1055 inputData: "1,2.5,true,hello\n10,20.5,false,\"bye,\n bye\"", 1056 expected: map[string]interface{}{ 1057 "int": []int{1, 10}, 1058 "float": []float64{2.5, 20.5}, 1059 "bool": []bool{true, false}, 1060 "string": []string{"hello", "bye,\n bye"}}, 1061 }, 1062 { 1063 name: "null string", 1064 inputHeaders: []string{"foo", "bar"}, 1065 inputData: "a,b\n,c", 1066 emptyNull: true, 1067 expected: map[string]interface{}{ 1068 "foo": []*string{&a, nil}, 1069 "bar": []*string{&b, &c}}, 1070 }, 1071 { 1072 name: "empty string", 1073 inputHeaders: []string{"foo", "bar"}, 1074 inputData: "a,b\n,c", 1075 emptyNull: false, 1076 expected: map[string]interface{}{ 1077 "foo": []*string{&a, &empty}, 1078 "bar": []*string{&b, &c}}, 1079 }, 1080 { 1081 name: "NaN float", 1082 inputHeaders: []string{"foo", "bar"}, 1083 inputData: "1.5,3.0\n,2.0", 1084 expected: map[string]interface{}{ 1085 "foo": []float64{1.5, math.NaN()}, 1086 "bar": []float64{3.0, 2.0}}, 1087 }, 1088 { 1089 name: "Int to float type success", 1090 inputHeaders: []string{"foo"}, 1091 inputData: "3\n2", 1092 expected: map[string]interface{}{"foo": []float64{3.0, 2.0}}, 1093 types: map[string]string{"foo": "float"}, 1094 }, 1095 { 1096 name: "Bool to string success", 1097 inputHeaders: []string{"foo"}, 1098 inputData: "true\nfalse", 1099 expected: map[string]interface{}{"foo": []string{"true", "false"}}, 1100 types: map[string]string{"foo": "string"}, 1101 }, 1102 { 1103 name: "Int to string success", 1104 inputHeaders: []string{"foo"}, 1105 inputData: "123\n456", 1106 expected: map[string]interface{}{"foo": []string{"123", "456"}}, 1107 types: map[string]string{"foo": "string"}, 1108 }, 1109 { 1110 name: "Float to int failure", 1111 inputHeaders: []string{"foo"}, 1112 inputData: "1.23\n4.56", 1113 expectedErr: "int", 1114 types: map[string]string{"foo": "int"}, 1115 }, 1116 { 1117 name: "String to bool failure", 1118 inputHeaders: []string{"foo"}, 1119 inputData: "abc\ndef", 1120 expectedErr: "bool", 1121 types: map[string]string{"foo": "bool"}, 1122 }, 1123 { 1124 name: "String to float failure", 1125 inputHeaders: []string{"foo"}, 1126 inputData: "abc\ndef", 1127 expectedErr: "float", 1128 types: map[string]string{"foo": "float"}, 1129 }, 1130 { 1131 name: "Enum with null value", 1132 inputHeaders: []string{"foo"}, 1133 inputData: "a\n\nc", 1134 types: map[string]string{"foo": "enum"}, 1135 emptyNull: true, 1136 expected: map[string]interface{}{"foo": []*string{&a, nil, &c}}, 1137 }, 1138 { 1139 name: "CRLF", 1140 rowDelimiter: "\r\n", 1141 inputHeaders: []string{"a_string", "b_number", "c_string"}, 1142 inputData: "abc,1,cde\r\n,1,cde\r\nabc,1,\r\n", 1143 emptyNull: false, 1144 expected: map[string]interface{}{ 1145 "a_string": []string{"abc", "", "abc"}, 1146 "b_number": []int{1, 1, 1}, 1147 "c_string": []string{"cde", "cde", ""}}, 1148 }, 1149 { 1150 name: "Duplicate column error", 1151 inputHeaders: []string{"foo", "bar", "foo"}, 1152 inputData: "a,b,c", 1153 expectedErr: "Duplicate columns", 1154 }, 1155 { 1156 name: "CRLF combined with quotes", 1157 inputHeaders: []string{"foo"}, 1158 inputData: "\"a\"\r\n\"b\"\r\n", 1159 expected: map[string]interface{}{ 1160 "foo": []string{"a", "b"}, 1161 }, 1162 }, 1163 } 1164 1165 for _, tc := range table { 1166 t.Run(fmt.Sprintf("ReadCSV %s", tc.name), func(t *testing.T) { 1167 if tc.delimiter == 0 { 1168 tc.delimiter = ',' 1169 } 1170 1171 if tc.rowDelimiter == "" { 1172 tc.rowDelimiter = "\n" 1173 } 1174 1175 input := strings.Join(tc.inputHeaders, string([]byte{tc.delimiter})) + tc.rowDelimiter + tc.inputData 1176 out := qframe.ReadCSV(strings.NewReader(input), 1177 csv.EmptyNull(tc.emptyNull), 1178 csv.Types(tc.types), 1179 csv.IgnoreEmptyLines(tc.ignoreEmptyLines), 1180 csv.Delimiter(tc.delimiter)) 1181 if tc.expectedErr != "" { 1182 assertErr(t, out.Err, tc.expectedErr) 1183 } else { 1184 assertNotErr(t, out.Err) 1185 1186 enums := make(map[string][]string) 1187 for k, v := range tc.types { 1188 if v == "enum" { 1189 enums[k] = nil 1190 } 1191 } 1192 1193 assertEquals(t, qframe.New(tc.expected, newqf.ColumnOrder(tc.inputHeaders...), newqf.Enums(enums)), out) 1194 } 1195 }) 1196 } 1197 } 1198 1199 // EOFReader is a mock to simulate io.Reader implementation that returns data together with err == io.EOF. 1200 type EOFReader struct { 1201 s string 1202 isRead bool 1203 } 1204 1205 func (r *EOFReader) Read(b []byte) (int, error) { 1206 if r.isRead { 1207 return 0, io.EOF 1208 } 1209 1210 if len(b) < len(r.s) { 1211 // This is just a mock, don't bother supporting more complicated cases 1212 log.Fatalf("Buffer len too short for string: %d < %d", len(b), len(r.s)) 1213 } 1214 1215 count := copy(b, []byte(r.s)) 1216 r.isRead = true 1217 return count, io.EOF 1218 } 1219 1220 func TestQFrame_ReadCSVCombinedReadAndEOF(t *testing.T) { 1221 input := `abc,def 1222 1,2 1223 3,4 1224 ` 1225 out := qframe.ReadCSV(&EOFReader{s: input}) 1226 expected := qframe.New(map[string]interface{}{"abc": []int{1, 3}, "def": []int{2, 4}}, newqf.ColumnOrder("abc", "def")) 1227 assertEquals(t, expected, out) 1228 } 1229 1230 func TestQFrame_ReadCSVNoRowsNoTypes(t *testing.T) { 1231 // Should be possible to test an empty, non typed column against anything. 1232 input := `abc,def` 1233 1234 t.Run("Empty column comparable to anything when not typed", func(t *testing.T) { 1235 out := qframe.ReadCSV(strings.NewReader(input)) 1236 assertNotErr(t, out.Err) 1237 1238 // Filtering 1239 out = out.Filter(qframe.Filter{Column: "abc", Comparator: ">", Arg: "b"}) 1240 assertNotErr(t, out.Err) 1241 1242 // Aggregation 1243 e := qframe.Expr("abs", types.ColumnName("abc")) 1244 assertNotErr(t, e.Err()) 1245 out = out.Eval("abc", e) 1246 assertNotErr(t, out.Err) 1247 }) 1248 1249 t.Run("Empty column not comparable to anything when typed", func(t *testing.T) { 1250 out := qframe.ReadCSV(strings.NewReader(input), csv.Types(map[string]string{"abc": "int"})) 1251 out = out.Filter(qframe.Filter{Column: "abc", Comparator: ">", Arg: "b"}) 1252 assertErr(t, out.Err, "type") 1253 }) 1254 } 1255 1256 func TestQFrame_ReadCSVNoHeader(t *testing.T) { 1257 input := `1,2` 1258 1259 out := qframe.ReadCSV(strings.NewReader(input), csv.Headers([]string{"abc", "def"})) 1260 assertNotErr(t, out.Err) 1261 1262 expected := qframe.New(map[string]interface{}{"abc": []int{1}, "def": []int{2}}) 1263 assertNotErr(t, out.Err) 1264 assertEquals(t, expected, out) 1265 } 1266 1267 func TestQFrame_Enum(t *testing.T) { 1268 mon, tue, wed, thu, fri, sat, sun := "mon", "tue", "wed", "thu", "fri", "sat", "sun" 1269 t.Run("Applies specified order", func(t *testing.T) { 1270 input := `day 1271 tue 1272 mon 1273 sat 1274 wed 1275 sun 1276 thu 1277 mon 1278 thu 1279 1280 ` 1281 out := qframe.ReadCSV( 1282 strings.NewReader(input), 1283 csv.EmptyNull(true), 1284 csv.Types(map[string]string{"day": "enum"}), 1285 csv.EnumValues(map[string][]string{"day": {mon, tue, wed, thu, fri, sat, sun}})) 1286 out = out.Sort(qframe.Order{Column: "day"}) 1287 expected := qframe.New( 1288 map[string]interface{}{"day": []*string{nil, &mon, &mon, &tue, &wed, &thu, &thu, &sat, &sun}}, 1289 newqf.Enums(map[string][]string{"day": {mon, tue, wed, thu, fri, sat, sun}})) 1290 1291 assertNotErr(t, out.Err) 1292 assertEquals(t, expected, out) 1293 }) 1294 1295 t.Run("Orders given for non-enum columns results in error", func(t *testing.T) { 1296 input := `day 1297 tue 1298 ` 1299 out := qframe.ReadCSV( 1300 strings.NewReader(input), 1301 csv.Types(map[string]string{"day": "enum"}), 1302 csv.EnumValues(map[string][]string{"week": {"foo", "bar"}})) 1303 assertErr(t, out.Err, "Enum values specified for non enum column") 1304 }) 1305 1306 t.Run("Wont accept unknown values in strict mode", func(t *testing.T) { 1307 input := `day 1308 tue 1309 mon 1310 foo 1311 ` 1312 out := qframe.ReadCSV( 1313 strings.NewReader(input), 1314 csv.Types(map[string]string{"day": "enum"}), 1315 csv.EnumValues(map[string][]string{"day": {mon, tue, wed, thu, fri, sat, sun}})) 1316 1317 assertErr(t, out.Err, "unknown enum value") 1318 }) 1319 1320 t.Run("Fails with too high cardinality column", func(t *testing.T) { 1321 input := make([]string, 0) 1322 for i := 0; i < 256; i++ { 1323 input = append(input, strconv.Itoa(i)) 1324 } 1325 1326 out := qframe.New( 1327 map[string]interface{}{"foo": input}, 1328 newqf.Enums(map[string][]string{"foo": nil})) 1329 1330 assertErr(t, out.Err, "max cardinality") 1331 }) 1332 1333 t.Run("Fails when enum values specified for non enum column", func(t *testing.T) { 1334 input := `day 1335 tue 1336 ` 1337 1338 out := qframe.ReadCSV( 1339 strings.NewReader(input), 1340 csv.EnumValues(map[string][]string{"day": {mon, tue, wed, thu, fri, sat, sun}})) 1341 1342 assertErr(t, out.Err, "specified for non enum column") 1343 }) 1344 1345 t.Run("Wont accept unknown filter values in strict mode", func(t *testing.T) { 1346 input := `day 1347 tue 1348 mon 1349 ` 1350 out := qframe.ReadCSV( 1351 strings.NewReader(input), 1352 csv.Types(map[string]string{"day": "enum"}), 1353 csv.EnumValues(map[string][]string{"day": {mon, tue, wed, thu, fri, sat, sun}})) 1354 out = out.Filter(qframe.Filter{Column: "day", Comparator: ">", Arg: "foo"}) 1355 assertErr(t, out.Err, "unknown enum value") 1356 }) 1357 1358 t.Run("Will accept unknown filter values in non-strict mode", func(t *testing.T) { 1359 input := `day 1360 tue 1361 mon 1362 ` 1363 out := qframe.ReadCSV( 1364 strings.NewReader(input), 1365 csv.Types(map[string]string{"day": "enum"})) 1366 out = out.Filter(qframe.Filter{Column: "day", Comparator: ">", Arg: "foo"}) 1367 assertNotErr(t, out.Err) 1368 }) 1369 1370 t.Run("Will accept and eval to true for neq and unknown filter value in non-strict mode", func(t *testing.T) { 1371 input := `day 1372 tue 1373 mon 1374 ` 1375 out := qframe.ReadCSV( 1376 strings.NewReader(input), 1377 csv.Types(map[string]string{"day": "enum"})) 1378 out = out.Filter(qframe.Filter{Column: "day", Comparator: "!=", Arg: "foo"}) 1379 assertNotErr(t, out.Err) 1380 assertTrue(t, out.Len() == 2) 1381 }) 1382 } 1383 1384 func TestQFrame_ReadCSVMissingColumnName(t *testing.T) { 1385 input := `,COL2 1386 a,1.5` 1387 expectedIn := `COL,COL2 1388 a,1.5` 1389 1390 out := qframe.ReadCSV(strings.NewReader(input), csv.MissingColumnNameAlias("COL")) 1391 expected := qframe.ReadCSV(strings.NewReader(expectedIn)) 1392 assertNotErr(t, out.Err) 1393 assertEquals(t, expected, out) 1394 } 1395 1396 func TestQFrame_ReadCSVDuplicateColumnName(t *testing.T) { 1397 input := `COL,COL,COL,COL,COL,KOL,KOL 1398 a,1.5,1.6,1.7,1.8,1.9,2.0` 1399 1400 expectedIn := `COL,COL0,COL1,COL2,COL3,KOL,KOL0 1401 a,1.5,1.6,1.7,1.8,1.9,2.0` 1402 1403 out := qframe.ReadCSV(strings.NewReader(input), csv.RenameDuplicateColumns(true)) 1404 expected := qframe.ReadCSV(strings.NewReader(expectedIn)) 1405 assertNotErr(t, out.Err) 1406 assertEquals(t, expected, out) 1407 } 1408 1409 func TestQFrame_ReadCSVDuplicateAndEmptyColumnName(t *testing.T) { 1410 input := `, 1411 a,1.5` 1412 1413 expectedIn := `COL,COL0 1414 a,1.5` 1415 1416 out := qframe.ReadCSV(strings.NewReader(input), csv.RenameDuplicateColumns(true), csv.MissingColumnNameAlias("COL")) 1417 expected := qframe.ReadCSV(strings.NewReader(expectedIn)) 1418 assertNotErr(t, out.Err) 1419 assertEquals(t, expected, out) 1420 } 1421 1422 func TestQFrame_ReadJSON(t *testing.T) { 1423 /* 1424 >>> pd.DataFrame.from_records([dict(a=1.5), dict(a=None)]) 1425 a 1426 0 1.5 1427 1 NaN 1428 >>> pd.DataFrame.from_records([dict(a=1), dict(a=None)]) 1429 a 1430 0 1.0 1431 1 NaN 1432 >>> pd.DataFrame.from_records([dict(a=1), dict(a=2)]) 1433 a 1434 0 1 1435 1 2 1436 >>> pd.DataFrame.from_records([dict(a='foo'), dict(a=None)]) 1437 a 1438 0 foo 1439 1 None 1440 >>> pd.DataFrame.from_records([dict(a=1.5), dict(a='N/A')]) 1441 a 1442 0 1.5 1443 1 N/A 1444 >>> x = pd.DataFrame.from_records([dict(a=1.5), dict(a='N/A')]) 1445 >>> x.ix[0] 1446 a 1.5 1447 Name: 0, dtype: object 1448 */ 1449 testString := "FOO" 1450 table := []struct { 1451 input string 1452 expected map[string]interface{} 1453 }{ 1454 { 1455 input: `[ 1456 {"STRING1": "a", "INT1": 1, "FLOAT1": 1.5, "BOOL1": true}, 1457 {"STRING1": "b", "INT1": 2, "FLOAT1": 2.5, "BOOL1": false}]`, 1458 expected: map[string]interface{}{ 1459 // NOTE: The integers become floats if not explicitly typed 1460 "STRING1": []string{"a", "b"}, "INT1": []float64{1, 2}, "FLOAT1": []float64{1.5, 2.5}, "BOOL1": []bool{true, false}}, 1461 }, 1462 { 1463 input: `[{"STRING1": "FOO"}, {"STRING1": null}]`, 1464 expected: map[string]interface{}{ 1465 "STRING1": []*string{&testString, nil}}, 1466 }, 1467 } 1468 1469 for i, tc := range table { 1470 t.Run(fmt.Sprintf("FromJSON %d", i), func(t *testing.T) { 1471 out := qframe.ReadJSON(strings.NewReader(tc.input)) 1472 assertNotErr(t, out.Err) 1473 assertEquals(t, qframe.New(tc.expected), out) 1474 }) 1475 } 1476 } 1477 1478 func TestQFrame_ToCSV(t *testing.T) { 1479 table := []struct { 1480 input map[string]interface{} 1481 expected string 1482 header bool 1483 }{ 1484 { 1485 input: map[string]interface{}{ 1486 "STRING1": []string{"a", "b,c"}, "INT1": []int{1, 2}, "FLOAT1": []float64{1.5, 2.5}, "BOOL1": []bool{true, false}}, 1487 expected: `BOOL1,FLOAT1,INT1,STRING1 1488 true,1.5,1,a 1489 false,2.5,2,"b,c" 1490 `, 1491 header: true, 1492 }, 1493 { 1494 input: map[string]interface{}{ 1495 "STRING1": []string{"a", "b,c"}, "INT1": []int{1, 2}, "FLOAT1": []float64{1.5, 2.5}, "BOOL1": []bool{true, false}}, 1496 expected: `true,1.5,1,a 1497 false,2.5,2,"b,c" 1498 `, 1499 header: false, 1500 }, 1501 } 1502 1503 for i, tc := range table { 1504 t.Run(fmt.Sprintf("ToCSV %d", i), func(t *testing.T) { 1505 in := qframe.New(tc.input) 1506 assertNotErr(t, in.Err) 1507 1508 buf := new(bytes.Buffer) 1509 err := in.ToCSV(buf, 1510 csv.Header(tc.header), 1511 ) 1512 assertNotErr(t, err) 1513 1514 result := buf.String() 1515 if result != tc.expected { 1516 t.Errorf("QFrames not equal. \nGot:\n%s\nExpected:\n%s", result, tc.expected) 1517 } 1518 }) 1519 } 1520 } 1521 1522 func TestQFrame_ToFromJSON(t *testing.T) { 1523 config := []newqf.ConfigFunc{newqf.Enums(map[string][]string{"ENUM": {"aa", "bb"}})} 1524 data := map[string]interface{}{ 1525 "STRING1": []string{"añ", "bö☺ "}, "FLOAT1": []float64{1.5, 2.5}, "BOOL1": []bool{true, false}, "ENUM": []string{"aa", "bb"}} 1526 originalDf := qframe.New(data, config...) 1527 assertNotErr(t, originalDf.Err) 1528 1529 buf := new(bytes.Buffer) 1530 err := originalDf.ToJSON(buf) 1531 assertNotErr(t, err) 1532 1533 // Map order should be consistent across calls 1534 for i := 0; i < 10; i++ { 1535 buf2 := new(bytes.Buffer) 1536 err := originalDf.ToJSON(buf2) 1537 assertNotErr(t, err) 1538 if buf.String() != buf2.String() { 1539 t.Errorf("%s != %s", buf.String(), buf.String()) 1540 } 1541 } 1542 1543 jsonDf := qframe.ReadJSON(buf, config...) 1544 assertNotErr(t, jsonDf.Err) 1545 assertEquals(t, originalDf, jsonDf) 1546 } 1547 1548 func TestQFrame_ToJSONNaN(t *testing.T) { 1549 buf := new(bytes.Buffer) 1550 1551 // Test the special case NaN, this can currently be encoded but not 1552 // decoded by the JSON parsers. 1553 data := map[string]interface{}{"FLOAT1": []float64{1.5, math.NaN()}} 1554 originalDf := qframe.New(data) 1555 assertNotErr(t, originalDf.Err) 1556 1557 err := originalDf.ToJSON(buf) 1558 assertNotErr(t, err) 1559 expected := `[{"FLOAT1":1.5},{"FLOAT1":null}]` 1560 if buf.String() != expected { 1561 t.Errorf("Not equal: %s ||| %s", buf.String(), expected) 1562 } 1563 } 1564 1565 func TestQFrame_ToJSONInt(t *testing.T) { 1566 // The ints should not have decimals when turned into JSON 1567 data := map[string]interface{}{"INT": []int{1, 2}} 1568 originalDf := qframe.New(data) 1569 assertNotErr(t, originalDf.Err) 1570 1571 buf := new(bytes.Buffer) 1572 err := originalDf.ToJSON(buf) 1573 assertNotErr(t, err) 1574 if buf.String() != `[{"INT":1},{"INT":2}]` { 1575 t.Errorf("Unexpected JSON string: %s", buf.String()) 1576 } 1577 } 1578 1579 func TestQFrame_FilterEnum(t *testing.T) { 1580 a, b, c, d, e := "a", "b", "c", "d", "e" 1581 enums := newqf.Enums(map[string][]string{"COL1": {"a", "b", "c", "d", "e"}}) 1582 in := qframe.New(map[string]interface{}{ 1583 "COL1": []*string{&b, &c, &a, nil, &e, &d, nil}}, enums) 1584 1585 table := []struct { 1586 clause qframe.FilterClause 1587 expected map[string]interface{} 1588 }{ 1589 { 1590 qframe.Filter{Column: "COL1", Comparator: ">", Arg: "b"}, 1591 map[string]interface{}{"COL1": []*string{&c, &e, &d}}, 1592 }, 1593 { 1594 qframe.Filter{Column: "COL1", Comparator: "in", Arg: []string{"a", "b"}}, 1595 map[string]interface{}{"COL1": []*string{&b, &a}}, 1596 }, 1597 } 1598 1599 for i, tc := range table { 1600 t.Run(fmt.Sprintf("Filter enum %d", i), func(t *testing.T) { 1601 expected := qframe.New(tc.expected, enums) 1602 out := in.Filter(tc.clause) 1603 assertEquals(t, expected, out) 1604 }) 1605 } 1606 } 1607 1608 func TestQFrame_FilterString(t *testing.T) { 1609 a, b, c, d, e := "a", "b", "c", "d", "e" 1610 withNil := map[string]interface{}{"COL1": []*string{&b, &c, &a, nil, &e, &d, nil}} 1611 1612 table := []struct { 1613 input map[string]interface{} 1614 clause qframe.FilterClause 1615 expected map[string]interface{} 1616 }{ 1617 { 1618 withNil, 1619 qframe.Filter{Column: "COL1", Comparator: ">", Arg: "b"}, 1620 map[string]interface{}{"COL1": []*string{&c, &e, &d}}, 1621 }, 1622 { 1623 withNil, 1624 qframe.Filter{Column: "COL1", Comparator: "<", Arg: "b"}, 1625 map[string]interface{}{"COL1": []*string{&a}}, 1626 }, 1627 { 1628 withNil, 1629 qframe.Filter{Column: "COL1", Comparator: "!=", Arg: "a"}, 1630 map[string]interface{}{"COL1": []*string{&b, &c, nil, &e, &d, nil}}, 1631 }, 1632 { 1633 withNil, 1634 qframe.Filter{Column: "COL1", Comparator: "like", Arg: "b"}, 1635 map[string]interface{}{"COL1": []*string{&b}}, 1636 }, 1637 { 1638 withNil, 1639 qframe.Filter{Column: "COL1", Comparator: "in", Arg: []string{"a", "b"}}, 1640 map[string]interface{}{"COL1": []*string{&b, &a}}, 1641 }, 1642 } 1643 1644 for i, tc := range table { 1645 t.Run(fmt.Sprintf("Filter string %d", i), func(t *testing.T) { 1646 in := qframe.New(tc.input) 1647 expected := qframe.New(tc.expected) 1648 out := in.Filter(tc.clause) 1649 assertEquals(t, expected, out) 1650 }) 1651 } 1652 } 1653 1654 func TestQFrame_LikeFilterString(t *testing.T) { 1655 col1 := []string{"ABC", "AbC", "DEF", "ABCDEF", "abcdef", "FFF", "abc$def", "défåäöΦ"} 1656 1657 // Add a couple of fields to be able to verify functionality for high cardinality enums 1658 for i := 0; i < 200; i++ { 1659 col1 = append(col1, fmt.Sprintf("foo%dbar", i)) 1660 } 1661 1662 data := map[string]interface{}{"COL1": col1} 1663 for _, enums := range []map[string][]string{{}, {"COL1": nil}} { 1664 table := []struct { 1665 comparator string 1666 arg string 1667 expected []string 1668 }{ 1669 // like 1670 {"like", ".*EF.*", []string{"DEF", "ABCDEF"}}, 1671 {"like", "%EF%", []string{"DEF", "ABCDEF"}}, 1672 {"like", "AB%", []string{"ABC", "ABCDEF"}}, 1673 {"like", "%F", []string{"DEF", "ABCDEF", "FFF"}}, 1674 {"like", "ABC", []string{"ABC"}}, 1675 {"like", "défåäöΦ", []string{"défåäöΦ"}}, 1676 {"like", "%éfåäöΦ", []string{"défåäöΦ"}}, 1677 {"like", "défå%", []string{"défåäöΦ"}}, 1678 {"like", "%éfåäö%", []string{"défåäöΦ"}}, 1679 {"like", "abc$def", []string{}}, 1680 {"like", regexp.QuoteMeta("abc$def"), []string{"abc$def"}}, 1681 {"like", "%180%", []string{"foo180bar"}}, 1682 1683 // ilike 1684 {"ilike", ".*ef.*", []string{"DEF", "ABCDEF", "abcdef", "abc$def"}}, 1685 {"ilike", "ab%", []string{"ABC", "AbC", "ABCDEF", "abcdef", "abc$def"}}, 1686 {"ilike", "%f", []string{"DEF", "ABCDEF", "abcdef", "FFF", "abc$def"}}, 1687 {"ilike", "%ef%", []string{"DEF", "ABCDEF", "abcdef", "abc$def"}}, 1688 {"ilike", "défÅäöΦ", []string{"défåäöΦ"}}, 1689 {"ilike", "%éFåäöΦ", []string{"défåäöΦ"}}, 1690 {"ilike", "défå%", []string{"défåäöΦ"}}, 1691 {"ilike", "%éfåäÖ%", []string{"défåäöΦ"}}, 1692 {"ilike", "ABC$def", []string{}}, 1693 {"ilike", regexp.QuoteMeta("abc$DEF"), []string{"abc$def"}}, 1694 {"ilike", "%180%", []string{"foo180bar"}}, 1695 } 1696 1697 for _, tc := range table { 1698 t.Run(fmt.Sprintf("Enum %t, %s %s", len(enums) > 0, tc.comparator, tc.arg), func(t *testing.T) { 1699 in := qframe.New(data, newqf.Enums(enums)) 1700 expected := qframe.New(map[string]interface{}{"COL1": tc.expected}, newqf.Enums(enums)) 1701 out := in.Filter(qframe.Filter{Column: "COL1", Comparator: tc.comparator, Arg: tc.arg}) 1702 assertEquals(t, expected, out) 1703 }) 1704 } 1705 } 1706 } 1707 1708 func TestQFrame_String(t *testing.T) { 1709 a := qframe.New(map[string]interface{}{ 1710 "COLUMN1": []string{"Long content", "a", "b", "c"}, 1711 "COL2": []int{3, 2, 1, 12345678910}, 1712 }, newqf.ColumnOrder("COL2", "COLUMN1")) 1713 1714 expected := `COL2(i) COLUMN1(s) 1715 ------- ---------- 1716 3 Long co... 1717 2 a 1718 1 b 1719 1234... c 1720 1721 Dims = 2 x 4` 1722 1723 if expected != a.String() { 1724 if len(expected) != len(a.String()) { 1725 t.Errorf("Different lengths: %d != %d", len(expected), len(a.String())) 1726 } 1727 t.Errorf("\n%s\n != \n%s\n", expected, a.String()) 1728 } 1729 } 1730 1731 func TestQFrame_ByteSize(t *testing.T) { 1732 a := qframe.New(map[string]interface{}{ 1733 "COL1": []string{"a", "b"}, 1734 "COL2": []int{3, 2}, 1735 "COL3": []float64{3.5, 2.0}, 1736 "COL4": []bool{true, false}, 1737 "COL5": []string{"1", "2"}, 1738 }, newqf.Enums(map[string][]string{"COL5": nil})) 1739 totalSize := a.ByteSize() 1740 1741 // This is not so much of as a test as lock down on behavior to detect changes 1742 expectedSize := 740 1743 if totalSize != expectedSize { 1744 t.Errorf("Unexpected byte size: %d != %d", totalSize, expectedSize) 1745 } 1746 1747 assertTrue(t, a.Select("COL1", "COL2", "COL3", "COL4").ByteSize() < totalSize) 1748 assertTrue(t, a.Select("COL2", "COL3", "COL4", "COL5").ByteSize() < totalSize) 1749 assertTrue(t, a.Select("COL1", "COL3", "COL4", "COL5").ByteSize() < totalSize) 1750 assertTrue(t, a.Select("COL1", "COL2", "COL4", "COL5").ByteSize() < totalSize) 1751 assertTrue(t, a.Select("COL1", "COL2", "COL3", "COL5").ByteSize() < totalSize) 1752 } 1753 1754 func TestQFrame_CopyColumn(t *testing.T) { 1755 input := qframe.New(map[string]interface{}{ 1756 "COL1": []string{"a", "b"}, 1757 "COL2": []int{3, 2}, 1758 }) 1759 1760 expectedNew := qframe.New(map[string]interface{}{ 1761 "COL1": []string{"a", "b"}, 1762 "COL2": []int{3, 2}, 1763 "COL3": []int{3, 2}, 1764 }) 1765 1766 expectedReplace := qframe.New(map[string]interface{}{ 1767 "COL1": []int{3, 2}, 1768 "COL2": []int{3, 2}, 1769 }) 1770 1771 assertEquals(t, expectedNew, input.Copy("COL3", "COL2")) 1772 assertEquals(t, expectedReplace, input.Copy("COL1", "COL2")) 1773 } 1774 1775 func TestQFrame_ApplyZeroArg(t *testing.T) { 1776 a, b := "a", "b" 1777 table := []struct { 1778 name string 1779 expected interface{} 1780 fn interface{} 1781 }{ 1782 {name: "int fn", expected: []int{2, 2}, fn: func() int { return 2 }}, 1783 {name: "int const", expected: []int{3, 3}, fn: 3}, 1784 {name: "float fn", expected: []float64{2.5, 2.5}, fn: func() float64 { return 2.5 }}, 1785 {name: "float const", expected: []float64{3.5, 3.5}, fn: 3.5}, 1786 {name: "bool fn", expected: []bool{true, true}, fn: func() bool { return true }}, 1787 {name: "bool const", expected: []bool{false, false}, fn: false}, 1788 {name: "string fn", expected: []*string{&a, &a}, fn: func() *string { return &a }}, 1789 {name: "string const", expected: []*string{&b, &b}, fn: &b}, 1790 } 1791 1792 for _, tc := range table { 1793 t.Run(tc.name, func(t *testing.T) { 1794 input := map[string]interface{}{"COL1": []int{3, 2}} 1795 in := qframe.New(input) 1796 input["COL2"] = tc.expected 1797 expected := qframe.New(input) 1798 out := in.Apply(qframe.Instruction{Fn: tc.fn, DstCol: "COL2"}) 1799 assertEquals(t, expected, out) 1800 }) 1801 } 1802 } 1803 1804 func TestQFrame_ApplySingleArgIntToInt(t *testing.T) { 1805 input := qframe.New(map[string]interface{}{ 1806 "COL1": []int{3, 2}, 1807 }) 1808 1809 expectedNew := qframe.New(map[string]interface{}{ 1810 "COL1": []int{6, 4}, 1811 }) 1812 1813 assertEquals(t, expectedNew, input.Apply(qframe.Instruction{Fn: func(a int) int { return 2 * a }, DstCol: "COL1", SrcCol1: "COL1"})) 1814 } 1815 1816 func TestQFrame_ApplySingleArgStringToBool(t *testing.T) { 1817 input := qframe.New(map[string]interface{}{ 1818 "COL1": []string{"a", "aa", "aaa"}, 1819 }) 1820 1821 expectedNew := qframe.New(map[string]interface{}{ 1822 "COL1": []string{"a", "aa", "aaa"}, 1823 "IS_LONG": []bool{false, false, true}, 1824 }) 1825 1826 assertEquals(t, expectedNew, input.Apply(qframe.Instruction{Fn: func(x *string) bool { return len(*x) > 2 }, DstCol: "IS_LONG", SrcCol1: "COL1"})) 1827 } 1828 1829 func toUpper(x *string) *string { 1830 if x == nil { 1831 return x 1832 } 1833 result := strings.ToUpper(*x) 1834 return &result 1835 } 1836 1837 func TestQFrame_ApplySingleArgString(t *testing.T) { 1838 a, b := "a", "b" 1839 A, B := "A", "B" 1840 input := qframe.New(map[string]interface{}{ 1841 "COL1": []*string{&a, &b, nil}, 1842 }) 1843 1844 expectedNew := qframe.New(map[string]interface{}{ 1845 "COL1": []*string{&A, &B, nil}, 1846 }) 1847 1848 // General function 1849 assertEquals(t, expectedNew, input.Apply(qframe.Instruction{Fn: toUpper, DstCol: "COL1", SrcCol1: "COL1"})) 1850 1851 // Built in function 1852 assertEquals(t, expectedNew, input.Apply(qframe.Instruction{Fn: "ToUpper", DstCol: "COL1", SrcCol1: "COL1"})) 1853 } 1854 1855 func TestQFrame_ApplySingleArgEnum(t *testing.T) { 1856 a, b := "a", "b" 1857 A, B := "A", "B" 1858 input := qframe.New( 1859 map[string]interface{}{"COL1": []*string{&a, &b, nil, &a}}, 1860 newqf.Enums(map[string][]string{"COL1": nil})) 1861 1862 expectedData := map[string]interface{}{"COL1": []*string{&A, &B, nil, &A}} 1863 expectedNewGeneral := qframe.New(expectedData) 1864 expectedNewBuiltIn := qframe.New(expectedData, newqf.Enums(map[string][]string{"COL1": nil})) 1865 1866 // General function 1867 assertEquals(t, expectedNewGeneral, input.Apply(qframe.Instruction{Fn: toUpper, DstCol: "COL1", SrcCol1: "COL1"})) 1868 1869 // Builtin function 1870 assertEquals(t, expectedNewBuiltIn, input.Apply(qframe.Instruction{Fn: "ToUpper", DstCol: "COL1", SrcCol1: "COL1"})) 1871 } 1872 1873 func TestQFrame_ApplyToCopyColumn(t *testing.T) { 1874 a, b := "a", "b" 1875 input := qframe.New(map[string]interface{}{ 1876 "COL1": []string{a, b}}) 1877 1878 expectedNew := qframe.New(map[string]interface{}{ 1879 "COL1": []string{a, b}, 1880 "COL2": []string{a, b}}) 1881 1882 assertEquals(t, expectedNew, input.Apply(qframe.Instruction{Fn: types.ColumnName("COL1"), DstCol: "COL2"})) 1883 } 1884 1885 func TestQFrame_ApplyDoubleArg(t *testing.T) { 1886 table := []struct { 1887 name string 1888 input map[string]interface{} 1889 expected interface{} 1890 fn interface{} 1891 enums map[string][]string 1892 }{ 1893 { 1894 name: "int", 1895 input: map[string]interface{}{"COL1": []int{3, 2}, "COL2": []int{30, 20}}, 1896 expected: []int{33, 22}, 1897 fn: func(a, b int) int { return a + b }}, 1898 { 1899 name: "string", 1900 input: map[string]interface{}{"COL1": []string{"a", "b"}, "COL2": []string{"x", "y"}}, 1901 expected: []string{"ax", "by"}, 1902 fn: func(a, b *string) *string { result := *a + *b; return &result }}, 1903 { 1904 name: "enum", 1905 input: map[string]interface{}{"COL1": []string{"a", "b"}, "COL2": []string{"x", "y"}}, 1906 expected: []string{"ax", "by"}, 1907 fn: func(a, b *string) *string { result := *a + *b; return &result }, 1908 enums: map[string][]string{"COL1": nil, "COL2": nil}}, 1909 } 1910 1911 for _, tc := range table { 1912 t.Run(tc.name, func(t *testing.T) { 1913 in := qframe.New(tc.input, newqf.Enums(tc.enums)) 1914 tc.input["COL3"] = tc.expected 1915 expected := qframe.New(tc.input, newqf.Enums(tc.enums)) 1916 out := in.Apply(qframe.Instruction{Fn: tc.fn, DstCol: "COL3", SrcCol1: "COL1", SrcCol2: "COL2"}) 1917 assertEquals(t, expected, out) 1918 }) 1919 } 1920 } 1921 1922 func TestQFrame_FilteredApply(t *testing.T) { 1923 plus1 := func(a int) int { return a + 1 } 1924 table := []struct { 1925 name string 1926 input map[string]interface{} 1927 expected map[string]interface{} 1928 instructions []qframe.Instruction 1929 clauses qframe.FilterClause 1930 }{ 1931 { 1932 name: "null fills for rows that dont match filter when destination column is new", 1933 input: map[string]interface{}{"COL1": []int{3, 2, 1}}, 1934 instructions: []qframe.Instruction{{Fn: plus1, DstCol: "COL3", SrcCol1: "COL1"}, {Fn: plus1, DstCol: "COL3", SrcCol1: "COL3"}}, 1935 expected: map[string]interface{}{"COL1": []int{3, 2, 1}, "COL3": []int{5, 4, 0}}, 1936 clauses: qframe.Filter{Comparator: ">", Column: "COL1", Arg: 1}}, 1937 { 1938 // One could question whether this is the desired behaviour or not. The alternative 1939 // would be to preserve the existing values but that would cause a lot of inconsistencies 1940 // when the result column type differs from the source column type for example. What would 1941 // the preserved value be in that case? Preserving the existing behaviour could be achieved 1942 // by using a temporary column that indexes which columns to modify and not. Perhaps this 1943 // should be built in at some point. 1944 name: "null fills rows that dont match filter when destination column is existing", 1945 input: map[string]interface{}{"COL1": []int{3, 2, 1}}, 1946 instructions: []qframe.Instruction{{Fn: plus1, DstCol: "COL1", SrcCol1: "COL1"}}, 1947 expected: map[string]interface{}{"COL1": []int{4, 3, 0}}, 1948 clauses: qframe.Filter{Comparator: ">", Column: "COL1", Arg: 1}}, 1949 } 1950 1951 for _, tc := range table { 1952 t.Run(tc.name, func(t *testing.T) { 1953 in := qframe.New(tc.input) 1954 expected := qframe.New(tc.expected) 1955 out := in.FilteredApply(tc.clauses, tc.instructions...) 1956 assertEquals(t, expected, out) 1957 }) 1958 } 1959 } 1960 1961 func TestQFrame_AggregateStrings(t *testing.T) { 1962 table := []struct { 1963 enums map[string][]string 1964 }{ 1965 {map[string][]string{"COL2": nil}}, 1966 {map[string][]string{}}, 1967 } 1968 1969 for _, tc := range table { 1970 t.Run(fmt.Sprintf("Enum %t", len(tc.enums) > 0), func(t *testing.T) { 1971 input := qframe.New(map[string]interface{}{ 1972 "COL1": []string{"a", "b", "a", "b", "a"}, 1973 "COL2": []string{"x", "p", "y", "q", "z"}, 1974 }, newqf.Enums(tc.enums)) 1975 expected := qframe.New(map[string]interface{}{"COL1": []string{"a", "b"}, "COL2": []string{"x,y,z", "p,q"}}) 1976 out := input.GroupBy(groupby.Columns("COL1")).Aggregate(qframe.Aggregation{Fn: aggregation.StrJoin(","), Column: "COL2"}) 1977 assertEquals(t, expected, out.Sort(qframe.Order{Column: "COL1"})) 1978 }) 1979 } 1980 } 1981 1982 func sum(c []int) int { 1983 result := 0 1984 for _, v := range c { 1985 result += v 1986 } 1987 return result 1988 } 1989 1990 func TestQFrame_AggregateGroupByNull(t *testing.T) { 1991 a, b := "a", "b" 1992 for _, groupByNull := range []bool{false, true} { 1993 for _, column := range []string{"COL1", "COL2", "COL3"} { 1994 t.Run(fmt.Sprintf("%s %v", column, groupByNull), func(t *testing.T) { 1995 input := qframe.New(map[string]interface{}{ 1996 "COL1": []*string{&a, &b, nil, &a, &b, nil}, 1997 "COL2": []*string{&a, &b, nil, &a, &b, nil}, 1998 "COL3": []float64{1, 2, math.NaN(), 1, 2, math.NaN()}, 1999 "COL4": []int{1, 2, 3, 10, 20, 30}, 2000 }, newqf.Enums(map[string][]string{"COL2": nil})) 2001 2002 col4 := []int{3, 30, 11, 22} 2003 if groupByNull { 2004 // Here we expect the nil/NaN columns to have been aggregated together 2005 col4 = []int{33, 11, 22} 2006 } 2007 expected := qframe.New(map[string]interface{}{"COL4": col4}) 2008 2009 out := input.GroupBy(groupby.Columns(column), groupby.Null(groupByNull)).Aggregate(qframe.Aggregation{Fn: sum, Column: "COL4"}) 2010 assertEquals(t, expected, out.Sort(colNamesToOrders(column, "COL4")...).Select("COL4")) 2011 }) 2012 } 2013 } 2014 } 2015 2016 func TestQFrame_NewWithConstantVal(t *testing.T) { 2017 a := "a" 2018 table := []struct { 2019 name string 2020 input interface{} 2021 expected interface{} 2022 enums map[string][]string 2023 }{ 2024 { 2025 name: "int", 2026 input: qframe.ConstInt{Val: 33, Count: 2}, 2027 expected: []int{33, 33}}, 2028 { 2029 name: "float", 2030 input: qframe.ConstFloat{Val: 33.5, Count: 2}, 2031 expected: []float64{33.5, 33.5}}, 2032 { 2033 name: "bool", 2034 input: qframe.ConstBool{Val: true, Count: 2}, 2035 expected: []bool{true, true}}, 2036 { 2037 name: "string", 2038 input: qframe.ConstString{Val: &a, Count: 2}, 2039 expected: []string{"a", "a"}}, 2040 { 2041 name: "string null", 2042 input: qframe.ConstString{Val: nil, Count: 2}, 2043 expected: []*string{nil, nil}}, 2044 { 2045 name: "enum", 2046 input: qframe.ConstString{Val: &a, Count: 2}, 2047 expected: []string{"a", "a"}, 2048 enums: map[string][]string{"COL1": nil}}, 2049 { 2050 name: "enum null", 2051 input: qframe.ConstString{Val: nil, Count: 2}, 2052 expected: []*string{nil, nil}, 2053 enums: map[string][]string{"COL1": nil}}, 2054 } 2055 2056 for _, tc := range table { 2057 t.Run(tc.name, func(t *testing.T) { 2058 in := qframe.New(map[string]interface{}{"COL1": tc.input}, newqf.Enums(tc.enums)) 2059 expected := qframe.New(map[string]interface{}{"COL1": tc.expected}, newqf.Enums(tc.enums)) 2060 assertEquals(t, expected, in) 2061 }) 2062 } 2063 } 2064 2065 func TestQFrame_NewErrors(t *testing.T) { 2066 longCol := make([]string, 256) 2067 for i := range longCol { 2068 longCol[i] = fmt.Sprintf("%d", i) 2069 } 2070 2071 table := []struct { 2072 input map[string]interface{} 2073 configs []newqf.ConfigFunc 2074 err string 2075 }{ 2076 { 2077 input: map[string]interface{}{"": []int{1}}, 2078 err: "must not be empty"}, 2079 { 2080 input: map[string]interface{}{"'foo'": []int{1}}, 2081 err: `must not be quoted: 'foo'`}, 2082 { 2083 input: map[string]interface{}{`"foo"`: []int{1}}, 2084 err: `must not be quoted: "foo"`}, 2085 { 2086 input: map[string]interface{}{"$foo": []int{1}}, 2087 err: "must not start with $"}, 2088 { 2089 input: map[string]interface{}{"COL1": longCol}, 2090 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": nil})}, 2091 err: "enum max cardinality"}, 2092 { 2093 input: map[string]interface{}{"COL1": longCol}, 2094 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": nil})}, 2095 err: "enum max cardinality"}, 2096 { 2097 input: map[string]interface{}{"COL1": longCol}, 2098 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL2": nil})}, 2099 err: "unknown enum columns"}, 2100 { 2101 input: map[string]interface{}{"COL1": []int{1}, "COL2": []int{2}}, 2102 configs: []newqf.ConfigFunc{newqf.ColumnOrder("COL1")}, 2103 err: "number of columns and columns order length do not match"}, 2104 { 2105 input: map[string]interface{}{"COL1": []int{1}, "COL2": []int{2}}, 2106 configs: []newqf.ConfigFunc{newqf.ColumnOrder("COL1", "COL3")}, 2107 err: `column "COL3" in column order does not exist`}, 2108 { 2109 input: map[string]interface{}{"COL1": []int8{1}}, 2110 err: `unknown column data type`}, 2111 { 2112 input: map[string]interface{}{"COL1": []int{1}, "COL2": []int{2, 3}}, 2113 err: `different lengths on columns not allowed`}, 2114 } 2115 2116 for _, tc := range table { 2117 t.Run(tc.err, func(t *testing.T) { 2118 f := qframe.New(tc.input, tc.configs...) 2119 assertErr(t, f.Err, tc.err) 2120 }) 2121 } 2122 } 2123 2124 func TestQFrame_OperationErrors(t *testing.T) { 2125 // Catch all test case for various errors caused by invalid input parameters 2126 // to various functions on the QFrame. 2127 table := []struct { 2128 name string 2129 fn func(f qframe.QFrame) error 2130 err string 2131 configs []newqf.ConfigFunc 2132 input map[string]interface{} 2133 }{ 2134 { 2135 name: "Copy with invalid destination column name", 2136 fn: func(f qframe.QFrame) error { return f.Copy("$A", "COL1").Err }, 2137 err: "must not start with $"}, 2138 { 2139 name: "Apply with invalid destination column name", 2140 fn: func(f qframe.QFrame) error { return f.Apply(qframe.Instruction{Fn: 1, DstCol: "$A"}).Err }, 2141 err: "must not start with $"}, 2142 { 2143 name: "Set eval func with invalid name", 2144 fn: func(f qframe.QFrame) error { 2145 ctx := eval.NewDefaultCtx() 2146 return ctx.SetFunc("$foo", func(i int) int { return i }) 2147 }, 2148 err: "must not start with $"}, 2149 { 2150 name: "Missing function in eval", 2151 fn: func(f qframe.QFrame) error { 2152 expr := qframe.Expr("foo", types.ColumnName("COL1")) 2153 return f.Eval("COL3", expr).Err 2154 }, 2155 err: "Could not find Int function"}, 2156 { 2157 name: "Error in lhs of composed expression", 2158 fn: func(f qframe.QFrame) error { 2159 expr := qframe.Expr("+", 2160 qframe.Expr("foo", types.ColumnName("COL1")), 2161 qframe.Expr("abs", types.ColumnName("COL2"))) 2162 return f.Eval("COL3", expr).Err 2163 }, 2164 err: "Could not find Int function"}, 2165 { 2166 name: "Error in rhs of composed expression", 2167 fn: func(f qframe.QFrame) error { 2168 expr := qframe.Expr("+", 2169 qframe.Expr("abs", types.ColumnName("COL2")), 2170 qframe.Expr("foo", types.ColumnName("COL1"))) 2171 return f.Eval("COL3", expr).Err 2172 }, 2173 err: "Could not find Int function"}, 2174 { 2175 name: "Zero clause OR filter not allowed", 2176 fn: func(f qframe.QFrame) error { return f.Filter(qframe.Or()).Err }, 2177 err: "zero subclauses not allowed"}, 2178 { 2179 name: "Zero clause AND filter not allowed", 2180 fn: func(f qframe.QFrame) error { return f.Filter(qframe.And()).Err }, 2181 err: "zero subclauses not allowed"}, 2182 { 2183 name: "Group by missing column", 2184 fn: func(f qframe.QFrame) error { return f.GroupBy(groupby.Columns("FOO")).Err }, 2185 err: "unknown column"}, 2186 { 2187 name: "Aggregate on missing column", 2188 fn: func(f qframe.QFrame) error { 2189 return f.GroupBy(groupby.Columns("COL1")).Aggregate(qframe.Aggregation{Fn: "sum", Column: "FOO"}).Err 2190 }, 2191 err: "unknown column"}, 2192 { 2193 name: "Aggregate on column part of the group by expression is not allowed", 2194 fn: func(f qframe.QFrame) error { 2195 return f.GroupBy(groupby.Columns("COL1")).Aggregate(qframe.Aggregation{Fn: "sum", Column: "COL1"}).Err 2196 }, 2197 err: "cannot aggregate on column that is part of group by"}, 2198 { 2199 name: "Filter using unknown operation, enum", 2200 input: map[string]interface{}{"COL1": []string{"a", "b"}}, 2201 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": {"a", "b"}})}, 2202 fn: func(f qframe.QFrame) error { 2203 return f.Filter(qframe.Filter{Comparator: ">>>", Column: "COL1", Arg: "c"}).Err 2204 }, 2205 err: "unknown comparison operator"}, 2206 { 2207 name: "Filter against unknown value, enum", 2208 input: map[string]interface{}{"COL1": []string{"a", "b"}}, 2209 configs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": {"a", "b"}})}, 2210 fn: func(f qframe.QFrame) error { 2211 return f.Filter(qframe.Filter{Comparator: ">", Column: "COL1", Arg: "c"}).Err 2212 }, 2213 err: "unknown enum value"}, 2214 { 2215 name: "Filter using unknown operator, float", 2216 input: map[string]interface{}{"COL1": []float64{1.0}}, 2217 fn: func(f qframe.QFrame) error { 2218 return f.Filter(qframe.Filter{Comparator: ">>>", Column: "COL1", Arg: 1.0}).Err 2219 }, 2220 err: "invalid comparison operator"}, 2221 { 2222 name: "Filter against wrong type, float", 2223 input: map[string]interface{}{"COL1": []float64{1.0}}, 2224 fn: func(f qframe.QFrame) error { 2225 return f.Filter(qframe.Filter{Comparator: ">", Column: "COL1", Arg: "foo"}).Err 2226 }, 2227 err: "invalid comparison value type"}, 2228 { 2229 name: "Filter against wrong type, string", 2230 input: map[string]interface{}{"COL1": []string{"a"}}, 2231 fn: func(f qframe.QFrame) error { 2232 return f.Filter(qframe.Filter{Comparator: ">", Column: "COL1", Arg: 1.0}).Err 2233 }, 2234 err: "invalid comparison value type"}, 2235 { 2236 name: "Filter on missing column", 2237 input: map[string]interface{}{"COL1": []string{"a"}}, 2238 fn: func(f qframe.QFrame) error { 2239 return f.Filter(qframe.Filter{Comparator: "=", Column: "FOO", Arg: "a"}).Err 2240 }, 2241 err: "unknown column"}, 2242 { 2243 name: "Filter against missing argument column", 2244 input: map[string]interface{}{"COL1": []string{"a"}}, 2245 fn: func(f qframe.QFrame) error { 2246 return f.Filter(qframe.Filter{Comparator: "=", Column: "COL1", Arg: types.ColumnName("COL2")}).Err 2247 }, 2248 err: "unknown argument column"}, 2249 { 2250 name: "Distinct on missing column", 2251 input: map[string]interface{}{"COL1": []string{"a"}}, 2252 fn: func(f qframe.QFrame) error { return f.Distinct(groupby.Columns("COL2")).Err }, 2253 err: "unknown column"}, 2254 { 2255 name: "Select on missing column", 2256 input: map[string]interface{}{"COL1": []string{"a"}}, 2257 fn: func(f qframe.QFrame) error { return f.Select("COL2").Err }, 2258 err: "unknown column"}, 2259 { 2260 name: "Copy with missing column", 2261 input: map[string]interface{}{"COL1": []string{"a"}}, 2262 fn: func(f qframe.QFrame) error { return f.Copy("COL3", "COL2").Err }, 2263 err: "unknown column"}, 2264 { 2265 name: "Unknown sort column", 2266 input: map[string]interface{}{"COL1": []string{"a"}}, 2267 fn: func(f qframe.QFrame) error { return f.Sort(qframe.Order{Column: "COL2"}).Err }, 2268 err: "unknown column"}, 2269 { 2270 name: "Get view for wrong type", 2271 input: map[string]interface{}{"COL1": []string{"a"}}, 2272 fn: func(f qframe.QFrame) error { 2273 _, err := f.FloatView("COL1") 2274 return err 2275 }, 2276 err: "invalid column type"}, 2277 } 2278 2279 for _, tc := range table { 2280 t.Run(tc.name, func(t *testing.T) { 2281 if tc.input == nil { 2282 tc.input = map[string]interface{}{"COL1": []int{1, 2, 3}, "COL2": []int{11, 12, 13}} 2283 } 2284 f := qframe.New(tc.input, tc.configs...) 2285 err := tc.fn(f) 2286 assertErr(t, err, tc.err) 2287 }) 2288 } 2289 } 2290 2291 func TestQFrame_Equals(t *testing.T) { 2292 table := []struct { 2293 name string 2294 input map[string]interface{} 2295 comparatee map[string]interface{} 2296 inputConfigs []newqf.ConfigFunc 2297 comparateeConfigs []newqf.ConfigFunc 2298 expected bool 2299 }{ 2300 { 2301 name: "Equality basic", 2302 input: map[string]interface{}{"COL1": []int{1}, "COL2": []int{1}}, 2303 comparatee: map[string]interface{}{"COL1": []int{1}, "COL2": []int{1}}, 2304 expected: true}, 2305 { 2306 name: "Equality of zero column", 2307 input: map[string]interface{}{}, 2308 comparatee: map[string]interface{}{}, 2309 expected: true}, 2310 { 2311 name: "Equality of empty column", 2312 input: map[string]interface{}{"COL1": []int{}}, 2313 comparatee: map[string]interface{}{"COL1": []int{}}, 2314 expected: true}, 2315 { 2316 name: "Inequality empty vs non-empty column", 2317 input: map[string]interface{}{"COL1": []int{}}, 2318 comparatee: map[string]interface{}{"COL1": []int{1}}, 2319 expected: false}, 2320 { 2321 name: "Inequality different columns", 2322 input: map[string]interface{}{"COL1": []int{}}, 2323 comparatee: map[string]interface{}{"COL2": []int{}}, 2324 expected: false}, 2325 { 2326 name: "Inequality different number of columns", 2327 input: map[string]interface{}{"COL1": []int{1}}, 2328 comparatee: map[string]interface{}{"COL1": []int{1}, "COL2": []int{1}}, 2329 expected: false}, 2330 { 2331 name: "Inequality different column content", 2332 input: map[string]interface{}{"COL1": []int{1, 2}}, 2333 comparatee: map[string]interface{}{"COL1": []int{2, 1}}, 2334 expected: false}, 2335 { 2336 name: "Equality between different enum types as long as elements are the same", 2337 input: map[string]interface{}{"COL1": []string{"a", "b"}}, 2338 inputConfigs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": {"a", "b"}})}, 2339 comparatee: map[string]interface{}{"COL1": []string{"a", "b"}}, 2340 comparateeConfigs: []newqf.ConfigFunc{newqf.Enums(map[string][]string{"COL1": {"c", "b", "a"}})}, 2341 expected: true}, 2342 { 2343 // Not sure if this is the way it should work, just documenting the current behaviour 2344 name: "Inequality with same content but different column order", 2345 input: map[string]interface{}{"COL1": []string{"a", "b"}, "COL2": []string{"aa", "bb"}}, 2346 inputConfigs: []newqf.ConfigFunc{newqf.ColumnOrder("COL1", "COL2")}, 2347 comparatee: map[string]interface{}{"COL1": []string{"a", "b"}, "COL2": []string{"aa", "bb"}}, 2348 comparateeConfigs: []newqf.ConfigFunc{newqf.ColumnOrder("COL2", "COL1")}, 2349 expected: false}, 2350 } 2351 2352 for _, tc := range table { 2353 t.Run(tc.name, func(t *testing.T) { 2354 in := qframe.New(tc.input, tc.inputConfigs...) 2355 comp := qframe.New(tc.comparatee, tc.comparateeConfigs...) 2356 eq, reason := in.Equals(comp) 2357 if eq != tc.expected { 2358 t.Errorf("Actual: %v, expected: %v, reason: %s", eq, tc.expected, reason) 2359 } 2360 }) 2361 } 2362 } 2363 2364 func TestQFrame_FloatView(t *testing.T) { 2365 input := qframe.New(map[string]interface{}{"COL1": []float64{1.5, 0.5, 3.0}}) 2366 input = input.Sort(qframe.Order{Column: "COL1"}) 2367 expected := []float64{0.5, 1.5, 3.0} 2368 2369 v, err := input.FloatView("COL1") 2370 assertNotErr(t, err) 2371 2372 s := v.Slice() 2373 assertTrue(t, v.Len() == len(expected)) 2374 assertTrue(t, len(s) == len(expected)) 2375 assertTrue(t, (v.ItemAt(0) == s[0]) && (s[0] == expected[0])) 2376 assertTrue(t, (v.ItemAt(1) == s[1]) && (s[1] == expected[1])) 2377 assertTrue(t, (v.ItemAt(2) == s[2]) && (s[2] == expected[2])) 2378 } 2379 2380 func TestQFrame_StringView(t *testing.T) { 2381 a, b := "a", "b" 2382 input := qframe.New(map[string]interface{}{"COL1": []*string{&a, nil, &b}}) 2383 input = input.Sort(qframe.Order{Column: "COL1"}) 2384 expected := []*string{nil, &a, &b} 2385 2386 v, err := input.StringView("COL1") 2387 assertNotErr(t, err) 2388 2389 s := v.Slice() 2390 assertTrue(t, v.Len() == len(expected)) 2391 assertTrue(t, len(s) == len(expected)) 2392 2393 // Nil, check pointers 2394 assertTrue(t, (v.ItemAt(0) == s[0]) && (s[0] == expected[0])) 2395 2396 // !Nil, check values 2397 assertTrue(t, (*v.ItemAt(1) == *s[1]) && (*s[1] == *expected[1])) 2398 assertTrue(t, (*v.ItemAt(2) == *s[2]) && (*s[2] == *expected[2])) 2399 } 2400 2401 func TestQFrame_EnumView(t *testing.T) { 2402 a, b := "a", "b" 2403 input := qframe.New(map[string]interface{}{"COL1": []*string{&a, nil, &b}}, newqf.Enums(map[string][]string{"COL1": {"a", "b"}})) 2404 input = input.Sort(qframe.Order{Column: "COL1"}) 2405 expected := []*string{nil, &a, &b} 2406 2407 v, err := input.EnumView("COL1") 2408 assertNotErr(t, err) 2409 2410 s := v.Slice() 2411 assertTrue(t, v.Len() == len(expected)) 2412 assertTrue(t, len(s) == len(expected)) 2413 2414 // Nil, check pointers 2415 assertTrue(t, (v.ItemAt(0) == s[0]) && (s[0] == expected[0])) 2416 2417 // !Nil, check values 2418 assertTrue(t, (*v.ItemAt(1) == *s[1]) && (*s[1] == *expected[1])) 2419 assertTrue(t, (*v.ItemAt(2) == *s[2]) && (*s[2] == *expected[2])) 2420 } 2421 2422 func col(c string) types.ColumnName { 2423 return types.ColumnName(c) 2424 } 2425 2426 func TestQFrame_EvalSuccess(t *testing.T) { 2427 table := []struct { 2428 name string 2429 expr qframe.Expression 2430 dstCol string 2431 input map[string]interface{} 2432 expected interface{} 2433 customFn interface{} 2434 customFnName string 2435 enums map[string][]string 2436 }{ 2437 { 2438 name: "column copying", 2439 expr: qframe.Val(col("COL1")), 2440 input: map[string]interface{}{"COL1": []int{1, 2}}, 2441 dstCol: "COL2", 2442 expected: []int{1, 2}}, 2443 { 2444 name: "column constant fill", 2445 expr: qframe.Val(3), 2446 input: map[string]interface{}{"COL1": []int{1, 2}}, 2447 dstCol: "COL2", 2448 expected: []int{3, 3}}, 2449 { 2450 name: "column nil fill", 2451 expr: qframe.Val(nil), 2452 input: map[string]interface{}{"COL1": []int{1, 2}}, 2453 dstCol: "COL2", 2454 expected: []*string{nil, nil}}, 2455 { 2456 name: "int col plus col", 2457 expr: qframe.Expr("+", col("COL1"), col("COL2")), 2458 input: map[string]interface{}{"COL1": []int{1, 2}, "COL2": []int{3, 4}}, 2459 expected: []int{4, 6}}, 2460 { 2461 name: "int col plus const minus const", 2462 expr: qframe.Expr("-", qframe.Expr("+", col("COL1"), 10), qframe.Val(1)), 2463 input: map[string]interface{}{"COL1": []int{1, 2}}, 2464 expected: []int{10, 11}}, 2465 { 2466 name: "string plus itoa int", 2467 expr: qframe.Expr("+", col("COL1"), qframe.Expr("str", col("COL2"))), 2468 input: map[string]interface{}{"COL1": []string{"a", "b"}, "COL2": []int{1, 2}}, 2469 expected: []string{"a1", "b2"}}, 2470 { 2471 name: "string plus string literal", 2472 expr: qframe.Expr("+", col("COL1"), qframe.Val("A")), 2473 input: map[string]interface{}{"COL1": []string{"a", "b"}}, 2474 expected: []string{"aA", "bA"}}, 2475 { 2476 name: "float custom func", 2477 expr: qframe.Expr("pythagoras", col("COL1"), col("COL2")), 2478 input: map[string]interface{}{"COL1": []float64{1, 2}, "COL2": []float64{1, 3}}, 2479 expected: []float64{math.Sqrt(2), math.Sqrt(4 + 9)}, 2480 customFn: func(x, y float64) float64 { return math.Sqrt(x*x + y*y) }, 2481 customFnName: "pythagoras"}, 2482 { 2483 name: "bool col and col", 2484 expr: qframe.Expr("&", col("COL1"), col("COL2")), 2485 input: map[string]interface{}{"COL1": []bool{true, false}, "COL2": []bool{true, true}}, 2486 expected: []bool{true, false}}, 2487 { 2488 name: "enum col plus col", 2489 expr: qframe.Expr("+", col("COL1"), col("COL2")), 2490 input: map[string]interface{}{"COL1": []string{"a", "b"}, "COL2": []string{"A", "B"}}, 2491 expected: []string{"aA", "bB"}, 2492 enums: map[string][]string{"COL1": nil, "COL2": nil}}, 2493 { 2494 name: "enum col plus string col, cast string to enum needed", 2495 expr: qframe.Expr("+", qframe.Expr("str", col("COL1")), col("COL2")), 2496 input: map[string]interface{}{"COL1": []string{"a", "b"}, "COL2": []string{"A", "B"}}, 2497 expected: []string{"aA", "bB"}, 2498 enums: map[string][]string{"COL1": nil}}, 2499 { 2500 name: "abs of float sum", 2501 expr: qframe.Expr("abs", qframe.Expr("+", col("COL1"), col("COL2"))), 2502 input: map[string]interface{}{"COL1": []float64{1, 2}, "COL2": []float64{-3, -2}}, 2503 expected: []float64{2, 0}}, 2504 { 2505 name: "chained multi argument evaluation - three arguments", 2506 expr: qframe.Expr("/", col("COL1"), col("COL2"), col("COL3")), 2507 input: map[string]interface{}{"COL1": []float64{18}, "COL2": []float64{2}, "COL3": []float64{3}}, 2508 dstCol: "COL4", 2509 expected: []float64{3}}, 2510 { 2511 name: "chained multi argument evaluation - four arguments including constant", 2512 expr: qframe.Expr("/", col("COL1"), col("COL2"), col("COL3"), 3.0), 2513 input: map[string]interface{}{"COL1": []float64{18}, "COL2": []float64{2}, "COL3": []float64{3}}, 2514 dstCol: "COL4", 2515 expected: []float64{1}}, 2516 } 2517 2518 for _, tc := range table { 2519 t.Run(tc.name, func(t *testing.T) { 2520 conf := make([]eval.ConfigFunc, 0) 2521 if tc.customFn != nil { 2522 ctx := eval.NewDefaultCtx() 2523 err := ctx.SetFunc(tc.customFnName, tc.customFn) 2524 assertNotErr(t, err) 2525 conf = append(conf, eval.EvalContext(ctx)) 2526 } 2527 2528 if tc.dstCol == "" { 2529 tc.dstCol = "COL3" 2530 } 2531 in := qframe.New(tc.input, newqf.Enums(tc.enums)) 2532 tc.input[tc.dstCol] = tc.expected 2533 expected := qframe.New(tc.input, newqf.Enums(tc.enums)) 2534 2535 assertNotErr(t, tc.expr.Err()) 2536 out := in.Eval(tc.dstCol, tc.expr, conf...) 2537 2538 assertEquals(t, expected, out) 2539 }) 2540 } 2541 } 2542 2543 func TestQFrame_Typing(t *testing.T) { 2544 qf := qframe.New(map[string]interface{}{ 2545 "ints": []int{1, 2}, 2546 "bools": []bool{true, false}, 2547 "floats": []float64{1, 0}, 2548 "strings": []string{"a", "b"}, 2549 "enums": []string{"a", "b"}, 2550 }, 2551 newqf.Enums(map[string][]string{"enums": {"a", "b"}}), 2552 newqf.ColumnOrder("ints", "bools", "floats", "strings", "enums"), 2553 ) 2554 assertTrue(t, qf.ColumnTypeMap()["ints"] == types.Int) 2555 assertTrue(t, qf.ColumnTypes()[0] == types.Int) 2556 assertTrue(t, qf.ColumnTypeMap()["bools"] == types.Bool) 2557 assertTrue(t, qf.ColumnTypes()[1] == types.Bool) 2558 assertTrue(t, qf.ColumnTypeMap()["floats"] == types.Float) 2559 assertTrue(t, qf.ColumnTypes()[2] == types.Float) 2560 assertTrue(t, qf.ColumnTypeMap()["strings"] == types.String) 2561 assertTrue(t, qf.ColumnTypes()[3] == types.String) 2562 assertTrue(t, qf.ColumnTypeMap()["enums"] == types.Enum) 2563 assertTrue(t, qf.ColumnTypes()[4] == types.Enum) 2564 } 2565 2566 func TestQFrame_WithRows(t *testing.T) { 2567 input := qframe.New(map[string]interface{}{"COL1": []int{11, 22, 33}}) 2568 expected := qframe.New(map[string]interface{}{ 2569 "ROWNUMS": []int{0, 1, 2}, 2570 "COL1": []int{11, 22, 33}}) 2571 assertEquals(t, expected, input.WithRowNums("ROWNUMS")) 2572 } 2573 2574 func assertContains(t *testing.T, actual, expected string) { 2575 t.Helper() 2576 if !strings.Contains(actual, expected) { 2577 t.Errorf("Could not find: %s, in: %s", expected, actual) 2578 } 2579 } 2580 2581 func TestDoc(t *testing.T) { 2582 // This is just a verification that something is printed rather than a proper test. 2583 doc := qframe.Doc() 2584 assertContains(t, doc, "context") 2585 assertContains(t, doc, "Single arg") 2586 assertContains(t, doc, "Double arg") 2587 assertContains(t, doc, "bool") 2588 assertContains(t, doc, "enum") 2589 assertContains(t, doc, "float") 2590 assertContains(t, doc, "int") 2591 assertContains(t, doc, "string") 2592 assertContains(t, doc, "filters") 2593 assertContains(t, doc, "aggregations") 2594 } 2595 2596 func TestQFrame_AppendSuccess(t *testing.T) { 2597 f1 := qframe.New(map[string]interface{}{"COL1": []int{11, 22}}) 2598 f2 := qframe.New(map[string]interface{}{"COL1": []int{33}}) 2599 f3 := qframe.New(map[string]interface{}{"COL1": []int{44, 55}}) 2600 expected := qframe.New(map[string]interface{}{"COL1": []int{11, 22, 33, 44, 55}}) 2601 assertEquals(t, expected, f1.Append(f2, f3)) 2602 } 2603 2604 func assertContainsQFrame(t *testing.T, frames []qframe.QFrame, frame qframe.QFrame) { 2605 t.Helper() 2606 for _, f := range frames { 2607 if ok, _ := frame.Equals(f); ok { 2608 return 2609 } 2610 } 2611 t.Errorf("%v does not contain %v", frames, frame) 2612 } 2613 2614 func TestQFrame_GroupByQFrames(t *testing.T) { 2615 f := qframe.New(map[string]interface{}{ 2616 "COL1": []int{1, 1, 2, 3, 3}, 2617 "COL2": []int{10, 11, 20, 30, 31}, 2618 }) 2619 2620 ff, err := f.GroupBy(groupby.Columns("COL1")).QFrames() 2621 assertNotErr(t, err) 2622 assertTrue(t, len(ff) == 3) 2623 assertContainsQFrame(t, ff, qframe.New(map[string]interface{}{"COL1": []int{1, 1}, "COL2": []int{10, 11}})) 2624 assertContainsQFrame(t, ff, qframe.New(map[string]interface{}{"COL1": []int{2}, "COL2": []int{20}})) 2625 assertContainsQFrame(t, ff, qframe.New(map[string]interface{}{"COL1": []int{3, 3}, "COL2": []int{30, 31}})) 2626 }