github.com/tobgu/qframe@v0.4.0/internal/ecolumn/column.go (about) 1 package ecolumn 2 3 import ( 4 "fmt" 5 "github.com/tobgu/qframe/config/rolling" 6 "reflect" 7 "strings" 8 9 "github.com/tobgu/qframe/filter" 10 "github.com/tobgu/qframe/internal/column" 11 "github.com/tobgu/qframe/internal/hash" 12 "github.com/tobgu/qframe/internal/index" 13 "github.com/tobgu/qframe/internal/scolumn" 14 qfstrings "github.com/tobgu/qframe/internal/strings" 15 "github.com/tobgu/qframe/qerrors" 16 "github.com/tobgu/qframe/types" 17 ) 18 19 type enumVal uint8 20 21 const maxCardinality = 255 22 const nullValue = maxCardinality 23 24 func (v enumVal) isNull() bool { 25 return v == nullValue 26 } 27 28 func (v enumVal) compVal() int { 29 // Convenience function to be able to compare null and non null values 30 // in a straight forward way. Null is considered smaller than all other values. 31 if v == nullValue { 32 return -1 33 } 34 35 return int(v) 36 } 37 38 type Column struct { 39 data []enumVal 40 values []string 41 42 // strict is set to true if the set of values has been defined rather than derived from the data. 43 strict bool 44 } 45 46 // Factory is a helper used during construction of the enum column 47 type Factory struct { 48 column Column 49 valToEnum map[string]enumVal 50 } 51 52 func New(data []*string, values []string) (Column, error) { 53 f, err := NewFactory(values, len(data)) 54 if err != nil { 55 return Column{}, err 56 } 57 58 for _, d := range data { 59 if d != nil { 60 if err := f.AppendString(*d); err != nil { 61 return Column{}, err 62 } 63 } else { 64 f.AppendNil() 65 } 66 } 67 68 return f.ToColumn(), nil 69 } 70 71 func NewConst(val *string, count int, values []string) (Column, error) { 72 f, err := NewFactory(values, count) 73 if err != nil { 74 return Column{}, err 75 } 76 77 eV, err := f.enumVal(val) 78 if err != nil { 79 return Column{}, err 80 } 81 82 for i := 0; i < count; i++ { 83 f.AppendEnum(eV) 84 } 85 86 return f.ToColumn(), nil 87 } 88 89 func NewFactory(values []string, sizeHint int) (*Factory, error) { 90 if len(values) > maxCardinality { 91 return nil, qerrors.New("New enum", "too many unique values, max cardinality is %d", maxCardinality) 92 } 93 94 if values == nil { 95 values = make([]string, 0) 96 } 97 98 valToEnum := make(map[string]enumVal, len(values)) 99 for i, v := range values { 100 valToEnum[v] = enumVal(i) 101 } 102 103 return &Factory{column: Column{ 104 data: make([]enumVal, 0, sizeHint), values: values, strict: len(values) > 0}, 105 valToEnum: valToEnum}, nil 106 } 107 108 func (f *Factory) AppendNil() { 109 f.AppendEnum(nullValue) 110 } 111 112 func (f *Factory) AppendEnum(val enumVal) { 113 f.column.data = append(f.column.data, val) 114 } 115 116 func (f *Factory) AppendByteString(str []byte) error { 117 if e, ok := f.valToEnum[string(str)]; ok { 118 f.AppendEnum(e) 119 return nil 120 } 121 122 v := string(str) 123 return f.appendString(v) 124 } 125 126 func (f *Factory) AppendString(str string) error { 127 if e, ok := f.valToEnum[str]; ok { 128 f.column.data = append(f.column.data, e) 129 return nil 130 } 131 132 return f.appendString(str) 133 } 134 135 func (f *Factory) newEnumVal(s string) enumVal { 136 ev := enumVal(len(f.column.values)) 137 f.column.values = append(f.column.values, s) 138 f.valToEnum[s] = ev 139 return ev 140 } 141 142 func (f *Factory) enumVal(s *string) (enumVal, error) { 143 if s == nil { 144 return nullValue, nil 145 } 146 147 if e, ok := f.valToEnum[*s]; ok { 148 return e, nil 149 } 150 151 if f.column.strict { 152 return 0, qerrors.New("enum val", `unknown enum value "%s" using strict enum`, *s) 153 } 154 155 if len(f.column.values) >= maxCardinality { 156 return 0, qerrors.New("enum val", `enum max cardinality (%d) exceeded`, maxCardinality) 157 } 158 159 return f.newEnumVal(*s), nil 160 } 161 162 func (f *Factory) appendString(str string) error { 163 if f.column.strict { 164 return qerrors.New("append enum val", `unknown enum value "%s" using strict enum`, str) 165 } 166 167 if len(f.column.values) >= maxCardinality { 168 return qerrors.New("append enum val", `enum max cardinality (%d) exceeded`, maxCardinality) 169 } 170 171 ev := f.newEnumVal(str) 172 f.column.data = append(f.column.data, ev) 173 return nil 174 } 175 176 func (f *Factory) ToColumn() Column { 177 // Using the factory after this method has been called and the column exposed 178 // is not recommended. 179 return f.column 180 } 181 182 var enumApplyFuncs = map[string]func(index.Int, Column) interface{}{ 183 "ToUpper": toUpper, 184 } 185 186 func toUpper(_ index.Int, s Column) interface{} { 187 // This demonstrates how built in functions can be made a lot more 188 // efficient than the current general functions. 189 // In this example the upper function only has to be applied once to 190 // every enum value instead of once to every element. The data field 191 // can be kept as is. 192 newValues := make([]string, len(s.values)) 193 for i, s := range s.values { 194 newValues[i] = strings.ToUpper(s) 195 } 196 197 return Column{data: s.data, values: newValues} 198 } 199 200 func (c Column) Len() int { 201 return len(c.data) 202 } 203 204 func (c Column) StringAt(i uint32, naRep string) string { 205 v := c.data[i] 206 if v.isNull() { 207 return naRep 208 } 209 210 return c.values[v] 211 } 212 213 func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte { 214 enum := c.data[i] 215 if enum.isNull() { 216 return append(buf, "null"...) 217 } 218 219 return qfstrings.AppendQuotedString(buf, c.values[enum]) 220 } 221 222 func (c Column) ByteSize() int { 223 totalSize := 2 * 2 * 8 // Slice headers 224 for _, s := range c.values { 225 totalSize += len(s) 226 } 227 totalSize += cap(c.data) 228 return totalSize 229 } 230 231 func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool { 232 otherE, ok := other.(Column) 233 if !ok { 234 return false 235 } 236 237 for ix, x := range index { 238 enumVal := c.data[x] 239 oEnumVal := otherE.data[otherIndex[ix]] 240 if enumVal.isNull() || oEnumVal.isNull() { 241 if enumVal == oEnumVal { 242 continue 243 } 244 return false 245 } 246 247 if c.values[enumVal] != otherE.values[oEnumVal] { 248 return false 249 } 250 } 251 252 return true 253 } 254 255 func (c Comparable) Compare(i, j uint32) column.CompareResult { 256 x, y := c.column.data[i], c.column.data[j] 257 if x.isNull() || y.isNull() { 258 if !x.isNull() { 259 return c.nullGtValue 260 } 261 262 if !y.isNull() { 263 return c.nullLtValue 264 } 265 266 return c.equalNullValue 267 } 268 269 if x < y { 270 return c.ltValue 271 } 272 273 if x > y { 274 return c.gtValue 275 } 276 277 return column.Equal 278 } 279 280 func (c Comparable) Hash(i uint32, seed uint64) uint64 { 281 b := [1]byte{byte(c.column.data[i])} 282 return hash.HashBytes(b[:], seed) 283 } 284 285 func equalTypes(s1, s2 Column) bool { 286 if len(s1.values) != len(s2.values) || len(s1.data) != len(s2.data) { 287 return false 288 } 289 290 for i, val := range s1.values { 291 if val != s2.values[i] { 292 return false 293 } 294 } 295 296 return true 297 } 298 299 func (c Column) filterWithBitset(index index.Int, bset *bitset, bIndex index.Bool) { 300 for i, x := range bIndex { 301 if !x { 302 enum := c.data[index[i]] 303 bIndex[i] = bset.isSet(enum) 304 } 305 } 306 } 307 308 func (c Column) filterBuiltIn(index index.Int, comparator string, comparatee interface{}, bIndex index.Bool) error { 309 comparatee = qfstrings.InterfaceSliceToStringSlice(comparatee) 310 switch comp := comparatee.(type) { 311 case string: 312 if compFunc, ok := filterFuncs1[comparator]; ok { 313 for i, value := range c.values { 314 if value == comp { 315 compFunc(index, c.data, enumVal(i), bIndex) 316 return nil 317 } 318 } 319 320 if c.strict { 321 return qerrors.New("filter enum", "Unknown enum value in filter argument: %s", comp) 322 } 323 324 // If no enum values have been explicitly defined we quietly accept the comparator 325 326 // In case comparator is != we can tell that it's true for all values since the comparatee is not present 327 if comparator == filter.Neq { 328 for i := range bIndex { 329 bIndex[i] = true 330 } 331 } 332 333 // Otherwise it's false for all values 334 return nil 335 } 336 337 if multiFunc, ok := multiFilterFuncs[comparator]; ok { 338 bset, err := multiFunc(comp, c.values) 339 if err != nil { 340 return qerrors.Propagate("filter enum", err) 341 } 342 343 c.filterWithBitset(index, bset, bIndex) 344 return nil 345 } 346 347 return qerrors.New("filter enum", "unknown comparison operator for single argument comparison, %v", comparator) 348 case []string: 349 if multiFunc, ok := multiInputFilterFuncs[comparator]; ok { 350 bset := multiFunc(qfstrings.NewStringSet(comp), c.values) 351 c.filterWithBitset(index, bset, bIndex) 352 return nil 353 } 354 355 return qerrors.New("filter enum", "unknown comparison operator for multi argument comparison, %v", comparator) 356 case Column: 357 if ok := equalTypes(c, comp); !ok { 358 return qerrors.New("filter enum", "cannot compare enums of different types") 359 } 360 361 compFunc, ok := filterFuncs2[comparator] 362 if !ok { 363 return qerrors.New("filter enum", "unknown comparison operator for column - column comparison, %v", comparator) 364 } 365 366 compFunc(index, c.data, comp.data, bIndex) 367 return nil 368 case nil: 369 compFunc, ok := filterFuncs0[comparator] 370 if !ok { 371 return qerrors.New("filter enum", "unknown comparison operator for zero argument comparison, %v", comparator) 372 } 373 compFunc(index, c.data, bIndex) 374 return nil 375 default: 376 return qerrors.New("filter enum", "invalid comparison type, %v, expected string or other enum column", reflect.TypeOf(comparatee)) 377 } 378 } 379 380 func (c Column) filterCustom1(index index.Int, fn func(*string) bool, bIndex index.Bool) { 381 for i, x := range bIndex { 382 if !x { 383 bIndex[i] = fn(c.stringPtrAt(index[i])) 384 } 385 } 386 } 387 388 func (c Column) filterCustom2(index index.Int, fn func(*string, *string) bool, comparatee interface{}, bIndex index.Bool) error { 389 otherC, ok := comparatee.(Column) 390 if !ok { 391 return qerrors.New("filter string", "expected comparatee to be string column, was %v", reflect.TypeOf(comparatee)) 392 } 393 394 for i, x := range bIndex { 395 if !x { 396 bIndex[i] = fn(c.stringPtrAt(index[i]), otherC.stringPtrAt(index[i])) 397 } 398 } 399 400 return nil 401 } 402 403 func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error { 404 var err error 405 switch t := comparator.(type) { 406 case string: 407 err = c.filterBuiltIn(index, t, comparatee, bIndex) 408 case func(*string) bool: 409 c.filterCustom1(index, t, bIndex) 410 case func(*string, *string) bool: 411 err = c.filterCustom2(index, t, comparatee, bIndex) 412 default: 413 err = qerrors.New("filter string", "invalid filter type %v", reflect.TypeOf(comparator)) 414 } 415 return err 416 } 417 418 func (c Column) subset(index index.Int) Column { 419 data := make([]enumVal, 0, len(index)) 420 for _, ix := range index { 421 data = append(data, c.data[ix]) 422 } 423 424 return Column{data: data, values: c.values} 425 } 426 427 func (c Column) Subset(index index.Int) column.Column { 428 return c.subset(index) 429 } 430 431 func (c Column) stringSlice(index index.Int) []*string { 432 result := make([]*string, 0, len(index)) 433 for _, ix := range index { 434 v := c.data[ix] 435 if v.isNull() { 436 result = append(result, nil) 437 } else { 438 result = append(result, &c.values[v]) 439 } 440 } 441 return result 442 } 443 444 func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable { 445 result := Comparable{column: c, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual} 446 if reverse { 447 result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue = 448 result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue 449 } 450 451 if nullLast { 452 result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue 453 } 454 455 if equalNull { 456 result.equalNullValue = column.Equal 457 } 458 459 return result 460 } 461 462 func (c Column) String() string { 463 strs := make([]string, len(c.data)) 464 for i, v := range c.data { 465 if v.isNull() { 466 // For now 467 strs[i] = "null" 468 } else { 469 strs[i] = c.values[v] 470 } 471 } 472 473 return fmt.Sprintf("%v", strs) 474 } 475 476 func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) { 477 // NB! The result of aggregating over an enum column is a string column 478 switch t := fn.(type) { 479 case string: 480 // There are currently no build in aggregations for enums 481 return nil, qerrors.New("enum aggregate", "aggregation function %v is not defined for enum column", fn) 482 case func([]*string) *string: 483 data := make([]*string, 0, len(indices)) 484 for _, ix := range indices { 485 data = append(data, t(c.stringSlice(ix))) 486 } 487 return scolumn.New(data), nil 488 default: 489 return nil, qerrors.New("enum aggregate", "invalid aggregation function type: %v", t) 490 } 491 } 492 493 func (c Column) stringPtrAt(i uint32) *string { 494 if c.data[i].isNull() { 495 return nil 496 } 497 return &c.values[c.data[i]] 498 } 499 500 func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) { 501 /* 502 Interesting optimisations could be applied here given that: 503 - The passed in function always returns the same value given the same input 504 - Or, for enums a given restriction is that the functions will only be called once for each value 505 In that case a mapping between the enum value and the result could be set up to avoid having to 506 call the function multiple times for the same input. 507 */ 508 switch t := fn.(type) { 509 case func(*string) int: 510 result := make([]int, len(c.data)) 511 for _, i := range ix { 512 result[i] = t(c.stringPtrAt(i)) 513 } 514 return result, nil 515 case func(*string) float64: 516 result := make([]float64, len(c.data)) 517 for _, i := range ix { 518 result[i] = t(c.stringPtrAt(i)) 519 } 520 return result, nil 521 case func(*string) bool: 522 result := make([]bool, len(c.data)) 523 for _, i := range ix { 524 result[i] = t(c.stringPtrAt(i)) 525 } 526 return result, nil 527 case func(*string) *string: 528 result := make([]*string, len(c.data)) 529 for _, i := range ix { 530 result[i] = t(c.stringPtrAt(i)) 531 } 532 return result, nil 533 case string: 534 if f, ok := enumApplyFuncs[t]; ok { 535 return f(ix, c), nil 536 } 537 return nil, qerrors.New("string.apply1", "unknown built in function %s", t) 538 default: 539 return nil, qerrors.New("enum.apply1", "cannot apply type %#v to column", fn) 540 } 541 } 542 543 func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) { 544 s2S, ok := s2.(Column) 545 if !ok { 546 return nil, qerrors.New("enum.apply2", "invalid column type %s", s2.DataType()) 547 } 548 549 switch t := fn.(type) { 550 case func(*string, *string) *string: 551 result := make([]*string, len(c.data)) 552 for _, i := range ix { 553 result[i] = t(c.stringPtrAt(i), s2S.stringPtrAt(i)) 554 } 555 556 // NB! String column returned here, not enum. Returning enum could result 557 // in unforeseen results (eg. it would not always fit in an enum, the order 558 // is not given, etc.). 559 return scolumn.New(result), nil 560 case string: 561 // No built in functions for enums at this stage 562 return nil, qerrors.New("enum.apply2", "unknown built in function %s", t) 563 default: 564 return nil, qerrors.New("enum.apply2", "cannot apply type %#v to column", fn) 565 } 566 } 567 568 func (c Column) View(ix index.Int) View { 569 return View{column: c, index: ix} 570 } 571 572 func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) { 573 return c, nil 574 } 575 576 func (c Column) FunctionType() types.FunctionType { 577 return types.FunctionTypeString 578 } 579 580 func (c Column) DataType() types.DataType { 581 return types.Enum 582 } 583 584 func (c Column) Append(cols ...column.Column) (column.Column, error) { 585 // TODO Append 586 return nil, qerrors.New("Append", "Not implemented yet") 587 } 588 589 type Comparable struct { 590 column Column 591 ltValue column.CompareResult 592 nullLtValue column.CompareResult 593 gtValue column.CompareResult 594 nullGtValue column.CompareResult 595 equalNullValue column.CompareResult 596 }