github.com/tobgu/qframe@v0.4.0/internal/scolumn/column.go (about) 1 package scolumn 2 3 import ( 4 "bytes" 5 "fmt" 6 "github.com/tobgu/qframe/config/rolling" 7 "github.com/tobgu/qframe/internal/column" 8 "github.com/tobgu/qframe/internal/hash" 9 "github.com/tobgu/qframe/internal/index" 10 qfstrings "github.com/tobgu/qframe/internal/strings" 11 "github.com/tobgu/qframe/qerrors" 12 "github.com/tobgu/qframe/types" 13 "math/rand" 14 "reflect" 15 ) 16 17 var stringApplyFuncs = map[string]func(index.Int, Column) interface{}{ 18 "ToUpper": toUpper, 19 } 20 21 // This is an example of how a more efficient built in function 22 // could be implemented that makes use of the underlying representation 23 // to make the operation faster than what could be done using the 24 // generic function based API. 25 // This function is roughly 3 - 4 times faster than applying the corresponding 26 // general function (depending on the input size, etc. of course). 27 func toUpper(ix index.Int, source Column) interface{} { 28 if len(source.pointers) == 0 { 29 return source 30 } 31 32 pointers := make([]qfstrings.Pointer, len(source.pointers)) 33 sizeEstimate := int(float64(len(source.data)) * (float64(len(ix)) / float64(len(source.pointers)))) 34 data := make([]byte, 0, sizeEstimate) 35 strBuf := make([]byte, 1024) 36 for _, i := range ix { 37 str, isNull := source.stringAt(i) 38 pointers[i] = qfstrings.NewPointer(len(data), len(str), isNull) 39 data = append(data, qfstrings.ToUpper(&strBuf, str)...) 40 } 41 42 return NewBytes(pointers, data) 43 } 44 45 func (c Column) StringAt(i uint32, naRep string) string { 46 if s, isNull := c.stringAt(i); !isNull { 47 return s 48 } 49 50 return naRep 51 } 52 53 func (c Column) stringSlice(index index.Int) []*string { 54 result := make([]*string, len(index)) 55 for i, ix := range index { 56 s, isNull := c.stringAt(ix) 57 if isNull { 58 result[i] = nil 59 } else { 60 result[i] = &s 61 } 62 } 63 64 return result 65 } 66 67 func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte { 68 p := c.pointers[i] 69 if p.IsNull() { 70 return append(buf, "null"...) 71 } 72 str := qfstrings.UnsafeBytesToString(c.data[p.Offset() : p.Offset()+p.Len()]) 73 return qfstrings.AppendQuotedString(buf, str) 74 } 75 76 func (c Column) ByteSize() int { 77 return 8*cap(c.pointers) + cap(c.data) 78 } 79 80 func (c Column) Len() int { 81 return len(c.pointers) 82 } 83 84 func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool { 85 otherC, ok := other.(Column) 86 if !ok { 87 return false 88 } 89 90 for ix, x := range index { 91 s, sNull := c.stringAt(x) 92 os, osNull := otherC.stringAt(otherIndex[ix]) 93 if sNull || osNull { 94 if sNull && osNull { 95 continue 96 } 97 98 return false 99 } 100 101 if s != os { 102 return false 103 } 104 } 105 106 return true 107 } 108 109 func (c Comparable) Compare(i, j uint32) column.CompareResult { 110 x, xNull := c.column.bytesAt(i) 111 y, yNull := c.column.bytesAt(j) 112 if xNull || yNull { 113 if !xNull { 114 return c.nullGtValue 115 } 116 117 if !yNull { 118 return c.nullLtValue 119 } 120 121 return c.equalNullValue 122 } 123 124 r := bytes.Compare(x, y) 125 switch r { 126 case -1: 127 return c.ltValue 128 case 1: 129 return c.gtValue 130 default: 131 return column.Equal 132 } 133 } 134 135 func (c Comparable) Hash(i uint32, seed uint64) uint64 { 136 x, isNull := c.column.bytesAt(i) 137 if isNull { 138 if c.equalNullValue == column.NotEqual { 139 // Use a random value here to avoid hash collisions when 140 // we don't consider null to equal null. 141 // Use a random value here to avoid hash collisions when 142 // we don't consider null to equal null. 143 return rand.Uint64() 144 } 145 146 b := [1]byte{0} 147 return hash.HashBytes(b[:], seed) 148 } 149 150 return hash.HashBytes(x, seed) 151 } 152 153 func (c Column) filterBuiltIn(index index.Int, comparator string, comparatee interface{}, bIndex index.Bool) error { 154 comparatee = qfstrings.InterfaceSliceToStringSlice(comparatee) 155 switch t := comparatee.(type) { 156 case string: 157 filterFn, ok := filterFuncs1[comparator] 158 if !ok { 159 return qerrors.New("filter string", "unknown filter operator %v for single value argument", comparator) 160 } 161 return filterFn(index, c, t, bIndex) 162 case []string: 163 filterFn, ok := multiInputFilterFuncs[comparator] 164 if !ok { 165 return qerrors.New("filter string", "unknown filter operator %v for multi value argument", comparator) 166 } 167 168 return filterFn(index, c, qfstrings.NewStringSet(t), bIndex) 169 case Column: 170 filterFn, ok := filterFuncs2[comparator] 171 if !ok { 172 return qerrors.New("filter string", "unknown filter operator %v for column - column comparison", comparator) 173 } 174 return filterFn(index, c, t, bIndex) 175 case nil: 176 filterFn, ok := filterFuncs0[comparator] 177 if !ok { 178 return qerrors.New("filter string", "unknown filter operator %v for zero argument", comparator) 179 } 180 return filterFn(index, c, bIndex) 181 default: 182 return qerrors.New("filter string", "invalid comparison value type %v", reflect.TypeOf(comparatee)) 183 } 184 } 185 186 func (c Column) filterCustom1(index index.Int, fn func(*string) bool, bIndex index.Bool) { 187 for i, x := range bIndex { 188 if !x { 189 bIndex[i] = fn(stringToPtr(c.stringAt(index[i]))) 190 } 191 } 192 } 193 194 func (c Column) filterCustom2(index index.Int, fn func(*string, *string) bool, comparatee interface{}, bIndex index.Bool) error { 195 otherC, ok := comparatee.(Column) 196 if !ok { 197 return qerrors.New("filter string", "expected comparatee to be string column, was %v", reflect.TypeOf(comparatee)) 198 } 199 200 for i, x := range bIndex { 201 if !x { 202 bIndex[i] = fn(stringToPtr(c.stringAt(index[i])), stringToPtr(otherC.stringAt(index[i]))) 203 } 204 } 205 206 return nil 207 } 208 209 func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error { 210 var err error 211 switch t := comparator.(type) { 212 case string: 213 err = c.filterBuiltIn(index, t, comparatee, bIndex) 214 case func(*string) bool: 215 c.filterCustom1(index, t, bIndex) 216 case func(*string, *string) bool: 217 err = c.filterCustom2(index, t, comparatee, bIndex) 218 default: 219 err = qerrors.New("filter string", "invalid filter type %v", reflect.TypeOf(comparator)) 220 } 221 return err 222 } 223 224 type Column struct { 225 pointers []qfstrings.Pointer 226 data []byte 227 } 228 229 func NewBytes(pointers []qfstrings.Pointer, bytes []byte) Column { 230 return Column{pointers: pointers, data: bytes} 231 } 232 233 func NewStrings(strings []string) Column { 234 data := make([]byte, 0, len(strings)) 235 pointers := make([]qfstrings.Pointer, len(strings)) 236 offset := 0 237 for i, s := range strings { 238 pointers[i] = qfstrings.NewPointer(offset, len(s), false) 239 offset += len(s) 240 data = append(data, s...) 241 } 242 243 return NewBytes(pointers, data) 244 } 245 246 func New(strings []*string) Column { 247 data := make([]byte, 0, len(strings)) 248 pointers := make([]qfstrings.Pointer, len(strings)) 249 offset := 0 250 for i, s := range strings { 251 if s == nil { 252 pointers[i] = qfstrings.NewPointer(offset, 0, true) 253 } else { 254 sLen := len(*s) 255 pointers[i] = qfstrings.NewPointer(offset, sLen, false) 256 offset += sLen 257 data = append(data, *s...) 258 } 259 } 260 261 return NewBytes(pointers, data) 262 } 263 264 func NewConst(val *string, count int) Column { 265 var data []byte 266 pointers := make([]qfstrings.Pointer, count) 267 if val == nil { 268 data = make([]byte, 0) 269 for i := range pointers { 270 pointers[i] = qfstrings.NewPointer(0, 0, true) 271 } 272 } else { 273 sLen := len(*val) 274 data = make([]byte, 0, sLen) 275 data = append(data, *val...) 276 for i := range pointers { 277 pointers[i] = qfstrings.NewPointer(0, sLen, false) 278 } 279 } 280 281 return NewBytes(pointers, data) 282 } 283 284 func (c Column) stringAt(i uint32) (string, bool) { 285 p := c.pointers[i] 286 if p.IsNull() { 287 return "", true 288 } 289 return qfstrings.UnsafeBytesToString(c.data[p.Offset() : p.Offset()+p.Len()]), false 290 } 291 292 func (c Column) bytesAt(i uint32) ([]byte, bool) { 293 p := c.pointers[i] 294 if p.IsNull() { 295 return nil, true 296 } 297 return c.data[p.Offset() : p.Offset()+p.Len()], false 298 } 299 300 func (c Column) stringCopyAt(i uint32) (string, bool) { 301 // Similar to stringAt but will allocate a new string and copy the content into it. 302 p := c.pointers[i] 303 if p.IsNull() { 304 return "", true 305 } 306 return string(c.data[p.Offset() : p.Offset()+p.Len()]), false 307 } 308 309 func (c Column) subset(index index.Int) Column { 310 data := make([]byte, 0, len(index)) 311 pointers := make([]qfstrings.Pointer, len(index)) 312 offset := 0 313 for i, ix := range index { 314 p := c.pointers[ix] 315 pointers[i] = qfstrings.NewPointer(offset, p.Len(), p.IsNull()) 316 if !p.IsNull() { 317 data = append(data, c.data[p.Offset():p.Offset()+p.Len()]...) 318 offset += p.Len() 319 } 320 } 321 322 return Column{data: data, pointers: pointers} 323 } 324 325 func (c Column) Subset(index index.Int) column.Column { 326 return c.subset(index) 327 } 328 329 func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable { 330 result := Comparable{column: c, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual} 331 if reverse { 332 result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue = 333 result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue 334 } 335 336 if nullLast { 337 result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue 338 } 339 340 if equalNull { 341 result.equalNullValue = column.Equal 342 } 343 344 return result 345 } 346 347 func (c Column) String() string { 348 return fmt.Sprintf("%v", c.data) 349 } 350 351 func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) { 352 switch t := fn.(type) { 353 case string: 354 // There are currently no built in aggregations for strings 355 return nil, qerrors.New("string aggregate", "aggregation function %c is not defined for string column", fn) 356 case func([]*string) *string: 357 data := make([]*string, 0, len(indices)) 358 for _, ix := range indices { 359 data = append(data, t(c.stringSlice(ix))) 360 } 361 return New(data), nil 362 default: 363 return nil, qerrors.New("string aggregate", "invalid aggregation function type: %v", t) 364 } 365 } 366 367 func stringToPtr(s string, isNull bool) *string { 368 if isNull { 369 return nil 370 } 371 return &s 372 } 373 374 func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) { 375 switch t := fn.(type) { 376 case func(*string) int: 377 result := make([]int, len(c.pointers)) 378 for _, i := range ix { 379 result[i] = t(stringToPtr(c.stringAt(i))) 380 } 381 return result, nil 382 case func(*string) float64: 383 result := make([]float64, len(c.pointers)) 384 for _, i := range ix { 385 result[i] = t(stringToPtr(c.stringAt(i))) 386 } 387 return result, nil 388 case func(*string) bool: 389 result := make([]bool, len(c.pointers)) 390 for _, i := range ix { 391 result[i] = t(stringToPtr(c.stringAt(i))) 392 } 393 return result, nil 394 case func(*string) *string: 395 result := make([]*string, len(c.pointers)) 396 for _, i := range ix { 397 result[i] = t(stringToPtr(c.stringAt(i))) 398 } 399 return result, nil 400 case string: 401 if f, ok := stringApplyFuncs[t]; ok { 402 return f(ix, c), nil 403 } 404 return nil, qerrors.New("string.apply1", "unknown built in function %v", t) 405 default: 406 return nil, qerrors.New("string.apply1", "cannot apply type %#v to column", fn) 407 } 408 } 409 410 func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) { 411 s2S, ok := s2.(Column) 412 if !ok { 413 return nil, qerrors.New("string.apply2", "invalid column type %v", reflect.TypeOf(s2)) 414 } 415 416 switch t := fn.(type) { 417 case func(*string, *string) *string: 418 result := make([]*string, len(c.pointers)) 419 for _, i := range ix { 420 result[i] = t(stringToPtr(c.stringAt(i)), stringToPtr(s2S.stringAt(i))) 421 } 422 return New(result), nil 423 case string: 424 // No built in functions for strings at this stage 425 return nil, qerrors.New("string.apply2", "unknown built in function %s", t) 426 default: 427 return nil, qerrors.New("string.apply2", "cannot apply type %#v to column", fn) 428 } 429 } 430 431 func (c Column) View(ix index.Int) View { 432 return View{column: c, index: ix} 433 } 434 435 func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) { 436 return c, nil 437 } 438 439 func (c Column) FunctionType() types.FunctionType { 440 return types.FunctionTypeString 441 } 442 443 func (c Column) DataType() types.DataType { 444 return types.String 445 } 446 447 func (c Column) Append(cols ...column.Column) (column.Column, error) { 448 // TODO Append 449 return nil, qerrors.New("Append", "Not implemented yet") 450 } 451 452 type Comparable struct { 453 column Column 454 ltValue column.CompareResult 455 gtValue column.CompareResult 456 nullLtValue column.CompareResult 457 nullGtValue column.CompareResult 458 equalNullValue column.CompareResult 459 }