github.com/tobgu/qframe@v0.4.0/internal/scolumn/column.go (about)

     1  package scolumn
     2  
     3  import (
     4  	"bytes"
     5  	"fmt"
     6  	"github.com/tobgu/qframe/config/rolling"
     7  	"github.com/tobgu/qframe/internal/column"
     8  	"github.com/tobgu/qframe/internal/hash"
     9  	"github.com/tobgu/qframe/internal/index"
    10  	qfstrings "github.com/tobgu/qframe/internal/strings"
    11  	"github.com/tobgu/qframe/qerrors"
    12  	"github.com/tobgu/qframe/types"
    13  	"math/rand"
    14  	"reflect"
    15  )
    16  
    17  var stringApplyFuncs = map[string]func(index.Int, Column) interface{}{
    18  	"ToUpper": toUpper,
    19  }
    20  
    21  // This is an example of how a more efficient built in function
    22  // could be implemented that makes use of the underlying representation
    23  // to make the operation faster than what could be done using the
    24  // generic function based API.
    25  // This function is roughly 3 - 4 times faster than applying the corresponding
    26  // general function (depending on the input size, etc. of course).
    27  func toUpper(ix index.Int, source Column) interface{} {
    28  	if len(source.pointers) == 0 {
    29  		return source
    30  	}
    31  
    32  	pointers := make([]qfstrings.Pointer, len(source.pointers))
    33  	sizeEstimate := int(float64(len(source.data)) * (float64(len(ix)) / float64(len(source.pointers))))
    34  	data := make([]byte, 0, sizeEstimate)
    35  	strBuf := make([]byte, 1024)
    36  	for _, i := range ix {
    37  		str, isNull := source.stringAt(i)
    38  		pointers[i] = qfstrings.NewPointer(len(data), len(str), isNull)
    39  		data = append(data, qfstrings.ToUpper(&strBuf, str)...)
    40  	}
    41  
    42  	return NewBytes(pointers, data)
    43  }
    44  
    45  func (c Column) StringAt(i uint32, naRep string) string {
    46  	if s, isNull := c.stringAt(i); !isNull {
    47  		return s
    48  	}
    49  
    50  	return naRep
    51  }
    52  
    53  func (c Column) stringSlice(index index.Int) []*string {
    54  	result := make([]*string, len(index))
    55  	for i, ix := range index {
    56  		s, isNull := c.stringAt(ix)
    57  		if isNull {
    58  			result[i] = nil
    59  		} else {
    60  			result[i] = &s
    61  		}
    62  	}
    63  
    64  	return result
    65  }
    66  
    67  func (c Column) AppendByteStringAt(buf []byte, i uint32) []byte {
    68  	p := c.pointers[i]
    69  	if p.IsNull() {
    70  		return append(buf, "null"...)
    71  	}
    72  	str := qfstrings.UnsafeBytesToString(c.data[p.Offset() : p.Offset()+p.Len()])
    73  	return qfstrings.AppendQuotedString(buf, str)
    74  }
    75  
    76  func (c Column) ByteSize() int {
    77  	return 8*cap(c.pointers) + cap(c.data)
    78  }
    79  
    80  func (c Column) Len() int {
    81  	return len(c.pointers)
    82  }
    83  
    84  func (c Column) Equals(index index.Int, other column.Column, otherIndex index.Int) bool {
    85  	otherC, ok := other.(Column)
    86  	if !ok {
    87  		return false
    88  	}
    89  
    90  	for ix, x := range index {
    91  		s, sNull := c.stringAt(x)
    92  		os, osNull := otherC.stringAt(otherIndex[ix])
    93  		if sNull || osNull {
    94  			if sNull && osNull {
    95  				continue
    96  			}
    97  
    98  			return false
    99  		}
   100  
   101  		if s != os {
   102  			return false
   103  		}
   104  	}
   105  
   106  	return true
   107  }
   108  
   109  func (c Comparable) Compare(i, j uint32) column.CompareResult {
   110  	x, xNull := c.column.bytesAt(i)
   111  	y, yNull := c.column.bytesAt(j)
   112  	if xNull || yNull {
   113  		if !xNull {
   114  			return c.nullGtValue
   115  		}
   116  
   117  		if !yNull {
   118  			return c.nullLtValue
   119  		}
   120  
   121  		return c.equalNullValue
   122  	}
   123  
   124  	r := bytes.Compare(x, y)
   125  	switch r {
   126  	case -1:
   127  		return c.ltValue
   128  	case 1:
   129  		return c.gtValue
   130  	default:
   131  		return column.Equal
   132  	}
   133  }
   134  
   135  func (c Comparable) Hash(i uint32, seed uint64) uint64 {
   136  	x, isNull := c.column.bytesAt(i)
   137  	if isNull {
   138  		if c.equalNullValue == column.NotEqual {
   139  			// Use a random value here to avoid hash collisions when
   140  			// we don't consider null to equal null.
   141  			// Use a random value here to avoid hash collisions when
   142  			// we don't consider null to equal null.
   143  			return rand.Uint64()
   144  		}
   145  
   146  		b := [1]byte{0}
   147  		return hash.HashBytes(b[:], seed)
   148  	}
   149  
   150  	return hash.HashBytes(x, seed)
   151  }
   152  
   153  func (c Column) filterBuiltIn(index index.Int, comparator string, comparatee interface{}, bIndex index.Bool) error {
   154  	comparatee = qfstrings.InterfaceSliceToStringSlice(comparatee)
   155  	switch t := comparatee.(type) {
   156  	case string:
   157  		filterFn, ok := filterFuncs1[comparator]
   158  		if !ok {
   159  			return qerrors.New("filter string", "unknown filter operator %v for single value argument", comparator)
   160  		}
   161  		return filterFn(index, c, t, bIndex)
   162  	case []string:
   163  		filterFn, ok := multiInputFilterFuncs[comparator]
   164  		if !ok {
   165  			return qerrors.New("filter string", "unknown filter operator %v for multi value argument", comparator)
   166  		}
   167  
   168  		return filterFn(index, c, qfstrings.NewStringSet(t), bIndex)
   169  	case Column:
   170  		filterFn, ok := filterFuncs2[comparator]
   171  		if !ok {
   172  			return qerrors.New("filter string", "unknown filter operator %v for column - column comparison", comparator)
   173  		}
   174  		return filterFn(index, c, t, bIndex)
   175  	case nil:
   176  		filterFn, ok := filterFuncs0[comparator]
   177  		if !ok {
   178  			return qerrors.New("filter string", "unknown filter operator %v for zero argument", comparator)
   179  		}
   180  		return filterFn(index, c, bIndex)
   181  	default:
   182  		return qerrors.New("filter string", "invalid comparison value type %v", reflect.TypeOf(comparatee))
   183  	}
   184  }
   185  
   186  func (c Column) filterCustom1(index index.Int, fn func(*string) bool, bIndex index.Bool) {
   187  	for i, x := range bIndex {
   188  		if !x {
   189  			bIndex[i] = fn(stringToPtr(c.stringAt(index[i])))
   190  		}
   191  	}
   192  }
   193  
   194  func (c Column) filterCustom2(index index.Int, fn func(*string, *string) bool, comparatee interface{}, bIndex index.Bool) error {
   195  	otherC, ok := comparatee.(Column)
   196  	if !ok {
   197  		return qerrors.New("filter string", "expected comparatee to be string column, was %v", reflect.TypeOf(comparatee))
   198  	}
   199  
   200  	for i, x := range bIndex {
   201  		if !x {
   202  			bIndex[i] = fn(stringToPtr(c.stringAt(index[i])), stringToPtr(otherC.stringAt(index[i])))
   203  		}
   204  	}
   205  
   206  	return nil
   207  }
   208  
   209  func (c Column) Filter(index index.Int, comparator interface{}, comparatee interface{}, bIndex index.Bool) error {
   210  	var err error
   211  	switch t := comparator.(type) {
   212  	case string:
   213  		err = c.filterBuiltIn(index, t, comparatee, bIndex)
   214  	case func(*string) bool:
   215  		c.filterCustom1(index, t, bIndex)
   216  	case func(*string, *string) bool:
   217  		err = c.filterCustom2(index, t, comparatee, bIndex)
   218  	default:
   219  		err = qerrors.New("filter string", "invalid filter type %v", reflect.TypeOf(comparator))
   220  	}
   221  	return err
   222  }
   223  
   224  type Column struct {
   225  	pointers []qfstrings.Pointer
   226  	data     []byte
   227  }
   228  
   229  func NewBytes(pointers []qfstrings.Pointer, bytes []byte) Column {
   230  	return Column{pointers: pointers, data: bytes}
   231  }
   232  
   233  func NewStrings(strings []string) Column {
   234  	data := make([]byte, 0, len(strings))
   235  	pointers := make([]qfstrings.Pointer, len(strings))
   236  	offset := 0
   237  	for i, s := range strings {
   238  		pointers[i] = qfstrings.NewPointer(offset, len(s), false)
   239  		offset += len(s)
   240  		data = append(data, s...)
   241  	}
   242  
   243  	return NewBytes(pointers, data)
   244  }
   245  
   246  func New(strings []*string) Column {
   247  	data := make([]byte, 0, len(strings))
   248  	pointers := make([]qfstrings.Pointer, len(strings))
   249  	offset := 0
   250  	for i, s := range strings {
   251  		if s == nil {
   252  			pointers[i] = qfstrings.NewPointer(offset, 0, true)
   253  		} else {
   254  			sLen := len(*s)
   255  			pointers[i] = qfstrings.NewPointer(offset, sLen, false)
   256  			offset += sLen
   257  			data = append(data, *s...)
   258  		}
   259  	}
   260  
   261  	return NewBytes(pointers, data)
   262  }
   263  
   264  func NewConst(val *string, count int) Column {
   265  	var data []byte
   266  	pointers := make([]qfstrings.Pointer, count)
   267  	if val == nil {
   268  		data = make([]byte, 0)
   269  		for i := range pointers {
   270  			pointers[i] = qfstrings.NewPointer(0, 0, true)
   271  		}
   272  	} else {
   273  		sLen := len(*val)
   274  		data = make([]byte, 0, sLen)
   275  		data = append(data, *val...)
   276  		for i := range pointers {
   277  			pointers[i] = qfstrings.NewPointer(0, sLen, false)
   278  		}
   279  	}
   280  
   281  	return NewBytes(pointers, data)
   282  }
   283  
   284  func (c Column) stringAt(i uint32) (string, bool) {
   285  	p := c.pointers[i]
   286  	if p.IsNull() {
   287  		return "", true
   288  	}
   289  	return qfstrings.UnsafeBytesToString(c.data[p.Offset() : p.Offset()+p.Len()]), false
   290  }
   291  
   292  func (c Column) bytesAt(i uint32) ([]byte, bool) {
   293  	p := c.pointers[i]
   294  	if p.IsNull() {
   295  		return nil, true
   296  	}
   297  	return c.data[p.Offset() : p.Offset()+p.Len()], false
   298  }
   299  
   300  func (c Column) stringCopyAt(i uint32) (string, bool) {
   301  	// Similar to stringAt but will allocate a new string and copy the content into it.
   302  	p := c.pointers[i]
   303  	if p.IsNull() {
   304  		return "", true
   305  	}
   306  	return string(c.data[p.Offset() : p.Offset()+p.Len()]), false
   307  }
   308  
   309  func (c Column) subset(index index.Int) Column {
   310  	data := make([]byte, 0, len(index))
   311  	pointers := make([]qfstrings.Pointer, len(index))
   312  	offset := 0
   313  	for i, ix := range index {
   314  		p := c.pointers[ix]
   315  		pointers[i] = qfstrings.NewPointer(offset, p.Len(), p.IsNull())
   316  		if !p.IsNull() {
   317  			data = append(data, c.data[p.Offset():p.Offset()+p.Len()]...)
   318  			offset += p.Len()
   319  		}
   320  	}
   321  
   322  	return Column{data: data, pointers: pointers}
   323  }
   324  
   325  func (c Column) Subset(index index.Int) column.Column {
   326  	return c.subset(index)
   327  }
   328  
   329  func (c Column) Comparable(reverse, equalNull, nullLast bool) column.Comparable {
   330  	result := Comparable{column: c, ltValue: column.LessThan, gtValue: column.GreaterThan, nullLtValue: column.LessThan, nullGtValue: column.GreaterThan, equalNullValue: column.NotEqual}
   331  	if reverse {
   332  		result.ltValue, result.nullLtValue, result.gtValue, result.nullGtValue =
   333  			result.gtValue, result.nullGtValue, result.ltValue, result.nullLtValue
   334  	}
   335  
   336  	if nullLast {
   337  		result.nullLtValue, result.nullGtValue = result.nullGtValue, result.nullLtValue
   338  	}
   339  
   340  	if equalNull {
   341  		result.equalNullValue = column.Equal
   342  	}
   343  
   344  	return result
   345  }
   346  
   347  func (c Column) String() string {
   348  	return fmt.Sprintf("%v", c.data)
   349  }
   350  
   351  func (c Column) Aggregate(indices []index.Int, fn interface{}) (column.Column, error) {
   352  	switch t := fn.(type) {
   353  	case string:
   354  		// There are currently no built in aggregations for strings
   355  		return nil, qerrors.New("string aggregate", "aggregation function %c is not defined for string column", fn)
   356  	case func([]*string) *string:
   357  		data := make([]*string, 0, len(indices))
   358  		for _, ix := range indices {
   359  			data = append(data, t(c.stringSlice(ix)))
   360  		}
   361  		return New(data), nil
   362  	default:
   363  		return nil, qerrors.New("string aggregate", "invalid aggregation function type: %v", t)
   364  	}
   365  }
   366  
   367  func stringToPtr(s string, isNull bool) *string {
   368  	if isNull {
   369  		return nil
   370  	}
   371  	return &s
   372  }
   373  
   374  func (c Column) Apply1(fn interface{}, ix index.Int) (interface{}, error) {
   375  	switch t := fn.(type) {
   376  	case func(*string) int:
   377  		result := make([]int, len(c.pointers))
   378  		for _, i := range ix {
   379  			result[i] = t(stringToPtr(c.stringAt(i)))
   380  		}
   381  		return result, nil
   382  	case func(*string) float64:
   383  		result := make([]float64, len(c.pointers))
   384  		for _, i := range ix {
   385  			result[i] = t(stringToPtr(c.stringAt(i)))
   386  		}
   387  		return result, nil
   388  	case func(*string) bool:
   389  		result := make([]bool, len(c.pointers))
   390  		for _, i := range ix {
   391  			result[i] = t(stringToPtr(c.stringAt(i)))
   392  		}
   393  		return result, nil
   394  	case func(*string) *string:
   395  		result := make([]*string, len(c.pointers))
   396  		for _, i := range ix {
   397  			result[i] = t(stringToPtr(c.stringAt(i)))
   398  		}
   399  		return result, nil
   400  	case string:
   401  		if f, ok := stringApplyFuncs[t]; ok {
   402  			return f(ix, c), nil
   403  		}
   404  		return nil, qerrors.New("string.apply1", "unknown built in function %v", t)
   405  	default:
   406  		return nil, qerrors.New("string.apply1", "cannot apply type %#v to column", fn)
   407  	}
   408  }
   409  
   410  func (c Column) Apply2(fn interface{}, s2 column.Column, ix index.Int) (column.Column, error) {
   411  	s2S, ok := s2.(Column)
   412  	if !ok {
   413  		return nil, qerrors.New("string.apply2", "invalid column type %v", reflect.TypeOf(s2))
   414  	}
   415  
   416  	switch t := fn.(type) {
   417  	case func(*string, *string) *string:
   418  		result := make([]*string, len(c.pointers))
   419  		for _, i := range ix {
   420  			result[i] = t(stringToPtr(c.stringAt(i)), stringToPtr(s2S.stringAt(i)))
   421  		}
   422  		return New(result), nil
   423  	case string:
   424  		// No built in functions for strings at this stage
   425  		return nil, qerrors.New("string.apply2", "unknown built in function %s", t)
   426  	default:
   427  		return nil, qerrors.New("string.apply2", "cannot apply type %#v to column", fn)
   428  	}
   429  }
   430  
   431  func (c Column) View(ix index.Int) View {
   432  	return View{column: c, index: ix}
   433  }
   434  
   435  func (c Column) Rolling(fn interface{}, ix index.Int, config rolling.Config) (column.Column, error) {
   436  	return c, nil
   437  }
   438  
   439  func (c Column) FunctionType() types.FunctionType {
   440  	return types.FunctionTypeString
   441  }
   442  
   443  func (c Column) DataType() types.DataType {
   444  	return types.String
   445  }
   446  
   447  func (c Column) Append(cols ...column.Column) (column.Column, error) {
   448  	// TODO Append
   449  	return nil, qerrors.New("Append", "Not implemented yet")
   450  }
   451  
   452  type Comparable struct {
   453  	column         Column
   454  	ltValue        column.CompareResult
   455  	gtValue        column.CompareResult
   456  	nullLtValue    column.CompareResult
   457  	nullGtValue    column.CompareResult
   458  	equalNullValue column.CompareResult
   459  }