github.com/tobgu/qframe@v0.4.0/expression.go (about)

     1  package qframe
     2  
     3  import (
     4  	"fmt"
     5  	"strconv"
     6  
     7  	"github.com/tobgu/qframe/config/eval"
     8  	"github.com/tobgu/qframe/qerrors"
     9  	"github.com/tobgu/qframe/types"
    10  )
    11  
    12  func getFunc(ctx *eval.Context, ac eval.ArgCount, qf QFrame, colName types.ColumnName, funcName string) (QFrame, interface{}) {
    13  	if qf.Err != nil {
    14  		return qf, nil
    15  	}
    16  
    17  	typ, err := qf.functionType(string(colName))
    18  	if err != nil {
    19  		return qf.withErr(qerrors.Propagate("getFunc", err)), nil
    20  	}
    21  
    22  	fn, ok := ctx.GetFunc(typ, ac, funcName)
    23  	if !ok {
    24  		return qf.withErr(qerrors.New("getFunc", "Could not find %s %s function with name '%s'", typ, ac, funcName)), nil
    25  	}
    26  
    27  	return qf, fn
    28  }
    29  
    30  // Expression is an internal interface representing an expression that can be executed on a QFrame.
    31  type Expression interface {
    32  	execute(f QFrame, ctx *eval.Context) (QFrame, types.ColumnName)
    33  
    34  	// Err returns an error if the expression could not be constructed for some reason.
    35  	Err() error
    36  }
    37  
    38  func newExpr(expr interface{}) Expression {
    39  	// Try, in turn, to decode expr into a valid expression type.
    40  	if e, ok := expr.(Expression); ok {
    41  		return e
    42  	}
    43  
    44  	if e, ok := newColExpr(expr); ok {
    45  		return e
    46  	}
    47  
    48  	if e, ok := newConstExpr(expr); ok {
    49  		return e
    50  	}
    51  
    52  	if e, ok := newUnaryExpr(expr); ok {
    53  		return e
    54  	}
    55  
    56  	if e, ok := newColConstExpr(expr); ok {
    57  		return e
    58  	}
    59  
    60  	if e, ok := newColColExpr(expr); ok {
    61  		return e
    62  	}
    63  
    64  	return newExprExpr(expr)
    65  }
    66  
    67  // Either an operation or a column identifier
    68  func opIdentifier(x interface{}) (string, bool) {
    69  	s, ok := x.(string)
    70  	return s, ok
    71  }
    72  
    73  // This will just pass the src column on
    74  type colExpr struct {
    75  	srcCol types.ColumnName
    76  }
    77  
    78  func colIdentifier(x interface{}) (types.ColumnName, bool) {
    79  	srcCol, cOk := x.(types.ColumnName)
    80  	return srcCol, cOk
    81  }
    82  
    83  func newColExpr(x interface{}) (colExpr, bool) {
    84  	srcCol, cOk := colIdentifier(x)
    85  	return colExpr{srcCol: srcCol}, cOk
    86  }
    87  
    88  func (e colExpr) execute(qf QFrame, _ *eval.Context) (QFrame, types.ColumnName) {
    89  	return qf, e.srcCol
    90  }
    91  
    92  func (e colExpr) Err() error {
    93  	return nil
    94  }
    95  
    96  func tempColName(qf QFrame, prefix string) types.ColumnName {
    97  	for i := 0; i < 10000; i++ {
    98  		colName := prefix + "-temp-" + strconv.Itoa(i)
    99  		if !qf.Contains(colName) {
   100  			return types.ColumnName(colName)
   101  		}
   102  	}
   103  
   104  	// This is really strange, somehow there are more than 10000 columns
   105  	// in the sequence we're trying from. This should never happen, Panic...
   106  	panic(fmt.Sprintf("Could not find temp column name for prefix %s", prefix))
   107  }
   108  
   109  // Generating a new column with a given content (eg. 42)
   110  type constExpr struct {
   111  	value interface{}
   112  }
   113  
   114  func newConstExpr(x interface{}) (constExpr, bool) {
   115  	// TODO: Support const functions somehow? Or perhaps add some kind of
   116  	//       "variable" (accessed by $...?) to the context?
   117  	value := x
   118  	if value == nil {
   119  		// Nil is implicitly typed to string
   120  		value = (*string)(nil)
   121  	}
   122  
   123  	var isConst bool
   124  	switch value.(type) {
   125  	case int, float64, bool, string, *string:
   126  		isConst = true
   127  	default:
   128  		isConst = false
   129  	}
   130  
   131  	return constExpr{value: value}, isConst
   132  }
   133  
   134  func (e constExpr) execute(qf QFrame, _ *eval.Context) (QFrame, types.ColumnName) {
   135  	if qf.Err != nil {
   136  		return qf, ""
   137  	}
   138  
   139  	colName := tempColName(qf, "const")
   140  	return qf.Apply(Instruction{Fn: e.value, DstCol: string(colName)}), colName
   141  }
   142  
   143  func (e constExpr) Err() error {
   144  	return nil
   145  }
   146  
   147  // Use the content of a single column and nothing else as input (eg. abs(x))
   148  type unaryExpr struct {
   149  	operation string
   150  	srcCol    types.ColumnName
   151  }
   152  
   153  func newUnaryExpr(x interface{}) (unaryExpr, bool) {
   154  	// TODO: Might want to accept slice of strings here as well?
   155  	l, ok := x.([]interface{})
   156  	if ok && len(l) == 2 {
   157  		operation, oOk := opIdentifier(l[0])
   158  		srcCol, cOk := colIdentifier(l[1])
   159  		return unaryExpr{operation: operation, srcCol: srcCol}, oOk && cOk
   160  	}
   161  
   162  	return unaryExpr{}, false
   163  }
   164  
   165  func (e unaryExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
   166  	qf, fn := getFunc(ctx, eval.ArgCountOne, qf, e.srcCol, e.operation)
   167  	if qf.Err != nil {
   168  		return qf, ""
   169  	}
   170  
   171  	colName := tempColName(qf, "unary")
   172  	return qf.Apply(Instruction{Fn: fn, DstCol: string(colName), SrcCol1: string(e.srcCol)}), colName
   173  }
   174  
   175  func (e unaryExpr) Err() error {
   176  	return nil
   177  }
   178  
   179  // Use the content of a single column and a constant as input (eg. age + 1)
   180  type colConstExpr struct {
   181  	operation string
   182  	srcCol    types.ColumnName
   183  	value     interface{}
   184  }
   185  
   186  func newColConstExpr(x interface{}) (colConstExpr, bool) {
   187  	l, ok := x.([]interface{})
   188  	if ok && len(l) == 3 {
   189  		operation, oOk := opIdentifier(l[0])
   190  
   191  		srcCol, colOk := colIdentifier(l[1])
   192  		constE, constOk := newConstExpr(l[2])
   193  		if !colOk || !constOk {
   194  			// Test flipping order
   195  			srcCol, colOk = colIdentifier(l[2])
   196  			constE, constOk = newConstExpr(l[1])
   197  		}
   198  
   199  		return colConstExpr{operation: operation, srcCol: srcCol, value: constE.value}, colOk && constOk && oOk
   200  	}
   201  
   202  	return colConstExpr{}, false
   203  }
   204  
   205  func (e colConstExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
   206  	if qf.Err != nil {
   207  		return qf, ""
   208  	}
   209  
   210  	// Fill temp column with the constant part and then apply col col expression.
   211  	// There are other ways to do this that would avoid the temp column but it would
   212  	// require more special case logic.
   213  	cE, _ := newConstExpr(e.value)
   214  	result, constColName := cE.execute(qf, ctx)
   215  	ccE, _ := newColColExpr([]interface{}{e.operation, e.srcCol, constColName})
   216  	result, colName := ccE.execute(result, ctx)
   217  	result = result.Drop(string(constColName))
   218  	return result, colName
   219  }
   220  
   221  func (e colConstExpr) Err() error {
   222  	return nil
   223  }
   224  
   225  // Use the content of two columns as input (eg. weight / length)
   226  type colColExpr struct {
   227  	operation string
   228  	srcCol1   types.ColumnName
   229  	srcCol2   types.ColumnName
   230  }
   231  
   232  func newColColExpr(x interface{}) (colColExpr, bool) {
   233  	l, ok := x.([]interface{})
   234  	if ok && len(l) == 3 {
   235  		op, oOk := opIdentifier(l[0])
   236  		srcCol1, col1Ok := colIdentifier(l[1])
   237  		srcCol2, col2Ok := colIdentifier(l[2])
   238  		return colColExpr{operation: op, srcCol1: srcCol1, srcCol2: srcCol2}, oOk && col1Ok && col2Ok
   239  	}
   240  
   241  	return colColExpr{}, false
   242  }
   243  
   244  func (e colColExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
   245  	qf, fn := getFunc(ctx, eval.ArgCountTwo, qf, e.srcCol1, e.operation)
   246  	if qf.Err != nil {
   247  		return qf, ""
   248  	}
   249  
   250  	// Fill temp column with the constant part and then apply col col expression.
   251  	// There are other ways to do this that would avoid the temp column but it would
   252  	// require more special case logic.
   253  	colName := tempColName(qf, "colcol")
   254  	result := qf.Apply(Instruction{Fn: fn, DstCol: string(colName), SrcCol1: string(e.srcCol1), SrcCol2: string(e.srcCol2)})
   255  	return result, colName
   256  }
   257  
   258  func (e colColExpr) Err() error {
   259  	return nil
   260  }
   261  
   262  // Nested expressions
   263  type exprExpr1 struct {
   264  	operation string
   265  	expr      Expression
   266  }
   267  
   268  type exprExpr2 struct {
   269  	operation string
   270  	lhs       Expression
   271  	rhs       Expression
   272  }
   273  
   274  func newExprExpr(x interface{}) Expression {
   275  	// In contrast to other expression constructors this one returns an error instead
   276  	// of a bool to denote success or failure. This is to be able to pinpoint the
   277  	// subexpression where the error occurred.
   278  
   279  	l, ok := x.([]interface{})
   280  	if ok {
   281  		if len(l) == 2 || len(l) == 3 {
   282  			operation, oOk := opIdentifier(l[0])
   283  			if !oOk {
   284  				return errorExpr{err: qerrors.New("newExprExpr", "invalid operation: %v", l[0])}
   285  			}
   286  
   287  			lhs := newExpr(l[1])
   288  			if lhs.Err() != nil {
   289  				return errorExpr{err: qerrors.Propagate("newExprExpr", lhs.Err())}
   290  			}
   291  
   292  			if len(l) == 2 {
   293  				// Single argument functions such as "abs"
   294  				return exprExpr1{operation: operation, expr: lhs}
   295  			}
   296  
   297  			rhs := newExpr(l[2])
   298  			if rhs.Err() != nil {
   299  				return errorExpr{err: qerrors.Propagate("newExprExpr", rhs.Err())}
   300  			}
   301  
   302  			return exprExpr2{operation: operation, lhs: lhs, rhs: rhs}
   303  		}
   304  		return errorExpr{err: qerrors.New("newExprExpr", "Expected a list with two or three elements, was: %v", x)}
   305  	}
   306  
   307  	return errorExpr{err: qerrors.New("newExprExpr", "Expected a list of elements, was: %v", x)}
   308  }
   309  
   310  func (e exprExpr1) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
   311  	result, tempColName := e.expr.execute(qf, ctx)
   312  	ccE, _ := newUnaryExpr([]interface{}{e.operation, types.ColumnName(tempColName)})
   313  	result, colName := ccE.execute(result, ctx)
   314  
   315  	// Drop intermediate result if not present in original frame
   316  	if !qf.Contains(string(tempColName)) {
   317  		result = result.Drop(string(tempColName))
   318  	}
   319  
   320  	return result, colName
   321  }
   322  
   323  func (e exprExpr1) Err() error {
   324  	return nil
   325  }
   326  
   327  func (e exprExpr2) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
   328  	result, lColName := e.lhs.execute(qf, ctx)
   329  	result, rColName := e.rhs.execute(result, ctx)
   330  	ccE, _ := newColColExpr([]interface{}{e.operation, lColName, rColName})
   331  	result, colName := ccE.execute(result, ctx)
   332  
   333  	// Drop intermediate results if not present in original frame
   334  	dropCols := make([]string, 0)
   335  	for _, c := range []types.ColumnName{lColName, rColName} {
   336  		s := string(c)
   337  		if !qf.Contains(s) {
   338  			dropCols = append(dropCols, s)
   339  		}
   340  	}
   341  	result = result.Drop(dropCols...)
   342  
   343  	return result, colName
   344  }
   345  
   346  func (e exprExpr2) Err() error {
   347  	return nil
   348  }
   349  
   350  type errorExpr struct {
   351  	err error
   352  }
   353  
   354  func (e errorExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) {
   355  	if qf.Err != nil {
   356  		return qf, ""
   357  	}
   358  
   359  	return qf.withErr(e.err), ""
   360  }
   361  
   362  func (e errorExpr) Err() error {
   363  	return e.err
   364  }
   365  
   366  // Val represents a constant or column.
   367  func Val(value interface{}) Expression {
   368  	return newExpr(value)
   369  }
   370  
   371  // Expr represents an expression with one or more arguments.
   372  // The arguments may be values, columns or the result of other expressions.
   373  //
   374  // If more arguments than two are passed, the expression will be evaluated by
   375  // repeatedly applying the function to pairwise elements from the left.
   376  // Temporary columns will be created as necessary to hold intermediate results.
   377  //
   378  // Pseudo example:
   379  //     ["/", 18, 2, 3] is evaluated as ["/", ["/", 18, 2], 3] (= 3)
   380  func Expr(name string, args ...interface{}) Expression {
   381  	if len(args) == 0 {
   382  		// This is currently the case. It may change if introducing variables for example.
   383  		return errorExpr{err: qerrors.New("Expr", "Expressions require at least one argument")}
   384  
   385  	}
   386  
   387  	if len(args) == 1 {
   388  		return newExpr([]interface{}{name, args[0]})
   389  	}
   390  
   391  	if len(args) == 2 {
   392  		return newExpr([]interface{}{name, args[0], args[1]})
   393  	}
   394  
   395  	newArgs := make([]interface{}, len(args)-1)
   396  	newArgs[0] = newExpr([]interface{}{name, args[0], args[1]})
   397  	copy(newArgs[1:], args[2:])
   398  	return Expr(name, newArgs...)
   399  }