github.com/tobgu/qframe@v0.4.0/expression.go (about) 1 package qframe 2 3 import ( 4 "fmt" 5 "strconv" 6 7 "github.com/tobgu/qframe/config/eval" 8 "github.com/tobgu/qframe/qerrors" 9 "github.com/tobgu/qframe/types" 10 ) 11 12 func getFunc(ctx *eval.Context, ac eval.ArgCount, qf QFrame, colName types.ColumnName, funcName string) (QFrame, interface{}) { 13 if qf.Err != nil { 14 return qf, nil 15 } 16 17 typ, err := qf.functionType(string(colName)) 18 if err != nil { 19 return qf.withErr(qerrors.Propagate("getFunc", err)), nil 20 } 21 22 fn, ok := ctx.GetFunc(typ, ac, funcName) 23 if !ok { 24 return qf.withErr(qerrors.New("getFunc", "Could not find %s %s function with name '%s'", typ, ac, funcName)), nil 25 } 26 27 return qf, fn 28 } 29 30 // Expression is an internal interface representing an expression that can be executed on a QFrame. 31 type Expression interface { 32 execute(f QFrame, ctx *eval.Context) (QFrame, types.ColumnName) 33 34 // Err returns an error if the expression could not be constructed for some reason. 35 Err() error 36 } 37 38 func newExpr(expr interface{}) Expression { 39 // Try, in turn, to decode expr into a valid expression type. 40 if e, ok := expr.(Expression); ok { 41 return e 42 } 43 44 if e, ok := newColExpr(expr); ok { 45 return e 46 } 47 48 if e, ok := newConstExpr(expr); ok { 49 return e 50 } 51 52 if e, ok := newUnaryExpr(expr); ok { 53 return e 54 } 55 56 if e, ok := newColConstExpr(expr); ok { 57 return e 58 } 59 60 if e, ok := newColColExpr(expr); ok { 61 return e 62 } 63 64 return newExprExpr(expr) 65 } 66 67 // Either an operation or a column identifier 68 func opIdentifier(x interface{}) (string, bool) { 69 s, ok := x.(string) 70 return s, ok 71 } 72 73 // This will just pass the src column on 74 type colExpr struct { 75 srcCol types.ColumnName 76 } 77 78 func colIdentifier(x interface{}) (types.ColumnName, bool) { 79 srcCol, cOk := x.(types.ColumnName) 80 return srcCol, cOk 81 } 82 83 func newColExpr(x interface{}) (colExpr, bool) { 84 srcCol, cOk := colIdentifier(x) 85 return colExpr{srcCol: srcCol}, cOk 86 } 87 88 func (e colExpr) execute(qf QFrame, _ *eval.Context) (QFrame, types.ColumnName) { 89 return qf, e.srcCol 90 } 91 92 func (e colExpr) Err() error { 93 return nil 94 } 95 96 func tempColName(qf QFrame, prefix string) types.ColumnName { 97 for i := 0; i < 10000; i++ { 98 colName := prefix + "-temp-" + strconv.Itoa(i) 99 if !qf.Contains(colName) { 100 return types.ColumnName(colName) 101 } 102 } 103 104 // This is really strange, somehow there are more than 10000 columns 105 // in the sequence we're trying from. This should never happen, Panic... 106 panic(fmt.Sprintf("Could not find temp column name for prefix %s", prefix)) 107 } 108 109 // Generating a new column with a given content (eg. 42) 110 type constExpr struct { 111 value interface{} 112 } 113 114 func newConstExpr(x interface{}) (constExpr, bool) { 115 // TODO: Support const functions somehow? Or perhaps add some kind of 116 // "variable" (accessed by $...?) to the context? 117 value := x 118 if value == nil { 119 // Nil is implicitly typed to string 120 value = (*string)(nil) 121 } 122 123 var isConst bool 124 switch value.(type) { 125 case int, float64, bool, string, *string: 126 isConst = true 127 default: 128 isConst = false 129 } 130 131 return constExpr{value: value}, isConst 132 } 133 134 func (e constExpr) execute(qf QFrame, _ *eval.Context) (QFrame, types.ColumnName) { 135 if qf.Err != nil { 136 return qf, "" 137 } 138 139 colName := tempColName(qf, "const") 140 return qf.Apply(Instruction{Fn: e.value, DstCol: string(colName)}), colName 141 } 142 143 func (e constExpr) Err() error { 144 return nil 145 } 146 147 // Use the content of a single column and nothing else as input (eg. abs(x)) 148 type unaryExpr struct { 149 operation string 150 srcCol types.ColumnName 151 } 152 153 func newUnaryExpr(x interface{}) (unaryExpr, bool) { 154 // TODO: Might want to accept slice of strings here as well? 155 l, ok := x.([]interface{}) 156 if ok && len(l) == 2 { 157 operation, oOk := opIdentifier(l[0]) 158 srcCol, cOk := colIdentifier(l[1]) 159 return unaryExpr{operation: operation, srcCol: srcCol}, oOk && cOk 160 } 161 162 return unaryExpr{}, false 163 } 164 165 func (e unaryExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) { 166 qf, fn := getFunc(ctx, eval.ArgCountOne, qf, e.srcCol, e.operation) 167 if qf.Err != nil { 168 return qf, "" 169 } 170 171 colName := tempColName(qf, "unary") 172 return qf.Apply(Instruction{Fn: fn, DstCol: string(colName), SrcCol1: string(e.srcCol)}), colName 173 } 174 175 func (e unaryExpr) Err() error { 176 return nil 177 } 178 179 // Use the content of a single column and a constant as input (eg. age + 1) 180 type colConstExpr struct { 181 operation string 182 srcCol types.ColumnName 183 value interface{} 184 } 185 186 func newColConstExpr(x interface{}) (colConstExpr, bool) { 187 l, ok := x.([]interface{}) 188 if ok && len(l) == 3 { 189 operation, oOk := opIdentifier(l[0]) 190 191 srcCol, colOk := colIdentifier(l[1]) 192 constE, constOk := newConstExpr(l[2]) 193 if !colOk || !constOk { 194 // Test flipping order 195 srcCol, colOk = colIdentifier(l[2]) 196 constE, constOk = newConstExpr(l[1]) 197 } 198 199 return colConstExpr{operation: operation, srcCol: srcCol, value: constE.value}, colOk && constOk && oOk 200 } 201 202 return colConstExpr{}, false 203 } 204 205 func (e colConstExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) { 206 if qf.Err != nil { 207 return qf, "" 208 } 209 210 // Fill temp column with the constant part and then apply col col expression. 211 // There are other ways to do this that would avoid the temp column but it would 212 // require more special case logic. 213 cE, _ := newConstExpr(e.value) 214 result, constColName := cE.execute(qf, ctx) 215 ccE, _ := newColColExpr([]interface{}{e.operation, e.srcCol, constColName}) 216 result, colName := ccE.execute(result, ctx) 217 result = result.Drop(string(constColName)) 218 return result, colName 219 } 220 221 func (e colConstExpr) Err() error { 222 return nil 223 } 224 225 // Use the content of two columns as input (eg. weight / length) 226 type colColExpr struct { 227 operation string 228 srcCol1 types.ColumnName 229 srcCol2 types.ColumnName 230 } 231 232 func newColColExpr(x interface{}) (colColExpr, bool) { 233 l, ok := x.([]interface{}) 234 if ok && len(l) == 3 { 235 op, oOk := opIdentifier(l[0]) 236 srcCol1, col1Ok := colIdentifier(l[1]) 237 srcCol2, col2Ok := colIdentifier(l[2]) 238 return colColExpr{operation: op, srcCol1: srcCol1, srcCol2: srcCol2}, oOk && col1Ok && col2Ok 239 } 240 241 return colColExpr{}, false 242 } 243 244 func (e colColExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) { 245 qf, fn := getFunc(ctx, eval.ArgCountTwo, qf, e.srcCol1, e.operation) 246 if qf.Err != nil { 247 return qf, "" 248 } 249 250 // Fill temp column with the constant part and then apply col col expression. 251 // There are other ways to do this that would avoid the temp column but it would 252 // require more special case logic. 253 colName := tempColName(qf, "colcol") 254 result := qf.Apply(Instruction{Fn: fn, DstCol: string(colName), SrcCol1: string(e.srcCol1), SrcCol2: string(e.srcCol2)}) 255 return result, colName 256 } 257 258 func (e colColExpr) Err() error { 259 return nil 260 } 261 262 // Nested expressions 263 type exprExpr1 struct { 264 operation string 265 expr Expression 266 } 267 268 type exprExpr2 struct { 269 operation string 270 lhs Expression 271 rhs Expression 272 } 273 274 func newExprExpr(x interface{}) Expression { 275 // In contrast to other expression constructors this one returns an error instead 276 // of a bool to denote success or failure. This is to be able to pinpoint the 277 // subexpression where the error occurred. 278 279 l, ok := x.([]interface{}) 280 if ok { 281 if len(l) == 2 || len(l) == 3 { 282 operation, oOk := opIdentifier(l[0]) 283 if !oOk { 284 return errorExpr{err: qerrors.New("newExprExpr", "invalid operation: %v", l[0])} 285 } 286 287 lhs := newExpr(l[1]) 288 if lhs.Err() != nil { 289 return errorExpr{err: qerrors.Propagate("newExprExpr", lhs.Err())} 290 } 291 292 if len(l) == 2 { 293 // Single argument functions such as "abs" 294 return exprExpr1{operation: operation, expr: lhs} 295 } 296 297 rhs := newExpr(l[2]) 298 if rhs.Err() != nil { 299 return errorExpr{err: qerrors.Propagate("newExprExpr", rhs.Err())} 300 } 301 302 return exprExpr2{operation: operation, lhs: lhs, rhs: rhs} 303 } 304 return errorExpr{err: qerrors.New("newExprExpr", "Expected a list with two or three elements, was: %v", x)} 305 } 306 307 return errorExpr{err: qerrors.New("newExprExpr", "Expected a list of elements, was: %v", x)} 308 } 309 310 func (e exprExpr1) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) { 311 result, tempColName := e.expr.execute(qf, ctx) 312 ccE, _ := newUnaryExpr([]interface{}{e.operation, types.ColumnName(tempColName)}) 313 result, colName := ccE.execute(result, ctx) 314 315 // Drop intermediate result if not present in original frame 316 if !qf.Contains(string(tempColName)) { 317 result = result.Drop(string(tempColName)) 318 } 319 320 return result, colName 321 } 322 323 func (e exprExpr1) Err() error { 324 return nil 325 } 326 327 func (e exprExpr2) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) { 328 result, lColName := e.lhs.execute(qf, ctx) 329 result, rColName := e.rhs.execute(result, ctx) 330 ccE, _ := newColColExpr([]interface{}{e.operation, lColName, rColName}) 331 result, colName := ccE.execute(result, ctx) 332 333 // Drop intermediate results if not present in original frame 334 dropCols := make([]string, 0) 335 for _, c := range []types.ColumnName{lColName, rColName} { 336 s := string(c) 337 if !qf.Contains(s) { 338 dropCols = append(dropCols, s) 339 } 340 } 341 result = result.Drop(dropCols...) 342 343 return result, colName 344 } 345 346 func (e exprExpr2) Err() error { 347 return nil 348 } 349 350 type errorExpr struct { 351 err error 352 } 353 354 func (e errorExpr) execute(qf QFrame, ctx *eval.Context) (QFrame, types.ColumnName) { 355 if qf.Err != nil { 356 return qf, "" 357 } 358 359 return qf.withErr(e.err), "" 360 } 361 362 func (e errorExpr) Err() error { 363 return e.err 364 } 365 366 // Val represents a constant or column. 367 func Val(value interface{}) Expression { 368 return newExpr(value) 369 } 370 371 // Expr represents an expression with one or more arguments. 372 // The arguments may be values, columns or the result of other expressions. 373 // 374 // If more arguments than two are passed, the expression will be evaluated by 375 // repeatedly applying the function to pairwise elements from the left. 376 // Temporary columns will be created as necessary to hold intermediate results. 377 // 378 // Pseudo example: 379 // ["/", 18, 2, 3] is evaluated as ["/", ["/", 18, 2], 3] (= 3) 380 func Expr(name string, args ...interface{}) Expression { 381 if len(args) == 0 { 382 // This is currently the case. It may change if introducing variables for example. 383 return errorExpr{err: qerrors.New("Expr", "Expressions require at least one argument")} 384 385 } 386 387 if len(args) == 1 { 388 return newExpr([]interface{}{name, args[0]}) 389 } 390 391 if len(args) == 2 { 392 return newExpr([]interface{}{name, args[0], args[1]}) 393 } 394 395 newArgs := make([]interface{}, len(args)-1) 396 newArgs[0] = newExpr([]interface{}{name, args[0], args[1]}) 397 copy(newArgs[1:], args[2:]) 398 return Expr(name, newArgs...) 399 }