github.com/whtcorpsinc/milevadb-prod@v0.0.0-20211104133533-f57f4be3b597/dbs/memristed/memex/expression.go (about) 1 // Copyright 2020 WHTCORPS INC, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // See the License for the specific language governing permissions and 12 // limitations under the License. 13 14 package memex 15 16 import ( 17 goJSON "encoding/json" 18 "fmt" 19 "strings" 20 "sync" 21 "sync/atomic" 22 23 "github.com/gogo/protobuf/proto" 24 "github.com/whtcorpsinc/BerolinaSQL/allegrosql" 25 "github.com/whtcorpsinc/BerolinaSQL/ast" 26 "github.com/whtcorpsinc/BerolinaSQL/opcode" 27 "github.com/whtcorpsinc/BerolinaSQL/perceptron" 28 "github.com/whtcorpsinc/BerolinaSQL/terror" 29 "github.com/whtcorpsinc/errors" 30 "github.com/whtcorpsinc/failpoint" 31 "github.com/whtcorpsinc/fidelpb/go-fidelpb" 32 "github.com/whtcorpsinc/milevadb/ekv" 33 "github.com/whtcorpsinc/milevadb/soliton/chunk" 34 "github.com/whtcorpsinc/milevadb/soliton/generatedexpr" 35 "github.com/whtcorpsinc/milevadb/soliton/logutil" 36 "github.com/whtcorpsinc/milevadb/stochastikctx" 37 "github.com/whtcorpsinc/milevadb/stochastikctx/stmtctx" 38 "github.com/whtcorpsinc/milevadb/types" 39 "github.com/whtcorpsinc/milevadb/types/json" 40 "go.uber.org/zap" 41 ) 42 43 // These are byte flags used for `HashCode()`. 44 const ( 45 constantFlag byte = 0 46 defCausumnFlag byte = 1 47 scalarFunctionFlag byte = 3 48 ) 49 50 // EvalAstExpr evaluates ast memex directly. 51 var EvalAstExpr func(sctx stochastikctx.Context, expr ast.ExprNode) (types.Causet, error) 52 53 // RewriteAstExpr rewrites ast memex directly. 54 var RewriteAstExpr func(sctx stochastikctx.Context, expr ast.ExprNode, schemaReplicant *Schema, names types.NameSlice) (Expression, error) 55 56 // VecExpr contains all vectorized evaluation methods. 57 type VecExpr interface { 58 // Vectorized returns if this memex supports vectorized evaluation. 59 Vectorized() bool 60 61 // VecEvalInt evaluates this memex in a vectorized manner. 62 VecEvalInt(ctx stochastikctx.Context, input *chunk.Chunk, result *chunk.DeferredCauset) error 63 64 // VecEvalReal evaluates this memex in a vectorized manner. 65 VecEvalReal(ctx stochastikctx.Context, input *chunk.Chunk, result *chunk.DeferredCauset) error 66 67 // VecEvalString evaluates this memex in a vectorized manner. 68 VecEvalString(ctx stochastikctx.Context, input *chunk.Chunk, result *chunk.DeferredCauset) error 69 70 // VecEvalDecimal evaluates this memex in a vectorized manner. 71 VecEvalDecimal(ctx stochastikctx.Context, input *chunk.Chunk, result *chunk.DeferredCauset) error 72 73 // VecEvalTime evaluates this memex in a vectorized manner. 74 VecEvalTime(ctx stochastikctx.Context, input *chunk.Chunk, result *chunk.DeferredCauset) error 75 76 // VecEvalDuration evaluates this memex in a vectorized manner. 77 VecEvalDuration(ctx stochastikctx.Context, input *chunk.Chunk, result *chunk.DeferredCauset) error 78 79 // VecEvalJSON evaluates this memex in a vectorized manner. 80 VecEvalJSON(ctx stochastikctx.Context, input *chunk.Chunk, result *chunk.DeferredCauset) error 81 } 82 83 // ReverseExpr contains all resersed evaluation methods. 84 type ReverseExpr interface { 85 // SupportReverseEval checks whether the builtinFunc support reverse evaluation. 86 SupportReverseEval() bool 87 88 // ReverseEval evaluates the only one defCausumn value with given function result. 89 ReverseEval(sc *stmtctx.StatementContext, res types.Causet, rType types.RoundingType) (val types.Causet, err error) 90 } 91 92 // Expression represents all scalar memex in ALLEGROALLEGROSQL. 93 type Expression interface { 94 fmt.Stringer 95 goJSON.Marshaler 96 VecExpr 97 ReverseExpr 98 DefCauslationInfo 99 100 // Eval evaluates an memex through a event. 101 Eval(event chunk.Event) (types.Causet, error) 102 103 // EvalInt returns the int64 representation of memex. 104 EvalInt(ctx stochastikctx.Context, event chunk.Event) (val int64, isNull bool, err error) 105 106 // EvalReal returns the float64 representation of memex. 107 EvalReal(ctx stochastikctx.Context, event chunk.Event) (val float64, isNull bool, err error) 108 109 // EvalString returns the string representation of memex. 110 EvalString(ctx stochastikctx.Context, event chunk.Event) (val string, isNull bool, err error) 111 112 // EvalDecimal returns the decimal representation of memex. 113 EvalDecimal(ctx stochastikctx.Context, event chunk.Event) (val *types.MyDecimal, isNull bool, err error) 114 115 // EvalTime returns the DATE/DATETIME/TIMESTAMP representation of memex. 116 EvalTime(ctx stochastikctx.Context, event chunk.Event) (val types.Time, isNull bool, err error) 117 118 // EvalDuration returns the duration representation of memex. 119 EvalDuration(ctx stochastikctx.Context, event chunk.Event) (val types.Duration, isNull bool, err error) 120 121 // EvalJSON returns the JSON representation of memex. 122 EvalJSON(ctx stochastikctx.Context, event chunk.Event) (val json.BinaryJSON, isNull bool, err error) 123 124 // GetType gets the type that the memex returns. 125 GetType() *types.FieldType 126 127 // Clone copies an memex totally. 128 Clone() Expression 129 130 // Equal checks whether two memexs are equal. 131 Equal(ctx stochastikctx.Context, e Expression) bool 132 133 // IsCorrelated checks if this memex has correlated key. 134 IsCorrelated() bool 135 136 // ConstItem checks if this memex is constant item, regardless of query evaluation state. 137 // An memex is constant item if it: 138 // refers no blocks. 139 // refers no correlated defCausumn. 140 // refers no subqueries that refers any blocks. 141 // refers no non-deterministic functions. 142 // refers no memex parameters. 143 // refers no param markers when prepare plan cache is enabled. 144 ConstItem(sc *stmtctx.StatementContext) bool 145 146 // Decorrelate try to decorrelate the memex by schemaReplicant. 147 Decorrelate(schemaReplicant *Schema) Expression 148 149 // ResolveIndices resolves indices by the given schemaReplicant. It will copy the original memex and return the copied one. 150 ResolveIndices(schemaReplicant *Schema) (Expression, error) 151 152 // resolveIndices is called inside the `ResolveIndices` It will perform on the memex itself. 153 resolveIndices(schemaReplicant *Schema) error 154 155 // ExplainInfo returns operator information to be explained. 156 ExplainInfo() string 157 158 // ExplainNormalizedInfo returns operator normalized information for generating digest. 159 ExplainNormalizedInfo() string 160 161 // HashCode creates the hashcode for memex which can be used to identify itself from other memex. 162 // It generated as the following: 163 // Constant: ConstantFlag+encoded value 164 // DeferredCauset: DeferredCausetFlag+encoded value 165 // ScalarFunction: SFFlag+encoded function name + encoded arg_1 + encoded arg_2 + ... 166 HashCode(sc *stmtctx.StatementContext) []byte 167 } 168 169 // CNFExprs stands for a CNF memex. 170 type CNFExprs []Expression 171 172 // Clone clones itself. 173 func (e CNFExprs) Clone() CNFExprs { 174 cnf := make(CNFExprs, 0, len(e)) 175 for _, expr := range e { 176 cnf = append(cnf, expr.Clone()) 177 } 178 return cnf 179 } 180 181 // Shallow makes a shallow copy of itself. 182 func (e CNFExprs) Shallow() CNFExprs { 183 cnf := make(CNFExprs, 0, len(e)) 184 cnf = append(cnf, e...) 185 return cnf 186 } 187 188 func isDeferredCausetInOperand(c *DeferredCauset) bool { 189 return c.InOperand 190 } 191 192 // IsEQCondFromIn checks if an memex is equal condition converted from `[not] in (subq)`. 193 func IsEQCondFromIn(expr Expression) bool { 194 sf, ok := expr.(*ScalarFunction) 195 if !ok || sf.FuncName.L != ast.EQ { 196 return false 197 } 198 defcaus := make([]*DeferredCauset, 0, 1) 199 defcaus = ExtractDeferredCausetsFromExpressions(defcaus, sf.GetArgs(), isDeferredCausetInOperand) 200 return len(defcaus) > 0 201 } 202 203 // HandleOverflowOnSelection handles Overflow errors when evaluating selection filters. 204 // We should ignore overflow errors when evaluating selection conditions: 205 // INSERT INTO t VALUES ("999999999999999999"); 206 // SELECT * FROM t WHERE v; 207 func HandleOverflowOnSelection(sc *stmtctx.StatementContext, val int64, err error) (int64, error) { 208 if sc.InSelectStmt && err != nil && types.ErrOverflow.Equal(err) { 209 return -1, nil 210 } 211 return val, err 212 } 213 214 // EvalBool evaluates memex list to a boolean value. The first returned value 215 // indicates bool result of the memex list, the second returned value indicates 216 // whether the result of the memex list is null, it can only be true when the 217 // first returned values is false. 218 func EvalBool(ctx stochastikctx.Context, exprList CNFExprs, event chunk.Event) (bool, bool, error) { 219 hasNull := false 220 for _, expr := range exprList { 221 data, err := expr.Eval(event) 222 if err != nil { 223 return false, false, err 224 } 225 if data.IsNull() { 226 // For queries like `select a in (select a from s where t.b = s.b) from t`, 227 // if result of `t.a = s.a` is null, we cannot return immediately until 228 // we have checked if `t.b = s.b` is null or false, because it means 229 // subquery is empty, and we should return false as the result of the whole 230 // exprList in that case, instead of null. 231 if !IsEQCondFromIn(expr) { 232 return false, false, nil 233 } 234 hasNull = true 235 continue 236 } 237 238 i, err := data.ToBool(ctx.GetStochastikVars().StmtCtx) 239 if err != nil { 240 i, err = HandleOverflowOnSelection(ctx.GetStochastikVars().StmtCtx, i, err) 241 if err != nil { 242 return false, false, err 243 244 } 245 } 246 if i == 0 { 247 return false, false, nil 248 } 249 } 250 if hasNull { 251 return false, true, nil 252 } 253 return true, false, nil 254 } 255 256 var ( 257 defaultChunkSize = 1024 258 selPool = sync.Pool{ 259 New: func() interface{} { 260 return make([]int, defaultChunkSize) 261 }, 262 } 263 zeroPool = sync.Pool{ 264 New: func() interface{} { 265 return make([]int8, defaultChunkSize) 266 }, 267 } 268 ) 269 270 func allocSelSlice(n int) []int { 271 if n > defaultChunkSize { 272 return make([]int, n) 273 } 274 return selPool.Get().([]int) 275 } 276 277 func deallocateSelSlice(sel []int) { 278 if cap(sel) <= defaultChunkSize { 279 selPool.Put(sel) 280 } 281 } 282 283 func allocZeroSlice(n int) []int8 { 284 if n > defaultChunkSize { 285 return make([]int8, n) 286 } 287 return zeroPool.Get().([]int8) 288 } 289 290 func deallocateZeroSlice(isZero []int8) { 291 if cap(isZero) <= defaultChunkSize { 292 zeroPool.Put(isZero) 293 } 294 } 295 296 // VecEvalBool does the same thing as EvalBool but it works in a vectorized manner. 297 func VecEvalBool(ctx stochastikctx.Context, exprList CNFExprs, input *chunk.Chunk, selected, nulls []bool) ([]bool, []bool, error) { 298 // If input.Sel() != nil, we will call input.SetSel(nil) to clear the sel slice in input chunk. 299 // After the function finished, then we reset the input.Sel(). 300 // The caller will handle the input.Sel() and selected slices. 301 defer input.SetSel(input.Sel()) 302 input.SetSel(nil) 303 304 n := input.NumEvents() 305 selected = selected[:0] 306 nulls = nulls[:0] 307 for i := 0; i < n; i++ { 308 selected = append(selected, false) 309 nulls = append(nulls, false) 310 } 311 312 sel := allocSelSlice(n) 313 defer deallocateSelSlice(sel) 314 sel = sel[:0] 315 for i := 0; i < n; i++ { 316 sel = append(sel, i) 317 } 318 input.SetSel(sel) 319 320 // In isZero slice, -1 means Null, 0 means zero, 1 means not zero 321 isZero := allocZeroSlice(n) 322 defer deallocateZeroSlice(isZero) 323 for _, expr := range exprList { 324 eType := expr.GetType().EvalType() 325 buf, err := globalDeferredCausetSlabPredictor.get(eType, n) 326 if err != nil { 327 return nil, nil, err 328 } 329 330 if err := EvalExpr(ctx, expr, input, buf); err != nil { 331 return nil, nil, err 332 } 333 334 err = toBool(ctx.GetStochastikVars().StmtCtx, eType, buf, sel, isZero) 335 if err != nil { 336 return nil, nil, err 337 } 338 339 j := 0 340 isEQCondFromIn := IsEQCondFromIn(expr) 341 for i := range sel { 342 if isZero[i] == -1 { 343 if eType != types.ETInt && !isEQCondFromIn { 344 continue 345 } 346 // In this case, we set this event to null and let it pass this filter. 347 // The null flag may be set to false later by other memexs in some cases. 348 nulls[sel[i]] = true 349 sel[j] = sel[i] 350 j++ 351 continue 352 } 353 354 if isZero[i] == 0 { 355 continue 356 } 357 sel[j] = sel[i] // this event passes this filter 358 j++ 359 } 360 sel = sel[:j] 361 input.SetSel(sel) 362 globalDeferredCausetSlabPredictor.put(buf) 363 } 364 365 for _, i := range sel { 366 if !nulls[i] { 367 selected[i] = true 368 } 369 } 370 371 return selected, nulls, nil 372 } 373 374 func toBool(sc *stmtctx.StatementContext, eType types.EvalType, buf *chunk.DeferredCauset, sel []int, isZero []int8) error { 375 switch eType { 376 case types.ETInt: 377 i64s := buf.Int64s() 378 for i := range sel { 379 if buf.IsNull(i) { 380 isZero[i] = -1 381 } else { 382 if i64s[i] == 0 { 383 isZero[i] = 0 384 } else { 385 isZero[i] = 1 386 } 387 } 388 } 389 case types.ETReal: 390 f64s := buf.Float64s() 391 for i := range sel { 392 if buf.IsNull(i) { 393 isZero[i] = -1 394 } else { 395 if f64s[i] == 0 { 396 isZero[i] = 0 397 } else { 398 isZero[i] = 1 399 } 400 } 401 } 402 case types.ETDuration: 403 d64s := buf.GoDurations() 404 for i := range sel { 405 if buf.IsNull(i) { 406 isZero[i] = -1 407 } else { 408 if d64s[i] == 0 { 409 isZero[i] = 0 410 } else { 411 isZero[i] = 1 412 } 413 } 414 } 415 case types.ETDatetime, types.ETTimestamp: 416 t64s := buf.Times() 417 for i := range sel { 418 if buf.IsNull(i) { 419 isZero[i] = -1 420 } else { 421 if t64s[i].IsZero() { 422 isZero[i] = 0 423 } else { 424 isZero[i] = 1 425 } 426 } 427 } 428 case types.ETString: 429 for i := range sel { 430 if buf.IsNull(i) { 431 isZero[i] = -1 432 } else { 433 iVal, err := types.StrToFloat(sc, buf.GetString(i), false) 434 if err != nil { 435 return err 436 } 437 if iVal == 0 { 438 isZero[i] = 0 439 } else { 440 isZero[i] = 1 441 } 442 } 443 } 444 case types.ETDecimal: 445 d64s := buf.Decimals() 446 for i := range sel { 447 if buf.IsNull(i) { 448 isZero[i] = -1 449 } else { 450 if d64s[i].IsZero() { 451 isZero[i] = 0 452 } else { 453 isZero[i] = 1 454 } 455 } 456 } 457 case types.ETJson: 458 for i := range sel { 459 if buf.IsNull(i) { 460 isZero[i] = -1 461 } else { 462 if buf.GetJSON(i).IsZero() { 463 isZero[i] = 0 464 } else { 465 isZero[i] = 1 466 } 467 } 468 } 469 } 470 return nil 471 } 472 473 // EvalExpr evaluates this expr according to its type. 474 // And it selects the method for evaluating memex based on 475 // the environment variables and whether the memex can be vectorized. 476 func EvalExpr(ctx stochastikctx.Context, expr Expression, input *chunk.Chunk, result *chunk.DeferredCauset) (err error) { 477 evalType := expr.GetType().EvalType() 478 if expr.Vectorized() && ctx.GetStochastikVars().EnableVectorizedExpression { 479 switch evalType { 480 case types.ETInt: 481 err = expr.VecEvalInt(ctx, input, result) 482 case types.ETReal: 483 err = expr.VecEvalReal(ctx, input, result) 484 case types.ETDuration: 485 err = expr.VecEvalDuration(ctx, input, result) 486 case types.ETDatetime, types.ETTimestamp: 487 err = expr.VecEvalTime(ctx, input, result) 488 case types.ETString: 489 err = expr.VecEvalString(ctx, input, result) 490 case types.ETJson: 491 err = expr.VecEvalJSON(ctx, input, result) 492 case types.ETDecimal: 493 err = expr.VecEvalDecimal(ctx, input, result) 494 default: 495 err = errors.New(fmt.Sprintf("invalid eval type %v", expr.GetType().EvalType())) 496 } 497 } else { 498 ind, n := 0, input.NumEvents() 499 iter := chunk.NewIterator4Chunk(input) 500 switch evalType { 501 case types.ETInt: 502 result.ResizeInt64(n, false) 503 i64s := result.Int64s() 504 for it := iter.Begin(); it != iter.End(); it = iter.Next() { 505 value, isNull, err := expr.EvalInt(ctx, it) 506 if err != nil { 507 return err 508 } 509 if isNull { 510 result.SetNull(ind, isNull) 511 } else { 512 i64s[ind] = value 513 } 514 ind++ 515 } 516 case types.ETReal: 517 result.ResizeFloat64(n, false) 518 f64s := result.Float64s() 519 for it := iter.Begin(); it != iter.End(); it = iter.Next() { 520 value, isNull, err := expr.EvalReal(ctx, it) 521 if err != nil { 522 return err 523 } 524 if isNull { 525 result.SetNull(ind, isNull) 526 } else { 527 f64s[ind] = value 528 } 529 ind++ 530 } 531 case types.ETDuration: 532 result.ResizeGoDuration(n, false) 533 d64s := result.GoDurations() 534 for it := iter.Begin(); it != iter.End(); it = iter.Next() { 535 value, isNull, err := expr.EvalDuration(ctx, it) 536 if err != nil { 537 return err 538 } 539 if isNull { 540 result.SetNull(ind, isNull) 541 } else { 542 d64s[ind] = value.Duration 543 } 544 ind++ 545 } 546 case types.ETDatetime, types.ETTimestamp: 547 result.ResizeTime(n, false) 548 t64s := result.Times() 549 for it := iter.Begin(); it != iter.End(); it = iter.Next() { 550 value, isNull, err := expr.EvalTime(ctx, it) 551 if err != nil { 552 return err 553 } 554 if isNull { 555 result.SetNull(ind, isNull) 556 } else { 557 t64s[ind] = value 558 } 559 ind++ 560 } 561 case types.ETString: 562 result.ReserveString(n) 563 for it := iter.Begin(); it != iter.End(); it = iter.Next() { 564 value, isNull, err := expr.EvalString(ctx, it) 565 if err != nil { 566 return err 567 } 568 if isNull { 569 result.AppendNull() 570 } else { 571 result.AppendString(value) 572 } 573 } 574 case types.ETJson: 575 result.ReserveJSON(n) 576 for it := iter.Begin(); it != iter.End(); it = iter.Next() { 577 value, isNull, err := expr.EvalJSON(ctx, it) 578 if err != nil { 579 return err 580 } 581 if isNull { 582 result.AppendNull() 583 } else { 584 result.AppendJSON(value) 585 } 586 } 587 case types.ETDecimal: 588 result.ResizeDecimal(n, false) 589 d64s := result.Decimals() 590 for it := iter.Begin(); it != iter.End(); it = iter.Next() { 591 value, isNull, err := expr.EvalDecimal(ctx, it) 592 if err != nil { 593 return err 594 } 595 if isNull { 596 result.SetNull(ind, isNull) 597 } else { 598 d64s[ind] = *value 599 } 600 ind++ 601 } 602 default: 603 err = errors.New(fmt.Sprintf("invalid eval type %v", expr.GetType().EvalType())) 604 } 605 } 606 return 607 } 608 609 // composeConditionWithBinaryOp composes condition with binary operator into a balance deep tree, which benefits a lot for pb causetDecoder/causetCausetEncoder. 610 func composeConditionWithBinaryOp(ctx stochastikctx.Context, conditions []Expression, funcName string) Expression { 611 length := len(conditions) 612 if length == 0 { 613 return nil 614 } 615 if length == 1 { 616 return conditions[0] 617 } 618 expr := NewFunctionInternal(ctx, funcName, 619 types.NewFieldType(allegrosql.TypeTiny), 620 composeConditionWithBinaryOp(ctx, conditions[:length/2], funcName), 621 composeConditionWithBinaryOp(ctx, conditions[length/2:], funcName)) 622 return expr 623 } 624 625 // ComposeCNFCondition composes CNF items into a balance deep CNF tree, which benefits a lot for pb causetDecoder/causetCausetEncoder. 626 func ComposeCNFCondition(ctx stochastikctx.Context, conditions ...Expression) Expression { 627 return composeConditionWithBinaryOp(ctx, conditions, ast.LogicAnd) 628 } 629 630 // ComposeDNFCondition composes DNF items into a balance deep DNF tree. 631 func ComposeDNFCondition(ctx stochastikctx.Context, conditions ...Expression) Expression { 632 return composeConditionWithBinaryOp(ctx, conditions, ast.LogicOr) 633 } 634 635 func extractBinaryOpItems(conditions *ScalarFunction, funcName string) []Expression { 636 var ret []Expression 637 for _, arg := range conditions.GetArgs() { 638 if sf, ok := arg.(*ScalarFunction); ok && sf.FuncName.L == funcName { 639 ret = append(ret, extractBinaryOpItems(sf, funcName)...) 640 } else { 641 ret = append(ret, arg) 642 } 643 } 644 return ret 645 } 646 647 // FlattenDNFConditions extracts DNF memex's leaf item. 648 // e.g. or(or(a=1, a=2), or(a=3, a=4)), we'll get [a=1, a=2, a=3, a=4]. 649 func FlattenDNFConditions(DNFCondition *ScalarFunction) []Expression { 650 return extractBinaryOpItems(DNFCondition, ast.LogicOr) 651 } 652 653 // FlattenCNFConditions extracts CNF memex's leaf item. 654 // e.g. and(and(a>1, a>2), and(a>3, a>4)), we'll get [a>1, a>2, a>3, a>4]. 655 func FlattenCNFConditions(CNFCondition *ScalarFunction) []Expression { 656 return extractBinaryOpItems(CNFCondition, ast.LogicAnd) 657 } 658 659 // Assignment represents a set assignment in UFIDelate, such as 660 // UFIDelate t set c1 = hex(12), c2 = c3 where c2 = 1 661 type Assignment struct { 662 DefCaus *DeferredCauset 663 // DefCausName indicates its original defCausumn name in causet schemaReplicant. It's used for outputting helping message when executing meets some errors. 664 DefCausName perceptron.CIStr 665 Expr Expression 666 } 667 668 // VarAssignment represents a variable assignment in Set, such as set global a = 1. 669 type VarAssignment struct { 670 Name string 671 Expr Expression 672 IsDefault bool 673 IsGlobal bool 674 IsSystem bool 675 ExtendValue *Constant 676 } 677 678 // splitNormalFormItems split CNF(conjunctive normal form) like "a and b and c", or DNF(disjunctive normal form) like "a or b or c" 679 func splitNormalFormItems(onExpr Expression, funcName string) []Expression { 680 switch v := onExpr.(type) { 681 case *ScalarFunction: 682 if v.FuncName.L == funcName { 683 var ret []Expression 684 for _, arg := range v.GetArgs() { 685 ret = append(ret, splitNormalFormItems(arg, funcName)...) 686 } 687 return ret 688 } 689 } 690 return []Expression{onExpr} 691 } 692 693 // SplitCNFItems splits CNF items. 694 // CNF means conjunctive normal form, e.g. "a and b and c". 695 func SplitCNFItems(onExpr Expression) []Expression { 696 return splitNormalFormItems(onExpr, ast.LogicAnd) 697 } 698 699 // SplitDNFItems splits DNF items. 700 // DNF means disjunctive normal form, e.g. "a or b or c". 701 func SplitDNFItems(onExpr Expression) []Expression { 702 return splitNormalFormItems(onExpr, ast.LogicOr) 703 } 704 705 // EvaluateExprWithNull sets defCausumns in schemaReplicant as null and calculate the final result of the scalar function. 706 // If the Expression is a non-constant value, it means the result is unknown. 707 func EvaluateExprWithNull(ctx stochastikctx.Context, schemaReplicant *Schema, expr Expression) Expression { 708 switch x := expr.(type) { 709 case *ScalarFunction: 710 args := make([]Expression, len(x.GetArgs())) 711 for i, arg := range x.GetArgs() { 712 args[i] = EvaluateExprWithNull(ctx, schemaReplicant, arg) 713 } 714 return NewFunctionInternal(ctx, x.FuncName.L, x.RetType, args...) 715 case *DeferredCauset: 716 if !schemaReplicant.Contains(x) { 717 return x 718 } 719 return &Constant{Value: types.Causet{}, RetType: types.NewFieldType(allegrosql.TypeNull)} 720 case *Constant: 721 if x.DeferredExpr != nil { 722 return FoldConstant(x) 723 } 724 } 725 return expr 726 } 727 728 // BlockInfo2SchemaAndNames converts the BlockInfo to the schemaReplicant and name slice. 729 func BlockInfo2SchemaAndNames(ctx stochastikctx.Context, dbName perceptron.CIStr, tbl *perceptron.BlockInfo) (*Schema, []*types.FieldName, error) { 730 defcaus, names, err := DeferredCausetInfos2DeferredCausetsAndNames(ctx, dbName, tbl.Name, tbl.DefCauss(), tbl) 731 if err != nil { 732 return nil, nil, err 733 } 734 keys := make([]KeyInfo, 0, len(tbl.Indices)+1) 735 for _, idx := range tbl.Indices { 736 if !idx.Unique || idx.State != perceptron.StatePublic { 737 continue 738 } 739 ok := true 740 newKey := make([]*DeferredCauset, 0, len(idx.DeferredCausets)) 741 for _, idxDefCaus := range idx.DeferredCausets { 742 find := false 743 for i, defCaus := range tbl.DeferredCausets { 744 if idxDefCaus.Name.L == defCaus.Name.L { 745 if !allegrosql.HasNotNullFlag(defCaus.Flag) { 746 break 747 } 748 newKey = append(newKey, defcaus[i]) 749 find = true 750 break 751 } 752 } 753 if !find { 754 ok = false 755 break 756 } 757 } 758 if ok { 759 keys = append(keys, newKey) 760 } 761 } 762 if tbl.PKIsHandle { 763 for i, defCaus := range tbl.DeferredCausets { 764 if allegrosql.HasPriKeyFlag(defCaus.Flag) { 765 keys = append(keys, KeyInfo{defcaus[i]}) 766 break 767 } 768 } 769 } 770 schemaReplicant := NewSchema(defcaus...) 771 schemaReplicant.SetUniqueKeys(keys) 772 return schemaReplicant, names, nil 773 } 774 775 // DeferredCausetInfos2DeferredCausetsAndNames converts the DeferredCausetInfo to the *DeferredCauset and NameSlice. 776 func DeferredCausetInfos2DeferredCausetsAndNames(ctx stochastikctx.Context, dbName, tblName perceptron.CIStr, defCausInfos []*perceptron.DeferredCausetInfo, tblInfo *perceptron.BlockInfo) ([]*DeferredCauset, types.NameSlice, error) { 777 defCausumns := make([]*DeferredCauset, 0, len(defCausInfos)) 778 names := make([]*types.FieldName, 0, len(defCausInfos)) 779 for i, defCaus := range defCausInfos { 780 names = append(names, &types.FieldName{ 781 OrigTblName: tblName, 782 OrigDefCausName: defCaus.Name, 783 DBName: dbName, 784 TblName: tblName, 785 DefCausName: defCaus.Name, 786 }) 787 newDefCaus := &DeferredCauset{ 788 RetType: &defCaus.FieldType, 789 ID: defCaus.ID, 790 UniqueID: ctx.GetStochastikVars().AllocCausetDeferredCausetID(), 791 Index: defCaus.Offset, 792 OrigName: names[i].String(), 793 IsHidden: defCaus.Hidden, 794 } 795 defCausumns = append(defCausumns, newDefCaus) 796 } 797 // Resolve virtual generated defCausumn. 798 mockSchema := NewSchema(defCausumns...) 799 // Ignore redundant warning here. 800 save := ctx.GetStochastikVars().StmtCtx.IgnoreTruncate 801 defer func() { 802 ctx.GetStochastikVars().StmtCtx.IgnoreTruncate = save 803 }() 804 ctx.GetStochastikVars().StmtCtx.IgnoreTruncate = true 805 for i, defCaus := range defCausInfos { 806 if defCaus.IsGenerated() && !defCaus.GeneratedStored { 807 expr, err := generatedexpr.ParseExpression(defCaus.GeneratedExprString) 808 if err != nil { 809 return nil, nil, errors.Trace(err) 810 } 811 expr, err = generatedexpr.SimpleResolveName(expr, tblInfo) 812 if err != nil { 813 return nil, nil, errors.Trace(err) 814 } 815 e, err := RewriteAstExpr(ctx, expr, mockSchema, names) 816 if err != nil { 817 return nil, nil, errors.Trace(err) 818 } 819 if e != nil { 820 defCausumns[i].VirtualExpr = e.Clone() 821 } 822 defCausumns[i].VirtualExpr, err = defCausumns[i].VirtualExpr.ResolveIndices(mockSchema) 823 if err != nil { 824 return nil, nil, errors.Trace(err) 825 } 826 } 827 } 828 return defCausumns, names, nil 829 } 830 831 // NewValuesFunc creates a new values function. 832 func NewValuesFunc(ctx stochastikctx.Context, offset int, retTp *types.FieldType) *ScalarFunction { 833 fc := &valuesFunctionClass{baseFunctionClass{ast.Values, 0, 0}, offset, retTp} 834 bt, err := fc.getFunction(ctx, nil) 835 terror.Log(err) 836 return &ScalarFunction{ 837 FuncName: perceptron.NewCIStr(ast.Values), 838 RetType: retTp, 839 Function: bt, 840 } 841 } 842 843 // IsBinaryLiteral checks whether an memex is a binary literal 844 func IsBinaryLiteral(expr Expression) bool { 845 con, ok := expr.(*Constant) 846 return ok && con.Value.HoTT() == types.HoTTBinaryLiteral 847 } 848 849 func canFuncBePushed(sf *ScalarFunction, storeType ekv.StoreType) bool { 850 // Use the failpoint to control whether to push down an memex in the integration test. 851 // Push down all memex if the `failpoint memex` is `all`, otherwise, check 852 // whether scalar function's name is contained in the enabled memex list (e.g.`ne,eq,lt`). 853 // If neither of the above is true, switch to original logic. 854 failpoint.Inject("PushDownTestSwitcher", func(val failpoint.Value) { 855 enabled := val.(string) 856 if enabled == "all" { 857 failpoint.Return(true) 858 } 859 exprs := strings.Split(enabled, ",") 860 for _, expr := range exprs { 861 if strings.ToLower(strings.TrimSpace(expr)) == sf.FuncName.L { 862 failpoint.Return(true) 863 } 864 } 865 }) 866 867 ret := false 868 switch sf.FuncName.L { 869 case 870 // op functions. 871 ast.LogicAnd, 872 ast.LogicOr, 873 ast.LogicXor, 874 ast.UnaryNot, 875 ast.And, 876 ast.Or, 877 ast.Xor, 878 ast.BitNeg, 879 ast.LeftShift, 880 ast.RightShift, 881 ast.UnaryMinus, 882 883 // compare functions. 884 ast.LT, 885 ast.LE, 886 ast.EQ, 887 ast.NE, 888 ast.GE, 889 ast.GT, 890 ast.NullEQ, 891 ast.In, 892 ast.IsNull, 893 ast.Like, 894 ast.IsTruthWithoutNull, 895 ast.IsTruthWithNull, 896 ast.IsFalsity, 897 898 // arithmetical functions. 899 ast.Plus, 900 ast.Minus, 901 ast.Mul, 902 ast.Div, 903 ast.Abs, 904 905 // math functions. 906 ast.Ceil, 907 ast.Ceiling, 908 ast.Floor, 909 ast.Sqrt, 910 ast.Sign, 911 ast.Ln, 912 ast.Log, 913 ast.Log2, 914 ast.Log10, 915 ast.Exp, 916 ast.Pow, 917 // Rust use the llvm math functions, which have different precision with Golang/MyALLEGROSQL(cmath) 918 // open the following switchers if we implement them in interlock via `cmath` 919 // ast.Sin, 920 // ast.Asin, 921 // ast.Cos, 922 // ast.Acos, 923 // ast.Tan, 924 // ast.Atan, 925 // ast.Atan2, 926 // ast.Cot, 927 ast.Radians, 928 ast.Degrees, 929 ast.Conv, 930 ast.CRC32, 931 932 // control flow functions. 933 ast.Case, 934 ast.If, 935 ast.Ifnull, 936 ast.Coalesce, 937 938 // string functions. 939 ast.Length, 940 ast.BitLength, 941 ast.Concat, 942 ast.ConcatWS, 943 // ast.Locate, 944 ast.Replace, 945 ast.ASCII, 946 ast.Hex, 947 ast.Reverse, 948 ast.LTrim, 949 ast.RTrim, 950 // ast.Left, 951 ast.Strcmp, 952 ast.Space, 953 ast.Elt, 954 ast.Field, 955 956 // json functions. 957 ast.JSONType, 958 ast.JSONExtract, 959 // FIXME: JSONUnquote is incompatible with Coprocessor 960 // ast.JSONUnquote, 961 ast.JSONObject, 962 ast.JSONArray, 963 ast.JSONMerge, 964 ast.JSONSet, 965 ast.JSONInsert, 966 // ast.JSONReplace, 967 ast.JSONRemove, 968 ast.JSONLength, 969 970 // date functions. 971 ast.DateFormat, 972 ast.FromDays, 973 // ast.ToDays, 974 ast.DayOfYear, 975 ast.DayOfMonth, 976 ast.Year, 977 ast.Month, 978 // FIXME: the interlock cannot keep the same behavior with MilevaDB in current compute framework 979 // ast.Hour, 980 // ast.Minute, 981 // ast.Second, 982 // ast.MicroSecond, 983 // ast.DayName, 984 ast.PeriodAdd, 985 ast.PeriodDiff, 986 ast.TimestamFIDeliff, 987 ast.DateAdd, 988 ast.FromUnixTime, 989 990 // encryption functions. 991 ast.MD5, 992 ast.SHA1, 993 ast.UncompressedLength, 994 995 ast.Cast, 996 997 // misc functions. 998 ast.InetNtoa, 999 ast.InetAton, 1000 ast.Inet6Ntoa, 1001 ast.Inet6Aton, 1002 ast.IsIPv4, 1003 ast.IsIPv4Compat, 1004 ast.IsIPv4Mapped, 1005 ast.IsIPv6: 1006 ret = true 1007 1008 // A special case: Only push down Round by signature 1009 case ast.Round: 1010 switch sf.Function.PbCode() { 1011 case 1012 fidelpb.ScalarFuncSig_RoundReal, 1013 fidelpb.ScalarFuncSig_RoundInt, 1014 fidelpb.ScalarFuncSig_RoundDec: 1015 ret = true 1016 } 1017 case 1018 ast.Substring, 1019 ast.Substr: 1020 switch sf.Function.PbCode() { 1021 case 1022 fidelpb.ScalarFuncSig_Substring2ArgsUTF8, 1023 fidelpb.ScalarFuncSig_Substring3ArgsUTF8: 1024 ret = true 1025 } 1026 case ast.Rand: 1027 switch sf.Function.PbCode() { 1028 case 1029 fidelpb.ScalarFuncSig_RandWithSeedFirstGen: 1030 ret = true 1031 } 1032 } 1033 if ret { 1034 switch storeType { 1035 case ekv.TiFlash: 1036 ret = scalarExprSupportedByFlash(sf) 1037 case ekv.EinsteinDB: 1038 ret = scalarExprSupportedByEinsteinDB(sf) 1039 case ekv.MilevaDB: 1040 ret = scalarExprSupportedByMilevaDB(sf) 1041 } 1042 } 1043 if ret { 1044 ret = IsPushDownEnabled(sf.FuncName.L, storeType) 1045 } 1046 return ret 1047 } 1048 1049 func storeTypeMask(storeType ekv.StoreType) uint32 { 1050 if storeType == ekv.UnSpecified { 1051 return 1<<ekv.EinsteinDB | 1<<ekv.TiFlash | 1<<ekv.MilevaDB 1052 } 1053 return 1 << storeType 1054 } 1055 1056 // IsPushDownEnabled returns true if the input expr is not in the expr_pushdown_blacklist 1057 func IsPushDownEnabled(name string, storeType ekv.StoreType) bool { 1058 value, exists := DefaultExprPushDownBlacklist.Load().(map[string]uint32)[name] 1059 if exists { 1060 mask := storeTypeMask(storeType) 1061 return !(value&mask == mask) 1062 } 1063 1064 if storeType != ekv.TiFlash && name == ast.AggFuncApproxCountDistinct { 1065 // Can not push down approx_count_distinct to other causetstore except tiflash by now. 1066 return false 1067 } 1068 1069 return true 1070 } 1071 1072 // DefaultExprPushDownBlacklist indicates the memexs which can not be pushed down to EinsteinDB. 1073 var DefaultExprPushDownBlacklist *atomic.Value 1074 1075 func init() { 1076 DefaultExprPushDownBlacklist = new(atomic.Value) 1077 DefaultExprPushDownBlacklist.CausetStore(make(map[string]uint32)) 1078 } 1079 1080 func canScalarFuncPushDown(scalarFunc *ScalarFunction, pc PbConverter, storeType ekv.StoreType) bool { 1081 pbCode := scalarFunc.Function.PbCode() 1082 if pbCode <= fidelpb.ScalarFuncSig_Unspecified { 1083 failpoint.Inject("PanicIfPbCodeUnspecified", func() { 1084 panic(errors.Errorf("unspecified PbCode: %T", scalarFunc.Function)) 1085 }) 1086 return false 1087 } 1088 1089 // Check whether this function can be pushed. 1090 if !canFuncBePushed(scalarFunc, storeType) { 1091 return false 1092 } 1093 1094 // Check whether all of its parameters can be pushed. 1095 for _, arg := range scalarFunc.GetArgs() { 1096 if !canExprPushDown(arg, pc, storeType) { 1097 return false 1098 } 1099 } 1100 1101 if spacetimedata := scalarFunc.Function.spacetimedata(); spacetimedata != nil { 1102 var err error 1103 _, err = proto.Marshal(spacetimedata) 1104 if err != nil { 1105 logutil.BgLogger().Error("encode spacetimedata", zap.Any("spacetimedata", spacetimedata), zap.Error(err)) 1106 return false 1107 } 1108 } 1109 return true 1110 } 1111 1112 func canExprPushDown(expr Expression, pc PbConverter, storeType ekv.StoreType) bool { 1113 if storeType == ekv.TiFlash && expr.GetType().Tp == allegrosql.TypeDuration { 1114 return false 1115 } 1116 switch x := expr.(type) { 1117 case *CorrelatedDeferredCauset: 1118 return pc.conOrCorDefCausToPBExpr(expr) != nil && pc.defCausumnToPBExpr(&x.DeferredCauset) != nil 1119 case *Constant: 1120 return pc.conOrCorDefCausToPBExpr(expr) != nil 1121 case *DeferredCauset: 1122 return pc.defCausumnToPBExpr(x) != nil 1123 case *ScalarFunction: 1124 return canScalarFuncPushDown(x, pc, storeType) 1125 } 1126 return false 1127 } 1128 1129 // PushDownExprs split the input exprs into pushed and remained, pushed include all the exprs that can be pushed down 1130 func PushDownExprs(sc *stmtctx.StatementContext, exprs []Expression, client ekv.Client, storeType ekv.StoreType) (pushed []Expression, remained []Expression) { 1131 pc := PbConverter{sc: sc, client: client} 1132 for _, expr := range exprs { 1133 if canExprPushDown(expr, pc, storeType) { 1134 pushed = append(pushed, expr) 1135 } else { 1136 remained = append(remained, expr) 1137 } 1138 } 1139 return 1140 } 1141 1142 // CanExprsPushDown return true if all the expr in exprs can be pushed down 1143 func CanExprsPushDown(sc *stmtctx.StatementContext, exprs []Expression, client ekv.Client, storeType ekv.StoreType) bool { 1144 _, remained := PushDownExprs(sc, exprs, client, storeType) 1145 return len(remained) == 0 1146 } 1147 1148 func scalarExprSupportedByEinsteinDB(function *ScalarFunction) bool { 1149 switch function.FuncName.L { 1150 case ast.Substr, ast.Substring, ast.DateAdd, ast.TimestamFIDeliff, 1151 ast.FromUnixTime: 1152 return false 1153 default: 1154 return true 1155 } 1156 } 1157 1158 func scalarExprSupportedByMilevaDB(function *ScalarFunction) bool { 1159 switch function.FuncName.L { 1160 case ast.Substr, ast.Substring, ast.DateAdd, ast.TimestamFIDeliff, 1161 ast.FromUnixTime: 1162 return false 1163 default: 1164 return true 1165 } 1166 } 1167 1168 func scalarExprSupportedByFlash(function *ScalarFunction) bool { 1169 switch function.FuncName.L { 1170 case ast.Plus, ast.Minus, ast.Div, ast.Mul, ast.GE, ast.LE, 1171 ast.EQ, ast.NE, ast.LT, ast.GT, ast.Ifnull, ast.IsNull, 1172 ast.Or, ast.In, ast.Mod, ast.And, ast.LogicOr, ast.LogicAnd, 1173 ast.Like, ast.UnaryNot, ast.Case, ast.Month, ast.Substr, 1174 ast.Substring, ast.TimestamFIDeliff, ast.DateFormat, ast.FromUnixTime, 1175 ast.JSONLength, ast.If, ast.BitNeg, ast.Xor: 1176 return true 1177 case ast.Cast: 1178 switch function.Function.PbCode() { 1179 case fidelpb.ScalarFuncSig_CastIntAsDecimal: 1180 return true 1181 default: 1182 return false 1183 } 1184 case ast.DateAdd: 1185 switch function.Function.PbCode() { 1186 case fidelpb.ScalarFuncSig_AddDateDatetimeInt, fidelpb.ScalarFuncSig_AddDateStringInt: 1187 return true 1188 default: 1189 return false 1190 } 1191 case ast.Round: 1192 switch function.Function.PbCode() { 1193 case fidelpb.ScalarFuncSig_RoundInt, fidelpb.ScalarFuncSig_RoundReal, 1194 fidelpb.ScalarFuncSig_RoundDec: 1195 return true 1196 default: 1197 return false 1198 } 1199 default: 1200 return false 1201 } 1202 } 1203 1204 // wrapWithIsTrue wraps `arg` with istrue function if the return type of expr is not 1205 // type int, otherwise, returns `arg` directly. 1206 // The `keepNull` controls what the istrue function will return when `arg` is null: 1207 // 1. keepNull is true and arg is null, the istrue function returns null. 1208 // 2. keepNull is false and arg is null, the istrue function returns 0. 1209 // The `wrapForInt` indicates whether we need to wrapIsTrue for non-logical Expression with int type. 1210 // TODO: remove this function. ScalarFunction should be newed in one place. 1211 func wrapWithIsTrue(ctx stochastikctx.Context, keepNull bool, arg Expression, wrapForInt bool) (Expression, error) { 1212 if arg.GetType().EvalType() == types.ETInt { 1213 if !wrapForInt { 1214 return arg, nil 1215 } 1216 if child, ok := arg.(*ScalarFunction); ok { 1217 if _, isLogicalOp := logicalOps[child.FuncName.L]; isLogicalOp { 1218 return arg, nil 1219 } 1220 } 1221 } 1222 var fc *isTrueOrFalseFunctionClass 1223 if keepNull { 1224 fc = &isTrueOrFalseFunctionClass{baseFunctionClass{ast.IsTruthWithNull, 1, 1}, opcode.IsTruth, keepNull} 1225 } else { 1226 fc = &isTrueOrFalseFunctionClass{baseFunctionClass{ast.IsTruthWithoutNull, 1, 1}, opcode.IsTruth, keepNull} 1227 } 1228 f, err := fc.getFunction(ctx, []Expression{arg}) 1229 if err != nil { 1230 return nil, err 1231 } 1232 sf := &ScalarFunction{ 1233 FuncName: perceptron.NewCIStr(ast.IsTruthWithoutNull), 1234 Function: f, 1235 RetType: f.getRetTp(), 1236 } 1237 if keepNull { 1238 sf.FuncName = perceptron.NewCIStr(ast.IsTruthWithNull) 1239 } 1240 return FoldConstant(sf), nil 1241 }