github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/compile/compile.go (about) 1 // Copyright 2021 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package compile 16 17 import ( 18 "context" 19 "encoding/hex" 20 "encoding/json" 21 "fmt" 22 "math" 23 "net" 24 "runtime" 25 gotrace "runtime/trace" 26 "sort" 27 "strings" 28 "sync" 29 "sync/atomic" 30 "time" 31 32 "github.com/google/uuid" 33 "github.com/matrixorigin/matrixone/pkg/catalog" 34 "github.com/matrixorigin/matrixone/pkg/cnservice/cnclient" 35 "github.com/matrixorigin/matrixone/pkg/common/moerr" 36 "github.com/matrixorigin/matrixone/pkg/common/morpc" 37 "github.com/matrixorigin/matrixone/pkg/common/mpool" 38 "github.com/matrixorigin/matrixone/pkg/common/reuse" 39 moruntime "github.com/matrixorigin/matrixone/pkg/common/runtime" 40 "github.com/matrixorigin/matrixone/pkg/container/batch" 41 "github.com/matrixorigin/matrixone/pkg/container/types" 42 "github.com/matrixorigin/matrixone/pkg/container/vector" 43 "github.com/matrixorigin/matrixone/pkg/defines" 44 "github.com/matrixorigin/matrixone/pkg/fileservice" 45 "github.com/matrixorigin/matrixone/pkg/logutil" 46 "github.com/matrixorigin/matrixone/pkg/objectio" 47 "github.com/matrixorigin/matrixone/pkg/pb/lock" 48 "github.com/matrixorigin/matrixone/pkg/pb/pipeline" 49 "github.com/matrixorigin/matrixone/pkg/pb/plan" 50 "github.com/matrixorigin/matrixone/pkg/pb/timestamp" 51 "github.com/matrixorigin/matrixone/pkg/perfcounter" 52 "github.com/matrixorigin/matrixone/pkg/sql/colexec" 53 "github.com/matrixorigin/matrixone/pkg/sql/colexec/connector" 54 "github.com/matrixorigin/matrixone/pkg/sql/colexec/deletion" 55 "github.com/matrixorigin/matrixone/pkg/sql/colexec/dispatch" 56 "github.com/matrixorigin/matrixone/pkg/sql/colexec/external" 57 "github.com/matrixorigin/matrixone/pkg/sql/colexec/insert" 58 "github.com/matrixorigin/matrixone/pkg/sql/colexec/lockop" 59 "github.com/matrixorigin/matrixone/pkg/sql/colexec/merge" 60 "github.com/matrixorigin/matrixone/pkg/sql/colexec/mergeblock" 61 "github.com/matrixorigin/matrixone/pkg/sql/colexec/mergecte" 62 "github.com/matrixorigin/matrixone/pkg/sql/colexec/mergedelete" 63 "github.com/matrixorigin/matrixone/pkg/sql/colexec/mergerecursive" 64 "github.com/matrixorigin/matrixone/pkg/sql/colexec/output" 65 "github.com/matrixorigin/matrixone/pkg/sql/colexec/preinsert" 66 "github.com/matrixorigin/matrixone/pkg/sql/colexec/preinsertsecondaryindex" 67 "github.com/matrixorigin/matrixone/pkg/sql/colexec/preinsertunique" 68 "github.com/matrixorigin/matrixone/pkg/sql/colexec/sample" 69 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 70 plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan" 71 "github.com/matrixorigin/matrixone/pkg/sql/plan/function" 72 "github.com/matrixorigin/matrixone/pkg/sql/util" 73 mokafka "github.com/matrixorigin/matrixone/pkg/stream/adapter/kafka" 74 "github.com/matrixorigin/matrixone/pkg/txn/client" 75 "github.com/matrixorigin/matrixone/pkg/txn/storage/memorystorage" 76 txnTrace "github.com/matrixorigin/matrixone/pkg/txn/trace" 77 util2 "github.com/matrixorigin/matrixone/pkg/util" 78 "github.com/matrixorigin/matrixone/pkg/util/executor" 79 v2 "github.com/matrixorigin/matrixone/pkg/util/metric/v2" 80 "github.com/matrixorigin/matrixone/pkg/util/trace" 81 "github.com/matrixorigin/matrixone/pkg/util/trace/impl/motrace/statistic" 82 "github.com/matrixorigin/matrixone/pkg/vm" 83 "github.com/matrixorigin/matrixone/pkg/vm/engine" 84 "github.com/matrixorigin/matrixone/pkg/vm/process" 85 "github.com/panjf2000/ants/v2" 86 _ "go.uber.org/automaxprocs" 87 "go.uber.org/zap" 88 ) 89 90 // Note: Now the cost going from stat is actually the number of rows, so we can only estimate a number for the size of each row. 91 // The current insertion of around 200,000 rows triggers cn to write s3 directly 92 const ( 93 DistributedThreshold uint64 = 10 * mpool.MB 94 SingleLineSizeEstimate uint64 = 300 * mpool.B 95 shuffleChannelBufferSize = 16 96 ) 97 98 var ( 99 ncpu = runtime.GOMAXPROCS(0) 100 ctxCancelError = context.Canceled.Error() 101 ) 102 103 // NewCompile is used to new an object of compile 104 func NewCompile( 105 addr, db, sql, tenant, uid string, 106 ctx context.Context, 107 e engine.Engine, 108 proc *process.Process, 109 stmt tree.Statement, 110 isInternal bool, 111 cnLabel map[string]string, 112 startAt time.Time, 113 ) *Compile { 114 c := reuse.Alloc[Compile](nil) 115 c.e = e 116 c.db = db 117 c.ctx = ctx 118 c.tenant = tenant 119 c.uid = uid 120 c.sql = sql 121 c.proc = proc 122 c.proc.MessageBoard = c.MessageBoard 123 c.stmt = stmt 124 c.addr = addr 125 c.isInternal = isInternal 126 c.cnLabel = cnLabel 127 c.startAt = startAt 128 c.disableRetry = false 129 if c.proc.TxnOperator != nil { 130 c.proc.TxnOperator.GetWorkspace().UpdateSnapshotWriteOffset() 131 } 132 return c 133 } 134 135 func (c *Compile) Release() { 136 if c == nil { 137 return 138 } 139 reuse.Free[Compile](c, nil) 140 } 141 142 func (c Compile) TypeName() string { 143 return "compile.Compile" 144 } 145 146 func (c *Compile) reset() { 147 if c.anal != nil { 148 c.anal.release() 149 } 150 for i := range c.scope { 151 c.scope[i].release() 152 } 153 for i := range c.fuzzys { 154 c.fuzzys[i].release() 155 } 156 157 c.MessageBoard.Messages = c.MessageBoard.Messages[:0] 158 c.fuzzys = c.fuzzys[:0] 159 c.scope = c.scope[:0] 160 c.pn = nil 161 c.fill = nil 162 c.affectRows.Store(0) 163 c.addr = "" 164 c.db = "" 165 c.tenant = "" 166 c.uid = "" 167 c.sql = "" 168 c.originSQL = "" 169 c.anal = nil 170 c.e = nil 171 c.ctx = nil 172 c.proc = nil 173 c.cnList = c.cnList[:0] 174 c.stmt = nil 175 c.startAt = time.Time{} 176 c.needLockMeta = false 177 c.isInternal = false 178 c.lastAllocID = 0 179 180 for k := range c.metaTables { 181 delete(c.metaTables, k) 182 } 183 for k := range c.nodeRegs { 184 delete(c.nodeRegs, k) 185 } 186 for k := range c.stepRegs { 187 delete(c.stepRegs, k) 188 } 189 for k := range c.cnLabel { 190 delete(c.cnLabel, k) 191 } 192 } 193 194 // helper function to judge if init temporary engine is needed 195 func (c *Compile) NeedInitTempEngine() bool { 196 for _, s := range c.scope { 197 ddl := s.Plan.GetDdl() 198 if ddl == nil { 199 continue 200 } 201 if qry := ddl.GetCreateTable(); qry != nil && qry.Temporary { 202 if c.e.(*engine.EntireEngine).TempEngine == nil { 203 return true 204 } 205 } 206 } 207 return false 208 } 209 210 func (c *Compile) SetTempEngine(tempEngine engine.Engine, tempStorage *memorystorage.Storage) { 211 e := c.e.(*engine.EntireEngine) 212 e.TempEngine = tempEngine 213 if c.ctx != nil && c.ctx.Value(defines.TemporaryTN{}) == nil { 214 c.ctx = context.WithValue(c.ctx, defines.TemporaryTN{}, tempStorage) 215 } 216 } 217 218 // Compile is the entrance of the compute-execute-layer. 219 // It generates a scope (logic pipeline) for a query plan. 220 func (c *Compile) Compile(ctx context.Context, pn *plan.Plan, fill func(*batch.Batch) error) (err error) { 221 start := time.Now() 222 defer func() { 223 v2.TxnStatementCompileDurationHistogram.Observe(time.Since(start).Seconds()) 224 }() 225 226 _, task := gotrace.NewTask(context.TODO(), "pipeline.Compile") 227 defer task.End() 228 defer func() { 229 if e := recover(); e != nil { 230 err = moerr.ConvertPanicError(ctx, e) 231 getLogger().Error("panic in compile", 232 zap.String("sql", c.sql), 233 zap.String("error", err.Error())) 234 } 235 }() 236 237 if c.proc.TxnOperator != nil && c.proc.TxnOperator.Txn().IsPessimistic() { 238 txnOp := c.proc.TxnOperator 239 seq := txnOp.NextSequence() 240 txnTrace.GetService().AddTxnDurationAction( 241 txnOp, 242 client.CompileEvent, 243 seq, 244 0, 245 0, 246 err) 247 defer func() { 248 txnTrace.GetService().AddTxnDurationAction( 249 txnOp, 250 client.CompileEvent, 251 seq, 252 0, 253 time.Since(start), 254 err) 255 }() 256 257 if qry, ok := pn.Plan.(*plan.Plan_Query); ok { 258 if qry.Query.StmtType == plan.Query_SELECT { 259 for _, n := range qry.Query.Nodes { 260 if n.NodeType == plan.Node_LOCK_OP { 261 c.needLockMeta = true 262 break 263 } 264 } 265 } else { 266 c.needLockMeta = true 267 } 268 } 269 } 270 271 // with values 272 c.proc.Ctx = perfcounter.WithCounterSet(c.proc.Ctx, c.counterSet) 273 c.ctx = c.proc.Ctx 274 275 // session info and callback function to write back query result. 276 // XXX u is really a bad name, I'm not sure if `session` or `user` will be more suitable. 277 c.fill = fill 278 279 c.pn = pn 280 // get execute related information 281 // about ap or tp, what and how many compute resource we can use. 282 c.info = plan2.GetExecTypeFromPlan(pn) 283 if pn.IsPrepare { 284 c.info.Typ = plan2.ExecTypeTP 285 } 286 287 // Compile may exec some function that need engine.Engine. 288 c.proc.Ctx = context.WithValue(c.proc.Ctx, defines.EngineKey{}, c.e) 289 // generate logic pipeline for query. 290 c.scope, err = c.compileScope(ctx, pn) 291 292 if err != nil { 293 return err 294 } 295 for _, s := range c.scope { 296 if len(s.NodeInfo.Addr) == 0 { 297 s.NodeInfo.Addr = c.addr 298 } 299 } 300 if c.shouldReturnCtxErr() { 301 return c.proc.Ctx.Err() 302 } 303 return nil 304 } 305 306 func (c *Compile) addAffectedRows(n uint64) { 307 c.affectRows.Add(n) 308 } 309 310 func (c *Compile) setAffectedRows(n uint64) { 311 c.affectRows.Store(n) 312 } 313 314 func (c *Compile) getAffectedRows() uint64 { 315 affectRows := c.affectRows.Load() 316 return affectRows 317 } 318 319 func (c *Compile) run(s *Scope) error { 320 if s == nil { 321 return nil 322 } 323 324 switch s.Magic { 325 case Normal: 326 defer c.fillAnalyzeInfo() 327 err := s.Run(c) 328 if err != nil { 329 return err 330 } 331 332 c.addAffectedRows(s.affectedRows()) 333 return nil 334 case Merge, MergeInsert: 335 defer c.fillAnalyzeInfo() 336 err := s.MergeRun(c) 337 if err != nil { 338 return err 339 } 340 341 c.addAffectedRows(s.affectedRows()) 342 return nil 343 case MergeDelete: 344 defer c.fillAnalyzeInfo() 345 err := s.MergeRun(c) 346 if err != nil { 347 return err 348 } 349 c.setAffectedRows(s.Instructions[len(s.Instructions)-1].Arg.(*mergedelete.Argument).AffectedRows) 350 return nil 351 case Remote: 352 defer c.fillAnalyzeInfo() 353 err := s.RemoteRun(c) 354 c.addAffectedRows(s.affectedRows()) 355 return err 356 case CreateDatabase: 357 err := s.CreateDatabase(c) 358 if err != nil { 359 return err 360 } 361 c.setAffectedRows(1) 362 return nil 363 case DropDatabase: 364 err := s.DropDatabase(c) 365 if err != nil { 366 return err 367 } 368 c.setAffectedRows(1) 369 return nil 370 case CreateTable: 371 qry := s.Plan.GetDdl().GetCreateTable() 372 if qry.Temporary { 373 return s.CreateTempTable(c) 374 } else { 375 return s.CreateTable(c) 376 } 377 case AlterView: 378 return s.AlterView(c) 379 case AlterTable: 380 return s.AlterTable(c) 381 case DropTable: 382 return s.DropTable(c) 383 case DropSequence: 384 return s.DropSequence(c) 385 case CreateSequence: 386 return s.CreateSequence(c) 387 case AlterSequence: 388 return s.AlterSequence(c) 389 case CreateIndex: 390 return s.CreateIndex(c) 391 case DropIndex: 392 return s.DropIndex(c) 393 case TruncateTable: 394 return s.TruncateTable(c) 395 case Replace: 396 return s.replace(c) 397 } 398 return nil 399 } 400 401 func (c *Compile) allocOperatorID() int32 { 402 c.lock.Lock() 403 defer func() { 404 c.lastAllocID++ 405 c.lock.Unlock() 406 }() 407 408 return c.lastAllocID 409 } 410 411 // Run is an important function of the compute-layer, it executes a single sql according to its scope 412 // Need call Release() after call this function. 413 func (c *Compile) Run(_ uint64) (result *util2.RunResult, err error) { 414 sql := c.originSQL 415 if sql == "" { 416 sql = c.sql 417 } 418 419 txnOp := c.proc.TxnOperator 420 seq := uint64(0) 421 if txnOp != nil { 422 seq = txnOp.NextSequence() 423 txnOp.EnterRunSql() 424 } 425 426 defer func() { 427 if txnOp != nil { 428 txnOp.ExitRunSql() 429 } 430 c.proc.CleanValueScanBatchs() 431 c.proc.SetPrepareBatch(nil) 432 c.proc.SetPrepareExprList(nil) 433 }() 434 435 var writeOffset uint64 436 437 start := time.Now() 438 v2.TxnStatementExecuteLatencyDurationHistogram.Observe(start.Sub(c.startAt).Seconds()) 439 440 stats := statistic.StatsInfoFromContext(c.proc.Ctx) 441 stats.ExecutionStart() 442 443 txnTrace.GetService().TxnStatementStart(txnOp, sql, seq) 444 defer func() { 445 stats.ExecutionEnd() 446 447 cost := time.Since(start) 448 row := 0 449 if result != nil { 450 row = int(result.AffectRows) 451 } 452 txnTrace.GetService().TxnStatementCompleted( 453 txnOp, 454 sql, 455 cost, 456 seq, 457 row, 458 err, 459 ) 460 v2.TxnStatementExecuteDurationHistogram.Observe(cost.Seconds()) 461 }() 462 463 for _, s := range c.scope { 464 s.SetOperatorInfoRecursively(c.allocOperatorID) 465 } 466 467 if c.proc.TxnOperator != nil { 468 writeOffset = uint64(c.proc.TxnOperator.GetWorkspace().GetSnapshotWriteOffset()) 469 } 470 result = &util2.RunResult{} 471 var span trace.Span 472 var runC *Compile // compile structure for rerun. 473 // var result = &util2.RunResult{} 474 // var err error 475 var retryTimes int 476 releaseRunC := func() { 477 if runC != c { 478 runC.Release() 479 } 480 } 481 482 sp := c.proc.GetStmtProfile() 483 c.ctx, span = trace.Start(c.ctx, "Compile.Run", trace.WithKind(trace.SpanKindStatement)) 484 _, task := gotrace.NewTask(context.TODO(), "pipeline.Run") 485 defer func() { 486 releaseRunC() 487 488 task.End() 489 span.End(trace.WithStatementExtra(sp.GetTxnId(), sp.GetStmtId(), sp.GetSqlOfStmt())) 490 }() 491 492 if c.proc.TxnOperator != nil { 493 c.proc.TxnOperator.GetWorkspace().IncrSQLCount() 494 c.proc.TxnOperator.ResetRetry(false) 495 } 496 497 v2.TxnStatementTotalCounter.Inc() 498 runC = c 499 for { 500 if err = runC.runOnce(); err == nil { 501 break 502 } 503 504 c.fatalLog(retryTimes, err) 505 if !c.canRetry(err) { 506 return nil, err 507 } 508 509 retryTimes++ 510 releaseRunC() 511 defChanged := moerr.IsMoErrCode( 512 err, 513 moerr.ErrTxnNeedRetryWithDefChanged) 514 if runC, err = c.prepareRetry(defChanged); err != nil { 515 return nil, err 516 } 517 } 518 519 if c.shouldReturnCtxErr() { 520 return nil, c.proc.Ctx.Err() 521 } 522 result.AffectRows = runC.getAffectedRows() 523 524 if c.proc.TxnOperator != nil { 525 return result, c.proc.TxnOperator.GetWorkspace().Adjust(writeOffset) 526 } 527 return result, nil 528 } 529 530 func (c *Compile) prepareRetry(defChanged bool) (*Compile, error) { 531 v2.TxnStatementRetryCounter.Inc() 532 c.proc.TxnOperator.ResetRetry(true) 533 c.proc.TxnOperator.GetWorkspace().IncrSQLCount() 534 535 // clear the workspace of the failed statement 536 if e := c.proc.TxnOperator.GetWorkspace().RollbackLastStatement(c.ctx); e != nil { 537 return nil, e 538 } 539 540 // increase the statement id 541 if e := c.proc.TxnOperator.GetWorkspace().IncrStatementID(c.ctx, false); e != nil { 542 return nil, e 543 } 544 545 // FIXME: the current retry method is quite bad, the overhead is relatively large, and needs to be 546 // improved to refresh expression in the future. 547 548 var e error 549 runC := NewCompile(c.addr, c.db, c.sql, c.tenant, c.uid, c.proc.Ctx, c.e, c.proc, c.stmt, c.isInternal, c.cnLabel, c.startAt) 550 defer func() { 551 if e != nil { 552 runC.Release() 553 } 554 }() 555 if defChanged { 556 var pn *plan2.Plan 557 pn, e = c.buildPlanFunc() 558 if e != nil { 559 return nil, e 560 } 561 c.pn = pn 562 } 563 if e = runC.Compile(c.proc.Ctx, c.pn, c.fill); e != nil { 564 return nil, e 565 } 566 567 return runC, nil 568 } 569 570 // isRetryErr if the error is ErrTxnNeedRetry and the transaction is RC isolation, we need to retry t 571 // he statement 572 func (c *Compile) isRetryErr(err error) bool { 573 return (moerr.IsMoErrCode(err, moerr.ErrTxnNeedRetry) || 574 moerr.IsMoErrCode(err, moerr.ErrTxnNeedRetryWithDefChanged)) && 575 c.proc.TxnOperator.Txn().IsRCIsolation() 576 } 577 578 func (c *Compile) canRetry(err error) bool { 579 return !c.disableRetry && c.isRetryErr(err) 580 } 581 582 // run once 583 func (c *Compile) runOnce() error { 584 var wg sync.WaitGroup 585 c.MessageBoard.Reset() 586 err := c.lockMetaTables() 587 if err != nil { 588 return err 589 } 590 errC := make(chan error, len(c.scope)) 591 for _, s := range c.scope { 592 s.SetContextRecursively(c.proc.Ctx) 593 err = s.InitAllDataSource(c) 594 if err != nil { 595 return err 596 } 597 } 598 599 for i := range c.scope { 600 wg.Add(1) 601 scope := c.scope[i] 602 errSubmit := ants.Submit(func() { 603 defer func() { 604 if e := recover(); e != nil { 605 err := moerr.ConvertPanicError(c.ctx, e) 606 getLogger().Error("panic in run", 607 zap.String("sql", c.sql), 608 zap.String("error", err.Error())) 609 errC <- err 610 } 611 wg.Done() 612 }() 613 errC <- c.run(scope) 614 }) 615 if errSubmit != nil { 616 errC <- errSubmit 617 wg.Done() 618 } 619 } 620 wg.Wait() 621 close(errC) 622 623 errList := make([]error, 0, len(c.scope)) 624 for e := range errC { 625 if e != nil { 626 errList = append(errList, e) 627 if c.isRetryErr(e) { 628 return e 629 } 630 } 631 } 632 633 if len(errList) > 0 { 634 err = errList[0] 635 } 636 if err != nil { 637 return err 638 } 639 640 // fuzzy filter not sure whether this insert / load obey duplicate constraints, need double check 641 if len(c.fuzzys) > 0 { 642 for _, f := range c.fuzzys { 643 if f != nil && f.cnt > 0 { 644 if f.cnt > 10 { 645 logutil.Warnf("fuzzy filter cnt is %d, may be too high", f.cnt) 646 } 647 err = f.backgroundSQLCheck(c) 648 if err != nil { 649 return err 650 } 651 } 652 } 653 } 654 655 //detect fk self refer 656 //update, insert 657 query := c.pn.GetQuery() 658 if query != nil && (query.StmtType == plan.Query_INSERT || 659 query.StmtType == plan.Query_UPDATE) && len(query.GetDetectSqls()) != 0 { 660 err = detectFkSelfRefer(c, query.DetectSqls) 661 } 662 //alter table ... add/drop foreign key 663 if err == nil && c.pn.GetDdl() != nil { 664 alterTable := c.pn.GetDdl().GetAlterTable() 665 if alterTable != nil && len(alterTable.GetDetectSqls()) != 0 { 666 err = detectFkSelfRefer(c, alterTable.GetDetectSqls()) 667 } 668 } 669 return err 670 } 671 672 // shouldReturnCtxErr return true only if the ctx has error and the error is not canceled. 673 // maybe deadlined or other error. 674 func (c *Compile) shouldReturnCtxErr() bool { 675 if e := c.proc.Ctx.Err(); e != nil && e.Error() != ctxCancelError { 676 return true 677 } 678 return false 679 } 680 681 func (c *Compile) compileScope(ctx context.Context, pn *plan.Plan) ([]*Scope, error) { 682 start := time.Now() 683 defer func() { 684 v2.TxnStatementCompileScopeHistogram.Observe(time.Since(start).Seconds()) 685 }() 686 switch qry := pn.Plan.(type) { 687 case *plan.Plan_Query: 688 switch qry.Query.StmtType { 689 case plan.Query_REPLACE: 690 return []*Scope{ 691 newScope(Replace). 692 withPlan(pn), 693 }, nil 694 } 695 scopes, err := c.compileQuery(ctx, qry.Query) 696 if err != nil { 697 return nil, err 698 } 699 for _, s := range scopes { 700 if s.Plan == nil { 701 s.Plan = pn 702 } 703 } 704 return scopes, nil 705 case *plan.Plan_Ddl: 706 switch qry.Ddl.DdlType { 707 case plan.DataDefinition_CREATE_DATABASE: 708 return []*Scope{ 709 newScope(CreateDatabase). 710 withPlan(pn), 711 }, nil 712 case plan.DataDefinition_DROP_DATABASE: 713 return []*Scope{ 714 newScope(DropDatabase). 715 withPlan(pn), 716 }, nil 717 case plan.DataDefinition_CREATE_TABLE: 718 return []*Scope{ 719 newScope(CreateTable). 720 withPlan(pn), 721 }, nil 722 case plan.DataDefinition_ALTER_VIEW: 723 return []*Scope{ 724 newScope(AlterView). 725 withPlan(pn), 726 }, nil 727 case plan.DataDefinition_ALTER_TABLE: 728 return []*Scope{ 729 newScope(AlterTable). 730 withPlan(pn), 731 }, nil 732 case plan.DataDefinition_DROP_TABLE: 733 return []*Scope{ 734 newScope(DropTable). 735 withPlan(pn), 736 }, nil 737 case plan.DataDefinition_DROP_SEQUENCE: 738 return []*Scope{ 739 newScope(DropSequence). 740 withPlan(pn), 741 }, nil 742 case plan.DataDefinition_ALTER_SEQUENCE: 743 return []*Scope{ 744 newScope(AlterSequence). 745 withPlan(pn), 746 }, nil 747 case plan.DataDefinition_TRUNCATE_TABLE: 748 return []*Scope{ 749 newScope(TruncateTable). 750 withPlan(pn), 751 }, nil 752 case plan.DataDefinition_CREATE_SEQUENCE: 753 return []*Scope{ 754 newScope(CreateSequence). 755 withPlan(pn), 756 }, nil 757 case plan.DataDefinition_CREATE_INDEX: 758 return []*Scope{ 759 newScope(CreateIndex). 760 withPlan(pn), 761 }, nil 762 case plan.DataDefinition_DROP_INDEX: 763 return []*Scope{ 764 newScope(DropIndex). 765 withPlan(pn), 766 }, nil 767 case plan.DataDefinition_SHOW_DATABASES, 768 plan.DataDefinition_SHOW_TABLES, 769 plan.DataDefinition_SHOW_COLUMNS, 770 plan.DataDefinition_SHOW_CREATETABLE: 771 return c.compileQuery(ctx, pn.GetDdl().GetQuery()) 772 // 1、not supported: show arnings/errors/status/processlist 773 // 2、show variables will not return query 774 // 3、show create database/table need rewrite to create sql 775 } 776 } 777 return nil, moerr.NewNYI(ctx, fmt.Sprintf("query '%s'", pn)) 778 } 779 780 func (c *Compile) appendMetaTables(objRes *plan.ObjectRef) { 781 if !c.needLockMeta { 782 return 783 } 784 785 if objRes.SchemaName == catalog.MO_CATALOG && (objRes.ObjName == catalog.MO_DATABASE || objRes.ObjName == catalog.MO_TABLES || objRes.ObjName == catalog.MO_COLUMNS) { 786 // do not lock meta table for meta table 787 } else { 788 key := fmt.Sprintf("%s %s", objRes.SchemaName, objRes.ObjName) 789 c.metaTables[key] = struct{}{} 790 } 791 } 792 793 func (c *Compile) lockMetaTables() error { 794 lockLen := len(c.metaTables) 795 if lockLen == 0 { 796 return nil 797 } 798 799 tables := make([]string, 0, lockLen) 800 for table := range c.metaTables { 801 tables = append(tables, table) 802 } 803 sort.Strings(tables) 804 805 for _, table := range tables { 806 names := strings.SplitN(table, " ", 2) 807 808 err := lockMoTable(c, names[0], names[1], lock.LockMode_Shared) 809 if err != nil { 810 // if get error in locking mocatalog.mo_tables by it's dbName & tblName 811 // that means the origin table's schema was changed. then return NeedRetryWithDefChanged err 812 if moerr.IsMoErrCode(err, moerr.ErrTxnNeedRetry) || 813 moerr.IsMoErrCode(err, moerr.ErrTxnNeedRetryWithDefChanged) { 814 return moerr.NewTxnNeedRetryWithDefChangedNoCtx() 815 } 816 817 // other errors, just throw out 818 return err 819 } 820 } 821 return nil 822 } 823 824 func (c *Compile) cnListStrategy() { 825 if len(c.cnList) == 0 { 826 c.cnList = append(c.cnList, engine.Node{ 827 Addr: c.addr, 828 Mcpu: ncpu, 829 }) 830 } else if len(c.cnList) > c.info.CnNumbers { 831 c.cnList = c.cnList[:c.info.CnNumbers] 832 } 833 } 834 835 // func (c *Compile) compileAttachedScope(ctx context.Context, attachedPlan *plan.Plan) ([]*Scope, error) { 836 // query := attachedPlan.Plan.(*plan.Plan_Query) 837 // attachedScope, err := c.compileQuery(ctx, query.Query) 838 // if err != nil { 839 // return nil, err 840 // } 841 // for _, s := range attachedScope { 842 // s.Plan = attachedPlan 843 // } 844 // return attachedScope, nil 845 // } 846 847 func isAvailable(client morpc.RPCClient, addr string) bool { 848 _, _, err := net.SplitHostPort(addr) 849 if err != nil { 850 logutil.Warnf("compileScope received a malformed cn address '%s', expected 'ip:port'", addr) 851 return false 852 } 853 logutil.Debugf("ping %s start", addr) 854 ctx, cancel := context.WithTimeout(context.Background(), 500*time.Millisecond) 855 defer cancel() 856 err = client.Ping(ctx, addr) 857 if err != nil { 858 // ping failed 859 logutil.Debugf("ping %s err %+v\n", addr, err) 860 return false 861 } 862 return true 863 } 864 865 func (c *Compile) removeUnavailableCN() { 866 client := cnclient.GetRPCClient() 867 if client == nil { 868 return 869 } 870 i := 0 871 for _, cn := range c.cnList { 872 if isSameCN(c.addr, cn.Addr) || isAvailable(client, cn.Addr) { 873 c.cnList[i] = cn 874 i++ 875 } 876 } 877 c.cnList = c.cnList[:i] 878 } 879 880 // getCNList gets the CN list from engine.Nodes() method. It will 881 // ensure the current CN is included in the result. 882 func (c *Compile) getCNList() (engine.Nodes, error) { 883 cnList, err := c.e.Nodes(c.isInternal, c.tenant, c.uid, c.cnLabel) 884 if err != nil { 885 return nil, err 886 } 887 888 // We should always make sure the current CN is contained in the cn list. 889 if c.proc == nil || c.proc.QueryClient == nil { 890 return cnList, nil 891 } 892 cnID := c.proc.QueryClient.ServiceID() 893 for _, node := range cnList { 894 if node.Id == cnID { 895 return cnList, nil 896 } 897 } 898 cnList = append(cnList, engine.Node{ 899 Id: cnID, 900 Addr: c.addr, 901 Mcpu: ncpu, 902 }) 903 return cnList, nil 904 } 905 906 func (c *Compile) compileQuery(ctx context.Context, qry *plan.Query) ([]*Scope, error) { 907 var err error 908 909 start := time.Now() 910 defer func() { 911 v2.TxnStatementCompileQueryHistogram.Observe(time.Since(start).Seconds()) 912 }() 913 c.cnList, err = c.getCNList() 914 if err != nil { 915 return nil, err 916 } 917 // sort by addr to get fixed order of CN list 918 sort.Slice(c.cnList, func(i, j int) bool { return c.cnList[i].Addr < c.cnList[j].Addr }) 919 920 if c.info.Typ == plan2.ExecTypeAP { 921 c.removeUnavailableCN() 922 } 923 924 c.info.CnNumbers = len(c.cnList) 925 blkNum := 0 926 cost := float64(0.0) 927 for _, n := range qry.Nodes { 928 if n.Stats == nil { 929 continue 930 } 931 if n.NodeType == plan.Node_TABLE_SCAN { 932 blkNum += int(n.Stats.BlockNum) 933 } 934 if n.NodeType == plan.Node_INSERT { 935 cost += n.Stats.GetCost() 936 } 937 } 938 switch qry.StmtType { 939 case plan.Query_INSERT: 940 if cost*float64(SingleLineSizeEstimate) > float64(DistributedThreshold) || qry.LoadTag || blkNum >= plan2.BlockNumForceOneCN { 941 c.cnListStrategy() 942 } else { 943 c.cnList = engine.Nodes{ 944 engine.Node{ 945 Addr: c.addr, 946 Mcpu: c.generateCPUNumber(ncpu, blkNum), 947 }, 948 } 949 } 950 // insertNode := qry.Nodes[qry.Steps[0]] 951 // nodeStats := qry.Nodes[insertNode.Children[0]].Stats 952 // if nodeStats.GetCost()*float64(SingleLineSizeEstimate) > float64(DistributedThreshold) || qry.LoadTag || blkNum >= MinBlockNum { 953 // if len(insertNode.InsertCtx.OnDuplicateIdx) > 0 { 954 // c.cnList = engine.Nodes{ 955 // engine.Node{ 956 // Addr: c.addr, 957 // Mcpu: c.generateCPUNumber(1, blkNum)}, 958 // } 959 // } else { 960 // c.cnListStrategy() 961 // } 962 // } else { 963 // if len(insertNode.InsertCtx.OnDuplicateIdx) > 0 { 964 // c.cnList = engine.Nodes{ 965 // engine.Node{ 966 // Addr: c.addr, 967 // Mcpu: c.generateCPUNumber(1, blkNum)}, 968 // } 969 // } else { 970 // c.cnList = engine.Nodes{engine.Node{ 971 // Addr: c.addr, 972 // Mcpu: c.generateCPUNumber(c.NumCPU(), blkNum)}, 973 // } 974 // } 975 // } 976 default: 977 if blkNum < plan2.BlockNumForceOneCN { 978 c.cnList = engine.Nodes{ 979 engine.Node{ 980 Addr: c.addr, 981 Mcpu: c.generateCPUNumber(ncpu, blkNum), 982 }, 983 } 984 } else { 985 c.cnListStrategy() 986 } 987 } 988 if c.info.Typ == plan2.ExecTypeTP && len(c.cnList) > 1 { 989 c.cnList = engine.Nodes{ 990 engine.Node{ 991 Addr: c.addr, 992 Mcpu: c.generateCPUNumber(ncpu, blkNum), 993 }, 994 } 995 } 996 997 c.initAnalyze(qry) 998 999 // deal with sink scan first. 1000 for i := len(qry.Steps) - 1; i >= 0; i-- { 1001 err := c.compileSinkScan(qry, qry.Steps[i]) 1002 if err != nil { 1003 return nil, err 1004 } 1005 } 1006 1007 steps := make([]*Scope, 0, len(qry.Steps)) 1008 defer func() { 1009 if err != nil { 1010 ReleaseScopes(steps) 1011 } 1012 }() 1013 for i := len(qry.Steps) - 1; i >= 0; i-- { 1014 var scopes []*Scope 1015 var scope *Scope 1016 scopes, err = c.compilePlanScope(ctx, int32(i), qry.Steps[i], qry.Nodes) 1017 if err != nil { 1018 return nil, err 1019 } 1020 scope, err = c.compileApQuery(qry, scopes, qry.Steps[i]) 1021 if err != nil { 1022 return nil, err 1023 } 1024 steps = append(steps, scope) 1025 } 1026 1027 return steps, err 1028 } 1029 1030 func (c *Compile) compileSinkScan(qry *plan.Query, nodeId int32) error { 1031 n := qry.Nodes[nodeId] 1032 for _, childId := range n.Children { 1033 err := c.compileSinkScan(qry, childId) 1034 if err != nil { 1035 return err 1036 } 1037 } 1038 1039 if n.NodeType == plan.Node_SINK_SCAN || n.NodeType == plan.Node_RECURSIVE_SCAN || n.NodeType == plan.Node_RECURSIVE_CTE { 1040 for _, s := range n.SourceStep { 1041 var wr *process.WaitRegister 1042 if c.anal.qry.LoadTag { 1043 wr = &process.WaitRegister{ 1044 Ctx: c.ctx, 1045 Ch: make(chan *batch.Batch, ncpu), 1046 } 1047 } else { 1048 wr = &process.WaitRegister{ 1049 Ctx: c.ctx, 1050 Ch: make(chan *batch.Batch, 1), 1051 } 1052 } 1053 c.appendStepRegs(s, nodeId, wr) 1054 } 1055 } 1056 return nil 1057 } 1058 1059 func (c *Compile) compileApQuery(qry *plan.Query, ss []*Scope, step int32) (*Scope, error) { 1060 if qry.Nodes[step].NodeType == plan.Node_SINK { 1061 return ss[0], nil 1062 } 1063 var rs *Scope 1064 switch qry.StmtType { 1065 case plan.Query_DELETE: 1066 return ss[0], nil 1067 case plan.Query_INSERT: 1068 return ss[0], nil 1069 case plan.Query_UPDATE: 1070 return ss[0], nil 1071 default: 1072 rs = c.newMergeScope(ss) 1073 updateScopesLastFlag([]*Scope{rs}) 1074 c.setAnalyzeCurrent([]*Scope{rs}, c.anal.curr) 1075 rs.Instructions = append(rs.Instructions, vm.Instruction{ 1076 Op: vm.Output, 1077 Arg: output.NewArgument(). 1078 WithFunc(c.fill), 1079 }) 1080 } 1081 return rs, nil 1082 } 1083 1084 func constructValueScanBatch(ctx context.Context, proc *process.Process, node *plan.Node) (*batch.Batch, error) { 1085 var nodeId uuid.UUID 1086 var exprList []colexec.ExpressionExecutor 1087 1088 if node == nil || node.TableDef == nil { // like : select 1, 2 1089 bat := batch.NewWithSize(1) 1090 bat.Vecs[0] = vector.NewConstNull(types.T_int64.ToType(), 1, proc.Mp()) 1091 bat.SetRowCount(1) 1092 return bat, nil 1093 } 1094 // select * from (values row(1,1), row(2,2), row(3,3)) a; 1095 tableDef := node.TableDef 1096 colCount := len(tableDef.Cols) 1097 colsData := node.RowsetData.Cols 1098 copy(nodeId[:], node.Uuid) 1099 bat := proc.GetPrepareBatch() 1100 if bat == nil { 1101 bat = proc.GetValueScanBatch(nodeId) 1102 if bat == nil { 1103 return nil, moerr.NewInfo(ctx, fmt.Sprintf("constructValueScanBatch failed, node id: %s", nodeId.String())) 1104 } 1105 } 1106 params := proc.GetPrepareParams() 1107 if len(colsData) > 0 { 1108 exprs := proc.GetPrepareExprList() 1109 for i := 0; i < colCount; i++ { 1110 if exprs != nil { 1111 exprList = exprs.([][]colexec.ExpressionExecutor)[i] 1112 } 1113 if params != nil { 1114 vs := vector.MustFixedCol[types.Varlena](params) 1115 for _, row := range colsData[i].Data { 1116 if row.Pos >= 0 { 1117 isNull := params.GetNulls().Contains(uint64(row.Pos - 1)) 1118 str := vs[row.Pos-1].GetString(params.GetArea()) 1119 if err := util.SetBytesToAnyVector(ctx, str, int(row.RowPos), isNull, bat.Vecs[i], 1120 proc); err != nil { 1121 return nil, err 1122 } 1123 } 1124 } 1125 } 1126 if err := evalRowsetData(proc, colsData[i].Data, bat.Vecs[i], exprList); err != nil { 1127 bat.Clean(proc.Mp()) 1128 return nil, err 1129 } 1130 } 1131 } 1132 return bat, nil 1133 } 1134 1135 func (c *Compile) compilePlanScope(ctx context.Context, step int32, curNodeIdx int32, ns []*plan.Node) ([]*Scope, error) { 1136 start := time.Now() 1137 defer func() { 1138 v2.TxnStatementCompilePlanScopeHistogram.Observe(time.Since(start).Seconds()) 1139 }() 1140 var ss []*Scope 1141 var left []*Scope 1142 var right []*Scope 1143 var err error 1144 defer func() { 1145 if err != nil { 1146 ReleaseScopes(ss) 1147 ReleaseScopes(left) 1148 ReleaseScopes(right) 1149 } 1150 }() 1151 n := ns[curNodeIdx] 1152 switch n.NodeType { 1153 case plan.Node_VALUE_SCAN: 1154 ds := newScope(Normal) 1155 ds.DataSource = &Source{isConst: true, node: n} 1156 ds.NodeInfo = engine.Node{Addr: c.addr, Mcpu: 1} 1157 ds.Proc = process.NewWithAnalyze(c.proc, c.ctx, 0, c.anal.Nodes()) 1158 ss = c.compileSort(n, c.compileProjection(n, []*Scope{ds})) 1159 return ss, nil 1160 case plan.Node_EXTERNAL_SCAN: 1161 node := plan2.DeepCopyNode(n) 1162 ss, err = c.compileExternScan(ctx, node) 1163 if err != nil { 1164 return nil, err 1165 } 1166 ss = c.compileSort(n, c.compileProjection(n, c.compileRestrict(node, ss))) 1167 return ss, nil 1168 case plan.Node_TABLE_SCAN: 1169 c.appendMetaTables(n.ObjRef) 1170 ss, err = c.compileTableScan(n) 1171 if err != nil { 1172 return nil, err 1173 } 1174 ss = c.compileProjection(n, c.compileRestrict(n, ss)) 1175 if n.Offset != nil { 1176 ss = c.compileOffset(n, ss) 1177 } 1178 if n.Limit != nil { 1179 ss = c.compileLimit(n, ss) 1180 } 1181 return ss, nil 1182 case plan.Node_SOURCE_SCAN: 1183 ss, err = c.compileSourceScan(ctx, n) 1184 if err != nil { 1185 return nil, err 1186 } 1187 ss = c.compileSort(n, c.compileProjection(n, c.compileRestrict(n, ss))) 1188 return ss, nil 1189 case plan.Node_FILTER, plan.Node_PROJECT, plan.Node_PRE_DELETE: 1190 curr := c.anal.curr 1191 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1192 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1193 if err != nil { 1194 return nil, err 1195 } 1196 c.setAnalyzeCurrent(ss, curr) 1197 ss = c.compileSort(n, c.compileProjection(n, c.compileRestrict(n, ss))) 1198 return ss, nil 1199 case plan.Node_AGG: 1200 curr := c.anal.curr 1201 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1202 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1203 if err != nil { 1204 return nil, err 1205 } 1206 c.setAnalyzeCurrent(ss, curr) 1207 1208 groupInfo := constructGroup(c.ctx, n, ns[n.Children[0]], 0, 0, false, 0, c.proc) 1209 defer groupInfo.Release() 1210 anyDistinctAgg := groupInfo.AnyDistinctAgg() 1211 1212 if !anyDistinctAgg && n.Stats.HashmapStats != nil && n.Stats.HashmapStats.Shuffle { 1213 ss = c.compileSort(n, c.compileShuffleGroup(n, ss, ns)) 1214 return ss, nil 1215 } else { 1216 ss = c.compileSort(n, c.compileProjection(n, c.compileRestrict(n, c.compileMergeGroup(n, ss, ns, anyDistinctAgg)))) 1217 return ss, nil 1218 } 1219 case plan.Node_SAMPLE: 1220 curr := c.anal.curr 1221 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1222 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1223 if err != nil { 1224 return nil, err 1225 } 1226 c.setAnalyzeCurrent(ss, curr) 1227 1228 ss = c.compileSort(n, c.compileProjection(n, c.compileRestrict(n, c.compileSample(n, ss)))) 1229 return ss, nil 1230 case plan.Node_WINDOW: 1231 curr := c.anal.curr 1232 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1233 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1234 if err != nil { 1235 return nil, err 1236 } 1237 c.setAnalyzeCurrent(ss, curr) 1238 ss = c.compileSort(n, c.compileProjection(n, c.compileRestrict(n, c.compileWin(n, ss)))) 1239 return ss, nil 1240 case plan.Node_TIME_WINDOW: 1241 curr := c.anal.curr 1242 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1243 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1244 if err != nil { 1245 return nil, err 1246 } 1247 c.setAnalyzeCurrent(ss, curr) 1248 ss = c.compileProjection(n, c.compileRestrict(n, c.compileTimeWin(n, c.compileSort(n, ss)))) 1249 return ss, nil 1250 case plan.Node_FILL: 1251 curr := c.anal.curr 1252 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1253 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1254 if err != nil { 1255 return nil, err 1256 } 1257 c.setAnalyzeCurrent(ss, curr) 1258 ss = c.compileProjection(n, c.compileRestrict(n, c.compileFill(n, ss))) 1259 return ss, nil 1260 case plan.Node_JOIN: 1261 curr := c.anal.curr 1262 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1263 left, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1264 if err != nil { 1265 return nil, err 1266 } 1267 c.setAnalyzeCurrent(left, int(n.Children[1])) 1268 right, err = c.compilePlanScope(ctx, step, n.Children[1], ns) 1269 if err != nil { 1270 return nil, err 1271 } 1272 c.setAnalyzeCurrent(right, curr) 1273 ss = c.compileSort(n, c.compileJoin(ctx, n, ns[n.Children[0]], ns[n.Children[1]], left, right)) 1274 return ss, nil 1275 case plan.Node_SORT: 1276 curr := c.anal.curr 1277 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1278 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1279 if err != nil { 1280 return nil, err 1281 } 1282 c.setAnalyzeCurrent(ss, curr) 1283 ss = c.compileProjection(n, c.compileRestrict(n, c.compileSort(n, ss))) 1284 return ss, nil 1285 case plan.Node_PARTITION: 1286 curr := c.anal.curr 1287 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1288 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1289 if err != nil { 1290 return nil, err 1291 } 1292 c.setAnalyzeCurrent(ss, curr) 1293 ss = c.compileProjection(n, c.compileRestrict(n, c.compilePartition(n, ss))) 1294 return ss, nil 1295 case plan.Node_UNION: 1296 curr := c.anal.curr 1297 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1298 left, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1299 if err != nil { 1300 return nil, err 1301 } 1302 c.setAnalyzeCurrent(left, int(n.Children[1])) 1303 right, err = c.compilePlanScope(ctx, step, n.Children[1], ns) 1304 if err != nil { 1305 return nil, err 1306 } 1307 c.setAnalyzeCurrent(right, curr) 1308 ss = c.compileSort(n, c.compileUnion(n, left, right)) 1309 return ss, nil 1310 case plan.Node_MINUS, plan.Node_INTERSECT, plan.Node_INTERSECT_ALL: 1311 curr := c.anal.curr 1312 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1313 left, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1314 if err != nil { 1315 return nil, err 1316 } 1317 c.setAnalyzeCurrent(left, int(n.Children[1])) 1318 right, err = c.compilePlanScope(ctx, step, n.Children[1], ns) 1319 if err != nil { 1320 return nil, err 1321 } 1322 c.setAnalyzeCurrent(right, curr) 1323 ss = c.compileSort(n, c.compileMinusAndIntersect(n, left, right, n.NodeType)) 1324 return ss, nil 1325 case plan.Node_UNION_ALL: 1326 curr := c.anal.curr 1327 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1328 1329 left, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1330 if err != nil { 1331 return nil, err 1332 } 1333 c.setAnalyzeCurrent(left, int(n.Children[1])) 1334 right, err = c.compilePlanScope(ctx, step, n.Children[1], ns) 1335 if err != nil { 1336 return nil, err 1337 } 1338 c.setAnalyzeCurrent(right, curr) 1339 ss = c.compileSort(n, c.compileUnionAll(left, right)) 1340 return ss, nil 1341 case plan.Node_DELETE: 1342 if n.DeleteCtx.CanTruncate { 1343 s := newScope(TruncateTable) 1344 s.Plan = &plan.Plan{ 1345 Plan: &plan.Plan_Ddl{ 1346 Ddl: &plan.DataDefinition{ 1347 DdlType: plan.DataDefinition_TRUNCATE_TABLE, 1348 Definition: &plan.DataDefinition_TruncateTable{ 1349 TruncateTable: n.DeleteCtx.TruncateTable, 1350 }, 1351 }, 1352 }, 1353 } 1354 ss = []*Scope{s} 1355 return ss, nil 1356 } 1357 c.appendMetaTables(n.DeleteCtx.Ref) 1358 curr := c.anal.curr 1359 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1360 1361 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1362 if err != nil { 1363 return nil, err 1364 } 1365 1366 n.NotCacheable = true 1367 nodeStats := ns[n.Children[0]].Stats 1368 1369 var arg *deletion.Argument 1370 arg, err = constructDeletion(n, c.e, c.proc) 1371 if err != nil { 1372 return nil, err 1373 } 1374 1375 if nodeStats.GetCost()*float64(SingleLineSizeEstimate) > 1376 float64(DistributedThreshold) && 1377 !arg.DeleteCtx.CanTruncate { 1378 logutil.Infof("delete of '%s' write s3\n", c.sql) 1379 rs := c.newDeleteMergeScope(arg, ss) 1380 rs.Instructions = append(rs.Instructions, vm.Instruction{ 1381 Op: vm.MergeDelete, 1382 Arg: mergedelete.NewArgument(). 1383 WithDelSource(arg.DeleteCtx.Source). 1384 WithPartitionSources(arg.DeleteCtx.PartitionSources), 1385 }) 1386 rs.Magic = MergeDelete 1387 ss = []*Scope{rs} 1388 arg.Release() 1389 return ss, nil 1390 } 1391 rs := c.newMergeScope(ss) 1392 // updateScopesLastFlag([]*Scope{rs}) 1393 rs.Magic = Merge 1394 c.setAnalyzeCurrent([]*Scope{rs}, c.anal.curr) 1395 1396 rs.Instructions = append(rs.Instructions, vm.Instruction{ 1397 Op: vm.Deletion, 1398 Arg: arg, 1399 }) 1400 ss = []*Scope{rs} 1401 c.setAnalyzeCurrent(ss, curr) 1402 return ss, nil 1403 case plan.Node_ON_DUPLICATE_KEY: 1404 curr := c.anal.curr 1405 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1406 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1407 if err != nil { 1408 return nil, err 1409 } 1410 c.setAnalyzeCurrent(ss, curr) 1411 1412 rs := c.newMergeScope(ss) 1413 rs.Instructions[0].Arg.Release() 1414 rs.Instructions[0] = vm.Instruction{ 1415 Op: vm.OnDuplicateKey, 1416 Idx: c.anal.curr, 1417 Arg: constructOnduplicateKey(n, c.e), 1418 } 1419 ss = []*Scope{rs} 1420 return ss, nil 1421 case plan.Node_FUZZY_FILTER: 1422 curr := c.anal.curr 1423 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1424 left, err := c.compilePlanScope(ctx, step, n.Children[0], ns) 1425 if err != nil { 1426 return nil, err 1427 } 1428 c.setAnalyzeCurrent(left, int(n.Children[1])) 1429 right, err := c.compilePlanScope(ctx, step, n.Children[1], ns) 1430 if err != nil { 1431 return nil, err 1432 } 1433 c.setAnalyzeCurrent(right, curr) 1434 return c.compileFuzzyFilter(n, ns, left, right) 1435 case plan.Node_PRE_INSERT_UK, plan.Node_PRE_INSERT_SK: 1436 curr := c.anal.curr 1437 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1438 if err != nil { 1439 return nil, err 1440 } 1441 currentFirstFlag := c.anal.isFirst 1442 for i := range ss { 1443 if n.NodeType == plan.Node_PRE_INSERT_UK { 1444 var preInsertUkArg *preinsertunique.Argument 1445 preInsertUkArg, err = constructPreInsertUk(n, c.proc) 1446 if err != nil { 1447 return nil, err 1448 } 1449 ss[i].appendInstruction(vm.Instruction{ 1450 Op: vm.PreInsertUnique, 1451 Idx: c.anal.curr, 1452 IsFirst: currentFirstFlag, 1453 Arg: preInsertUkArg, 1454 }) 1455 } else { 1456 var preInsertSkArg *preinsertsecondaryindex.Argument 1457 preInsertSkArg, err = constructPreInsertSk(n, c.proc) 1458 if err != nil { 1459 return nil, err 1460 } 1461 ss[i].appendInstruction(vm.Instruction{ 1462 Op: vm.PreInsertSecondaryIndex, 1463 Idx: c.anal.curr, 1464 IsFirst: currentFirstFlag, 1465 Arg: preInsertSkArg, 1466 }) 1467 } 1468 } 1469 c.setAnalyzeCurrent(ss, curr) 1470 return ss, nil 1471 case plan.Node_PRE_INSERT: 1472 curr := c.anal.curr 1473 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1474 if err != nil { 1475 return nil, err 1476 } 1477 currentFirstFlag := c.anal.isFirst 1478 for i := range ss { 1479 var preInsertArg *preinsert.Argument 1480 preInsertArg, err = constructPreInsert(ns, n, c.e, c.proc) 1481 if err != nil { 1482 return nil, err 1483 } 1484 ss[i].appendInstruction(vm.Instruction{ 1485 Op: vm.PreInsert, 1486 Idx: c.anal.curr, 1487 IsFirst: currentFirstFlag, 1488 Arg: preInsertArg, 1489 }) 1490 } 1491 c.setAnalyzeCurrent(ss, curr) 1492 return ss, nil 1493 case plan.Node_INSERT: 1494 c.appendMetaTables(n.ObjRef) 1495 curr := c.anal.curr 1496 n.NotCacheable = true 1497 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1498 if err != nil { 1499 return nil, err 1500 } 1501 1502 currentFirstFlag := c.anal.isFirst 1503 toWriteS3 := n.Stats.GetCost()*float64(SingleLineSizeEstimate) > 1504 float64(DistributedThreshold) || c.anal.qry.LoadTag 1505 1506 if toWriteS3 { 1507 logutil.Debugf("insert of '%s' write s3\n", c.sql) 1508 if !haveSinkScanInPlan(ns, n.Children[0]) && len(ss) != 1 { 1509 var insertArg *insert.Argument 1510 insertArg, err = constructInsert(n, c.e, c.proc) 1511 if err != nil { 1512 return nil, err 1513 } 1514 insertArg.ToWriteS3 = true 1515 rs := c.newInsertMergeScope(insertArg, ss) 1516 rs.Magic = MergeInsert 1517 rs.Instructions = append(rs.Instructions, vm.Instruction{ 1518 Op: vm.MergeBlock, 1519 Arg: mergeblock.NewArgument(). 1520 WithTbl(insertArg.InsertCtx.Rel). 1521 WithPartitionSources(insertArg.InsertCtx.PartitionSources). 1522 WithAddAffectedRows(insertArg.InsertCtx.AddAffectedRows), 1523 }) 1524 ss = []*Scope{rs} 1525 insertArg.Release() 1526 } else { 1527 dataScope := c.newMergeScope(ss) 1528 dataScope.IsEnd = true 1529 if c.anal.qry.LoadTag { 1530 dataScope.Proc.Reg.MergeReceivers[0].Ch = make(chan *batch.Batch, dataScope.NodeInfo.Mcpu) // reset the channel buffer of sink for load 1531 } 1532 parallelSize := c.getParallelSizeForExternalScan(n, dataScope.NodeInfo.Mcpu) 1533 scopes := make([]*Scope, 0, parallelSize) 1534 regs := make([]*process.WaitRegister, 0, parallelSize) 1535 for i := 0; i < parallelSize; i++ { 1536 s := newScope(Merge) 1537 s.Instructions = []vm.Instruction{{Op: vm.Merge, Arg: merge.NewArgument()}} 1538 scopes = append(scopes, s) 1539 scopes[i].Proc = process.NewFromProc(c.proc, c.ctx, 1) 1540 if c.anal.qry.LoadTag { 1541 for _, rr := range scopes[i].Proc.Reg.MergeReceivers { 1542 rr.Ch = make(chan *batch.Batch, shuffleChannelBufferSize) 1543 } 1544 } 1545 regs = append(regs, scopes[i].Proc.Reg.MergeReceivers...) 1546 } 1547 1548 if c.anal.qry.LoadTag && n.Stats.HashmapStats != nil && n.Stats.HashmapStats.Shuffle && dataScope.NodeInfo.Mcpu == parallelSize { 1549 _, arg := constructDispatchLocalAndRemote(0, scopes, c.addr) 1550 arg.FuncId = dispatch.ShuffleToAllFunc 1551 arg.ShuffleType = plan2.ShuffleToLocalMatchedReg 1552 dataScope.Instructions = append(dataScope.Instructions, vm.Instruction{ 1553 Op: vm.Dispatch, 1554 Arg: arg, 1555 }) 1556 } else { 1557 dataScope.Instructions = append(dataScope.Instructions, vm.Instruction{ 1558 Op: vm.Dispatch, 1559 Arg: constructDispatchLocal(false, false, false, regs), 1560 }) 1561 } 1562 for i := range scopes { 1563 var insertArg *insert.Argument 1564 insertArg, err = constructInsert(n, c.e, c.proc) 1565 if err != nil { 1566 return nil, err 1567 } 1568 insertArg.ToWriteS3 = true 1569 scopes[i].appendInstruction(vm.Instruction{ 1570 Op: vm.Insert, 1571 Idx: c.anal.curr, 1572 IsFirst: currentFirstFlag, 1573 Arg: insertArg, 1574 }) 1575 } 1576 1577 var insertArg *insert.Argument 1578 insertArg, err = constructInsert(n, c.e, c.proc) 1579 if err != nil { 1580 return nil, err 1581 } 1582 insertArg.ToWriteS3 = true 1583 rs := c.newMergeScope(scopes) 1584 rs.PreScopes = append(rs.PreScopes, dataScope) 1585 rs.Magic = MergeInsert 1586 rs.Instructions = append(rs.Instructions, vm.Instruction{ 1587 Op: vm.MergeBlock, 1588 Arg: mergeblock.NewArgument(). 1589 WithTbl(insertArg.InsertCtx.Rel). 1590 WithPartitionSources(insertArg.InsertCtx.PartitionSources). 1591 WithAddAffectedRows(insertArg.InsertCtx.AddAffectedRows), 1592 }) 1593 ss = []*Scope{rs} 1594 insertArg.Release() 1595 } 1596 } else { 1597 for i := range ss { 1598 var insertArg *insert.Argument 1599 insertArg, err = constructInsert(n, c.e, c.proc) 1600 if err != nil { 1601 return nil, err 1602 } 1603 ss[i].appendInstruction(vm.Instruction{ 1604 Op: vm.Insert, 1605 Idx: c.anal.curr, 1606 IsFirst: currentFirstFlag, 1607 Arg: insertArg, 1608 }) 1609 } 1610 } 1611 c.setAnalyzeCurrent(ss, curr) 1612 return ss, nil 1613 case plan.Node_LOCK_OP: 1614 curr := c.anal.curr 1615 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1616 if err != nil { 1617 return nil, err 1618 } 1619 1620 block := false 1621 // only pessimistic txn needs to block downstream operators. 1622 if c.proc.TxnOperator.Txn().IsPessimistic() { 1623 block = n.LockTargets[0].Block 1624 if block { 1625 ss = []*Scope{c.newMergeScope(ss)} 1626 } 1627 } 1628 currentFirstFlag := c.anal.isFirst 1629 for i := range ss { 1630 var lockOpArg *lockop.Argument 1631 lockOpArg, err = constructLockOp(n, c.e) 1632 if err != nil { 1633 return nil, err 1634 } 1635 lockOpArg.SetBlock(block) 1636 if block { 1637 ss[i].Instructions[len(ss[i].Instructions)-1].Arg.Release() 1638 ss[i].Instructions[len(ss[i].Instructions)-1] = vm.Instruction{ 1639 Op: vm.LockOp, 1640 Idx: c.anal.curr, 1641 IsFirst: currentFirstFlag, 1642 Arg: lockOpArg, 1643 } 1644 } else { 1645 ss[i].appendInstruction(vm.Instruction{ 1646 Op: vm.LockOp, 1647 Idx: c.anal.curr, 1648 IsFirst: currentFirstFlag, 1649 Arg: lockOpArg, 1650 }) 1651 } 1652 } 1653 ss = c.compileProjection(n, ss) 1654 c.setAnalyzeCurrent(ss, curr) 1655 return ss, nil 1656 case plan.Node_FUNCTION_SCAN: 1657 curr := c.anal.curr 1658 c.setAnalyzeCurrent(nil, int(n.Children[0])) 1659 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1660 if err != nil { 1661 return nil, err 1662 } 1663 c.setAnalyzeCurrent(ss, curr) 1664 ss = c.compileSort(n, c.compileProjection(n, c.compileRestrict(n, c.compileTableFunction(n, ss)))) 1665 return ss, nil 1666 case plan.Node_SINK_SCAN: 1667 receivers := make([]*process.WaitRegister, len(n.SourceStep)) 1668 for i, step := range n.SourceStep { 1669 receivers[i] = c.getNodeReg(step, curNodeIdx) 1670 if receivers[i] == nil { 1671 return nil, moerr.NewInternalError(c.ctx, "no data sender for sinkScan node") 1672 } 1673 } 1674 rs := newScope(Merge) 1675 rs.NodeInfo = engine.Node{Addr: c.addr, Mcpu: ncpu} 1676 rs.Proc = process.NewWithAnalyze(c.proc, c.ctx, 1, c.anal.Nodes()) 1677 rs.Instructions = []vm.Instruction{{Op: vm.Merge, Arg: merge.NewArgument().WithSinkScan(true)}} 1678 for _, r := range receivers { 1679 r.Ctx = rs.Proc.Ctx 1680 } 1681 rs.Proc.Reg.MergeReceivers = receivers 1682 ss = c.compileProjection(n, []*Scope{rs}) 1683 return ss, nil 1684 case plan.Node_RECURSIVE_SCAN: 1685 receivers := make([]*process.WaitRegister, len(n.SourceStep)) 1686 for i, step := range n.SourceStep { 1687 receivers[i] = c.getNodeReg(step, curNodeIdx) 1688 if receivers[i] == nil { 1689 return nil, moerr.NewInternalError(c.ctx, "no data sender for sinkScan node") 1690 } 1691 } 1692 rs := newScope(Merge) 1693 rs.NodeInfo = engine.Node{Addr: c.addr, Mcpu: 1} 1694 rs.Proc = process.NewWithAnalyze(c.proc, c.ctx, len(receivers), c.anal.Nodes()) 1695 rs.Instructions = []vm.Instruction{{Op: vm.MergeRecursive, Arg: mergerecursive.NewArgument()}} 1696 1697 for _, r := range receivers { 1698 r.Ctx = rs.Proc.Ctx 1699 } 1700 rs.Proc.Reg.MergeReceivers = receivers 1701 ss = []*Scope{rs} 1702 return ss, nil 1703 case plan.Node_RECURSIVE_CTE: 1704 receivers := make([]*process.WaitRegister, len(n.SourceStep)) 1705 for i, step := range n.SourceStep { 1706 receivers[i] = c.getNodeReg(step, curNodeIdx) 1707 if receivers[i] == nil { 1708 return nil, moerr.NewInternalError(c.ctx, "no data sender for sinkScan node") 1709 } 1710 } 1711 rs := newScope(Merge) 1712 rs.NodeInfo = engine.Node{Addr: c.addr, Mcpu: ncpu} 1713 rs.Proc = process.NewWithAnalyze(c.proc, c.ctx, len(receivers), c.anal.Nodes()) 1714 rs.Instructions = []vm.Instruction{{Op: vm.MergeCTE, Arg: mergecte.NewArgument()}} 1715 1716 for _, r := range receivers { 1717 r.Ctx = rs.Proc.Ctx 1718 } 1719 rs.Proc.Reg.MergeReceivers = receivers 1720 ss = c.compileSort(n, []*Scope{rs}) 1721 return ss, nil 1722 case plan.Node_SINK: 1723 receivers := c.getStepRegs(step) 1724 if len(receivers) == 0 { 1725 return nil, moerr.NewInternalError(c.ctx, "no data receiver for sink node") 1726 } 1727 ss, err = c.compilePlanScope(ctx, step, n.Children[0], ns) 1728 if err != nil { 1729 return nil, err 1730 } 1731 rs := c.newMergeScope(ss) 1732 rs.appendInstruction(vm.Instruction{ 1733 Op: vm.Dispatch, 1734 Arg: constructDispatchLocal(true, true, n.RecursiveSink, receivers), 1735 }) 1736 ss = []*Scope{rs} 1737 return ss, nil 1738 default: 1739 return nil, moerr.NewNYI(ctx, fmt.Sprintf("query '%s'", n)) 1740 } 1741 } 1742 1743 func (c *Compile) appendStepRegs(step, nodeId int32, reg *process.WaitRegister) { 1744 c.nodeRegs[[2]int32{step, nodeId}] = reg 1745 c.stepRegs[step] = append(c.stepRegs[step], [2]int32{step, nodeId}) 1746 } 1747 1748 func (c *Compile) getNodeReg(step, nodeId int32) *process.WaitRegister { 1749 return c.nodeRegs[[2]int32{step, nodeId}] 1750 } 1751 1752 func (c *Compile) getStepRegs(step int32) []*process.WaitRegister { 1753 wrs := make([]*process.WaitRegister, len(c.stepRegs[step])) 1754 for i, sn := range c.stepRegs[step] { 1755 wrs[i] = c.nodeRegs[sn] 1756 } 1757 return wrs 1758 } 1759 1760 func (c *Compile) constructScopeForExternal(addr string, parallel bool) *Scope { 1761 ds := newScope(Normal) 1762 if parallel { 1763 ds.Magic = Remote 1764 } 1765 ds.NodeInfo = engine.Node{Addr: addr, Mcpu: ncpu} 1766 ds.Proc = process.NewWithAnalyze(c.proc, c.ctx, 0, c.anal.Nodes()) 1767 c.proc.LoadTag = c.anal.qry.LoadTag 1768 ds.Proc.LoadTag = true 1769 ds.DataSource = &Source{isConst: true} 1770 return ds 1771 } 1772 1773 func (c *Compile) constructLoadMergeScope() *Scope { 1774 ds := newScope(Merge) 1775 ds.Proc = process.NewWithAnalyze(c.proc, c.ctx, 1, c.anal.Nodes()) 1776 ds.Proc.LoadTag = true 1777 ds.appendInstruction(vm.Instruction{ 1778 Op: vm.Merge, 1779 Idx: c.anal.curr, 1780 IsFirst: c.anal.isFirst, 1781 Arg: merge.NewArgument(), 1782 }) 1783 return ds 1784 } 1785 1786 func (c *Compile) compileSourceScan(ctx context.Context, n *plan.Node) ([]*Scope, error) { 1787 _, span := trace.Start(ctx, "compileSourceScan") 1788 defer span.End() 1789 configs := make(map[string]interface{}) 1790 for _, def := range n.TableDef.Defs { 1791 switch v := def.Def.(type) { 1792 case *plan.TableDef_DefType_Properties: 1793 for _, p := range v.Properties.Properties { 1794 configs[p.Key] = p.Value 1795 } 1796 } 1797 } 1798 1799 end, err := mokafka.GetStreamCurrentSize(ctx, configs, mokafka.NewKafkaAdapter) 1800 if err != nil { 1801 return nil, err 1802 } 1803 ps := calculatePartitions(0, end, int64(ncpu)) 1804 1805 ss := make([]*Scope, len(ps)) 1806 for i := range ss { 1807 ss[i] = newScope(Normal) 1808 ss[i].NodeInfo = engine.Node{Addr: c.addr, Mcpu: ncpu} 1809 ss[i].Proc = process.NewWithAnalyze(c.proc, c.ctx, 0, c.anal.Nodes()) 1810 ss[i].appendInstruction(vm.Instruction{ 1811 Op: vm.Source, 1812 Idx: c.anal.curr, 1813 IsFirst: c.anal.isFirst, 1814 Arg: constructStream(n, ps[i]), 1815 }) 1816 } 1817 return ss, nil 1818 } 1819 1820 const StreamMaxInterval = 8192 1821 1822 func calculatePartitions(start, end, n int64) [][2]int64 { 1823 var ps [][2]int64 1824 interval := (end - start) / n 1825 if interval < StreamMaxInterval { 1826 interval = StreamMaxInterval 1827 } 1828 var r int64 1829 l := start 1830 for i := int64(0); i < n; i++ { 1831 r = l + interval 1832 if r >= end { 1833 ps = append(ps, [2]int64{l, end}) 1834 break 1835 } 1836 ps = append(ps, [2]int64{l, r}) 1837 l = r 1838 } 1839 return ps 1840 } 1841 1842 func (c *Compile) compileExternScan(ctx context.Context, n *plan.Node) ([]*Scope, error) { 1843 ctx, span := trace.Start(ctx, "compileExternScan") 1844 defer span.End() 1845 start := time.Now() 1846 defer func() { 1847 if t := time.Since(start); t > time.Second { 1848 logutil.Infof("compileExternScan cost %v", t) 1849 } 1850 }() 1851 1852 t := time.Now() 1853 // lock table's meta 1854 if n.ObjRef != nil && n.TableDef != nil { 1855 if err := lockMoTable(c, n.ObjRef.SchemaName, n.TableDef.Name, lock.LockMode_Shared); err != nil { 1856 return nil, err 1857 } 1858 } 1859 // lock table, for tables with no primary key, there is no need to lock the data 1860 if n.ObjRef != nil && c.proc.TxnOperator.Txn().IsPessimistic() && n.TableDef != nil && 1861 n.TableDef.Pkey.PkeyColName != catalog.FakePrimaryKeyColName { 1862 db, err := c.e.Database(ctx, n.ObjRef.SchemaName, c.proc.TxnOperator) 1863 if err != nil { 1864 panic(err) 1865 } 1866 rel, err := db.Relation(ctx, n.ObjRef.ObjName, c.proc) 1867 if err != nil { 1868 return nil, err 1869 } 1870 err = lockTable(c.ctx, c.e, c.proc, rel, n.ObjRef.SchemaName, nil, false) 1871 if err != nil { 1872 return nil, err 1873 } 1874 } 1875 if time.Since(t) > time.Second { 1876 logutil.Infof("lock table %s.%s cost %v", n.ObjRef.SchemaName, n.ObjRef.ObjName, time.Since(t)) 1877 } 1878 ID2Addr := make(map[int]int, 0) 1879 mcpu := 0 1880 for i := 0; i < len(c.cnList); i++ { 1881 tmp := mcpu 1882 mcpu += c.cnList[i].Mcpu 1883 ID2Addr[i] = mcpu - tmp 1884 } 1885 param := &tree.ExternParam{} 1886 if n.ExternScan == nil || n.ExternScan.Type != tree.INLINE { 1887 err := json.Unmarshal([]byte(n.TableDef.Createsql), param) 1888 if err != nil { 1889 return nil, err 1890 } 1891 } else { 1892 param.ScanType = int(n.ExternScan.Type) 1893 param.Data = n.ExternScan.Data 1894 param.Format = n.ExternScan.Format 1895 param.Tail = new(tree.TailParameter) 1896 param.Tail.IgnoredLines = n.ExternScan.IgnoredLines 1897 param.Tail.Fields = &tree.Fields{ 1898 Terminated: &tree.Terminated{ 1899 Value: n.ExternScan.Terminated, 1900 }, 1901 EnclosedBy: &tree.EnclosedBy{ 1902 Value: n.ExternScan.EnclosedBy[0], 1903 }, 1904 EscapedBy: &tree.EscapedBy{ 1905 Value: n.ExternScan.EscapedBy[0], 1906 }, 1907 } 1908 param.JsonData = n.ExternScan.JsonType 1909 } 1910 if param.ScanType == tree.S3 { 1911 if !param.Init { 1912 if err := plan2.InitS3Param(param); err != nil { 1913 return nil, err 1914 } 1915 } 1916 if param.Parallel { 1917 mcpu = 0 1918 ID2Addr = make(map[int]int, 0) 1919 for i := 0; i < len(c.cnList); i++ { 1920 tmp := mcpu 1921 if c.cnList[i].Mcpu > external.S3ParallelMaxnum { 1922 mcpu += external.S3ParallelMaxnum 1923 } else { 1924 mcpu += c.cnList[i].Mcpu 1925 } 1926 ID2Addr[i] = mcpu - tmp 1927 } 1928 } 1929 } else if param.ScanType == tree.INLINE { 1930 return c.compileExternValueScan(n, param) 1931 } else { 1932 if err := plan2.InitInfileParam(param); err != nil { 1933 return nil, err 1934 } 1935 } 1936 1937 t = time.Now() 1938 param.FileService = c.proc.FileService 1939 param.Ctx = c.ctx 1940 var err error 1941 var fileList []string 1942 var fileSize []int64 1943 if !param.Local && !param.Init { 1944 if param.QueryResult { 1945 fileList = strings.Split(param.Filepath, ",") 1946 for i := range fileList { 1947 fileList[i] = strings.TrimSpace(fileList[i]) 1948 } 1949 } else { 1950 _, spanReadDir := trace.Start(ctx, "compileExternScan.ReadDir") 1951 fileList, fileSize, err = plan2.ReadDir(param) 1952 if err != nil { 1953 spanReadDir.End() 1954 return nil, err 1955 } 1956 spanReadDir.End() 1957 } 1958 fileList, fileSize, err = external.FilterFileList(ctx, n, c.proc, fileList, fileSize) 1959 if err != nil { 1960 return nil, err 1961 } 1962 if param.LoadFile && len(fileList) == 0 { 1963 return nil, moerr.NewInvalidInput(ctx, "the file does not exist in load flow") 1964 } 1965 } else { 1966 fileList = []string{param.Filepath} 1967 fileSize = []int64{param.FileSize} 1968 } 1969 if time.Since(t) > time.Second { 1970 logutil.Infof("read dir cost %v", time.Since(t)) 1971 } 1972 1973 if len(fileList) == 0 { 1974 ret := newScope(Normal) 1975 ret.DataSource = nil 1976 ret.Proc = process.NewWithAnalyze(c.proc, c.ctx, 0, c.anal.Nodes()) 1977 1978 return []*Scope{ret}, nil 1979 } 1980 if param.Parallel && (external.GetCompressType(param, fileList[0]) != tree.NOCOMPRESS || param.Local) { 1981 return c.compileExternScanParallel(n, param, fileList, fileSize) 1982 } 1983 1984 t = time.Now() 1985 var fileOffset [][]int64 1986 if param.Parallel { 1987 if param.Strict { 1988 visibleCols := make([]*plan.ColDef, 0) 1989 for _, col := range n.TableDef.Cols { 1990 if !col.Hidden { 1991 visibleCols = append(visibleCols, col) 1992 } 1993 } 1994 for i := 0; i < len(fileList); i++ { 1995 param.Filepath = fileList[i] 1996 arr, err := external.ReadFileOffsetStrict(param, mcpu, fileSize[i], visibleCols) 1997 fileOffset = append(fileOffset, arr) 1998 if err != nil { 1999 return nil, err 2000 } 2001 } 2002 } else { 2003 for i := 0; i < len(fileList); i++ { 2004 param.Filepath = fileList[i] 2005 arr, err := external.ReadFileOffsetNoStrict(param, mcpu, fileSize[i]) 2006 fileOffset = append(fileOffset, arr) 2007 if err != nil { 2008 return nil, err 2009 } 2010 } 2011 } 2012 2013 } else { 2014 for i := 0; i < len(fileList); i++ { 2015 param.Filepath = fileList[i] 2016 } 2017 } 2018 2019 if time.Since(t) > time.Second { 2020 logutil.Infof("read file offset cost %v", time.Since(t)) 2021 } 2022 ss := make([]*Scope, 1) 2023 if param.Parallel { 2024 ss = make([]*Scope, len(c.cnList)) 2025 } 2026 pre := 0 2027 for i := range ss { 2028 ss[i] = c.constructScopeForExternal(c.cnList[i].Addr, param.Parallel) 2029 ss[i].IsLoad = true 2030 count := ID2Addr[i] 2031 fileOffsetTmp := make([]*pipeline.FileOffset, len(fileList)) 2032 for j := range fileOffsetTmp { 2033 preIndex := pre 2034 fileOffsetTmp[j] = &pipeline.FileOffset{} 2035 fileOffsetTmp[j].Offset = make([]int64, 0) 2036 if param.Parallel { 2037 if param.Strict { 2038 if 2*preIndex+2*count < len(fileOffset[j]) { 2039 fileOffsetTmp[j].Offset = append(fileOffsetTmp[j].Offset, fileOffset[j][2*preIndex:2*preIndex+2*count]...) 2040 } else if 2*preIndex < len(fileOffset[j]) { 2041 fileOffsetTmp[j].Offset = append(fileOffsetTmp[j].Offset, fileOffset[j][2*preIndex:]...) 2042 } else { 2043 continue 2044 } 2045 } else { 2046 fileOffsetTmp[j].Offset = append(fileOffsetTmp[j].Offset, fileOffset[j][2*preIndex:2*preIndex+2*count]...) 2047 } 2048 } else { 2049 fileOffsetTmp[j].Offset = append(fileOffsetTmp[j].Offset, []int64{0, -1}...) 2050 } 2051 } 2052 ss[i].appendInstruction(vm.Instruction{ 2053 Op: vm.External, 2054 Idx: c.anal.curr, 2055 IsFirst: c.anal.isFirst, 2056 Arg: constructExternal(n, param, c.ctx, fileList, fileSize, fileOffsetTmp), 2057 }) 2058 pre += count 2059 } 2060 2061 return ss, nil 2062 } 2063 2064 func (c *Compile) getParallelSizeForExternalScan(n *plan.Node, cpuNum int) int { 2065 if n.Stats == nil { 2066 return cpuNum 2067 } 2068 totalSize := n.Stats.Cost * n.Stats.Rowsize 2069 parallelSize := int(totalSize / float64(colexec.WriteS3Threshold)) 2070 if parallelSize < 1 { 2071 return 1 2072 } else if parallelSize < cpuNum { 2073 return parallelSize 2074 } 2075 return cpuNum 2076 } 2077 2078 func (c *Compile) compileExternValueScan(n *plan.Node, param *tree.ExternParam) ([]*Scope, error) { 2079 parallelSize := c.getParallelSizeForExternalScan(n, ncpu) 2080 ss := make([]*Scope, parallelSize) 2081 for i := 0; i < parallelSize; i++ { 2082 ss[i] = c.constructLoadMergeScope() 2083 } 2084 s := c.constructScopeForExternal(c.addr, false) 2085 s.appendInstruction(vm.Instruction{ 2086 Op: vm.External, 2087 Idx: c.anal.curr, 2088 IsFirst: c.anal.isFirst, 2089 Arg: constructExternal(n, param, c.ctx, nil, nil, nil), 2090 }) 2091 _, arg := constructDispatchLocalAndRemote(0, ss, c.addr) 2092 arg.FuncId = dispatch.SendToAnyLocalFunc 2093 s.appendInstruction(vm.Instruction{ 2094 Op: vm.Dispatch, 2095 Arg: arg, 2096 }) 2097 ss[0].PreScopes = append(ss[0].PreScopes, s) 2098 c.anal.isFirst = false 2099 return ss, nil 2100 } 2101 2102 // construct one thread to read the file data, then dispatch to mcpu thread to get the filedata for insert 2103 func (c *Compile) compileExternScanParallel(n *plan.Node, param *tree.ExternParam, fileList []string, fileSize []int64) ([]*Scope, error) { 2104 param.Parallel = false 2105 mcpu := c.cnList[0].Mcpu 2106 ss := make([]*Scope, mcpu) 2107 for i := 0; i < mcpu; i++ { 2108 ss[i] = c.constructLoadMergeScope() 2109 } 2110 fileOffsetTmp := make([]*pipeline.FileOffset, len(fileList)) 2111 for i := 0; i < len(fileList); i++ { 2112 fileOffsetTmp[i] = &pipeline.FileOffset{} 2113 fileOffsetTmp[i].Offset = make([]int64, 0) 2114 fileOffsetTmp[i].Offset = append(fileOffsetTmp[i].Offset, []int64{0, -1}...) 2115 } 2116 extern := constructExternal(n, param, c.ctx, fileList, fileSize, fileOffsetTmp) 2117 extern.Es.ParallelLoad = true 2118 scope := c.constructScopeForExternal("", false) 2119 scope.appendInstruction(vm.Instruction{ 2120 Op: vm.External, 2121 Idx: c.anal.curr, 2122 IsFirst: c.anal.isFirst, 2123 Arg: extern, 2124 }) 2125 _, arg := constructDispatchLocalAndRemote(0, ss, c.addr) 2126 arg.FuncId = dispatch.SendToAnyLocalFunc 2127 scope.appendInstruction(vm.Instruction{ 2128 Op: vm.Dispatch, 2129 Arg: arg, 2130 }) 2131 ss[0].PreScopes = append(ss[0].PreScopes, scope) 2132 c.anal.isFirst = false 2133 return ss, nil 2134 } 2135 2136 func (c *Compile) compileTableFunction(n *plan.Node, ss []*Scope) []*Scope { 2137 currentFirstFlag := c.anal.isFirst 2138 for i := range ss { 2139 ss[i].appendInstruction(vm.Instruction{ 2140 Op: vm.TableFunction, 2141 Idx: c.anal.curr, 2142 IsFirst: currentFirstFlag, 2143 Arg: constructTableFunction(n), 2144 }) 2145 } 2146 c.anal.isFirst = false 2147 2148 return ss 2149 } 2150 2151 func (c *Compile) compileTableScan(n *plan.Node) ([]*Scope, error) { 2152 nodes, partialResults, partialResultTypes, err := c.generateNodes(n) 2153 if err != nil { 2154 return nil, err 2155 } 2156 ss := make([]*Scope, 0, len(nodes)) 2157 2158 for i := range nodes { 2159 s, err := c.compileTableScanWithNode(n, nodes[i]) 2160 if err != nil { 2161 return nil, err 2162 } 2163 ss = append(ss, s) 2164 } 2165 ss[0].PartialResults = partialResults 2166 ss[0].PartialResultTypes = partialResultTypes 2167 return ss, nil 2168 } 2169 2170 func (c *Compile) compileTableScanWithNode(n *plan.Node, node engine.Node) (*Scope, error) { 2171 s := newScope(Remote) 2172 s.NodeInfo = node 2173 s.DataSource = &Source{ 2174 node: n, 2175 } 2176 s.Proc = process.NewWithAnalyze(c.proc, c.ctx, 0, c.anal.Nodes()) 2177 return s, nil 2178 } 2179 2180 func (c *Compile) compileTableScanDataSource(s *Scope) error { 2181 var err error 2182 var tblDef *plan.TableDef 2183 var ts timestamp.Timestamp 2184 var db engine.Database 2185 var rel engine.Relation 2186 var txnOp client.TxnOperator 2187 2188 n := s.DataSource.node 2189 attrs := make([]string, len(n.TableDef.Cols)) 2190 for j, col := range n.TableDef.Cols { 2191 attrs[j] = col.Name 2192 } 2193 2194 //----------------------------------------------------------------------------------------------------- 2195 ctx := c.ctx 2196 txnOp = c.proc.TxnOperator 2197 if n.ScanSnapshot != nil && n.ScanSnapshot.TS != nil { 2198 if !n.ScanSnapshot.TS.Equal(timestamp.Timestamp{LogicalTime: 0, PhysicalTime: 0}) && 2199 n.ScanSnapshot.TS.Less(c.proc.TxnOperator.Txn().SnapshotTS) { 2200 txnOp = c.proc.TxnOperator.CloneSnapshotOp(*n.ScanSnapshot.TS) 2201 2202 if n.ScanSnapshot.Tenant != nil { 2203 ctx = context.WithValue(ctx, defines.TenantIDKey{}, n.ScanSnapshot.Tenant.TenantID) 2204 } 2205 } 2206 } 2207 //----------------------------------------------------------------------------------------------------- 2208 2209 if c.proc != nil && c.proc.TxnOperator != nil { 2210 ts = txnOp.Txn().SnapshotTS 2211 } 2212 { 2213 //ctx := c.ctx 2214 if util.TableIsClusterTable(n.TableDef.GetTableType()) { 2215 ctx = defines.AttachAccountId(ctx, catalog.System_Account) 2216 } 2217 if n.ObjRef.PubInfo != nil { 2218 ctx = defines.AttachAccountId(ctx, uint32(n.ObjRef.PubInfo.TenantId)) 2219 } 2220 db, err = c.e.Database(ctx, n.ObjRef.SchemaName, txnOp) 2221 if err != nil { 2222 panic(err) 2223 } 2224 rel, err = db.Relation(ctx, n.TableDef.Name, c.proc) 2225 if err != nil { 2226 if txnOp.IsSnapOp() { 2227 return err 2228 } 2229 var e error // avoid contamination of error messages 2230 db, e = c.e.Database(c.ctx, defines.TEMPORARY_DBNAME, txnOp) 2231 if e != nil { 2232 panic(e) 2233 } 2234 rel, e = db.Relation(c.ctx, engine.GetTempTableName(n.ObjRef.SchemaName, n.TableDef.Name), c.proc) 2235 if e != nil { 2236 panic(e) 2237 } 2238 } 2239 tblDef = rel.GetTableDef(ctx) 2240 } 2241 2242 // prcoess partitioned table 2243 var partitionRelNames []string 2244 if n.TableDef.Partition != nil { 2245 if n.PartitionPrune != nil && n.PartitionPrune.IsPruned { 2246 for _, partition := range n.PartitionPrune.SelectedPartitions { 2247 partitionRelNames = append(partitionRelNames, partition.PartitionTableName) 2248 } 2249 } else { 2250 partitionRelNames = append(partitionRelNames, n.TableDef.Partition.PartitionTableNames...) 2251 } 2252 } 2253 2254 var filterExpr *plan.Expr 2255 if len(n.FilterList) > 0 { 2256 filterExpr = colexec.RewriteFilterExprList(n.FilterList) 2257 filterExpr, err = plan2.ConstantFold(batch.EmptyForConstFoldBatch, plan2.DeepCopyExpr(filterExpr), c.proc, true) 2258 if err != nil { 2259 return err 2260 } 2261 } 2262 2263 s.DataSource.Timestamp = ts 2264 s.DataSource.Attributes = attrs 2265 s.DataSource.TableDef = tblDef 2266 s.DataSource.RelationName = n.TableDef.Name 2267 s.DataSource.PartitionRelationNames = partitionRelNames 2268 s.DataSource.SchemaName = n.ObjRef.SchemaName 2269 s.DataSource.AccountId = n.ObjRef.GetPubInfo() 2270 s.DataSource.FilterExpr = filterExpr 2271 s.DataSource.RuntimeFilterSpecs = n.RuntimeFilterProbeList 2272 s.DataSource.OrderBy = n.OrderBy 2273 2274 return nil 2275 } 2276 2277 func (c *Compile) compileRestrict(n *plan.Node, ss []*Scope) []*Scope { 2278 if len(n.FilterList) == 0 && len(n.RuntimeFilterProbeList) == 0 { 2279 return ss 2280 } 2281 currentFirstFlag := c.anal.isFirst 2282 // for dynamic parameter, substitute param ref and const fold cast expression here to improve performance 2283 newFilters, err := plan2.ConstandFoldList(n.FilterList, c.proc, true) 2284 if err != nil { 2285 newFilters = n.FilterList 2286 } 2287 filterExpr := colexec.RewriteFilterExprList(newFilters) 2288 for i := range ss { 2289 ss[i].appendInstruction(vm.Instruction{ 2290 Op: vm.Restrict, 2291 Idx: c.anal.curr, 2292 IsFirst: currentFirstFlag, 2293 Arg: constructRestrict(n, filterExpr), 2294 }) 2295 } 2296 c.anal.isFirst = false 2297 return ss 2298 } 2299 2300 func (c *Compile) compileProjection(n *plan.Node, ss []*Scope) []*Scope { 2301 if len(n.ProjectList) == 0 { 2302 return ss 2303 } 2304 currentFirstFlag := c.anal.isFirst 2305 for i := range ss { 2306 ss[i].appendInstruction(vm.Instruction{ 2307 Op: vm.Projection, 2308 Idx: c.anal.curr, 2309 IsFirst: currentFirstFlag, 2310 Arg: constructProjection(n), 2311 }) 2312 } 2313 c.anal.isFirst = false 2314 return ss 2315 } 2316 2317 func (c *Compile) compileUnion(n *plan.Node, ss []*Scope, children []*Scope) []*Scope { 2318 ss = append(ss, children...) 2319 rs := c.newScopeList(1, int(n.Stats.BlockNum)) 2320 gn := new(plan.Node) 2321 gn.GroupBy = make([]*plan.Expr, len(n.ProjectList)) 2322 for i := range gn.GroupBy { 2323 gn.GroupBy[i] = plan2.DeepCopyExpr(n.ProjectList[i]) 2324 gn.GroupBy[i].Typ.NotNullable = false 2325 } 2326 idx := 0 2327 for i := range rs { 2328 rs[i].Instructions = append(rs[i].Instructions, vm.Instruction{ 2329 Op: vm.Group, 2330 Idx: c.anal.curr, 2331 Arg: constructGroup(c.ctx, gn, n, i, len(rs), true, 0, c.proc), 2332 }) 2333 if isSameCN(rs[i].NodeInfo.Addr, c.addr) { 2334 idx = i 2335 } 2336 } 2337 mergeChildren := c.newMergeScope(ss) 2338 mergeChildren.appendInstruction(vm.Instruction{ 2339 Op: vm.Dispatch, 2340 Arg: constructDispatch(0, rs, c.addr, n, false), 2341 }) 2342 rs[idx].PreScopes = append(rs[idx].PreScopes, mergeChildren) 2343 return rs 2344 } 2345 2346 func (c *Compile) compileMinusAndIntersect(n *plan.Node, ss []*Scope, children []*Scope, nodeType plan.Node_NodeType) []*Scope { 2347 rs := c.newJoinScopeListWithBucket(c.newScopeList(2, int(n.Stats.BlockNum)), ss, children, n) 2348 switch nodeType { 2349 case plan.Node_MINUS: 2350 for i := range rs { 2351 rs[i].Instructions[0].Arg.Release() 2352 rs[i].Instructions[0] = vm.Instruction{ 2353 Op: vm.Minus, 2354 Idx: c.anal.curr, 2355 Arg: constructMinus(i, len(rs)), 2356 } 2357 } 2358 case plan.Node_INTERSECT: 2359 for i := range rs { 2360 rs[i].Instructions[0].Arg.Release() 2361 rs[i].Instructions[0] = vm.Instruction{ 2362 Op: vm.Intersect, 2363 Idx: c.anal.curr, 2364 Arg: constructIntersect(i, len(rs)), 2365 } 2366 } 2367 case plan.Node_INTERSECT_ALL: 2368 for i := range rs { 2369 rs[i].Instructions[0].Arg.Release() 2370 rs[i].Instructions[0] = vm.Instruction{ 2371 Op: vm.IntersectAll, 2372 Idx: c.anal.curr, 2373 Arg: constructIntersectAll(i, len(rs)), 2374 } 2375 } 2376 } 2377 return rs 2378 } 2379 2380 func (c *Compile) compileUnionAll(ss []*Scope, children []*Scope) []*Scope { 2381 rs := c.newMergeScope(append(ss, children...)) 2382 rs.Instructions[0].Idx = c.anal.curr 2383 return []*Scope{rs} 2384 } 2385 2386 func (c *Compile) compileJoin(ctx context.Context, node, left, right *plan.Node, ss []*Scope, children []*Scope) []*Scope { 2387 if node.Stats.HashmapStats.Shuffle { 2388 return c.compileShuffleJoin(ctx, node, left, right, ss, children) 2389 } 2390 return c.compileBroadcastJoin(ctx, node, left, right, ss, children) 2391 } 2392 2393 func (c *Compile) compileShuffleJoin(ctx context.Context, node, left, right *plan.Node, lefts []*Scope, rights []*Scope) []*Scope { 2394 isEq := plan2.IsEquiJoin2(node.OnList) 2395 if !isEq { 2396 panic("shuffle join only support equal join for now!") 2397 } 2398 2399 rightTyps := make([]types.Type, len(right.ProjectList)) 2400 for i, expr := range right.ProjectList { 2401 rightTyps[i] = dupType(&expr.Typ) 2402 } 2403 2404 leftTyps := make([]types.Type, len(left.ProjectList)) 2405 for i, expr := range left.ProjectList { 2406 leftTyps[i] = dupType(&expr.Typ) 2407 } 2408 2409 parent, children := c.newShuffleJoinScopeList(lefts, rights, node) 2410 if parent != nil { 2411 lastOperator := make([]vm.Instruction, 0, len(children)) 2412 for i := range children { 2413 ilen := len(children[i].Instructions) - 1 2414 lastOperator = append(lastOperator, children[i].Instructions[ilen]) 2415 children[i].Instructions = children[i].Instructions[:ilen] 2416 } 2417 2418 defer func() { 2419 // recovery the children's last operator 2420 for i := range children { 2421 children[i].appendInstruction(lastOperator[i]) 2422 } 2423 }() 2424 } 2425 2426 switch node.JoinType { 2427 case plan.Node_INNER: 2428 for i := range children { 2429 children[i].appendInstruction(vm.Instruction{ 2430 Op: vm.Join, 2431 Idx: c.anal.curr, 2432 Arg: constructJoin(node, rightTyps, c.proc), 2433 }) 2434 } 2435 2436 case plan.Node_ANTI: 2437 if node.BuildOnLeft { 2438 for i := range children { 2439 children[i].appendInstruction(vm.Instruction{ 2440 Op: vm.RightAnti, 2441 Idx: c.anal.curr, 2442 Arg: constructRightAnti(node, rightTyps, 0, 0, c.proc), 2443 }) 2444 } 2445 } else { 2446 for i := range children { 2447 children[i].appendInstruction(vm.Instruction{ 2448 Op: vm.Anti, 2449 Idx: c.anal.curr, 2450 Arg: constructAnti(node, rightTyps, c.proc), 2451 }) 2452 } 2453 } 2454 2455 case plan.Node_SEMI: 2456 if node.BuildOnLeft { 2457 for i := range children { 2458 children[i].appendInstruction(vm.Instruction{ 2459 Op: vm.RightSemi, 2460 Idx: c.anal.curr, 2461 Arg: constructRightSemi(node, rightTyps, 0, 0, c.proc), 2462 }) 2463 } 2464 } else { 2465 for i := range children { 2466 children[i].appendInstruction(vm.Instruction{ 2467 Op: vm.Semi, 2468 Idx: c.anal.curr, 2469 Arg: constructSemi(node, rightTyps, c.proc), 2470 }) 2471 } 2472 } 2473 2474 case plan.Node_LEFT: 2475 for i := range children { 2476 children[i].appendInstruction(vm.Instruction{ 2477 Op: vm.Left, 2478 Idx: c.anal.curr, 2479 Arg: constructLeft(node, rightTyps, c.proc), 2480 }) 2481 } 2482 2483 case plan.Node_RIGHT: 2484 for i := range children { 2485 children[i].appendInstruction(vm.Instruction{ 2486 Op: vm.Right, 2487 Idx: c.anal.curr, 2488 Arg: constructRight(node, leftTyps, rightTyps, 0, 0, c.proc), 2489 }) 2490 } 2491 default: 2492 panic(moerr.NewNYI(ctx, fmt.Sprintf("shuffle join do not support join type '%v'", node.JoinType))) 2493 } 2494 2495 if parent != nil { 2496 return parent 2497 } 2498 return children 2499 } 2500 2501 func (c *Compile) compileBroadcastJoin(ctx context.Context, node, left, right *plan.Node, ss []*Scope, children []*Scope) []*Scope { 2502 var rs []*Scope 2503 isEq := plan2.IsEquiJoin2(node.OnList) 2504 2505 rightTyps := make([]types.Type, len(right.ProjectList)) 2506 for i, expr := range right.ProjectList { 2507 rightTyps[i] = dupType(&expr.Typ) 2508 } 2509 2510 leftTyps := make([]types.Type, len(left.ProjectList)) 2511 for i, expr := range left.ProjectList { 2512 leftTyps[i] = dupType(&expr.Typ) 2513 } 2514 2515 switch node.JoinType { 2516 case plan.Node_INNER: 2517 rs = c.newBroadcastJoinScopeList(ss, children, node) 2518 if len(node.OnList) == 0 { 2519 for i := range rs { 2520 rs[i].appendInstruction(vm.Instruction{ 2521 Op: vm.Product, 2522 Idx: c.anal.curr, 2523 Arg: constructProduct(node, rightTyps, c.proc), 2524 }) 2525 } 2526 } else { 2527 for i := range rs { 2528 if isEq { 2529 rs[i].appendInstruction(vm.Instruction{ 2530 Op: vm.Join, 2531 Idx: c.anal.curr, 2532 Arg: constructJoin(node, rightTyps, c.proc), 2533 }) 2534 } else { 2535 rs[i].appendInstruction(vm.Instruction{ 2536 Op: vm.LoopJoin, 2537 Idx: c.anal.curr, 2538 Arg: constructLoopJoin(node, rightTyps, c.proc), 2539 }) 2540 } 2541 } 2542 } 2543 2544 case plan.Node_INDEX: 2545 rs = c.newBroadcastJoinScopeList(ss, children, node) 2546 for i := range rs { 2547 rs[i].appendInstruction(vm.Instruction{ 2548 Op: vm.IndexJoin, 2549 Idx: c.anal.curr, 2550 Arg: constructIndexJoin(node, rightTyps, c.proc), 2551 }) 2552 } 2553 2554 case plan.Node_SEMI: 2555 if isEq { 2556 if node.BuildOnLeft { 2557 rs = c.newJoinScopeListWithBucket(c.newScopeListForRightJoin(2, 1, ss), ss, children, node) 2558 for i := range rs { 2559 rs[i].appendInstruction(vm.Instruction{ 2560 Op: vm.RightSemi, 2561 Idx: c.anal.curr, 2562 Arg: constructRightSemi(node, rightTyps, uint64(i), uint64(len(rs)), c.proc), 2563 }) 2564 } 2565 } else { 2566 rs = c.newBroadcastJoinScopeList(ss, children, node) 2567 for i := range rs { 2568 rs[i].appendInstruction(vm.Instruction{ 2569 Op: vm.Semi, 2570 Idx: c.anal.curr, 2571 Arg: constructSemi(node, rightTyps, c.proc), 2572 }) 2573 } 2574 } 2575 } else { 2576 rs = c.newBroadcastJoinScopeList(ss, children, node) 2577 for i := range rs { 2578 rs[i].appendInstruction(vm.Instruction{ 2579 Op: vm.LoopSemi, 2580 Idx: c.anal.curr, 2581 Arg: constructLoopSemi(node, rightTyps, c.proc), 2582 }) 2583 } 2584 } 2585 case plan.Node_LEFT: 2586 rs = c.newBroadcastJoinScopeList(ss, children, node) 2587 for i := range rs { 2588 if isEq { 2589 rs[i].appendInstruction(vm.Instruction{ 2590 Op: vm.Left, 2591 Idx: c.anal.curr, 2592 Arg: constructLeft(node, rightTyps, c.proc), 2593 }) 2594 } else { 2595 rs[i].appendInstruction(vm.Instruction{ 2596 Op: vm.LoopLeft, 2597 Idx: c.anal.curr, 2598 Arg: constructLoopLeft(node, rightTyps, c.proc), 2599 }) 2600 } 2601 } 2602 case plan.Node_RIGHT: 2603 if isEq { 2604 rs = c.newJoinScopeListWithBucket(c.newScopeListForRightJoin(2, 1, ss), ss, children, node) 2605 for i := range rs { 2606 rs[i].appendInstruction(vm.Instruction{ 2607 Op: vm.Right, 2608 Idx: c.anal.curr, 2609 Arg: constructRight(node, leftTyps, rightTyps, uint64(i), uint64(len(rs)), c.proc), 2610 }) 2611 } 2612 } else { 2613 panic("dont pass any no-equal right join plan to this function,it should be changed to left join by the planner") 2614 } 2615 case plan.Node_SINGLE: 2616 rs = c.newBroadcastJoinScopeList(ss, children, node) 2617 for i := range rs { 2618 if isEq { 2619 rs[i].appendInstruction(vm.Instruction{ 2620 Op: vm.Single, 2621 Idx: c.anal.curr, 2622 Arg: constructSingle(node, rightTyps, c.proc), 2623 }) 2624 } else { 2625 rs[i].appendInstruction(vm.Instruction{ 2626 Op: vm.LoopSingle, 2627 Idx: c.anal.curr, 2628 Arg: constructLoopSingle(node, rightTyps, c.proc), 2629 }) 2630 } 2631 } 2632 case plan.Node_ANTI: 2633 if isEq { 2634 if node.BuildOnLeft { 2635 rs = c.newJoinScopeListWithBucket(c.newScopeListForRightJoin(2, 1, ss), ss, children, node) 2636 for i := range rs { 2637 rs[i].appendInstruction(vm.Instruction{ 2638 Op: vm.RightAnti, 2639 Idx: c.anal.curr, 2640 Arg: constructRightAnti(node, rightTyps, uint64(i), uint64(len(rs)), c.proc), 2641 }) 2642 } 2643 } else { 2644 rs = c.newBroadcastJoinScopeList(ss, children, node) 2645 for i := range rs { 2646 rs[i].appendInstruction(vm.Instruction{ 2647 Op: vm.Anti, 2648 Idx: c.anal.curr, 2649 Arg: constructAnti(node, rightTyps, c.proc), 2650 }) 2651 } 2652 } 2653 } else { 2654 rs = c.newBroadcastJoinScopeList(ss, children, node) 2655 for i := range rs { 2656 rs[i].appendInstruction(vm.Instruction{ 2657 Op: vm.LoopAnti, 2658 Idx: c.anal.curr, 2659 Arg: constructLoopAnti(node, rightTyps, c.proc), 2660 }) 2661 } 2662 } 2663 case plan.Node_MARK: 2664 rs = c.newBroadcastJoinScopeList(ss, children, node) 2665 for i := range rs { 2666 //if isEq { 2667 // rs[i].appendInstruction(vm.Instruction{ 2668 // Op: vm.Mark, 2669 // Idx: c.anal.curr, 2670 // Arg: constructMark(n, typs, c.proc), 2671 // }) 2672 //} else { 2673 rs[i].appendInstruction(vm.Instruction{ 2674 Op: vm.LoopMark, 2675 Idx: c.anal.curr, 2676 Arg: constructLoopMark(node, rightTyps, c.proc), 2677 }) 2678 //} 2679 } 2680 default: 2681 panic(moerr.NewNYI(ctx, fmt.Sprintf("join typ '%v'", node.JoinType))) 2682 } 2683 return rs 2684 } 2685 2686 func (c *Compile) compilePartition(n *plan.Node, ss []*Scope) []*Scope { 2687 currentFirstFlag := c.anal.isFirst 2688 for i := range ss { 2689 c.anal.isFirst = currentFirstFlag 2690 if containBrokenNode(ss[i]) { 2691 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 2692 } 2693 ss[i].appendInstruction(vm.Instruction{ 2694 Op: vm.Order, 2695 Idx: c.anal.curr, 2696 IsFirst: c.anal.isFirst, 2697 Arg: constructOrder(n), 2698 }) 2699 } 2700 c.anal.isFirst = false 2701 2702 rs := c.newMergeScope(ss) 2703 rs.Instructions[0].Arg.Release() 2704 rs.Instructions[0] = vm.Instruction{ 2705 Op: vm.Partition, 2706 Idx: c.anal.curr, 2707 Arg: constructPartition(n), 2708 } 2709 return []*Scope{rs} 2710 } 2711 2712 func (c *Compile) compileSort(n *plan.Node, ss []*Scope) []*Scope { 2713 switch { 2714 case n.Limit != nil && n.Offset == nil && len(n.OrderBy) > 0: // top 2715 vec, err := colexec.EvalExpressionOnce(c.proc, n.Limit, []*batch.Batch{constBat}) 2716 if err != nil { 2717 panic(err) 2718 } 2719 defer vec.Free(c.proc.Mp()) 2720 return c.compileTop(n, vector.MustFixedCol[int64](vec)[0], ss) 2721 2722 case n.Limit == nil && n.Offset == nil && len(n.OrderBy) > 0: // top 2723 return c.compileOrder(n, ss) 2724 2725 case n.Limit != nil && n.Offset != nil && len(n.OrderBy) > 0: 2726 // get limit 2727 vec1, err := colexec.EvalExpressionOnce(c.proc, n.Limit, []*batch.Batch{constBat}) 2728 if err != nil { 2729 panic(err) 2730 } 2731 defer vec1.Free(c.proc.Mp()) 2732 2733 // get offset 2734 vec2, err := colexec.EvalExpressionOnce(c.proc, n.Offset, []*batch.Batch{constBat}) 2735 if err != nil { 2736 panic(err) 2737 } 2738 defer vec2.Free(c.proc.Mp()) 2739 2740 limit, offset := vector.MustFixedCol[int64](vec1)[0], vector.MustFixedCol[int64](vec2)[0] 2741 topN := limit + offset 2742 if topN <= 8192*2 { 2743 // if n is small, convert `order by col limit m offset n` to `top m+n offset n` 2744 return c.compileOffset(n, c.compileTop(n, topN, ss)) 2745 } 2746 return c.compileLimit(n, c.compileOffset(n, c.compileOrder(n, ss))) 2747 2748 case n.Limit == nil && n.Offset != nil && len(n.OrderBy) > 0: // order and offset 2749 return c.compileOffset(n, c.compileOrder(n, ss)) 2750 2751 case n.Limit != nil && n.Offset == nil && len(n.OrderBy) == 0: // limit 2752 return c.compileLimit(n, ss) 2753 2754 case n.Limit == nil && n.Offset != nil && len(n.OrderBy) == 0: // offset 2755 return c.compileOffset(n, ss) 2756 2757 case n.Limit != nil && n.Offset != nil && len(n.OrderBy) == 0: // limit and offset 2758 return c.compileLimit(n, c.compileOffset(n, ss)) 2759 2760 default: 2761 return ss 2762 } 2763 } 2764 2765 func containBrokenNode(s *Scope) bool { 2766 for i := range s.Instructions { 2767 if s.Instructions[i].IsBrokenNode() { 2768 return true 2769 } 2770 } 2771 return false 2772 } 2773 2774 func (c *Compile) compileTop(n *plan.Node, topN int64, ss []*Scope) []*Scope { 2775 // use topN TO make scope. 2776 currentFirstFlag := c.anal.isFirst 2777 for i := range ss { 2778 c.anal.isFirst = currentFirstFlag 2779 if containBrokenNode(ss[i]) { 2780 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 2781 } 2782 ss[i].appendInstruction(vm.Instruction{ 2783 Op: vm.Top, 2784 Idx: c.anal.curr, 2785 IsFirst: c.anal.isFirst, 2786 Arg: constructTop(n, topN), 2787 }) 2788 } 2789 c.anal.isFirst = false 2790 2791 rs := c.newMergeScope(ss) 2792 rs.Instructions[0].Arg.Release() 2793 rs.Instructions[0] = vm.Instruction{ 2794 Op: vm.MergeTop, 2795 Idx: c.anal.curr, 2796 Arg: constructMergeTop(n, topN), 2797 } 2798 return []*Scope{rs} 2799 } 2800 2801 func (c *Compile) compileOrder(n *plan.Node, ss []*Scope) []*Scope { 2802 currentFirstFlag := c.anal.isFirst 2803 for i := range ss { 2804 c.anal.isFirst = currentFirstFlag 2805 if containBrokenNode(ss[i]) { 2806 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 2807 } 2808 ss[i].appendInstruction(vm.Instruction{ 2809 Op: vm.Order, 2810 Idx: c.anal.curr, 2811 IsFirst: c.anal.isFirst, 2812 Arg: constructOrder(n), 2813 }) 2814 } 2815 c.anal.isFirst = false 2816 2817 rs := c.newMergeScope(ss) 2818 rs.Instructions[0].Arg.Release() 2819 rs.Instructions[0] = vm.Instruction{ 2820 Op: vm.MergeOrder, 2821 Idx: c.anal.curr, 2822 Arg: constructMergeOrder(n), 2823 } 2824 return []*Scope{rs} 2825 } 2826 2827 func (c *Compile) compileWin(n *plan.Node, ss []*Scope) []*Scope { 2828 rs := c.newMergeScope(ss) 2829 rs.Instructions[0].Arg.Release() 2830 rs.Instructions[0] = vm.Instruction{ 2831 Op: vm.Window, 2832 Idx: c.anal.curr, 2833 Arg: constructWindow(c.ctx, n, c.proc), 2834 } 2835 return []*Scope{rs} 2836 } 2837 2838 func (c *Compile) compileTimeWin(n *plan.Node, ss []*Scope) []*Scope { 2839 rs := c.newMergeScope(ss) 2840 rs.Instructions[0].Arg.Release() 2841 rs.Instructions[0] = vm.Instruction{ 2842 Op: vm.TimeWin, 2843 Idx: c.anal.curr, 2844 Arg: constructTimeWindow(c.ctx, n), 2845 } 2846 return []*Scope{rs} 2847 } 2848 2849 func (c *Compile) compileFill(n *plan.Node, ss []*Scope) []*Scope { 2850 rs := c.newMergeScope(ss) 2851 rs.Instructions[0].Arg.Release() 2852 rs.Instructions[0] = vm.Instruction{ 2853 Op: vm.Fill, 2854 Idx: c.anal.curr, 2855 Arg: constructFill(n), 2856 } 2857 return []*Scope{rs} 2858 } 2859 2860 func (c *Compile) compileOffset(n *plan.Node, ss []*Scope) []*Scope { 2861 currentFirstFlag := c.anal.isFirst 2862 for i := range ss { 2863 if containBrokenNode(ss[i]) { 2864 c.anal.isFirst = currentFirstFlag 2865 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 2866 } 2867 } 2868 2869 rs := c.newMergeScope(ss) 2870 rs.Instructions[0].Arg.Release() 2871 rs.Instructions[0] = vm.Instruction{ 2872 Op: vm.MergeOffset, 2873 Idx: c.anal.curr, 2874 Arg: constructMergeOffset(n, c.proc), 2875 } 2876 return []*Scope{rs} 2877 } 2878 2879 func (c *Compile) compileLimit(n *plan.Node, ss []*Scope) []*Scope { 2880 currentFirstFlag := c.anal.isFirst 2881 for i := range ss { 2882 c.anal.isFirst = currentFirstFlag 2883 if containBrokenNode(ss[i]) { 2884 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 2885 } 2886 ss[i].appendInstruction(vm.Instruction{ 2887 Op: vm.Limit, 2888 Idx: c.anal.curr, 2889 IsFirst: c.anal.isFirst, 2890 Arg: constructLimit(n, c.proc), 2891 }) 2892 } 2893 c.anal.isFirst = false 2894 2895 rs := c.newMergeScope(ss) 2896 rs.Instructions[0].Arg.Release() 2897 rs.Instructions[0] = vm.Instruction{ 2898 Op: vm.MergeLimit, 2899 Idx: c.anal.curr, 2900 Arg: constructMergeLimit(n, c.proc), 2901 } 2902 return []*Scope{rs} 2903 } 2904 2905 func (c *Compile) compileFuzzyFilter(n *plan.Node, ns []*plan.Node, left []*Scope, right []*Scope) ([]*Scope, error) { 2906 l := c.newMergeScope(left) 2907 r := c.newMergeScope(right) 2908 all := []*Scope{l, r} 2909 rs := c.newMergeScope(all) 2910 2911 rs.Instructions[0].Idx = c.anal.curr 2912 2913 arg := constructFuzzyFilter(c, n, ns[n.Children[1]]) 2914 2915 rs.appendInstruction(vm.Instruction{ 2916 Op: vm.FuzzyFilter, 2917 Idx: c.anal.curr, 2918 Arg: arg, 2919 }) 2920 2921 outData, err := newFuzzyCheck(n) 2922 if err != nil { 2923 return nil, err 2924 } 2925 c.fuzzys = append(c.fuzzys, outData) 2926 // wrap the collision key into c.fuzzy, for more information, 2927 // please refer fuzzyCheck.go 2928 rs.appendInstruction(vm.Instruction{ 2929 Op: vm.Output, 2930 Arg: output.NewArgument(). 2931 WithFunc( 2932 func(bat *batch.Batch) error { 2933 if bat == nil || bat.IsEmpty() { 2934 return nil 2935 } 2936 // the batch will contain the key that fuzzyCheck 2937 if err := outData.fill(c.ctx, bat); err != nil { 2938 return err 2939 } 2940 2941 return nil 2942 }), 2943 }) 2944 2945 return []*Scope{rs}, nil 2946 } 2947 2948 func (c *Compile) compileSample(n *plan.Node, ss []*Scope) []*Scope { 2949 for i := range ss { 2950 if containBrokenNode(ss[i]) { 2951 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 2952 } 2953 ss[i].appendInstruction(vm.Instruction{ 2954 Op: vm.Sample, 2955 Idx: c.anal.curr, 2956 IsFirst: c.anal.isFirst, 2957 Arg: constructSample(n, len(ss) != 1), 2958 }) 2959 } 2960 c.anal.isFirst = false 2961 2962 rs := c.newMergeScope(ss) 2963 if len(ss) == 1 { 2964 return []*Scope{rs} 2965 } 2966 2967 // should sample again if sample by rows. 2968 if n.SampleFunc.Rows != plan2.NotSampleByRows { 2969 rs.appendInstruction(vm.Instruction{ 2970 Op: vm.Sample, 2971 Idx: c.anal.curr, 2972 IsFirst: c.anal.isFirst, 2973 Arg: sample.NewMergeSample(constructSample(n, true), false), 2974 }) 2975 } 2976 return []*Scope{rs} 2977 } 2978 2979 func (c *Compile) compileMergeGroup(n *plan.Node, ss []*Scope, ns []*plan.Node, hasDistinct bool) []*Scope { 2980 currentFirstFlag := c.anal.isFirst 2981 2982 // for less memory usage while merge group, 2983 // we do not run the group-operator in parallel once this has a distinct aggregation. 2984 // because the parallel need to store all the source data in the memory for merging. 2985 // we construct a pipeline like the following description for this case: 2986 // 2987 // all the operators from ss[0] to ss[last] send the data to only one group-operator. 2988 // this group-operator sends its result to the merge-group-operator. 2989 // todo: I cannot remove the merge-group action directly, because the merge-group action is used to fill the partial result. 2990 if hasDistinct { 2991 for i := range ss { 2992 c.anal.isFirst = currentFirstFlag 2993 if containBrokenNode(ss[i]) { 2994 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 2995 } 2996 } 2997 c.anal.isFirst = false 2998 2999 mergeToGroup := c.newMergeScope(ss) 3000 mergeToGroup.appendInstruction( 3001 vm.Instruction{ 3002 Op: vm.Group, 3003 Idx: c.anal.curr, 3004 IsFirst: c.anal.isFirst, 3005 Arg: constructGroup(c.ctx, n, ns[n.Children[0]], 0, 0, false, 0, c.proc), 3006 }) 3007 3008 rs := c.newMergeScope([]*Scope{mergeToGroup}) 3009 arg := constructMergeGroup(true) 3010 if ss[0].PartialResults != nil { 3011 arg.PartialResults = ss[0].PartialResults 3012 arg.PartialResultTypes = ss[0].PartialResultTypes 3013 ss[0].PartialResults = nil 3014 ss[0].PartialResultTypes = nil 3015 } 3016 rs.Instructions[0].Arg.Release() 3017 rs.Instructions[0] = vm.Instruction{ 3018 Op: vm.MergeGroup, 3019 Idx: c.anal.curr, 3020 Arg: arg, 3021 } 3022 return []*Scope{rs} 3023 } 3024 3025 for i := range ss { 3026 c.anal.isFirst = currentFirstFlag 3027 if containBrokenNode(ss[i]) { 3028 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 3029 } 3030 ss[i].appendInstruction(vm.Instruction{ 3031 Op: vm.Group, 3032 Idx: c.anal.curr, 3033 IsFirst: c.anal.isFirst, 3034 Arg: constructGroup(c.ctx, n, ns[n.Children[0]], 0, 0, false, 0, c.proc), 3035 }) 3036 } 3037 c.anal.isFirst = false 3038 3039 rs := c.newMergeScope(ss) 3040 arg := constructMergeGroup(true) 3041 if ss[0].PartialResults != nil { 3042 arg.PartialResults = ss[0].PartialResults 3043 arg.PartialResultTypes = ss[0].PartialResultTypes 3044 ss[0].PartialResults = nil 3045 ss[0].PartialResultTypes = nil 3046 } 3047 rs.Instructions[0].Arg.Release() 3048 rs.Instructions[0] = vm.Instruction{ 3049 Op: vm.MergeGroup, 3050 Idx: c.anal.curr, 3051 Arg: arg, 3052 } 3053 return []*Scope{rs} 3054 } 3055 3056 // shuffle and dispatch must stick together 3057 func (c *Compile) constructShuffleAndDispatch(ss, children []*Scope, n *plan.Node) { 3058 j := 0 3059 for i := range ss { 3060 if containBrokenNode(ss[i]) { 3061 isEnd := ss[i].IsEnd 3062 ss[i] = c.newMergeScope([]*Scope{ss[i]}) 3063 ss[i].IsEnd = isEnd 3064 } 3065 if !ss[i].IsEnd { 3066 ss[i].appendInstruction(vm.Instruction{ 3067 Op: vm.Shuffle, 3068 Arg: constructShuffleGroupArg(children, n), 3069 }) 3070 3071 ss[i].appendInstruction(vm.Instruction{ 3072 Op: vm.Dispatch, 3073 Arg: constructDispatch(j, children, ss[i].NodeInfo.Addr, n, false), 3074 }) 3075 j++ 3076 ss[i].IsEnd = true 3077 } 3078 } 3079 } 3080 3081 func (c *Compile) compileShuffleGroup(n *plan.Node, ss []*Scope, ns []*plan.Node) []*Scope { 3082 currentIsFirst := c.anal.isFirst 3083 c.anal.isFirst = false 3084 3085 if len(c.cnList) > 1 { 3086 n.Stats.HashmapStats.ShuffleMethod = plan.ShuffleMethod_Normal 3087 } 3088 3089 switch n.Stats.HashmapStats.ShuffleMethod { 3090 case plan.ShuffleMethod_Reuse: 3091 for i := range ss { 3092 ss[i].appendInstruction(vm.Instruction{ 3093 Op: vm.Group, 3094 Idx: c.anal.curr, 3095 IsFirst: c.anal.isFirst, 3096 Arg: constructGroup(c.ctx, n, ns[n.Children[0]], 0, 0, true, len(ss), c.proc), 3097 }) 3098 } 3099 ss = c.compileProjection(n, c.compileRestrict(n, ss)) 3100 return ss 3101 3102 case plan.ShuffleMethod_Reshuffle: 3103 3104 dop := plan2.GetShuffleDop() 3105 parent, children := c.newScopeListForShuffleGroup(1, dop) 3106 // saving the last operator of all children to make sure the connector setting in 3107 // the right place 3108 lastOperator := make([]vm.Instruction, 0, len(children)) 3109 for i := range children { 3110 ilen := len(children[i].Instructions) - 1 3111 lastOperator = append(lastOperator, children[i].Instructions[ilen]) 3112 children[i].Instructions = children[i].Instructions[:ilen] 3113 } 3114 3115 for i := range children { 3116 children[i].appendInstruction(vm.Instruction{ 3117 Op: vm.Group, 3118 Idx: c.anal.curr, 3119 IsFirst: currentIsFirst, 3120 Arg: constructGroup(c.ctx, n, ns[n.Children[0]], 0, 0, true, len(children), c.proc), 3121 }) 3122 } 3123 children = c.compileProjection(n, c.compileRestrict(n, children)) 3124 // recovery the children's last operator 3125 for i := range children { 3126 children[i].appendInstruction(lastOperator[i]) 3127 } 3128 3129 for i := range ss { 3130 ss[i].appendInstruction(vm.Instruction{ 3131 Op: vm.Shuffle, 3132 Idx: c.anal.curr, 3133 IsFirst: currentIsFirst, 3134 Arg: constructShuffleGroupArg(children, n), 3135 }) 3136 } 3137 3138 mergeScopes := c.newMergeScope(ss) 3139 mergeScopes.appendInstruction(vm.Instruction{ 3140 Op: vm.Dispatch, 3141 Idx: c.anal.curr, 3142 IsFirst: currentIsFirst, 3143 Arg: constructDispatch(0, children, c.addr, n, false), 3144 }) 3145 3146 appendIdx := 0 3147 for i := range children { 3148 if isSameCN(mergeScopes.NodeInfo.Addr, children[i].NodeInfo.Addr) { 3149 appendIdx = i 3150 break 3151 } 3152 } 3153 children[appendIdx].PreScopes = append(children[appendIdx].PreScopes, mergeScopes) 3154 3155 return parent 3156 default: 3157 dop := plan2.GetShuffleDop() 3158 parent, children := c.newScopeListForShuffleGroup(validScopeCount(ss), dop) 3159 c.constructShuffleAndDispatch(ss, children, n) 3160 3161 // saving the last operator of all children to make sure the connector setting in 3162 // the right place 3163 lastOperator := make([]vm.Instruction, 0, len(children)) 3164 for i := range children { 3165 ilen := len(children[i].Instructions) - 1 3166 lastOperator = append(lastOperator, children[i].Instructions[ilen]) 3167 children[i].Instructions = children[i].Instructions[:ilen] 3168 } 3169 3170 for i := range children { 3171 children[i].appendInstruction(vm.Instruction{ 3172 Op: vm.Group, 3173 Idx: c.anal.curr, 3174 IsFirst: currentIsFirst, 3175 Arg: constructGroup(c.ctx, n, ns[n.Children[0]], 0, 0, true, len(children), c.proc), 3176 }) 3177 } 3178 children = c.compileProjection(n, c.compileRestrict(n, children)) 3179 // recovery the children's last operator 3180 for i := range children { 3181 children[i].appendInstruction(lastOperator[i]) 3182 } 3183 3184 for i := range ss { 3185 appended := false 3186 for j := range children { 3187 if isSameCN(children[j].NodeInfo.Addr, ss[i].NodeInfo.Addr) { 3188 children[j].PreScopes = append(children[j].PreScopes, ss[i]) 3189 appended = true 3190 break 3191 } 3192 } 3193 if !appended { 3194 children[0].PreScopes = append(children[0].PreScopes, ss[i]) 3195 } 3196 } 3197 3198 return parent 3199 // return []*Scope{c.newMergeScope(parent)} 3200 } 3201 } 3202 3203 // DeleteMergeScope need to assure this: 3204 // one block can be only deleted by one and the same 3205 // CN, so we need to transfer the rows from the 3206 // the same block to one and the same CN to perform 3207 // the deletion operators. 3208 func (c *Compile) newDeleteMergeScope(arg *deletion.Argument, ss []*Scope) *Scope { 3209 // Todo: implemet delete merge 3210 ss2 := make([]*Scope, 0, len(ss)) 3211 // ends := make([]*Scope, 0, len(ss)) 3212 for _, s := range ss { 3213 if s.IsEnd { 3214 // ends = append(ends, s) 3215 continue 3216 } 3217 ss2 = append(ss2, s) 3218 } 3219 3220 rs := make([]*Scope, 0, len(ss2)) 3221 uuids := make([]uuid.UUID, 0, len(ss2)) 3222 var uid uuid.UUID 3223 for i := 0; i < len(ss2); i++ { 3224 rs = append(rs, newScope(Merge)) 3225 uid, _ = uuid.NewV7() 3226 uuids = append(uuids, uid) 3227 } 3228 3229 // for every scope, it should dispatch its 3230 // batch to other cn 3231 for i := 0; i < len(ss2); i++ { 3232 constructDeleteDispatchAndLocal(i, rs, ss2, uuids, c) 3233 } 3234 delete := &vm.Instruction{ 3235 Op: vm.Deletion, 3236 Arg: arg, 3237 } 3238 for i := range rs { 3239 // use distributed delete 3240 arg.RemoteDelete = true 3241 // maybe just copy only once? 3242 arg.SegmentMap = colexec.Get().GetCnSegmentMap() 3243 arg.IBucket = uint32(i) 3244 arg.Nbucket = uint32(len(rs)) 3245 rs[i].Instructions = append( 3246 rs[i].Instructions, 3247 dupInstruction(delete, nil, 0)) 3248 } 3249 return c.newMergeScope(rs) 3250 } 3251 3252 func (c *Compile) newMergeScope(ss []*Scope) *Scope { 3253 rs := newScope(Merge) 3254 rs.NodeInfo = engine.Node{Addr: c.addr, Mcpu: ncpu} 3255 rs.PreScopes = ss 3256 cnt := 0 3257 for _, s := range ss { 3258 if s.IsEnd { 3259 continue 3260 } 3261 cnt++ 3262 } 3263 rs.Proc = process.NewWithAnalyze(c.proc, c.ctx, cnt, c.anal.Nodes()) 3264 if len(ss) > 0 { 3265 rs.Proc.LoadTag = ss[0].Proc.LoadTag 3266 } 3267 rs.Instructions = append(rs.Instructions, vm.Instruction{ 3268 Op: vm.Merge, 3269 Idx: c.anal.curr, 3270 IsFirst: c.anal.isFirst, 3271 Arg: merge.NewArgument(), 3272 }) 3273 c.anal.isFirst = false 3274 3275 j := 0 3276 for i := range ss { 3277 if !ss[i].IsEnd { 3278 ss[i].appendInstruction(vm.Instruction{ 3279 Op: vm.Connector, 3280 Arg: connector.NewArgument(). 3281 WithReg(rs.Proc.Reg.MergeReceivers[j]), 3282 }) 3283 j++ 3284 } 3285 } 3286 return rs 3287 } 3288 3289 func (c *Compile) newMergeRemoteScope(ss []*Scope, nodeinfo engine.Node) *Scope { 3290 rs := c.newMergeScope(ss) 3291 // reset rs's info to remote 3292 rs.Magic = Remote 3293 rs.NodeInfo.Addr = nodeinfo.Addr 3294 rs.NodeInfo.Mcpu = nodeinfo.Mcpu 3295 3296 return rs 3297 } 3298 3299 func (c *Compile) newScopeList(childrenCount int, blocks int) []*Scope { 3300 var ss []*Scope 3301 3302 currentFirstFlag := c.anal.isFirst 3303 for _, n := range c.cnList { 3304 c.anal.isFirst = currentFirstFlag 3305 ss = append(ss, c.newScopeListWithNode(c.generateCPUNumber(n.Mcpu, blocks), childrenCount, n.Addr)...) 3306 } 3307 return ss 3308 } 3309 3310 func (c *Compile) newScopeListForShuffleGroup(childrenCount int, blocks int) ([]*Scope, []*Scope) { 3311 parent := make([]*Scope, 0, len(c.cnList)) 3312 children := make([]*Scope, 0, len(c.cnList)) 3313 3314 currentFirstFlag := c.anal.isFirst 3315 for _, n := range c.cnList { 3316 c.anal.isFirst = currentFirstFlag 3317 scopes := c.newScopeListWithNode(c.generateCPUNumber(n.Mcpu, blocks), childrenCount, n.Addr) 3318 for _, s := range scopes { 3319 for _, rr := range s.Proc.Reg.MergeReceivers { 3320 rr.Ch = make(chan *batch.Batch, shuffleChannelBufferSize) 3321 } 3322 } 3323 children = append(children, scopes...) 3324 parent = append(parent, c.newMergeRemoteScope(scopes, n)) 3325 } 3326 return parent, children 3327 } 3328 3329 func (c *Compile) newScopeListWithNode(mcpu, childrenCount int, addr string) []*Scope { 3330 ss := make([]*Scope, mcpu) 3331 currentFirstFlag := c.anal.isFirst 3332 for i := range ss { 3333 ss[i] = newScope(Remote) 3334 ss[i].Magic = Remote 3335 ss[i].NodeInfo.Addr = addr 3336 ss[i].NodeInfo.Mcpu = 1 // ss is already the mcpu length so we don't need to parallel it 3337 ss[i].Proc = process.NewWithAnalyze(c.proc, c.ctx, childrenCount, c.anal.Nodes()) 3338 ss[i].Instructions = append(ss[i].Instructions, vm.Instruction{ 3339 Op: vm.Merge, 3340 Idx: c.anal.curr, 3341 IsFirst: currentFirstFlag, 3342 Arg: merge.NewArgument(), 3343 }) 3344 } 3345 c.anal.isFirst = false 3346 return ss 3347 } 3348 3349 func (c *Compile) newScopeListForRightJoin(childrenCount int, bIdx int, leftScopes []*Scope) []*Scope { 3350 /* 3351 ss := make([]*Scope, 0, len(leftScopes)) 3352 for i := range leftScopes { 3353 tmp := new(Scope) 3354 tmp.Magic = Remote 3355 tmp.IsJoin = true 3356 tmp.Proc = process.NewWithAnalyze(c.proc, c.ctx, childrenCount, c.anal.Nodes()) 3357 tmp.NodeInfo = leftScopes[i].NodeInfo 3358 ss = append(ss, tmp) 3359 } 3360 */ 3361 // Force right join to execute on one CN due to right join issue 3362 // Will fix in future 3363 maxCpuNum := 1 3364 for _, s := range leftScopes { 3365 if s.NodeInfo.Mcpu > maxCpuNum { 3366 maxCpuNum = s.NodeInfo.Mcpu 3367 } 3368 } 3369 3370 ss := make([]*Scope, 1) 3371 ss[0] = newScope(Remote) 3372 ss[0].IsJoin = true 3373 ss[0].Proc = process.NewWithAnalyze(c.proc, c.ctx, childrenCount, c.anal.Nodes()) 3374 ss[0].NodeInfo = engine.Node{Addr: c.addr, Mcpu: c.generateCPUNumber(ncpu, maxCpuNum)} 3375 ss[0].BuildIdx = bIdx 3376 return ss 3377 } 3378 3379 func (c *Compile) newJoinScopeListWithBucket(rs, ss, children []*Scope, n *plan.Node) []*Scope { 3380 currentFirstFlag := c.anal.isFirst 3381 // construct left 3382 leftMerge := c.newMergeScope(ss) 3383 leftMerge.appendInstruction(vm.Instruction{ 3384 Op: vm.Dispatch, 3385 Arg: constructDispatch(0, rs, c.addr, n, false), 3386 }) 3387 leftMerge.IsEnd = true 3388 3389 // construct right 3390 c.anal.isFirst = currentFirstFlag 3391 rightMerge := c.newMergeScope(children) 3392 rightMerge.appendInstruction(vm.Instruction{ 3393 Op: vm.Dispatch, 3394 Arg: constructDispatch(1, rs, c.addr, n, false), 3395 }) 3396 rightMerge.IsEnd = true 3397 3398 // append left and right to correspond rs 3399 idx := 0 3400 for i := range rs { 3401 if isSameCN(rs[i].NodeInfo.Addr, c.addr) { 3402 idx = i 3403 } 3404 } 3405 rs[idx].PreScopes = append(rs[idx].PreScopes, leftMerge, rightMerge) 3406 return rs 3407 } 3408 3409 func (c *Compile) newBroadcastJoinScopeList(ss []*Scope, children []*Scope, n *plan.Node) []*Scope { 3410 length := len(ss) 3411 rs := make([]*Scope, length) 3412 idx := 0 3413 for i := range ss { 3414 if ss[i].IsEnd { 3415 rs[i] = ss[i] 3416 continue 3417 } 3418 rs[i] = newScope(Remote) 3419 rs[i].IsJoin = true 3420 rs[i].NodeInfo = ss[i].NodeInfo 3421 rs[i].BuildIdx = 1 3422 if isSameCN(rs[i].NodeInfo.Addr, c.addr) { 3423 idx = i 3424 } 3425 rs[i].PreScopes = []*Scope{ss[i]} 3426 rs[i].Proc = process.NewWithAnalyze(c.proc, c.ctx, 2, c.anal.Nodes()) 3427 ss[i].appendInstruction(vm.Instruction{ 3428 Op: vm.Connector, 3429 Arg: connector.NewArgument(). 3430 WithReg(rs[i].Proc.Reg.MergeReceivers[0]), 3431 }) 3432 } 3433 3434 // all join's first flag will setting in newLeftScope and newRightScope 3435 // so we set it to false now 3436 c.anal.isFirst = false 3437 mergeChildren := c.newMergeScope(children) 3438 3439 mergeChildren.appendInstruction(vm.Instruction{ 3440 Op: vm.Dispatch, 3441 Arg: constructDispatch(1, rs, c.addr, n, false), 3442 }) 3443 mergeChildren.IsEnd = true 3444 rs[idx].PreScopes = append(rs[idx].PreScopes, mergeChildren) 3445 3446 return rs 3447 } 3448 3449 func (c *Compile) newShuffleJoinScopeList(left, right []*Scope, n *plan.Node) ([]*Scope, []*Scope) { 3450 single := len(c.cnList) <= 1 3451 if single { 3452 n.Stats.HashmapStats.ShuffleTypeForMultiCN = plan.ShuffleTypeForMultiCN_Simple 3453 } 3454 3455 var parent []*Scope 3456 children := make([]*Scope, 0, len(c.cnList)) 3457 lnum := len(left) 3458 sum := lnum + len(right) 3459 for _, n := range c.cnList { 3460 dop := c.generateCPUNumber(n.Mcpu, plan2.GetShuffleDop()) 3461 ss := make([]*Scope, dop) 3462 for i := range ss { 3463 ss[i] = newScope(Remote) 3464 ss[i].IsJoin = true 3465 ss[i].NodeInfo.Addr = n.Addr 3466 ss[i].NodeInfo.Mcpu = 1 3467 ss[i].Proc = process.NewWithAnalyze(c.proc, c.ctx, sum, c.anal.Nodes()) 3468 ss[i].BuildIdx = lnum 3469 ss[i].ShuffleCnt = dop 3470 for _, rr := range ss[i].Proc.Reg.MergeReceivers { 3471 rr.Ch = make(chan *batch.Batch, shuffleChannelBufferSize) 3472 } 3473 } 3474 children = append(children, ss...) 3475 if !single { 3476 parent = append(parent, c.newMergeRemoteScope(ss, n)) 3477 } 3478 } 3479 3480 currentFirstFlag := c.anal.isFirst 3481 for i, scp := range left { 3482 scp.appendInstruction(vm.Instruction{ 3483 Op: vm.Shuffle, 3484 Idx: c.anal.curr, 3485 Arg: constructShuffleJoinArg(children, n, true), 3486 }) 3487 scp.appendInstruction(vm.Instruction{ 3488 Op: vm.Dispatch, 3489 Arg: constructDispatch(i, children, scp.NodeInfo.Addr, n, true), 3490 }) 3491 scp.IsEnd = true 3492 3493 appended := false 3494 for _, js := range children { 3495 if isSameCN(js.NodeInfo.Addr, scp.NodeInfo.Addr) { 3496 js.PreScopes = append(js.PreScopes, scp) 3497 appended = true 3498 break 3499 } 3500 } 3501 if !appended { 3502 logutil.Errorf("no same addr scope to append left scopes") 3503 children[0].PreScopes = append(children[0].PreScopes, scp) 3504 } 3505 } 3506 3507 c.anal.isFirst = currentFirstFlag 3508 for i, scp := range right { 3509 scp.appendInstruction(vm.Instruction{ 3510 Op: vm.Shuffle, 3511 Idx: c.anal.curr, 3512 Arg: constructShuffleJoinArg(children, n, false), 3513 }) 3514 scp.appendInstruction(vm.Instruction{ 3515 Op: vm.Dispatch, 3516 Arg: constructDispatch(i+lnum, children, scp.NodeInfo.Addr, n, false), 3517 }) 3518 scp.IsEnd = true 3519 3520 appended := false 3521 for _, js := range children { 3522 if isSameCN(js.NodeInfo.Addr, scp.NodeInfo.Addr) { 3523 js.PreScopes = append(js.PreScopes, scp) 3524 appended = true 3525 break 3526 } 3527 } 3528 if !appended { 3529 logutil.Errorf("no same addr scope to append right scopes") 3530 children[0].PreScopes = append(children[0].PreScopes, scp) 3531 } 3532 } 3533 return parent, children 3534 } 3535 3536 func (c *Compile) newJoinProbeScope(s *Scope, ss []*Scope) *Scope { 3537 rs := newScope(Merge) 3538 rs.appendInstruction(vm.Instruction{ 3539 Op: vm.Merge, 3540 Idx: s.Instructions[0].Idx, 3541 IsFirst: true, 3542 Arg: merge.NewArgument(), 3543 }) 3544 rs.Proc = process.NewWithAnalyze(s.Proc, s.Proc.Ctx, s.BuildIdx, c.anal.Nodes()) 3545 for i := 0; i < s.BuildIdx; i++ { 3546 regTransplant(s, rs, i, i) 3547 } 3548 3549 if ss == nil { 3550 s.Proc.Reg.MergeReceivers[0] = &process.WaitRegister{ 3551 Ctx: s.Proc.Ctx, 3552 Ch: make(chan *batch.Batch, shuffleChannelBufferSize), 3553 } 3554 rs.appendInstruction(vm.Instruction{ 3555 Op: vm.Connector, 3556 Arg: connector.NewArgument(). 3557 WithReg(s.Proc.Reg.MergeReceivers[0]), 3558 }) 3559 s.Proc.Reg.MergeReceivers = append(s.Proc.Reg.MergeReceivers[:1], s.Proc.Reg.MergeReceivers[s.BuildIdx:]...) 3560 s.BuildIdx = 1 3561 } else { 3562 rs.appendInstruction(vm.Instruction{ 3563 Op: vm.Dispatch, 3564 Arg: constructDispatchLocal(false, false, false, extraRegisters(ss, 0)), 3565 }) 3566 } 3567 rs.IsEnd = true 3568 3569 return rs 3570 } 3571 3572 func (c *Compile) newJoinBuildScope(s *Scope, ss []*Scope) *Scope { 3573 rs := newScope(Merge) 3574 buildLen := len(s.Proc.Reg.MergeReceivers) - s.BuildIdx 3575 rs.Proc = process.NewWithAnalyze(s.Proc, s.Proc.Ctx, buildLen, c.anal.Nodes()) 3576 for i := 0; i < buildLen; i++ { 3577 regTransplant(s, rs, i+s.BuildIdx, i) 3578 } 3579 3580 rs.appendInstruction(constructJoinBuildInstruction(c, s.Instructions[0], s.ShuffleCnt, ss != nil)) 3581 3582 if ss == nil { // unparallel, send the hashtable to join scope directly 3583 s.Proc.Reg.MergeReceivers[s.BuildIdx] = &process.WaitRegister{ 3584 Ctx: s.Proc.Ctx, 3585 Ch: make(chan *batch.Batch, 1), 3586 } 3587 rs.appendInstruction(vm.Instruction{ 3588 Op: vm.Connector, 3589 Arg: connector.NewArgument(). 3590 WithReg(s.Proc.Reg.MergeReceivers[s.BuildIdx]), 3591 }) 3592 s.Proc.Reg.MergeReceivers = s.Proc.Reg.MergeReceivers[:s.BuildIdx+1] 3593 } else { 3594 rs.appendInstruction(vm.Instruction{ 3595 Op: vm.Dispatch, 3596 Arg: constructDispatchLocal(true, false, false, extraRegisters(ss, s.BuildIdx)), 3597 }) 3598 } 3599 rs.IsEnd = true 3600 3601 return rs 3602 } 3603 3604 // Transplant the source's RemoteReceivRegInfos which index equal to sourceIdx to 3605 // target with new index targetIdx 3606 func regTransplant(source, target *Scope, sourceIdx, targetIdx int) { 3607 target.Proc.Reg.MergeReceivers[targetIdx] = source.Proc.Reg.MergeReceivers[sourceIdx] 3608 target.Proc.Reg.MergeReceivers[targetIdx].Ctx = target.Proc.Ctx 3609 i := 0 3610 for i < len(source.RemoteReceivRegInfos) { 3611 op := &source.RemoteReceivRegInfos[i] 3612 if op.Idx == sourceIdx { 3613 target.RemoteReceivRegInfos = append(target.RemoteReceivRegInfos, RemoteReceivRegInfo{ 3614 Idx: targetIdx, 3615 Uuid: op.Uuid, 3616 FromAddr: op.FromAddr, 3617 }) 3618 source.RemoteReceivRegInfos = append(source.RemoteReceivRegInfos[:i], source.RemoteReceivRegInfos[i+1:]...) 3619 continue 3620 } 3621 i++ 3622 } 3623 } 3624 3625 func (c *Compile) generateCPUNumber(cpunum, blocks int) int { 3626 if cpunum <= 0 || blocks <= 0 { 3627 return 1 3628 } 3629 3630 if cpunum <= blocks { 3631 return cpunum 3632 } 3633 return blocks 3634 } 3635 3636 func (c *Compile) initAnalyze(qry *plan.Query) { 3637 if len(qry.Nodes) == 0 { 3638 panic("empty plan") 3639 } 3640 3641 anals := make([]*process.AnalyzeInfo, len(qry.Nodes)) 3642 for i := range anals { 3643 anals[i] = reuse.Alloc[process.AnalyzeInfo](nil) 3644 anals[i].NodeId = int32(i) 3645 } 3646 c.anal = newAnaylze() 3647 c.anal.qry = qry 3648 c.anal.analInfos = anals 3649 c.anal.curr = int(qry.Steps[0]) 3650 for _, node := range c.anal.qry.Nodes { 3651 if node.AnalyzeInfo == nil { 3652 node.AnalyzeInfo = new(plan.AnalyzeInfo) 3653 } 3654 } 3655 c.proc.AnalInfos = c.anal.analInfos 3656 } 3657 3658 func (c *Compile) fillAnalyzeInfo() { 3659 // record the number of s3 requests 3660 c.anal.S3IOInputCount(c.anal.curr, c.counterSet.FileService.S3.Put.Load()) 3661 c.anal.S3IOInputCount(c.anal.curr, c.counterSet.FileService.S3.List.Load()) 3662 3663 c.anal.S3IOOutputCount(c.anal.curr, c.counterSet.FileService.S3.Head.Load()) 3664 c.anal.S3IOOutputCount(c.anal.curr, c.counterSet.FileService.S3.Get.Load()) 3665 c.anal.S3IOOutputCount(c.anal.curr, c.counterSet.FileService.S3.Delete.Load()) 3666 c.anal.S3IOOutputCount(c.anal.curr, c.counterSet.FileService.S3.DeleteMulti.Load()) 3667 3668 for i, anal := range c.anal.analInfos { 3669 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.InputRows, atomic.LoadInt64(&anal.InputRows)) 3670 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.OutputRows, atomic.LoadInt64(&anal.OutputRows)) 3671 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.InputSize, atomic.LoadInt64(&anal.InputSize)) 3672 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.OutputSize, atomic.LoadInt64(&anal.OutputSize)) 3673 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.TimeConsumed, atomic.LoadInt64(&anal.TimeConsumed)) 3674 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.MemorySize, atomic.LoadInt64(&anal.MemorySize)) 3675 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.WaitTimeConsumed, atomic.LoadInt64(&anal.WaitTimeConsumed)) 3676 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.DiskIO, atomic.LoadInt64(&anal.DiskIO)) 3677 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.S3IOByte, atomic.LoadInt64(&anal.S3IOByte)) 3678 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.S3IOInputCount, atomic.LoadInt64(&anal.S3IOInputCount)) 3679 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.S3IOOutputCount, atomic.LoadInt64(&anal.S3IOOutputCount)) 3680 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.NetworkIO, atomic.LoadInt64(&anal.NetworkIO)) 3681 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.ScanTime, atomic.LoadInt64(&anal.ScanTime)) 3682 atomic.StoreInt64(&c.anal.qry.Nodes[i].AnalyzeInfo.InsertTime, atomic.LoadInt64(&anal.InsertTime)) 3683 anal.DeepCopyArray(c.anal.qry.Nodes[i].AnalyzeInfo) 3684 } 3685 } 3686 3687 func (c *Compile) determinExpandRanges(n *plan.Node, rel engine.Relation) bool { 3688 if n.TableDef.Partition != nil { 3689 return true 3690 } 3691 if len(n.RuntimeFilterProbeList) == 0 { 3692 return true 3693 } 3694 if n.Stats.BlockNum > plan2.BlockNumForceOneCN && len(c.cnList) > 1 { 3695 return true 3696 } 3697 if n.AggList != nil { //need to handle partial results 3698 return true 3699 } 3700 return false 3701 } 3702 3703 func (c *Compile) expandRanges(n *plan.Node, rel engine.Relation, blockFilterList []*plan.Expr) (engine.Ranges, error) { 3704 var err error 3705 var db engine.Database 3706 var ranges engine.Ranges 3707 var txnOp client.TxnOperator 3708 3709 //----------------------------------------------------------------------------------------------------- 3710 ctx := c.ctx 3711 txnOp = c.proc.TxnOperator 3712 if n.ScanSnapshot != nil && n.ScanSnapshot.TS != nil { 3713 if !n.ScanSnapshot.TS.Equal(timestamp.Timestamp{LogicalTime: 0, PhysicalTime: 0}) && 3714 n.ScanSnapshot.TS.Less(c.proc.TxnOperator.Txn().SnapshotTS) { 3715 txnOp = c.proc.TxnOperator.CloneSnapshotOp(*n.ScanSnapshot.TS) 3716 3717 if n.ScanSnapshot.Tenant != nil { 3718 ctx = context.WithValue(ctx, defines.TenantIDKey{}, n.ScanSnapshot.Tenant.TenantID) 3719 } 3720 } 3721 } 3722 //----------------------------------------------------------------------------------------------------- 3723 3724 if util.TableIsClusterTable(n.TableDef.GetTableType()) { 3725 ctx = defines.AttachAccountId(ctx, catalog.System_Account) 3726 } 3727 if n.ObjRef.PubInfo != nil { 3728 ctx = defines.AttachAccountId(ctx, uint32(n.ObjRef.PubInfo.GetTenantId())) 3729 } 3730 if util.TableIsLoggingTable(n.ObjRef.SchemaName, n.ObjRef.ObjName) { 3731 ctx = defines.AttachAccountId(ctx, catalog.System_Account) 3732 } 3733 3734 db, err = c.e.Database(ctx, n.ObjRef.SchemaName, txnOp) 3735 if err != nil { 3736 return nil, err 3737 } 3738 ranges, err = rel.Ranges(ctx, blockFilterList) 3739 if err != nil { 3740 return nil, err 3741 } 3742 3743 if n.TableDef.Partition != nil { 3744 if n.PartitionPrune != nil && n.PartitionPrune.IsPruned { 3745 for i, partitionItem := range n.PartitionPrune.SelectedPartitions { 3746 partTableName := partitionItem.PartitionTableName 3747 subrelation, err := db.Relation(ctx, partTableName, c.proc) 3748 if err != nil { 3749 return nil, err 3750 } 3751 subranges, err := subrelation.Ranges(ctx, n.BlockFilterList) 3752 if err != nil { 3753 return nil, err 3754 } 3755 // add partition number into objectio.BlockInfo. 3756 blkSlice := subranges.(*objectio.BlockInfoSlice) 3757 for j := 1; j < subranges.Len(); j++ { 3758 blkInfo := blkSlice.Get(j) 3759 blkInfo.PartitionNum = i 3760 ranges.Append(blkSlice.GetBytes(j)) 3761 } 3762 } 3763 } else { 3764 partitionInfo := n.TableDef.Partition 3765 partitionNum := int(partitionInfo.PartitionNum) 3766 partitionTableNames := partitionInfo.PartitionTableNames 3767 for i := 0; i < partitionNum; i++ { 3768 partTableName := partitionTableNames[i] 3769 subrelation, err := db.Relation(ctx, partTableName, c.proc) 3770 if err != nil { 3771 return nil, err 3772 } 3773 subranges, err := subrelation.Ranges(ctx, n.BlockFilterList) 3774 if err != nil { 3775 return nil, err 3776 } 3777 // add partition number into objectio.BlockInfo. 3778 blkSlice := subranges.(*objectio.BlockInfoSlice) 3779 for j := 1; j < subranges.Len(); j++ { 3780 blkInfo := blkSlice.Get(j) 3781 blkInfo.PartitionNum = i 3782 ranges.Append(blkSlice.GetBytes(j)) 3783 } 3784 } 3785 } 3786 } 3787 3788 return ranges, nil 3789 } 3790 3791 func (c *Compile) generateNodes(n *plan.Node) (engine.Nodes, []any, []types.T, error) { 3792 var err error 3793 var db engine.Database 3794 var rel engine.Relation 3795 var ranges engine.Ranges 3796 var partialResults []any 3797 var partialResultTypes []types.T 3798 var nodes engine.Nodes 3799 var txnOp client.TxnOperator 3800 3801 //------------------------------------------------------------------------------------------------------------------ 3802 ctx := c.ctx 3803 txnOp = c.proc.TxnOperator 3804 if n.ScanSnapshot != nil && n.ScanSnapshot.TS != nil { 3805 if !n.ScanSnapshot.TS.Equal(timestamp.Timestamp{LogicalTime: 0, PhysicalTime: 0}) && 3806 n.ScanSnapshot.TS.Less(c.proc.TxnOperator.Txn().SnapshotTS) { 3807 txnOp = c.proc.TxnOperator.CloneSnapshotOp(*n.ScanSnapshot.TS) 3808 3809 if n.ScanSnapshot.Tenant != nil { 3810 ctx = context.WithValue(ctx, defines.TenantIDKey{}, n.ScanSnapshot.Tenant.TenantID) 3811 } 3812 } 3813 } 3814 //------------------------------------------------------------------------------------------------------------- 3815 3816 isPartitionTable := false 3817 if n.TableDef.Partition != nil { 3818 isPartitionTable = true 3819 } 3820 3821 //ctx := c.ctx 3822 if util.TableIsClusterTable(n.TableDef.GetTableType()) { 3823 ctx = defines.AttachAccountId(ctx, catalog.System_Account) 3824 } 3825 if n.ObjRef.PubInfo != nil { 3826 ctx = defines.AttachAccountId(ctx, uint32(n.ObjRef.PubInfo.GetTenantId())) 3827 } 3828 if util.TableIsLoggingTable(n.ObjRef.SchemaName, n.ObjRef.ObjName) { 3829 ctx = defines.AttachAccountId(ctx, catalog.System_Account) 3830 } 3831 3832 db, err = c.e.Database(ctx, n.ObjRef.SchemaName, txnOp) 3833 if err != nil { 3834 return nil, nil, nil, err 3835 } 3836 rel, err = db.Relation(ctx, n.TableDef.Name, c.proc) 3837 if err != nil { 3838 if txnOp.IsSnapOp() { 3839 return nil, nil, nil, err 3840 } 3841 var e error // avoid contamination of error messages 3842 db, e = c.e.Database(ctx, defines.TEMPORARY_DBNAME, txnOp) 3843 if e != nil { 3844 return nil, nil, nil, err 3845 } 3846 3847 // if temporary table, just scan at local cn. 3848 rel, e = db.Relation(ctx, engine.GetTempTableName(n.ObjRef.SchemaName, n.TableDef.Name), c.proc) 3849 if e != nil { 3850 return nil, nil, nil, err 3851 } 3852 c.cnList = engine.Nodes{ 3853 engine.Node{ 3854 Addr: c.addr, 3855 Rel: rel, 3856 Mcpu: 1, 3857 }, 3858 } 3859 } 3860 3861 if c.determinExpandRanges(n, rel) { 3862 ranges, err = c.expandRanges(n, rel, n.BlockFilterList) 3863 if err != nil { 3864 return nil, nil, nil, err 3865 } 3866 } else { 3867 // add current CN 3868 nodes = append(nodes, engine.Node{ 3869 Addr: c.addr, 3870 Rel: rel, 3871 Mcpu: c.generateCPUNumber(ncpu, int(n.Stats.BlockNum)), 3872 }) 3873 nodes[0].NeedExpandRanges = true 3874 return nodes, nil, nil, nil 3875 } 3876 3877 if len(n.AggList) > 0 && ranges.Len() > 1 { 3878 newranges := make([]byte, 0, ranges.Size()) 3879 newranges = append(newranges, ranges.GetBytes(0)...) 3880 partialResults = make([]any, 0, len(n.AggList)) 3881 partialResultTypes = make([]types.T, len(n.AggList)) 3882 3883 for i := range n.AggList { 3884 agg := n.AggList[i].Expr.(*plan.Expr_F) 3885 name := agg.F.Func.ObjName 3886 switch name { 3887 case "starcount": 3888 partialResults = append(partialResults, int64(0)) 3889 partialResultTypes[i] = types.T_int64 3890 case "count": 3891 if (uint64(agg.F.Func.Obj) & function.Distinct) != 0 { 3892 partialResults = nil 3893 } else { 3894 partialResults = append(partialResults, int64(0)) 3895 partialResultTypes[i] = types.T_int64 3896 } 3897 case "min", "max": 3898 partialResults = append(partialResults, nil) 3899 default: 3900 partialResults = nil 3901 } 3902 if partialResults == nil { 3903 break 3904 } 3905 } 3906 3907 if len(n.AggList) == 1 && n.AggList[0].Expr.(*plan.Expr_F).F.Func.ObjName == "starcount" { 3908 for i := 1; i < ranges.Len(); i++ { 3909 blk := ranges.(*objectio.BlockInfoSlice).Get(i) 3910 if !blk.CanRemote || !blk.DeltaLocation().IsEmpty() { 3911 newranges = append(newranges, ranges.(*objectio.BlockInfoSlice).GetBytes(i)...) 3912 continue 3913 } 3914 partialResults[0] = partialResults[0].(int64) + int64(blk.MetaLocation().Rows()) 3915 } 3916 } else if partialResults != nil { 3917 columnMap := make(map[int]int) 3918 for i := range n.AggList { 3919 agg := n.AggList[i].Expr.(*plan.Expr_F) 3920 if agg.F.Func.ObjName == "starcount" { 3921 continue 3922 } 3923 args := agg.F.Args[0] 3924 col, ok := args.Expr.(*plan.Expr_Col) 3925 if !ok { 3926 if _, ok := args.Expr.(*plan.Expr_Lit); ok { 3927 if agg.F.Func.ObjName == "count" { 3928 agg.F.Func.ObjName = "starcount" 3929 continue 3930 } 3931 } 3932 partialResults = nil 3933 break 3934 } 3935 columnMap[int(col.Col.ColPos)] = int(n.TableDef.Cols[int(col.Col.ColPos)].Seqnum) 3936 } 3937 for i := 1; i < ranges.Len(); i++ { 3938 if partialResults == nil { 3939 break 3940 } 3941 blk := ranges.(*objectio.BlockInfoSlice).Get(i) 3942 if !blk.CanRemote || !blk.DeltaLocation().IsEmpty() { 3943 newranges = append(newranges, ranges.GetBytes(i)...) 3944 continue 3945 } 3946 var objMeta objectio.ObjectMeta 3947 location := blk.MetaLocation() 3948 var fs fileservice.FileService 3949 fs, err = fileservice.Get[fileservice.FileService](c.proc.FileService, defines.SharedFileServiceName) 3950 if err != nil { 3951 return nil, nil, nil, err 3952 } 3953 objMeta, err = objectio.FastLoadObjectMeta(ctx, &location, false, fs) 3954 if err != nil { 3955 partialResults = nil 3956 break 3957 } else { 3958 objDataMeta := objMeta.MustDataMeta() 3959 blkMeta := objDataMeta.GetBlockMeta(uint32(location.ID())) 3960 for i := range n.AggList { 3961 agg := n.AggList[i].Expr.(*plan.Expr_F) 3962 name := agg.F.Func.ObjName 3963 switch name { 3964 case "starcount": 3965 partialResults[i] = partialResults[i].(int64) + int64(blkMeta.GetRows()) 3966 case "count": 3967 partialResults[i] = partialResults[i].(int64) + int64(blkMeta.GetRows()) 3968 col := agg.F.Args[0].Expr.(*plan.Expr_Col) 3969 nullCnt := blkMeta.ColumnMeta(uint16(columnMap[int(col.Col.ColPos)])).NullCnt() 3970 partialResults[i] = partialResults[i].(int64) - int64(nullCnt) 3971 case "min": 3972 col := agg.F.Args[0].Expr.(*plan.Expr_Col) 3973 zm := blkMeta.ColumnMeta(uint16(columnMap[int(col.Col.ColPos)])).ZoneMap() 3974 if zm.GetType().FixedLength() < 0 { 3975 partialResults = nil 3976 } else { 3977 if partialResults[i] == nil { 3978 partialResults[i] = zm.GetMin() 3979 partialResultTypes[i] = zm.GetType() 3980 } else { 3981 switch zm.GetType() { 3982 case types.T_bool: 3983 partialResults[i] = !partialResults[i].(bool) || !types.DecodeFixed[bool](zm.GetMinBuf()) 3984 case types.T_bit: 3985 min := types.DecodeFixed[uint64](zm.GetMinBuf()) 3986 if min < partialResults[i].(uint64) { 3987 partialResults[i] = min 3988 } 3989 case types.T_int8: 3990 min := types.DecodeFixed[int8](zm.GetMinBuf()) 3991 if min < partialResults[i].(int8) { 3992 partialResults[i] = min 3993 } 3994 case types.T_int16: 3995 min := types.DecodeFixed[int16](zm.GetMinBuf()) 3996 if min < partialResults[i].(int16) { 3997 partialResults[i] = min 3998 } 3999 case types.T_int32: 4000 min := types.DecodeFixed[int32](zm.GetMinBuf()) 4001 if min < partialResults[i].(int32) { 4002 partialResults[i] = min 4003 } 4004 case types.T_int64: 4005 min := types.DecodeFixed[int64](zm.GetMinBuf()) 4006 if min < partialResults[i].(int64) { 4007 partialResults[i] = min 4008 } 4009 case types.T_uint8: 4010 min := types.DecodeFixed[uint8](zm.GetMinBuf()) 4011 if min < partialResults[i].(uint8) { 4012 partialResults[i] = min 4013 } 4014 case types.T_uint16: 4015 min := types.DecodeFixed[uint16](zm.GetMinBuf()) 4016 if min < partialResults[i].(uint16) { 4017 partialResults[i] = min 4018 } 4019 case types.T_uint32: 4020 min := types.DecodeFixed[uint32](zm.GetMinBuf()) 4021 if min < partialResults[i].(uint32) { 4022 partialResults[i] = min 4023 } 4024 case types.T_uint64: 4025 min := types.DecodeFixed[uint64](zm.GetMinBuf()) 4026 if min < partialResults[i].(uint64) { 4027 partialResults[i] = min 4028 } 4029 case types.T_float32: 4030 min := types.DecodeFixed[float32](zm.GetMinBuf()) 4031 if min < partialResults[i].(float32) { 4032 partialResults[i] = min 4033 } 4034 case types.T_float64: 4035 min := types.DecodeFixed[float64](zm.GetMinBuf()) 4036 if min < partialResults[i].(float64) { 4037 partialResults[i] = min 4038 } 4039 case types.T_date: 4040 min := types.DecodeFixed[types.Date](zm.GetMinBuf()) 4041 if min < partialResults[i].(types.Date) { 4042 partialResults[i] = min 4043 } 4044 case types.T_time: 4045 min := types.DecodeFixed[types.Time](zm.GetMinBuf()) 4046 if min < partialResults[i].(types.Time) { 4047 partialResults[i] = min 4048 } 4049 case types.T_datetime: 4050 min := types.DecodeFixed[types.Datetime](zm.GetMinBuf()) 4051 if min < partialResults[i].(types.Datetime) { 4052 partialResults[i] = min 4053 } 4054 case types.T_timestamp: 4055 min := types.DecodeFixed[types.Timestamp](zm.GetMinBuf()) 4056 if min < partialResults[i].(types.Timestamp) { 4057 partialResults[i] = min 4058 } 4059 case types.T_enum: 4060 min := types.DecodeFixed[types.Enum](zm.GetMinBuf()) 4061 if min < partialResults[i].(types.Enum) { 4062 partialResults[i] = min 4063 } 4064 case types.T_decimal64: 4065 min := types.DecodeFixed[types.Decimal64](zm.GetMinBuf()) 4066 if min < partialResults[i].(types.Decimal64) { 4067 partialResults[i] = min 4068 } 4069 case types.T_decimal128: 4070 min := types.DecodeFixed[types.Decimal128](zm.GetMinBuf()) 4071 if min.Compare(partialResults[i].(types.Decimal128)) < 0 { 4072 partialResults[i] = min 4073 } 4074 case types.T_uuid: 4075 min := types.DecodeFixed[types.Uuid](zm.GetMinBuf()) 4076 if min.Lt(partialResults[i].(types.Uuid)) { 4077 partialResults[i] = min 4078 } 4079 case types.T_TS: 4080 min := types.DecodeFixed[types.TS](zm.GetMinBuf()) 4081 ts := partialResults[i].(types.TS) 4082 if min.Less(&ts) { 4083 partialResults[i] = min 4084 } 4085 case types.T_Rowid: 4086 min := types.DecodeFixed[types.Rowid](zm.GetMinBuf()) 4087 if min.Less(partialResults[i].(types.Rowid)) { 4088 partialResults[i] = min 4089 } 4090 case types.T_Blockid: 4091 min := types.DecodeFixed[types.Blockid](zm.GetMinBuf()) 4092 if min.Less(partialResults[i].(types.Blockid)) { 4093 partialResults[i] = min 4094 } 4095 } 4096 } 4097 } 4098 case "max": 4099 col := agg.F.Args[0].Expr.(*plan.Expr_Col) 4100 zm := blkMeta.ColumnMeta(uint16(columnMap[int(col.Col.ColPos)])).ZoneMap() 4101 if zm.GetType().FixedLength() < 0 { 4102 partialResults = nil 4103 } else { 4104 if partialResults[i] == nil { 4105 partialResults[i] = zm.GetMax() 4106 partialResultTypes[i] = zm.GetType() 4107 } else { 4108 switch zm.GetType() { 4109 case types.T_bool: 4110 partialResults[i] = partialResults[i].(bool) || types.DecodeFixed[bool](zm.GetMaxBuf()) 4111 case types.T_bit: 4112 max := types.DecodeFixed[uint64](zm.GetMaxBuf()) 4113 if max > partialResults[i].(uint64) { 4114 partialResults[i] = max 4115 } 4116 case types.T_int8: 4117 max := types.DecodeFixed[int8](zm.GetMaxBuf()) 4118 if max > partialResults[i].(int8) { 4119 partialResults[i] = max 4120 } 4121 case types.T_int16: 4122 max := types.DecodeFixed[int16](zm.GetMaxBuf()) 4123 if max > partialResults[i].(int16) { 4124 partialResults[i] = max 4125 } 4126 case types.T_int32: 4127 max := types.DecodeFixed[int32](zm.GetMaxBuf()) 4128 if max > partialResults[i].(int32) { 4129 partialResults[i] = max 4130 } 4131 case types.T_int64: 4132 max := types.DecodeFixed[int64](zm.GetMaxBuf()) 4133 if max > partialResults[i].(int64) { 4134 partialResults[i] = max 4135 } 4136 case types.T_uint8: 4137 max := types.DecodeFixed[uint8](zm.GetMaxBuf()) 4138 if max > partialResults[i].(uint8) { 4139 partialResults[i] = max 4140 } 4141 case types.T_uint16: 4142 max := types.DecodeFixed[uint16](zm.GetMaxBuf()) 4143 if max > partialResults[i].(uint16) { 4144 partialResults[i] = max 4145 } 4146 case types.T_uint32: 4147 max := types.DecodeFixed[uint32](zm.GetMaxBuf()) 4148 if max > partialResults[i].(uint32) { 4149 partialResults[i] = max 4150 } 4151 case types.T_uint64: 4152 max := types.DecodeFixed[uint64](zm.GetMaxBuf()) 4153 if max > partialResults[i].(uint64) { 4154 partialResults[i] = max 4155 } 4156 case types.T_float32: 4157 max := types.DecodeFixed[float32](zm.GetMaxBuf()) 4158 if max > partialResults[i].(float32) { 4159 partialResults[i] = max 4160 } 4161 case types.T_float64: 4162 max := types.DecodeFixed[float64](zm.GetMaxBuf()) 4163 if max > partialResults[i].(float64) { 4164 partialResults[i] = max 4165 } 4166 case types.T_date: 4167 max := types.DecodeFixed[types.Date](zm.GetMaxBuf()) 4168 if max > partialResults[i].(types.Date) { 4169 partialResults[i] = max 4170 } 4171 case types.T_time: 4172 max := types.DecodeFixed[types.Time](zm.GetMaxBuf()) 4173 if max > partialResults[i].(types.Time) { 4174 partialResults[i] = max 4175 } 4176 case types.T_datetime: 4177 max := types.DecodeFixed[types.Datetime](zm.GetMaxBuf()) 4178 if max > partialResults[i].(types.Datetime) { 4179 partialResults[i] = max 4180 } 4181 case types.T_timestamp: 4182 max := types.DecodeFixed[types.Timestamp](zm.GetMaxBuf()) 4183 if max > partialResults[i].(types.Timestamp) { 4184 partialResults[i] = max 4185 } 4186 case types.T_enum: 4187 max := types.DecodeFixed[types.Enum](zm.GetMaxBuf()) 4188 if max > partialResults[i].(types.Enum) { 4189 partialResults[i] = max 4190 } 4191 case types.T_decimal64: 4192 max := types.DecodeFixed[types.Decimal64](zm.GetMaxBuf()) 4193 if max > partialResults[i].(types.Decimal64) { 4194 partialResults[i] = max 4195 } 4196 case types.T_decimal128: 4197 max := types.DecodeFixed[types.Decimal128](zm.GetMaxBuf()) 4198 if max.Compare(partialResults[i].(types.Decimal128)) > 0 { 4199 partialResults[i] = max 4200 } 4201 case types.T_uuid: 4202 max := types.DecodeFixed[types.Uuid](zm.GetMaxBuf()) 4203 if max.Gt(partialResults[i].(types.Uuid)) { 4204 partialResults[i] = max 4205 } 4206 case types.T_TS: 4207 max := types.DecodeFixed[types.TS](zm.GetMaxBuf()) 4208 ts := partialResults[i].(types.TS) 4209 if max.Greater(&ts) { 4210 partialResults[i] = max 4211 } 4212 case types.T_Rowid: 4213 max := types.DecodeFixed[types.Rowid](zm.GetMaxBuf()) 4214 if max.Great(partialResults[i].(types.Rowid)) { 4215 partialResults[i] = max 4216 } 4217 case types.T_Blockid: 4218 max := types.DecodeFixed[types.Blockid](zm.GetMaxBuf()) 4219 if max.Great(partialResults[i].(types.Blockid)) { 4220 partialResults[i] = max 4221 } 4222 } 4223 } 4224 } 4225 default: 4226 } 4227 if partialResults == nil { 4228 break 4229 } 4230 } 4231 if partialResults == nil { 4232 break 4233 } 4234 } 4235 } 4236 } 4237 if ranges.Size() == len(newranges) { 4238 partialResults = nil 4239 } else if partialResults != nil { 4240 ranges.SetBytes(newranges) 4241 } 4242 if partialResults == nil { 4243 partialResultTypes = nil 4244 } 4245 } 4246 // n.AggList = nil 4247 4248 // some log for finding a bug. 4249 tblId := rel.GetTableID(ctx) 4250 expectedLen := ranges.Len() 4251 logutil.Debugf("cn generateNodes, tbl %d ranges is %d", tblId, expectedLen) 4252 4253 // if len(ranges) == 0 indicates that it's a temporary table. 4254 if ranges.Len() == 0 && n.TableDef.TableType != catalog.SystemOrdinaryRel { 4255 nodes = make(engine.Nodes, len(c.cnList)) 4256 for i, node := range c.cnList { 4257 if isPartitionTable { 4258 nodes[i] = engine.Node{ 4259 Id: node.Id, 4260 Addr: node.Addr, 4261 Mcpu: c.generateCPUNumber(node.Mcpu, int(n.Stats.BlockNum)), 4262 } 4263 } else { 4264 nodes[i] = engine.Node{ 4265 Rel: rel, 4266 Id: node.Id, 4267 Addr: node.Addr, 4268 Mcpu: c.generateCPUNumber(node.Mcpu, int(n.Stats.BlockNum)), 4269 } 4270 } 4271 } 4272 return nodes, partialResults, partialResultTypes, nil 4273 } 4274 4275 engineType := rel.GetEngineType() 4276 if isPartitionTable { 4277 rel = nil 4278 } 4279 // for multi cn in launch mode, put all payloads in current CN, maybe delete this in the future 4280 // for an ordered scan, put all paylonds in current CN 4281 // or sometimes force on one CN 4282 if isLaunchMode(c.cnList) || len(n.OrderBy) > 0 || ranges.Len() < plan2.BlockNumForceOneCN || n.Stats.ForceOneCN { 4283 return putBlocksInCurrentCN(c, ranges.GetAllBytes(), rel, n), partialResults, partialResultTypes, nil 4284 } 4285 // disttae engine 4286 if engineType == engine.Disttae { 4287 nodes, err := shuffleBlocksToMultiCN(c, ranges.(*objectio.BlockInfoSlice), rel, n) 4288 return nodes, partialResults, partialResultTypes, err 4289 } 4290 // maybe temp table on memengine , just put payloads in average 4291 return putBlocksInAverage(c, ranges, rel, n), partialResults, partialResultTypes, nil 4292 } 4293 4294 func putBlocksInAverage(c *Compile, ranges engine.Ranges, rel engine.Relation, n *plan.Node) engine.Nodes { 4295 var nodes engine.Nodes 4296 step := (ranges.Len() + len(c.cnList) - 1) / len(c.cnList) 4297 for i := 0; i < ranges.Len(); i += step { 4298 j := i / step 4299 if i+step >= ranges.Len() { 4300 if isSameCN(c.cnList[j].Addr, c.addr) { 4301 if len(nodes) == 0 { 4302 nodes = append(nodes, engine.Node{ 4303 Addr: c.addr, 4304 Rel: rel, 4305 Mcpu: c.generateCPUNumber(ncpu, int(n.Stats.BlockNum)), 4306 }) 4307 } 4308 nodes[0].Data = append(nodes[0].Data, ranges.Slice(i, ranges.Len())...) 4309 } else { 4310 nodes = append(nodes, engine.Node{ 4311 Rel: rel, 4312 Id: c.cnList[j].Id, 4313 Addr: c.cnList[j].Addr, 4314 Mcpu: c.generateCPUNumber(c.cnList[j].Mcpu, int(n.Stats.BlockNum)), 4315 Data: ranges.Slice(i, ranges.Len()), 4316 }) 4317 } 4318 } else { 4319 if isSameCN(c.cnList[j].Addr, c.addr) { 4320 if len(nodes) == 0 { 4321 nodes = append(nodes, engine.Node{ 4322 Rel: rel, 4323 Addr: c.addr, 4324 Mcpu: c.generateCPUNumber(ncpu, int(n.Stats.BlockNum)), 4325 }) 4326 } 4327 nodes[0].Data = append(nodes[0].Data, ranges.Slice(i, i+step)...) 4328 } else { 4329 nodes = append(nodes, engine.Node{ 4330 Rel: rel, 4331 Id: c.cnList[j].Id, 4332 Addr: c.cnList[j].Addr, 4333 Mcpu: c.generateCPUNumber(c.cnList[j].Mcpu, int(n.Stats.BlockNum)), 4334 Data: ranges.Slice(i, i+step), 4335 }) 4336 } 4337 } 4338 } 4339 return nodes 4340 } 4341 4342 func shuffleBlocksToMultiCN(c *Compile, ranges *objectio.BlockInfoSlice, rel engine.Relation, n *plan.Node) (engine.Nodes, error) { 4343 var nodes engine.Nodes 4344 // add current CN 4345 nodes = append(nodes, engine.Node{ 4346 Addr: c.addr, 4347 Rel: rel, 4348 Mcpu: c.generateCPUNumber(ncpu, int(n.Stats.BlockNum)), 4349 }) 4350 // add memory table block 4351 nodes[0].Data = append(nodes[0].Data, ranges.GetBytes(0)...) 4352 *ranges = ranges.Slice(1, ranges.Len()) 4353 // only memory table block 4354 if ranges.Len() == 0 { 4355 return nodes, nil 4356 } 4357 // only one cn 4358 if len(c.cnList) == 1 { 4359 nodes[0].Data = append(nodes[0].Data, ranges.GetAllBytes()...) 4360 return nodes, nil 4361 } 4362 // put dirty blocks which can't be distributed remotely in current CN. 4363 newRanges := make(objectio.BlockInfoSlice, 0, ranges.Len()) 4364 for i := 0; i < ranges.Len(); i++ { 4365 if ranges.Get(i).CanRemote { 4366 newRanges = append(newRanges, ranges.GetBytes(i)...) 4367 } else { 4368 nodes[0].Data = append(nodes[0].Data, ranges.GetBytes(i)...) 4369 } 4370 } 4371 4372 // add the rest of CNs in list 4373 for i := range c.cnList { 4374 if c.cnList[i].Addr != c.addr { 4375 nodes = append(nodes, engine.Node{ 4376 Rel: rel, 4377 Id: c.cnList[i].Id, 4378 Addr: c.cnList[i].Addr, 4379 Mcpu: c.generateCPUNumber(c.cnList[i].Mcpu, int(n.Stats.BlockNum)), 4380 }) 4381 } 4382 } 4383 4384 sort.Slice(nodes, func(i, j int) bool { return nodes[i].Addr < nodes[j].Addr }) 4385 4386 if n.Stats.HashmapStats != nil && n.Stats.HashmapStats.Shuffle && n.Stats.HashmapStats.ShuffleType == plan.ShuffleType_Range { 4387 err := shuffleBlocksByRange(c, newRanges, n, nodes) 4388 if err != nil { 4389 return nil, err 4390 } 4391 } else { 4392 shuffleBlocksByHash(c, newRanges, nodes) 4393 } 4394 4395 minWorkLoad := math.MaxInt32 4396 maxWorkLoad := 0 4397 // remove empty node from nodes 4398 var newNodes engine.Nodes 4399 for i := range nodes { 4400 if len(nodes[i].Data) > maxWorkLoad { 4401 maxWorkLoad = len(nodes[i].Data) / objectio.BlockInfoSize 4402 } 4403 if len(nodes[i].Data) < minWorkLoad { 4404 minWorkLoad = len(nodes[i].Data) / objectio.BlockInfoSize 4405 } 4406 if len(nodes[i].Data) > 0 { 4407 newNodes = append(newNodes, nodes[i]) 4408 } 4409 } 4410 if minWorkLoad*2 < maxWorkLoad { 4411 logstring := fmt.Sprintf("read table %v ,workload %v blocks among %v nodes not balanced, max %v, min %v,", n.TableDef.Name, ranges.Len(), len(newNodes), maxWorkLoad, minWorkLoad) 4412 logstring = logstring + " cnlist: " 4413 for i := range c.cnList { 4414 logstring = logstring + c.cnList[i].Addr + " " 4415 } 4416 logutil.Warnf(logstring) 4417 } 4418 return newNodes, nil 4419 } 4420 4421 func shuffleBlocksByHash(c *Compile, ranges objectio.BlockInfoSlice, nodes engine.Nodes) { 4422 for i := 0; i < ranges.Len(); i++ { 4423 unmarshalledBlockInfo := ranges.Get(i) 4424 // get timestamp in objName to make sure it is random enough 4425 objTimeStamp := unmarshalledBlockInfo.MetaLocation().Name()[:7] 4426 index := plan2.SimpleCharHashToRange(objTimeStamp, uint64(len(c.cnList))) 4427 nodes[index].Data = append(nodes[index].Data, ranges.GetBytes(i)...) 4428 } 4429 } 4430 4431 func shuffleBlocksByRange(c *Compile, ranges objectio.BlockInfoSlice, n *plan.Node, nodes engine.Nodes) error { 4432 var objDataMeta objectio.ObjectDataMeta 4433 var objMeta objectio.ObjectMeta 4434 4435 var shuffleRangeUint64 []uint64 4436 var shuffleRangeInt64 []int64 4437 var init bool 4438 var index uint64 4439 for i := 0; i < ranges.Len(); i++ { 4440 unmarshalledBlockInfo := ranges.Get(i) 4441 location := unmarshalledBlockInfo.MetaLocation() 4442 fs, err := fileservice.Get[fileservice.FileService](c.proc.FileService, defines.SharedFileServiceName) 4443 if err != nil { 4444 return err 4445 } 4446 if !objectio.IsSameObjectLocVsMeta(location, objDataMeta) { 4447 if objMeta, err = objectio.FastLoadObjectMeta(c.ctx, &location, false, fs); err != nil { 4448 return err 4449 } 4450 objDataMeta = objMeta.MustDataMeta() 4451 } 4452 blkMeta := objDataMeta.GetBlockMeta(uint32(location.ID())) 4453 zm := blkMeta.MustGetColumn(uint16(n.Stats.HashmapStats.ShuffleColIdx)).ZoneMap() 4454 if !zm.IsInited() { 4455 // a block with all null will send to first CN 4456 nodes[0].Data = append(nodes[0].Data, ranges.GetBytes(i)...) 4457 continue 4458 } 4459 if !init { 4460 init = true 4461 switch zm.GetType() { 4462 case types.T_int64, types.T_int32, types.T_int16: 4463 shuffleRangeInt64 = plan2.ShuffleRangeReEvalSigned(n.Stats.HashmapStats.Ranges, len(c.cnList), n.Stats.HashmapStats.Nullcnt, int64(n.Stats.TableCnt)) 4464 case types.T_uint64, types.T_uint32, types.T_uint16, types.T_varchar, types.T_char, types.T_text, types.T_bit: 4465 shuffleRangeUint64 = plan2.ShuffleRangeReEvalUnsigned(n.Stats.HashmapStats.Ranges, len(c.cnList), n.Stats.HashmapStats.Nullcnt, int64(n.Stats.TableCnt)) 4466 } 4467 } 4468 if shuffleRangeUint64 != nil { 4469 index = plan2.GetRangeShuffleIndexForZMUnsignedSlice(shuffleRangeUint64, zm) 4470 } else if shuffleRangeInt64 != nil { 4471 index = plan2.GetRangeShuffleIndexForZMSignedSlice(shuffleRangeInt64, zm) 4472 } else { 4473 index = plan2.GetRangeShuffleIndexForZM(n.Stats.HashmapStats.ShuffleColMin, n.Stats.HashmapStats.ShuffleColMax, zm, uint64(len(c.cnList))) 4474 } 4475 nodes[index].Data = append(nodes[index].Data, ranges.GetBytes(i)...) 4476 } 4477 return nil 4478 } 4479 4480 func putBlocksInCurrentCN(c *Compile, ranges []byte, rel engine.Relation, n *plan.Node) engine.Nodes { 4481 var nodes engine.Nodes 4482 // add current CN 4483 nodes = append(nodes, engine.Node{ 4484 Addr: c.addr, 4485 Rel: rel, 4486 Mcpu: c.generateCPUNumber(ncpu, int(n.Stats.BlockNum)), 4487 }) 4488 nodes[0].Data = append(nodes[0].Data, ranges...) 4489 return nodes 4490 } 4491 4492 func validScopeCount(ss []*Scope) int { 4493 var cnt int 4494 4495 for _, s := range ss { 4496 if s.IsEnd { 4497 continue 4498 } 4499 cnt++ 4500 } 4501 return cnt 4502 } 4503 4504 func extraRegisters(ss []*Scope, i int) []*process.WaitRegister { 4505 regs := make([]*process.WaitRegister, 0, len(ss)) 4506 for _, s := range ss { 4507 if s.IsEnd { 4508 continue 4509 } 4510 regs = append(regs, s.Proc.Reg.MergeReceivers[i]) 4511 } 4512 return regs 4513 } 4514 4515 func dupType(typ *plan.Type) types.Type { 4516 return types.New(types.T(typ.Id), typ.Width, typ.Scale) 4517 } 4518 4519 // Update the specific scopes's instruction to true 4520 // then update the current idx 4521 func (c *Compile) setAnalyzeCurrent(updateScopes []*Scope, nextId int) { 4522 if updateScopes != nil { 4523 updateScopesLastFlag(updateScopes) 4524 } 4525 4526 c.anal.curr = nextId 4527 c.anal.isFirst = true 4528 } 4529 4530 func updateScopesLastFlag(updateScopes []*Scope) { 4531 for _, s := range updateScopes { 4532 if len(s.Instructions) == 0 { 4533 continue 4534 } 4535 last := len(s.Instructions) - 1 4536 s.Instructions[last].IsLast = true 4537 } 4538 } 4539 4540 func isLaunchMode(cnlist engine.Nodes) bool { 4541 for i := range cnlist { 4542 if !isSameCN(cnlist[0].Addr, cnlist[i].Addr) { 4543 return false 4544 } 4545 } 4546 return true 4547 } 4548 4549 func isSameCN(addr string, currentCNAddr string) bool { 4550 // just a defensive judgment. In fact, we shouldn't have received such data. 4551 4552 parts1 := strings.Split(addr, ":") 4553 if len(parts1) != 2 { 4554 logutil.Debugf("compileScope received a malformed cn address '%s', expected 'ip:port'", addr) 4555 return true 4556 } 4557 parts2 := strings.Split(currentCNAddr, ":") 4558 if len(parts2) != 2 { 4559 logutil.Debugf("compileScope received a malformed current-cn address '%s', expected 'ip:port'", currentCNAddr) 4560 return true 4561 } 4562 return parts1[0] == parts2[0] 4563 } 4564 4565 func (s *Scope) affectedRows() uint64 { 4566 affectedRows := uint64(0) 4567 for _, in := range s.Instructions { 4568 if arg, ok := in.Arg.(vm.ModificationArgument); ok { 4569 if marg, ok := arg.(*mergeblock.Argument); ok { 4570 return marg.AffectedRows() 4571 } 4572 affectedRows += arg.AffectedRows() 4573 } 4574 } 4575 return affectedRows 4576 } 4577 4578 func (c *Compile) runSql(sql string) error { 4579 if sql == "" { 4580 return nil 4581 } 4582 res, err := c.runSqlWithResult(sql) 4583 if err != nil { 4584 return err 4585 } 4586 res.Close() 4587 return nil 4588 } 4589 4590 func (c *Compile) runSqlWithResult(sql string) (executor.Result, error) { 4591 v, ok := moruntime.ProcessLevelRuntime().GetGlobalVariables(moruntime.InternalSQLExecutor) 4592 if !ok { 4593 panic("missing lock service") 4594 } 4595 exec := v.(executor.SQLExecutor) 4596 opts := executor.Options{}. 4597 // All runSql and runSqlWithResult is a part of input sql, can not incr statement. 4598 // All these sub-sql's need to be rolled back and retried en masse when they conflict in pessimistic mode 4599 WithDisableIncrStatement(). 4600 WithTxn(c.proc.TxnOperator). 4601 WithDatabase(c.db). 4602 WithTimeZone(c.proc.SessionInfo.TimeZone) 4603 return exec.Exec(c.proc.Ctx, sql, opts) 4604 } 4605 4606 func evalRowsetData(proc *process.Process, 4607 exprs []*plan.RowsetExpr, vec *vector.Vector, exprExecs []colexec.ExpressionExecutor, 4608 ) error { 4609 var bats []*batch.Batch 4610 4611 vec.ResetArea() 4612 bats = []*batch.Batch{batch.EmptyForConstFoldBatch} 4613 if len(exprExecs) > 0 { 4614 for i, expr := range exprExecs { 4615 val, err := expr.Eval(proc, bats) 4616 if err != nil { 4617 return err 4618 } 4619 if err := vec.Copy(val, int64(exprs[i].RowPos), 0, proc.Mp()); err != nil { 4620 return err 4621 } 4622 } 4623 } else { 4624 for _, expr := range exprs { 4625 if expr.Pos >= 0 { 4626 continue 4627 } 4628 val, err := colexec.EvalExpressionOnce(proc, expr.Expr, bats) 4629 if err != nil { 4630 return err 4631 } 4632 if err := vec.Copy(val, int64(expr.RowPos), 0, proc.Mp()); err != nil { 4633 val.Free(proc.Mp()) 4634 return err 4635 } 4636 val.Free(proc.Mp()) 4637 } 4638 } 4639 return nil 4640 } 4641 4642 func (c *Compile) newInsertMergeScope(arg *insert.Argument, ss []*Scope) *Scope { 4643 // see errors.Join() 4644 n := 0 4645 for _, s := range ss { 4646 if !s.IsEnd { 4647 n++ 4648 } 4649 } 4650 ss2 := make([]*Scope, 0, n) 4651 for _, s := range ss { 4652 if !s.IsEnd { 4653 ss2 = append(ss2, s) 4654 } 4655 } 4656 insert := &vm.Instruction{ 4657 Op: vm.Insert, 4658 Arg: arg, 4659 } 4660 for i := range ss2 { 4661 ss2[i].Instructions = append(ss2[i].Instructions, dupInstruction(insert, nil, i)) 4662 } 4663 return c.newMergeScope(ss2) 4664 } 4665 4666 func (c *Compile) fatalLog(retry int, err error) { 4667 if err == nil { 4668 return 4669 } 4670 fatal := moerr.IsMoErrCode(err, moerr.ErrTxnNeedRetry) || 4671 moerr.IsMoErrCode(err, moerr.ErrTxnNeedRetryWithDefChanged) || 4672 moerr.IsMoErrCode(err, moerr.ErrTxnWWConflict) || 4673 moerr.IsMoErrCode(err, moerr.ErrDuplicateEntry) || 4674 moerr.IsMoErrCode(err, moerr.ER_DUP_ENTRY) || 4675 moerr.IsMoErrCode(err, moerr.ER_DUP_ENTRY_WITH_KEY_NAME) 4676 if !fatal { 4677 return 4678 } 4679 4680 if retry == 0 && 4681 (moerr.IsMoErrCode(err, moerr.ErrTxnNeedRetry) || 4682 moerr.IsMoErrCode(err, moerr.ErrTxnNeedRetryWithDefChanged)) { 4683 return 4684 } 4685 4686 txnTrace.GetService().TxnError(c.proc.TxnOperator, err) 4687 4688 v, ok := moruntime.ProcessLevelRuntime(). 4689 GetGlobalVariables(moruntime.EnableCheckInvalidRCErrors) 4690 if !ok || !v.(bool) { 4691 return 4692 } 4693 4694 logutil.Fatalf("BUG(RC): txn %s retry %d, error %+v\n", 4695 hex.EncodeToString(c.proc.TxnOperator.Txn().ID), 4696 retry, 4697 err.Error()) 4698 } 4699 4700 func (c *Compile) SetOriginSQL(sql string) { 4701 c.originSQL = sql 4702 } 4703 4704 func (c *Compile) SetBuildPlanFunc(buildPlanFunc func() (*plan2.Plan, error)) { 4705 c.buildPlanFunc = buildPlanFunc 4706 } 4707 4708 // detectFkSelfRefer checks if foreign key self refer confirmed 4709 func detectFkSelfRefer(c *Compile, detectSqls []string) error { 4710 if len(detectSqls) == 0 { 4711 return nil 4712 } 4713 for _, sql := range detectSqls { 4714 err := runDetectSql(c, sql) 4715 if err != nil { 4716 return err 4717 } 4718 } 4719 4720 return nil 4721 } 4722 4723 // runDetectSql runs the fk detecting sql 4724 func runDetectSql(c *Compile, sql string) error { 4725 res, err := c.runSqlWithResult(sql) 4726 if err != nil { 4727 logutil.Errorf("The sql that caused the fk self refer check failed is %s, and generated background sql is %s", c.sql, sql) 4728 return err 4729 } 4730 defer res.Close() 4731 4732 if res.Batches != nil { 4733 vs := res.Batches[0].Vecs 4734 if vs != nil && vs[0].Length() > 0 { 4735 yes := vector.GetFixedAt[bool](vs[0], 0) 4736 if !yes { 4737 return moerr.NewErrFKNoReferencedRow2(c.ctx) 4738 } 4739 } 4740 } 4741 return nil 4742 } 4743 4744 // runDetectFkReferToDBSql runs the fk detecting sql 4745 func runDetectFkReferToDBSql(c *Compile, sql string) error { 4746 res, err := c.runSqlWithResult(sql) 4747 if err != nil { 4748 logutil.Errorf("The sql that caused the fk self refer check failed is %s, and generated background sql is %s", c.sql, sql) 4749 return err 4750 } 4751 defer res.Close() 4752 4753 if res.Batches != nil { 4754 vs := res.Batches[0].Vecs 4755 if vs != nil && vs[0].Length() > 0 { 4756 yes := vector.GetFixedAt[bool](vs[0], 0) 4757 if yes { 4758 return moerr.NewInternalError(c.ctx, 4759 "can not drop database. It has been referenced by foreign keys") 4760 } 4761 } 4762 } 4763 return nil 4764 }