github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/compile/types.go (about)

     1  // Copyright 2023 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //      http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package compile
    16  
    17  import (
    18  	"context"
    19  	"sync"
    20  	"sync/atomic"
    21  	"time"
    22  
    23  	"github.com/google/uuid"
    24  	"github.com/matrixorigin/matrixone/pkg/common/reuse"
    25  	"github.com/matrixorigin/matrixone/pkg/container/batch"
    26  	"github.com/matrixorigin/matrixone/pkg/container/types"
    27  	"github.com/matrixorigin/matrixone/pkg/pb/pipeline"
    28  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    29  	"github.com/matrixorigin/matrixone/pkg/pb/timestamp"
    30  	"github.com/matrixorigin/matrixone/pkg/perfcounter"
    31  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    32  	plan2 "github.com/matrixorigin/matrixone/pkg/sql/plan"
    33  	"github.com/matrixorigin/matrixone/pkg/txn/client"
    34  	"github.com/matrixorigin/matrixone/pkg/vm"
    35  	"github.com/matrixorigin/matrixone/pkg/vm/engine"
    36  	"github.com/matrixorigin/matrixone/pkg/vm/process"
    37  )
    38  
    39  type (
    40  	TxnOperator = client.TxnOperator
    41  )
    42  
    43  type magicType int
    44  
    45  // type of scope
    46  const (
    47  	Merge magicType = iota
    48  	Normal
    49  	Remote
    50  	Parallel
    51  	CreateDatabase
    52  	CreateTable
    53  	CreateIndex
    54  	DropDatabase
    55  	DropTable
    56  	DropIndex
    57  	TruncateTable
    58  	AlterView
    59  	AlterTable
    60  	MergeInsert
    61  	MergeDelete
    62  	CreateSequence
    63  	DropSequence
    64  	AlterSequence
    65  	Replace
    66  )
    67  
    68  // Source contains information of a relation which will be used in execution.
    69  type Source struct {
    70  	isConst bool
    71  
    72  	PushdownId             uint64
    73  	PushdownAddr           string
    74  	SchemaName             string
    75  	RelationName           string
    76  	PartitionRelationNames []string
    77  	Attributes             []string
    78  	R                      engine.Reader
    79  	Bat                    *batch.Batch
    80  	FilterExpr             *plan.Expr // todo: change this to []*plan.Expr
    81  	node                   *plan.Node
    82  	TableDef               *plan.TableDef
    83  	Timestamp              timestamp.Timestamp
    84  	AccountId              *plan.PubInfo
    85  
    86  	RuntimeFilterSpecs []*plan.RuntimeFilterSpec
    87  	OrderBy            []*plan.OrderBySpec // for ordered scan
    88  }
    89  
    90  // Col is the information of attribute
    91  type Col struct {
    92  	Typ  types.T
    93  	Name string
    94  }
    95  
    96  // Scope is the output of the compile process.
    97  // Each sql will be compiled to one or more execution unit scopes.
    98  type Scope struct {
    99  	// Magic specifies the type of Scope.
   100  	// 0 -  execution unit for reading data.
   101  	// 1 -  execution unit for processing intermediate results.
   102  	// 2 -  execution unit that requires remote call.
   103  	Magic magicType
   104  
   105  	// IsJoin means the pipeline is join
   106  	IsJoin bool
   107  
   108  	// IsEnd means the pipeline is end
   109  	IsEnd bool
   110  
   111  	// IsRemote means the pipeline is remote
   112  	IsRemote bool
   113  
   114  	// IsLoad means the pipeline is load
   115  	IsLoad bool
   116  
   117  	Plan *plan.Plan
   118  	// DataSource stores information about data source.
   119  	DataSource *Source
   120  	// PreScopes contains children of this scope will inherit and execute.
   121  	PreScopes []*Scope
   122  	// NodeInfo contains the information about the remote node.
   123  	NodeInfo engine.Node
   124  	// Instructions contains command list of this scope.
   125  	Instructions vm.Instructions
   126  	// Proc contains the execution context.
   127  	Proc *process.Process
   128  
   129  	Reg *process.WaitRegister
   130  
   131  	RemoteReceivRegInfos []RemoteReceivRegInfo
   132  
   133  	BuildIdx   int
   134  	ShuffleCnt int
   135  
   136  	PartialResults     []any
   137  	PartialResultTypes []types.T
   138  }
   139  
   140  // canRemote checks whether the current scope can be executed remotely.
   141  func (s *Scope) canRemote(c *Compile, checkAddr bool) bool {
   142  	// check the remote address.
   143  	// if it was empty or equal to the current address, return false.
   144  	if checkAddr {
   145  		if len(s.NodeInfo.Addr) == 0 || len(c.addr) == 0 {
   146  			return false
   147  		}
   148  		if isSameCN(c.addr, s.NodeInfo.Addr) {
   149  			return false
   150  		}
   151  	}
   152  
   153  	// some operators cannot be remote.
   154  	// todo: it is not a good way to check the operator type here.
   155  	//  cannot generate this remote pipeline if the operator type is not supported.
   156  	for _, op := range s.Instructions {
   157  		if op.CannotRemote() {
   158  			return false
   159  		}
   160  	}
   161  	for _, pre := range s.PreScopes {
   162  		if !pre.canRemote(c, false) {
   163  			return false
   164  		}
   165  	}
   166  	return true
   167  }
   168  
   169  // scopeContext contextual information to assist in the generation of pipeline.Pipeline.
   170  type scopeContext struct {
   171  	id       int32
   172  	plan     *plan.Plan
   173  	scope    *Scope
   174  	root     *scopeContext
   175  	parent   *scopeContext
   176  	children []*scopeContext
   177  	pipe     *pipeline.Pipeline
   178  	regs     map[*process.WaitRegister]int32
   179  }
   180  
   181  // anaylze information
   182  type anaylze struct {
   183  	// curr is the current index of plan
   184  	curr      int
   185  	isFirst   bool
   186  	qry       *plan.Query
   187  	analInfos []*process.AnalyzeInfo
   188  }
   189  
   190  func (a *anaylze) S3IOInputCount(idx int, count int64) {
   191  	atomic.AddInt64(&a.analInfos[idx].S3IOInputCount, count)
   192  }
   193  
   194  func (a *anaylze) S3IOOutputCount(idx int, count int64) {
   195  	atomic.AddInt64(&a.analInfos[idx].S3IOOutputCount, count)
   196  }
   197  
   198  func (a *anaylze) Nodes() []*process.AnalyzeInfo {
   199  	return a.analInfos
   200  }
   201  
   202  func (a anaylze) TypeName() string {
   203  	return "compile.anaylze"
   204  }
   205  
   206  func newAnaylze() *anaylze {
   207  	return reuse.Alloc[anaylze](nil)
   208  }
   209  
   210  func (a *anaylze) release() {
   211  	// there are 3 situations to release analyzeInfo
   212  	// 1 is free analyzeInfo of Local CN when release analyze
   213  	// 2 is free analyzeInfo of remote CN before transfer back
   214  	// 3 is free analyzeInfo of remote CN when errors happen before transfer back
   215  	// this is situation 1
   216  	for i := range a.analInfos {
   217  		reuse.Free[process.AnalyzeInfo](a.analInfos[i], nil)
   218  	}
   219  	reuse.Free[anaylze](a, nil)
   220  }
   221  
   222  // Compile contains all the information needed for compilation.
   223  type Compile struct {
   224  	scope []*Scope
   225  
   226  	pn   *plan.Plan
   227  	info plan2.ExecInfo
   228  
   229  	// fill is a result writer runs a callback function.
   230  	// fill will be called when result data is ready.
   231  	fill func(*batch.Batch) error
   232  	// affectRows stores the number of rows affected while insert / update / delete
   233  	affectRows *atomic.Uint64
   234  	// cn address
   235  	addr string
   236  	// db current database name.
   237  	db string
   238  	// tenant is the account name.
   239  	tenant string
   240  	// uid the user who initiated the sql.
   241  	uid string
   242  	// sql sql text.
   243  	sql       string
   244  	originSQL string
   245  
   246  	anal *anaylze
   247  	// e db engine instance.
   248  	e   engine.Engine
   249  	ctx context.Context
   250  	// proc stores the execution context.
   251  	proc *process.Process
   252  
   253  	MessageBoard *process.MessageBoard
   254  
   255  	cnList engine.Nodes
   256  	// ast
   257  	stmt tree.Statement
   258  
   259  	counterSet *perfcounter.CounterSet
   260  
   261  	nodeRegs map[[2]int32]*process.WaitRegister
   262  	stepRegs map[int32][][2]int32
   263  
   264  	lock *sync.RWMutex
   265  
   266  	isInternal bool
   267  
   268  	// cnLabel is the CN labels which is received from proxy when build connection.
   269  	cnLabel map[string]string
   270  
   271  	buildPlanFunc func() (*plan2.Plan, error)
   272  	startAt       time.Time
   273  	// use for duplicate check
   274  	fuzzys []*fuzzyCheck
   275  
   276  	needLockMeta bool
   277  	metaTables   map[string]struct{}
   278  	disableRetry bool
   279  
   280  	lastAllocID int32
   281  }
   282  
   283  type RemoteReceivRegInfo struct {
   284  	Idx      int
   285  	Uuid     uuid.UUID
   286  	FromAddr string
   287  }
   288  
   289  type fuzzyCheck struct {
   290  	db        string
   291  	tbl       string
   292  	attr      string
   293  	condition string
   294  
   295  	// handle with primary key(a, b, ...) or unique key (a, b, ...)
   296  	isCompound bool
   297  
   298  	// handle with cases like create a unique index for existed table, or alter add unique key
   299  	// and the type of unique key is compound
   300  	onlyInsertHidden bool
   301  
   302  	col          *plan.ColDef
   303  	compoundCols []*plan.ColDef
   304  
   305  	cnt int
   306  }
   307  
   308  type MultiTableIndex struct {
   309  	IndexAlgo string
   310  	IndexDefs map[string]*plan.IndexDef
   311  }