github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/build_sample.go (about)

     1  // Copyright 2022 Matrix Origin
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package plan
    16  
    17  import (
    18  	"github.com/matrixorigin/matrixone/pkg/common/moerr"
    19  	"github.com/matrixorigin/matrixone/pkg/pb/plan"
    20  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/dialect"
    21  	"github.com/matrixorigin/matrixone/pkg/sql/parsers/tree"
    22  )
    23  
    24  const (
    25  	NotSampleByRows     = -1
    26  	NotSampleByPercents = -1.0
    27  )
    28  
    29  type SampleFuncCtx struct {
    30  	hasSampleFunc bool
    31  
    32  	sRows    bool // if true, sample rows.
    33  	rows     int32
    34  	percents float64
    35  	columns  []*plan.Expr
    36  
    37  	// sampleUsingRow will scan all the blocks to avoid the centroids skewed.
    38  	// but this may cost much time.
    39  	sampleUsingRow bool
    40  	// start and offset in the select clause.
    41  	start  int
    42  	offset int
    43  }
    44  
    45  func (s *SampleFuncCtx) GenerateSampleFunc(se *tree.SampleExpr) error {
    46  	if err := se.Valid(); err != nil {
    47  		return err
    48  	}
    49  
    50  	if s.hasSampleFunc {
    51  		return moerr.NewSyntaxErrorNoCtx("cannot use more than one sample function at select clause.")
    52  	}
    53  	s.hasSampleFunc = true
    54  	s.sRows, s.sampleUsingRow, s.rows, s.percents = se.GetSampleDetail()
    55  
    56  	return nil
    57  }
    58  
    59  func (s *SampleFuncCtx) BindSampleColumn(ctx *BindContext, binder *ProjectionBinder, sampleList tree.SelectExprs) ([]*plan.Expr, error) {
    60  	s.columns = make([]*plan.Expr, 0, s.offset)
    61  
    62  	pList := make([]*plan.Expr, 0, len(sampleList))
    63  	for _, se := range sampleList {
    64  		astStr := tree.String(se.Expr, dialect.MYSQL)
    65  
    66  		if _, ok := ctx.groupByAst[astStr]; ok {
    67  			return nil, moerr.NewInternalErrorNoCtx("cannot sample the group by column.")
    68  		}
    69  
    70  		if colPos, ok := ctx.sampleByAst[astStr]; ok {
    71  			expr := &plan.Expr{
    72  				Typ: ctx.sampleFunc.columns[colPos].Typ,
    73  				Expr: &plan.Expr_Col{
    74  					Col: &plan.ColRef{
    75  						RelPos: ctx.sampleTag,
    76  						ColPos: colPos,
    77  					},
    78  				},
    79  			}
    80  			ctx.projects = append(ctx.projects, expr)
    81  			continue
    82  		}
    83  		expr, err := binder.baseBindExpr(se.Expr, 0, true)
    84  		if err != nil {
    85  			return nil, err
    86  		}
    87  		colPos := int32(len(s.columns))
    88  		ctx.sampleByAst[astStr] = colPos
    89  		s.columns = append(s.columns, expr)
    90  
    91  		pList = append(pList, &plan.Expr{
    92  			Typ: ctx.sampleFunc.columns[colPos].Typ,
    93  			Expr: &plan.Expr_Col{
    94  				Col: &plan.ColRef{
    95  					RelPos: ctx.sampleTag,
    96  					ColPos: colPos,
    97  				},
    98  			},
    99  		})
   100  	}
   101  	return pList, nil
   102  }
   103  
   104  func (s *SampleFuncCtx) SetStartOffset(start, offset int) {
   105  	s.start = start
   106  	s.offset = offset
   107  }
   108  
   109  func validSample(ctx *BindContext, builder *QueryBuilder) error {
   110  	if ctx.sampleFunc.hasSampleFunc {
   111  		if len(ctx.aggregates) > 0 {
   112  			return moerr.NewSyntaxError(builder.GetContext(), "cannot fixed non-scalar function and scalar function in the same query")
   113  		}
   114  		if ctx.recSelect || builder.isForUpdate {
   115  			return moerr.NewInternalError(builder.GetContext(), "not support sample function recursive cte or for update")
   116  		}
   117  		if len(ctx.windows) > 0 {
   118  			return moerr.NewNYI(builder.GetContext(), "sample for window function not support now")
   119  		}
   120  	}
   121  	return nil
   122  }
   123  
   124  //type SampleClauseCtx struct {
   125  //	hasSampleClause bool
   126  //
   127  //	sRows    bool // if true, sample rows.
   128  //	rows     int32
   129  //	percents float64
   130  //}
   131  
   132  func generateSamplePlanNode(ctx *BindContext, childNodeID int32) *plan.Node {
   133  	sampleNode := &plan.Node{
   134  		NodeType:    plan.Node_SAMPLE,
   135  		Children:    []int32{childNodeID},
   136  		GroupBy:     ctx.groups,
   137  		AggList:     ctx.sampleFunc.columns,
   138  		BindingTags: []int32{ctx.groupTag, ctx.sampleTag},
   139  		SampleFunc:  &plan.SampleFuncSpec{Rows: NotSampleByRows, Percent: NotSampleByPercents},
   140  	}
   141  	if ctx.sampleFunc.sRows {
   142  		sampleNode.SampleFunc.Rows = ctx.sampleFunc.rows
   143  		sampleNode.SampleFunc.UsingRow = ctx.sampleFunc.sampleUsingRow
   144  	} else {
   145  		sampleNode.SampleFunc.Percent = ctx.sampleFunc.percents
   146  		sampleNode.SampleFunc.UsingRow = true
   147  	}
   148  	return sampleNode
   149  }