github.com/matrixorigin/matrixone@v1.2.0/pkg/sql/plan/build_sample.go (about) 1 // Copyright 2022 Matrix Origin 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package plan 16 17 import ( 18 "github.com/matrixorigin/matrixone/pkg/common/moerr" 19 "github.com/matrixorigin/matrixone/pkg/pb/plan" 20 "github.com/matrixorigin/matrixone/pkg/sql/parsers/dialect" 21 "github.com/matrixorigin/matrixone/pkg/sql/parsers/tree" 22 ) 23 24 const ( 25 NotSampleByRows = -1 26 NotSampleByPercents = -1.0 27 ) 28 29 type SampleFuncCtx struct { 30 hasSampleFunc bool 31 32 sRows bool // if true, sample rows. 33 rows int32 34 percents float64 35 columns []*plan.Expr 36 37 // sampleUsingRow will scan all the blocks to avoid the centroids skewed. 38 // but this may cost much time. 39 sampleUsingRow bool 40 // start and offset in the select clause. 41 start int 42 offset int 43 } 44 45 func (s *SampleFuncCtx) GenerateSampleFunc(se *tree.SampleExpr) error { 46 if err := se.Valid(); err != nil { 47 return err 48 } 49 50 if s.hasSampleFunc { 51 return moerr.NewSyntaxErrorNoCtx("cannot use more than one sample function at select clause.") 52 } 53 s.hasSampleFunc = true 54 s.sRows, s.sampleUsingRow, s.rows, s.percents = se.GetSampleDetail() 55 56 return nil 57 } 58 59 func (s *SampleFuncCtx) BindSampleColumn(ctx *BindContext, binder *ProjectionBinder, sampleList tree.SelectExprs) ([]*plan.Expr, error) { 60 s.columns = make([]*plan.Expr, 0, s.offset) 61 62 pList := make([]*plan.Expr, 0, len(sampleList)) 63 for _, se := range sampleList { 64 astStr := tree.String(se.Expr, dialect.MYSQL) 65 66 if _, ok := ctx.groupByAst[astStr]; ok { 67 return nil, moerr.NewInternalErrorNoCtx("cannot sample the group by column.") 68 } 69 70 if colPos, ok := ctx.sampleByAst[astStr]; ok { 71 expr := &plan.Expr{ 72 Typ: ctx.sampleFunc.columns[colPos].Typ, 73 Expr: &plan.Expr_Col{ 74 Col: &plan.ColRef{ 75 RelPos: ctx.sampleTag, 76 ColPos: colPos, 77 }, 78 }, 79 } 80 ctx.projects = append(ctx.projects, expr) 81 continue 82 } 83 expr, err := binder.baseBindExpr(se.Expr, 0, true) 84 if err != nil { 85 return nil, err 86 } 87 colPos := int32(len(s.columns)) 88 ctx.sampleByAst[astStr] = colPos 89 s.columns = append(s.columns, expr) 90 91 pList = append(pList, &plan.Expr{ 92 Typ: ctx.sampleFunc.columns[colPos].Typ, 93 Expr: &plan.Expr_Col{ 94 Col: &plan.ColRef{ 95 RelPos: ctx.sampleTag, 96 ColPos: colPos, 97 }, 98 }, 99 }) 100 } 101 return pList, nil 102 } 103 104 func (s *SampleFuncCtx) SetStartOffset(start, offset int) { 105 s.start = start 106 s.offset = offset 107 } 108 109 func validSample(ctx *BindContext, builder *QueryBuilder) error { 110 if ctx.sampleFunc.hasSampleFunc { 111 if len(ctx.aggregates) > 0 { 112 return moerr.NewSyntaxError(builder.GetContext(), "cannot fixed non-scalar function and scalar function in the same query") 113 } 114 if ctx.recSelect || builder.isForUpdate { 115 return moerr.NewInternalError(builder.GetContext(), "not support sample function recursive cte or for update") 116 } 117 if len(ctx.windows) > 0 { 118 return moerr.NewNYI(builder.GetContext(), "sample for window function not support now") 119 } 120 } 121 return nil 122 } 123 124 //type SampleClauseCtx struct { 125 // hasSampleClause bool 126 // 127 // sRows bool // if true, sample rows. 128 // rows int32 129 // percents float64 130 //} 131 132 func generateSamplePlanNode(ctx *BindContext, childNodeID int32) *plan.Node { 133 sampleNode := &plan.Node{ 134 NodeType: plan.Node_SAMPLE, 135 Children: []int32{childNodeID}, 136 GroupBy: ctx.groups, 137 AggList: ctx.sampleFunc.columns, 138 BindingTags: []int32{ctx.groupTag, ctx.sampleTag}, 139 SampleFunc: &plan.SampleFuncSpec{Rows: NotSampleByRows, Percent: NotSampleByPercents}, 140 } 141 if ctx.sampleFunc.sRows { 142 sampleNode.SampleFunc.Rows = ctx.sampleFunc.rows 143 sampleNode.SampleFunc.UsingRow = ctx.sampleFunc.sampleUsingRow 144 } else { 145 sampleNode.SampleFunc.Percent = ctx.sampleFunc.percents 146 sampleNode.SampleFunc.UsingRow = true 147 } 148 return sampleNode 149 }