github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/parallelize.go (about) 1 // Copyright 2020-2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package analyzer 16 17 import ( 18 "os" 19 "strconv" 20 21 "github.com/go-kit/kit/metrics/discard" 22 23 "github.com/dolthub/go-mysql-server/sql" 24 "github.com/dolthub/go-mysql-server/sql/plan" 25 "github.com/dolthub/go-mysql-server/sql/transform" 26 ) 27 28 func init() { 29 // check for single-threaded feature flag 30 if v, ok := os.LookupEnv(singleThreadFlag); ok && v != "" { 31 SingleThreadFeatureFlag = true 32 } 33 } 34 35 const ( 36 singleThreadFlag = "GMS_SINGLE_THREAD" 37 ) 38 39 var ( 40 // ParallelQueryCounter describes a metric that accumulates 41 // number of parallel queries monotonically. 42 ParallelQueryCounter = discard.NewCounter() 43 44 SingleThreadFeatureFlag = false 45 ) 46 47 func shouldParallelize(node sql.Node, scope *plan.Scope) bool { 48 if SingleThreadFeatureFlag { 49 return false 50 } 51 52 // Don't parallelize subqueries, this can blow up the execution graph quickly 53 if !scope.IsEmpty() { 54 return false 55 } 56 57 if tc, ok := node.(*plan.TransactionCommittingNode); ok { 58 return shouldParallelize(tc.Child(), scope) 59 } 60 61 // Do not try to parallelize DDL or descriptive operations 62 return !plan.IsNoRowNode(node) 63 } 64 65 func parallelize(ctx *sql.Context, a *Analyzer, node sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) { 66 if a.Parallelism <= 1 || !node.Resolved() { 67 return node, transform.SameTree, nil 68 } 69 70 proc, ok := node.(*plan.QueryProcess) 71 if (ok && !shouldParallelize(proc.Child(), nil)) || !shouldParallelize(node, scope) { 72 return node, transform.SameTree, nil 73 } 74 75 foundOrderedDistinct := false 76 newNode, same, err := transform.NodeWithCtx(node, nil, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) { 77 if _, ok := c.Node.(*plan.OrderedDistinct); ok { 78 foundOrderedDistinct = true 79 } else if !isParallelizable(c.Node) { 80 return c.Node, transform.SameTree, nil 81 } else if _, ok := c.Parent.(*plan.Max1Row); ok { 82 return c.Node, transform.SameTree, nil 83 } 84 ParallelQueryCounter.With("parallelism", strconv.Itoa(a.Parallelism)).Add(1) 85 86 return plan.NewExchange(a.Parallelism, c.Node), transform.NewTree, nil 87 }) 88 if err != nil || bool(same) || foundOrderedDistinct { 89 return node, transform.SameTree, err 90 } 91 92 newNode, _, err = transform.Node(newNode, removeRedundantExchanges) 93 if err != nil { 94 return nil, transform.SameTree, err 95 } 96 97 return newNode, transform.NewTree, nil 98 } 99 100 // removeRedundantExchanges removes all the exchanges except for the topmost 101 // of all. 102 func removeRedundantExchanges(node sql.Node) (sql.Node, transform.TreeIdentity, error) { 103 exchange, ok := node.(*plan.Exchange) 104 if !ok { 105 return node, transform.SameTree, nil 106 } 107 108 var seenIta bool 109 child, same, err := transform.Node(exchange.Child, func(node sql.Node) (sql.Node, transform.TreeIdentity, error) { 110 if exchange, ok := node.(*plan.Exchange); ok { 111 return exchange.Child, transform.NewTree, nil 112 } else if ita, ok := node.(*plan.IndexedTableAccess); ok { 113 if !ita.IsStatic() { 114 // do not parallelize lookup join 115 // todo(max): more graceful top-down exchange application 116 seenIta = true 117 } 118 } 119 return node, transform.SameTree, nil 120 }) 121 if err != nil { 122 return nil, transform.SameTree, err 123 } 124 if seenIta { 125 return child, transform.NewTree, nil 126 } 127 if same { 128 return node, transform.SameTree, nil 129 } 130 node, err = exchange.WithChildren(child) 131 return node, transform.NewTree, err 132 } 133 134 func isParallelizable(node sql.Node) bool { 135 var parallelizable = true 136 var tableSeen bool 137 var lastWasTable bool 138 139 transform.Inspect(node, func(node sql.Node) bool { 140 if node == nil { 141 return true 142 } 143 144 lastWasTable = false 145 if plan.IsBinary(node) { 146 parallelizable = false 147 return false 148 } 149 150 switch node := node.(type) { 151 // These are the only unary nodes that can be parallelized. Any other 152 // unary nodes will not. 153 case *plan.TableAlias, *plan.Exchange: 154 // Some nodes may have subquery expressions that make them unparallelizable 155 case *plan.Project, *plan.Filter: 156 for _, e := range node.(sql.Expressioner).Expressions() { 157 sql.Inspect(e, func(e sql.Expression) bool { 158 if q, ok := e.(*plan.Subquery); ok { 159 subqueryParallelizable := true 160 transform.Inspect(q.Query, func(node sql.Node) bool { 161 if node == nil { 162 return true 163 } 164 subqueryParallelizable = isParallelizable(node) 165 return subqueryParallelizable 166 }) 167 if !subqueryParallelizable { 168 parallelizable = false 169 } 170 return true 171 } 172 return true 173 }) 174 } 175 // IndexedTablesAccess already uses an index for lookups, so parallelizing it won't help in most cases (and can 176 // blow up the query execution graph) 177 case *plan.IndexedTableAccess: 178 parallelizable = false 179 return false 180 // Foreign keys expect specific nodes as children and face issues when they're swapped with Exchange nodes 181 case *plan.ForeignKeyHandler: 182 parallelizable = false 183 return false 184 case *plan.JSONTable: 185 parallelizable = false 186 return false 187 case *plan.RecursiveCte: 188 parallelizable = false 189 return false 190 case sql.Table: 191 lastWasTable = true 192 tableSeen = true 193 case *plan.JoinNode: 194 if node.Op.IsFullOuter() { 195 parallelizable = false 196 lastWasTable = true 197 tableSeen = true 198 return false 199 } 200 default: 201 parallelizable = false 202 } 203 return true 204 }) 205 206 return parallelizable && tableSeen && lastWasTable 207 }