github.com/dolthub/go-mysql-server@v0.18.0/sql/analyzer/parallelize.go (about)

     1  // Copyright 2020-2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package analyzer
    16  
    17  import (
    18  	"os"
    19  	"strconv"
    20  
    21  	"github.com/go-kit/kit/metrics/discard"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  	"github.com/dolthub/go-mysql-server/sql/plan"
    25  	"github.com/dolthub/go-mysql-server/sql/transform"
    26  )
    27  
    28  func init() {
    29  	// check for single-threaded feature flag
    30  	if v, ok := os.LookupEnv(singleThreadFlag); ok && v != "" {
    31  		SingleThreadFeatureFlag = true
    32  	}
    33  }
    34  
    35  const (
    36  	singleThreadFlag = "GMS_SINGLE_THREAD"
    37  )
    38  
    39  var (
    40  	// ParallelQueryCounter describes a metric that accumulates
    41  	// number of parallel queries monotonically.
    42  	ParallelQueryCounter = discard.NewCounter()
    43  
    44  	SingleThreadFeatureFlag = false
    45  )
    46  
    47  func shouldParallelize(node sql.Node, scope *plan.Scope) bool {
    48  	if SingleThreadFeatureFlag {
    49  		return false
    50  	}
    51  
    52  	// Don't parallelize subqueries, this can blow up the execution graph quickly
    53  	if !scope.IsEmpty() {
    54  		return false
    55  	}
    56  
    57  	if tc, ok := node.(*plan.TransactionCommittingNode); ok {
    58  		return shouldParallelize(tc.Child(), scope)
    59  	}
    60  
    61  	// Do not try to parallelize DDL or descriptive operations
    62  	return !plan.IsNoRowNode(node)
    63  }
    64  
    65  func parallelize(ctx *sql.Context, a *Analyzer, node sql.Node, scope *plan.Scope, sel RuleSelector) (sql.Node, transform.TreeIdentity, error) {
    66  	if a.Parallelism <= 1 || !node.Resolved() {
    67  		return node, transform.SameTree, nil
    68  	}
    69  
    70  	proc, ok := node.(*plan.QueryProcess)
    71  	if (ok && !shouldParallelize(proc.Child(), nil)) || !shouldParallelize(node, scope) {
    72  		return node, transform.SameTree, nil
    73  	}
    74  
    75  	foundOrderedDistinct := false
    76  	newNode, same, err := transform.NodeWithCtx(node, nil, func(c transform.Context) (sql.Node, transform.TreeIdentity, error) {
    77  		if _, ok := c.Node.(*plan.OrderedDistinct); ok {
    78  			foundOrderedDistinct = true
    79  		} else if !isParallelizable(c.Node) {
    80  			return c.Node, transform.SameTree, nil
    81  		} else if _, ok := c.Parent.(*plan.Max1Row); ok {
    82  			return c.Node, transform.SameTree, nil
    83  		}
    84  		ParallelQueryCounter.With("parallelism", strconv.Itoa(a.Parallelism)).Add(1)
    85  
    86  		return plan.NewExchange(a.Parallelism, c.Node), transform.NewTree, nil
    87  	})
    88  	if err != nil || bool(same) || foundOrderedDistinct {
    89  		return node, transform.SameTree, err
    90  	}
    91  
    92  	newNode, _, err = transform.Node(newNode, removeRedundantExchanges)
    93  	if err != nil {
    94  		return nil, transform.SameTree, err
    95  	}
    96  
    97  	return newNode, transform.NewTree, nil
    98  }
    99  
   100  // removeRedundantExchanges removes all the exchanges except for the topmost
   101  // of all.
   102  func removeRedundantExchanges(node sql.Node) (sql.Node, transform.TreeIdentity, error) {
   103  	exchange, ok := node.(*plan.Exchange)
   104  	if !ok {
   105  		return node, transform.SameTree, nil
   106  	}
   107  
   108  	var seenIta bool
   109  	child, same, err := transform.Node(exchange.Child, func(node sql.Node) (sql.Node, transform.TreeIdentity, error) {
   110  		if exchange, ok := node.(*plan.Exchange); ok {
   111  			return exchange.Child, transform.NewTree, nil
   112  		} else if ita, ok := node.(*plan.IndexedTableAccess); ok {
   113  			if !ita.IsStatic() {
   114  				// do not parallelize lookup join
   115  				// todo(max): more graceful top-down exchange application
   116  				seenIta = true
   117  			}
   118  		}
   119  		return node, transform.SameTree, nil
   120  	})
   121  	if err != nil {
   122  		return nil, transform.SameTree, err
   123  	}
   124  	if seenIta {
   125  		return child, transform.NewTree, nil
   126  	}
   127  	if same {
   128  		return node, transform.SameTree, nil
   129  	}
   130  	node, err = exchange.WithChildren(child)
   131  	return node, transform.NewTree, err
   132  }
   133  
   134  func isParallelizable(node sql.Node) bool {
   135  	var parallelizable = true
   136  	var tableSeen bool
   137  	var lastWasTable bool
   138  
   139  	transform.Inspect(node, func(node sql.Node) bool {
   140  		if node == nil {
   141  			return true
   142  		}
   143  
   144  		lastWasTable = false
   145  		if plan.IsBinary(node) {
   146  			parallelizable = false
   147  			return false
   148  		}
   149  
   150  		switch node := node.(type) {
   151  		// These are the only unary nodes that can be parallelized. Any other
   152  		// unary nodes will not.
   153  		case *plan.TableAlias, *plan.Exchange:
   154  		// Some nodes may have subquery expressions that make them unparallelizable
   155  		case *plan.Project, *plan.Filter:
   156  			for _, e := range node.(sql.Expressioner).Expressions() {
   157  				sql.Inspect(e, func(e sql.Expression) bool {
   158  					if q, ok := e.(*plan.Subquery); ok {
   159  						subqueryParallelizable := true
   160  						transform.Inspect(q.Query, func(node sql.Node) bool {
   161  							if node == nil {
   162  								return true
   163  							}
   164  							subqueryParallelizable = isParallelizable(node)
   165  							return subqueryParallelizable
   166  						})
   167  						if !subqueryParallelizable {
   168  							parallelizable = false
   169  						}
   170  						return true
   171  					}
   172  					return true
   173  				})
   174  			}
   175  		// IndexedTablesAccess already uses an index for lookups, so parallelizing it won't help in most cases (and can
   176  		// blow up the query execution graph)
   177  		case *plan.IndexedTableAccess:
   178  			parallelizable = false
   179  			return false
   180  		// Foreign keys expect specific nodes as children and face issues when they're swapped with Exchange nodes
   181  		case *plan.ForeignKeyHandler:
   182  			parallelizable = false
   183  			return false
   184  		case *plan.JSONTable:
   185  			parallelizable = false
   186  			return false
   187  		case *plan.RecursiveCte:
   188  			parallelizable = false
   189  			return false
   190  		case sql.Table:
   191  			lastWasTable = true
   192  			tableSeen = true
   193  		case *plan.JoinNode:
   194  			if node.Op.IsFullOuter() {
   195  				parallelizable = false
   196  				lastWasTable = true
   197  				tableSeen = true
   198  				return false
   199  			}
   200  		default:
   201  			parallelizable = false
   202  		}
   203  		return true
   204  	})
   205  
   206  	return parallelizable && tableSeen && lastWasTable
   207  }