github.com/dolthub/go-mysql-server@v0.18.0/sql/memo/select_hints.go (about)

     1  // Copyright 2023 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package memo
    16  
    17  import (
    18  	"fmt"
    19  	"regexp"
    20  	"strings"
    21  
    22  	"github.com/dolthub/go-mysql-server/sql"
    23  	"github.com/dolthub/go-mysql-server/sql/plan"
    24  )
    25  
    26  //go:generate stringer -type=HintType -linecomment
    27  
    28  type HintType uint8
    29  
    30  // TODO implement NO_ICP and JOIN_FIXED_ORDER
    31  const (
    32  	HintTypeUnknown                  HintType = iota //
    33  	HintTypeJoinOrder                                // JOIN_ORDER
    34  	HintTypeJoinFixedOrder                           // JOIN_FIXED_ORDER
    35  	HintTypeMergeJoin                                // MERGE_JOIN
    36  	HintTypeLookupJoin                               // LOOKUP_JOIN
    37  	HintTypeHashJoin                                 // HASH_JOIN
    38  	HintTypeSemiJoin                                 // SEMI_JOIN
    39  	HintTypeAntiJoin                                 // ANTI_JOIN
    40  	HintTypeInnerJoin                                // INNER_JOIN
    41  	HintTypeLeftOuterLookupJoin                      // LEFT_OUTER_LOOKUP_JOIN
    42  	HintTypeNoIndexConditionPushDown                 // NO_ICP
    43  	HintTypeLeftDeep                                 // LEFT_DEEP
    44  )
    45  
    46  type Hint struct {
    47  	Typ  HintType
    48  	Args []string
    49  }
    50  
    51  func (h Hint) String() string {
    52  	if len(h.Args) > 0 {
    53  		return fmt.Sprintf("%s(%s)", h.Typ, strings.Join(h.Args, ","))
    54  	} else {
    55  		return h.Typ.String()
    56  	}
    57  }
    58  
    59  func newHint(joinTyp string, args []string) Hint {
    60  	var typ HintType
    61  	switch joinTyp {
    62  	case "join_order":
    63  		typ = HintTypeJoinOrder
    64  	case "join_fixed_order":
    65  		typ = HintTypeJoinFixedOrder
    66  	case "merge_join":
    67  		typ = HintTypeMergeJoin
    68  	case "lookup_join":
    69  		typ = HintTypeLookupJoin
    70  	case "hash_join":
    71  		typ = HintTypeHashJoin
    72  	case "inner_join":
    73  		typ = HintTypeInnerJoin
    74  	case "semi_join":
    75  		typ = HintTypeSemiJoin
    76  	case "anti_join":
    77  		typ = HintTypeAntiJoin
    78  	case "left_outer_lookup_join":
    79  		typ = HintTypeLeftOuterLookupJoin
    80  	case "no_icp":
    81  		typ = HintTypeNoIndexConditionPushDown
    82  	case "left_deep":
    83  		typ = HintTypeLeftDeep
    84  	default:
    85  		typ = HintTypeUnknown
    86  	}
    87  	return Hint{Typ: typ, Args: args}
    88  }
    89  
    90  func (h Hint) valid() bool {
    91  	switch h.Typ {
    92  	case HintTypeJoinOrder:
    93  		return len(h.Args) > 0
    94  	case HintTypeJoinFixedOrder:
    95  		return len(h.Args) == 0
    96  	case HintTypeMergeJoin:
    97  		return len(h.Args) == 2
    98  	case HintTypeLookupJoin:
    99  		return len(h.Args) == 2
   100  	case HintTypeHashJoin:
   101  		return len(h.Args) == 2
   102  	case HintTypeInnerJoin:
   103  		return len(h.Args) == 2
   104  	case HintTypeSemiJoin:
   105  		return len(h.Args) == 2
   106  	case HintTypeAntiJoin:
   107  		return len(h.Args) == 2
   108  	case HintTypeLeftOuterLookupJoin:
   109  		return len(h.Args) == 2
   110  	case HintTypeNoIndexConditionPushDown:
   111  		return len(h.Args) == 0
   112  	case HintTypeLeftDeep:
   113  		return len(h.Args) == 0
   114  	case HintTypeUnknown:
   115  		return false
   116  	default:
   117  	}
   118  	return true
   119  }
   120  
   121  var hintRegex = regexp.MustCompile("([a-z_]+)(\\(([^\\(]+)\\))?")
   122  var argsRegex = regexp.MustCompile("\\s*([^\\(,\\s]+)\\s*[,\\s*]?")
   123  
   124  func ExtractJoinHint(n *plan.JoinNode) []Hint {
   125  	if n.Comment() != "" {
   126  		return parseJoinHints(n.Comment())
   127  	}
   128  	return nil
   129  }
   130  
   131  // TODO: this is pretty nasty. Should be done in the parser instead.
   132  func parseJoinHints(comment string) []Hint {
   133  	if !strings.HasPrefix(comment, "/*+") {
   134  		return nil
   135  	}
   136  	var hints []Hint
   137  	comments := hintRegex.FindAllStringSubmatch(strings.ToLower(comment), -1)
   138  	for _, c := range comments {
   139  		var args []string
   140  		if c[3] != "" {
   141  			argsParsed := argsRegex.FindAllStringSubmatch(c[3], -1)
   142  			for _, arg := range argsParsed {
   143  				args = append(args, arg[1])
   144  			}
   145  		}
   146  		hint := newHint(c[1], args)
   147  		if hint.valid() {
   148  			hints = append(hints, hint)
   149  		}
   150  	}
   151  	return hints
   152  }
   153  
   154  // joinOrderHint encodes a groups relational dependencies in a bitset
   155  // by mapping group ids into join_order ordinals. Remapping source
   156  // relations from group -> join_order ordinal makes it easy to perform
   157  // ordering and compactness checks (see isOrdered and isCompact).
   158  //
   159  // Example:
   160  //
   161  //	G1 -> A
   162  //	G2 -> B
   163  //	G3 -> C
   164  //	G4 -> [G2 G1]
   165  //	G5 -> [G4 G3]
   166  //	JOIN_ORDER(B,A,C) = B = 1, A = 2, C = 3
   167  //	=>
   168  //	{1: 010, 2: 100, 3: 001, 4: 110, 5: 111}
   169  type joinOrderHint struct {
   170  	groups map[GroupId]vertexSet
   171  	order  map[sql.TableId]uint64
   172  	// cache avoids recomputing satisfiability for a RelExpr
   173  	cache map[uint64]bool
   174  }
   175  
   176  func newJoinOrderHint(order map[sql.TableId]uint64) *joinOrderHint {
   177  	return &joinOrderHint{
   178  		groups: make(map[GroupId]vertexSet),
   179  		cache:  make(map[uint64]bool),
   180  		order:  order,
   181  	}
   182  }
   183  
   184  func (o joinOrderHint) build(grp *ExprGroup) {
   185  	s := vertexSet(0)
   186  	// convert global table order to hint order
   187  	inputs := grp.RelProps.InputTables()
   188  	for idx, ok := inputs.Next(0); ok; idx, ok = inputs.Next(idx + 1) {
   189  		if i, ok := o.order[sql.TableId(idx)]; ok {
   190  			// If group |idx+1| is a dependency of this table, record the
   191  			// ordinal position of that group given by the hint order.
   192  			s = s.add(i)
   193  		}
   194  	}
   195  	o.groups[grp.Id] = s
   196  
   197  	for _, g := range grp.children() {
   198  		if _, ok := o.groups[g.Id]; !ok {
   199  			// avoid duplicate work
   200  			o.build(g)
   201  		}
   202  	}
   203  }
   204  
   205  // isValid returns true if the hint parsed correctly
   206  func (o joinOrderHint) isValid() bool {
   207  	for _, v := range o.groups {
   208  		if v == vertexSet(0) {
   209  			// invalid hint table name, fallback
   210  			return false
   211  		}
   212  	}
   213  	return true
   214  }
   215  
   216  func (o joinOrderHint) satisfiesOrder(n RelExpr) bool {
   217  	key := relKey(n)
   218  	if v, ok := o.cache[key]; ok {
   219  		return v
   220  	}
   221  	switch n := n.(type) {
   222  	case JoinRel:
   223  		base := n.JoinPrivate()
   224  		if !base.Left.HintOk || !base.Right.HintOk {
   225  			return false
   226  		}
   227  		l := o.groups[base.Left.Id]
   228  		r := o.groups[base.Right.Id]
   229  		valid := o.isOrdered(l, r) && o.isCompact(l, r)
   230  		o.cache[key] = valid
   231  		return valid
   232  	case *Project:
   233  		return o.satisfiesOrder(n.Child.Best)
   234  	case *Distinct:
   235  		return o.satisfiesOrder(n.Child.Best)
   236  	case *Filter:
   237  		return o.satisfiesOrder(n.Child.Best)
   238  	case SourceRel:
   239  		return true
   240  	default:
   241  		panic(fmt.Sprintf("missed type: %T", n))
   242  	}
   243  }
   244  
   245  // isOrdered returns true if the vertex sets obey the table
   246  // order requested by the hint.
   247  //
   248  // Ex: JOIN_ORDER(a,b,c) is ordered on [b]x[c], and
   249  // not on on [c]x[b].
   250  func (o joinOrderHint) isOrdered(s1, s2 vertexSet) bool {
   251  	return s1 < s2
   252  }
   253  
   254  // isCompact returns true if the tables in the joined result
   255  // set are a contiguous subsection of the order hint.
   256  //
   257  // Ex: JOIN_ORDER(a,b,c) is compact on [b]x[c], and not
   258  // on [a]x[c].
   259  func (o joinOrderHint) isCompact(s1, s2 vertexSet) bool {
   260  	if s1 == 0 || s2 == 0 {
   261  		panic("unexpected nil vertex set")
   262  	}
   263  	union := s1.union(s2)
   264  	last, _ := union.next(0)
   265  	next, ok := union.next(last + 1)
   266  	for ok {
   267  		if last+1 != next {
   268  			return false
   269  		}
   270  		last = next
   271  		next, ok = union.next(next + 1)
   272  	}
   273  
   274  	// sets are compact, s1 higher than s2
   275  	return true
   276  }
   277  
   278  // joinOpHint encodes a hint for a physical operator between
   279  // two relations.
   280  type joinOpHint struct {
   281  	op   HintType
   282  	l, r sql.FastIntSet
   283  }
   284  
   285  func newjoinOpHint(op HintType, left, right sql.TableId) joinOpHint {
   286  	return joinOpHint{
   287  		op: op,
   288  		l:  sql.NewFastIntSet(int(left)),
   289  		r:  sql.NewFastIntSet(int(right)),
   290  	}
   291  }
   292  
   293  // isValid returns true if the hint parsed correctly
   294  func (o joinOpHint) isValid() bool {
   295  	return !o.l.Empty() && !o.r.Empty()
   296  }
   297  
   298  // depsMatch returns whether this RelExpr is a join with left/right inputs
   299  // that match the join hint.
   300  //
   301  // Ex: LOOKUP_JOIN(a,b) will match [a] x [b], and [ac] x [b],
   302  // but not [ab] x [c].
   303  func (o joinOpHint) depsMatch(n RelExpr) bool {
   304  	switch n := n.(type) {
   305  	case *Project:
   306  		return o.depsMatch(n.Child.Best)
   307  	case *Filter:
   308  		return o.depsMatch(n.Child.Best)
   309  	case *Distinct:
   310  		return o.depsMatch(n.Child.Best)
   311  	case JoinRel:
   312  		jp := n.JoinPrivate()
   313  		if !jp.Left.Best.Group().HintOk || !jp.Right.Best.Group().HintOk {
   314  			// equiv closures can generate child plans that bypass hints
   315  			return false
   316  		}
   317  
   318  		leftTab := jp.Left.RelProps.InputTables()
   319  		rightTab := jp.Right.RelProps.InputTables()
   320  		deps := o.l.Union(o.r)
   321  		if deps.SubsetOf(leftTab.Union(rightTab)) &&
   322  			!deps.SubsetOf(leftTab) &&
   323  			!deps.SubsetOf(rightTab) {
   324  			// join tables satisfy but partition the hint rels
   325  			return true
   326  		}
   327  	default:
   328  		return true
   329  	}
   330  	return false
   331  }
   332  
   333  // typeMatches returns whether a RelExpr implements
   334  // the physical join operator indicated by the hint.
   335  //
   336  // Ex: MERGE_JOIN(a,b) will match merge and left-merge joins.
   337  func (o joinOpHint) typeMatches(n RelExpr) bool {
   338  	switch n := n.(type) {
   339  	case JoinRel:
   340  		base := n.JoinPrivate()
   341  		switch o.op {
   342  		case HintTypeLookupJoin:
   343  			return base.Op.IsLookup()
   344  		case HintTypeMergeJoin:
   345  			return base.Op.IsMerge()
   346  		case HintTypeInnerJoin:
   347  			return !base.Op.IsPhysical()
   348  		case HintTypeHashJoin:
   349  			return base.Op.IsHash()
   350  		case HintTypeSemiJoin:
   351  			return base.Op.IsSemi() && !base.Op.IsPhysical()
   352  		case HintTypeAntiJoin:
   353  			return base.Op.IsAnti() && !base.Op.IsPhysical()
   354  		case HintTypeLeftOuterLookupJoin:
   355  			return base.Op == plan.JoinTypeLeftOuterLookup
   356  		default:
   357  			return false
   358  		}
   359  	case *Project:
   360  		return o.typeMatches(n.Child.Best)
   361  	case *Filter:
   362  		return o.typeMatches(n.Child.Best)
   363  	case *Distinct:
   364  		return o.typeMatches(n.Child.Best)
   365  	default:
   366  	}
   367  	return true
   368  }
   369  
   370  // joinHints wraps a collection of join hints. The memo
   371  // interfaces with this object during costing.
   372  type joinHints struct {
   373  	ops      []joinOpHint
   374  	order    *joinOrderHint
   375  	leftDeep bool
   376  }
   377  
   378  func (h joinHints) isEmpty() bool {
   379  	return len(h.ops) == 0 && h.order == nil && !h.leftDeep
   380  }
   381  
   382  // satisfiedBy returns whether a RelExpr satisfies every join hint. This
   383  // is binary, an expr that satisfies most of the join hints but fails one
   384  // returns |false| and is subject to genpop costing.
   385  func (h joinHints) satisfiedBy(n RelExpr) bool {
   386  	if h.order != nil && !h.order.satisfiesOrder(n) {
   387  		return false
   388  	}
   389  
   390  	if h.leftDeep {
   391  		if j, ok := n.(JoinRel); ok {
   392  			if j.JoinPrivate().Right.RelProps.InputTables().Len() > 1 {
   393  				return false
   394  			}
   395  		}
   396  	}
   397  
   398  	if h.ops == nil {
   399  		return true
   400  	}
   401  
   402  	var foundMatch bool
   403  	for _, op := range h.ops {
   404  		if op.depsMatch(n) {
   405  			foundMatch = true
   406  			if !op.typeMatches(n) {
   407  				return false
   408  			}
   409  		}
   410  	}
   411  	return foundMatch
   412  }