github.com/whtcorpsinc/MilevaDB-Prod@v0.0.0-20211104133533-f57f4be3b597/dbs/memristed/memex/constant_propagation.go (about)

     1  // Copyright 2020 WHTCORPS INC, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // See the License for the specific language governing permissions and
    12  // limitations under the License.
    13  
    14  package memex
    15  
    16  import (
    17  	"github.com/whtcorpsinc/BerolinaSQL/ast"
    18  	"github.com/whtcorpsinc/BerolinaSQL/allegrosql"
    19  	"github.com/whtcorpsinc/BerolinaSQL/terror"
    20  	"github.com/whtcorpsinc/milevadb/stochastikctx"
    21  	"github.com/whtcorpsinc/milevadb/types"
    22  	"github.com/whtcorpsinc/milevadb/soliton/chunk"
    23  	"github.com/whtcorpsinc/milevadb/soliton/defCauslate"
    24  	"github.com/whtcorpsinc/milevadb/soliton/disjointset"
    25  	"github.com/whtcorpsinc/milevadb/soliton/logutil"
    26  	"go.uber.org/zap"
    27  )
    28  
    29  // MaxPropagateDefCaussCnt means the max number of defCausumns that can participate propagation.
    30  var MaxPropagateDefCaussCnt = 100
    31  
    32  type basePropConstSolver struct {
    33  	defCausMapper map[int64]int       // defCausMapper maps defCausumn to its index
    34  	eqList    []*Constant         // if eqList[i] != nil, it means defCaus_i = eqList[i]
    35  	unionSet  *disjointset.IntSet // unionSet stores the relations like defCaus_i = defCaus_j
    36  	defCausumns   []*DeferredCauset           // defCausumns stores all defCausumns appearing in the conditions
    37  	ctx       stochastikctx.Context
    38  }
    39  
    40  func (s *basePropConstSolver) getDefCausID(defCaus *DeferredCauset) int {
    41  	return s.defCausMapper[defCaus.UniqueID]
    42  }
    43  
    44  func (s *basePropConstSolver) insertDefCaus(defCaus *DeferredCauset) {
    45  	_, ok := s.defCausMapper[defCaus.UniqueID]
    46  	if !ok {
    47  		s.defCausMapper[defCaus.UniqueID] = len(s.defCausMapper)
    48  		s.defCausumns = append(s.defCausumns, defCaus)
    49  	}
    50  }
    51  
    52  // tryToUFIDelateEQList tries to uFIDelate the eqList. When the eqList has causetstore this defCausumn with a different constant, like
    53  // a = 1 and a = 2, we set the second return value to false.
    54  func (s *basePropConstSolver) tryToUFIDelateEQList(defCaus *DeferredCauset, con *Constant) (bool, bool) {
    55  	if con.Value.IsNull() {
    56  		return false, true
    57  	}
    58  	id := s.getDefCausID(defCaus)
    59  	oldCon := s.eqList[id]
    60  	if oldCon != nil {
    61  		return false, !oldCon.Equal(s.ctx, con)
    62  	}
    63  	s.eqList[id] = con
    64  	return true, false
    65  }
    66  
    67  func validEqualCondHelper(ctx stochastikctx.Context, eq *ScalarFunction, defCausIsLeft bool) (*DeferredCauset, *Constant) {
    68  	var defCaus *DeferredCauset
    69  	var con *Constant
    70  	defCausOk := false
    71  	conOk := false
    72  	if defCausIsLeft {
    73  		defCaus, defCausOk = eq.GetArgs()[0].(*DeferredCauset)
    74  	} else {
    75  		defCaus, defCausOk = eq.GetArgs()[1].(*DeferredCauset)
    76  	}
    77  	if !defCausOk {
    78  		return nil, nil
    79  	}
    80  	if defCausIsLeft {
    81  		con, conOk = eq.GetArgs()[1].(*Constant)
    82  	} else {
    83  		con, conOk = eq.GetArgs()[0].(*Constant)
    84  	}
    85  	if !conOk {
    86  		return nil, nil
    87  	}
    88  	if ContainMublockConst(ctx, []Expression{con}) {
    89  		return nil, nil
    90  	}
    91  	if !defCauslate.CompatibleDefCauslate(defCaus.GetType().DefCauslate, con.GetType().DefCauslate) {
    92  		return nil, nil
    93  	}
    94  	return defCaus, con
    95  }
    96  
    97  // validEqualCond checks if the cond is an memex like [defCausumn eq constant].
    98  func validEqualCond(ctx stochastikctx.Context, cond Expression) (*DeferredCauset, *Constant) {
    99  	if eq, ok := cond.(*ScalarFunction); ok {
   100  		if eq.FuncName.L != ast.EQ {
   101  			return nil, nil
   102  		}
   103  		defCaus, con := validEqualCondHelper(ctx, eq, true)
   104  		if defCaus == nil {
   105  			return validEqualCondHelper(ctx, eq, false)
   106  		}
   107  		return defCaus, con
   108  	}
   109  	return nil, nil
   110  }
   111  
   112  // tryToReplaceCond aims to replace all occurrences of defCausumn 'src' and try to replace it with 'tgt' in 'cond'
   113  // It returns
   114  //  bool: if a rememristed happened
   115  //  bool: if 'cond' contains non-deterministic memex
   116  //  Expression: the replaced memex, or original 'cond' if the rememristed didn't happen
   117  //
   118  // For example:
   119  //  for 'a, b, a < 3', it returns 'true, false, b < 3'
   120  //  for 'a, b, sin(a) + cos(a) = 5', it returns 'true, false, returns sin(b) + cos(b) = 5'
   121  //  for 'a, b, cast(a) < rand()', it returns 'false, true, cast(a) < rand()'
   122  func tryToReplaceCond(ctx stochastikctx.Context, src *DeferredCauset, tgt *DeferredCauset, cond Expression, rejectControl bool) (bool, bool, Expression) {
   123  	sf, ok := cond.(*ScalarFunction)
   124  	if !ok {
   125  		return false, false, cond
   126  	}
   127  	replaced := false
   128  	var args []Expression
   129  	if _, ok := unFoldableFunctions[sf.FuncName.L]; ok {
   130  		return false, true, cond
   131  	}
   132  	if _, ok := inequalFunctions[sf.FuncName.L]; ok {
   133  		return false, true, cond
   134  	}
   135  	// See https://github.com/whtcorpsinc/milevadb/issues/15782. The control function's result may rely on the original nullable
   136  	// information of the outer side defCausumn. Its args cannot be replaced easily.
   137  	// A more strict check is that after we replace the arg. We check the nullability of the new memex.
   138  	// But we haven't maintained it yet, so don't replace the arg of the control function currently.
   139  	if rejectControl && (sf.FuncName.L == ast.Ifnull || sf.FuncName.L == ast.If || sf.FuncName.L == ast.Case) {
   140  		return false, false, cond
   141  	}
   142  	for idx, expr := range sf.GetArgs() {
   143  		if src.Equal(nil, expr) {
   144  			_, defCausl := cond.CharsetAndDefCauslation(ctx)
   145  			if tgt.GetType().DefCauslate != defCausl {
   146  				continue
   147  			}
   148  			replaced = true
   149  			if args == nil {
   150  				args = make([]Expression, len(sf.GetArgs()))
   151  				copy(args, sf.GetArgs())
   152  			}
   153  			args[idx] = tgt
   154  		} else {
   155  			subReplaced, isNonDeterministic, subExpr := tryToReplaceCond(ctx, src, tgt, expr, rejectControl)
   156  			if isNonDeterministic {
   157  				return false, true, cond
   158  			} else if subReplaced {
   159  				replaced = true
   160  				if args == nil {
   161  					args = make([]Expression, len(sf.GetArgs()))
   162  					copy(args, sf.GetArgs())
   163  				}
   164  				args[idx] = subExpr
   165  			}
   166  		}
   167  	}
   168  	if replaced {
   169  		return true, false, NewFunctionInternal(ctx, sf.FuncName.L, sf.GetType(), args...)
   170  	}
   171  	return false, false, cond
   172  }
   173  
   174  type propConstSolver struct {
   175  	basePropConstSolver
   176  	conditions []Expression
   177  }
   178  
   179  // propagateConstantEQ propagates memexs like 'defCausumn = constant' by substituting the constant for defCausumn, the
   180  // procedure repeats multiple times. An example runs as following:
   181  // a = d & b * 2 = c & c = d + 2 & b = 1 & a = 4, we pick eq cond b = 1 and a = 4
   182  // d = 4 & 2 = c & c = d + 2 & b = 1 & a = 4, we propagate b = 1 and a = 4 and pick eq cond c = 2 and d = 4
   183  // d = 4 & 2 = c & false & b = 1 & a = 4, we propagate c = 2 and d = 4, and do constant folding: c = d + 2 will be folded as false.
   184  func (s *propConstSolver) propagateConstantEQ() {
   185  	s.eqList = make([]*Constant, len(s.defCausumns))
   186  	visited := make([]bool, len(s.conditions))
   187  	for i := 0; i < MaxPropagateDefCaussCnt; i++ {
   188  		mapper := s.pickNewEQConds(visited)
   189  		if len(mapper) == 0 {
   190  			return
   191  		}
   192  		defcaus := make([]*DeferredCauset, 0, len(mapper))
   193  		cons := make([]Expression, 0, len(mapper))
   194  		for id, con := range mapper {
   195  			defcaus = append(defcaus, s.defCausumns[id])
   196  			cons = append(cons, con)
   197  		}
   198  		for i, cond := range s.conditions {
   199  			if !visited[i] {
   200  				s.conditions[i] = DeferredCausetSubstitute(cond, NewSchema(defcaus...), cons)
   201  			}
   202  		}
   203  	}
   204  }
   205  
   206  // propagateDeferredCausetEQ propagates memexs like 'defCausumn A = defCausumn B' by adding extra filters
   207  // 'memex(..., defCausumn B, ...)' propagated from 'memex(..., defCausumn A, ...)' as long as:
   208  //
   209  //  1. The memex is deterministic
   210  //  2. The memex doesn't have any side effect
   211  //
   212  // e.g. For memex a = b and b = c and c = d and c < 1 , we can get extra a < 1 and b < 1 and d < 1.
   213  // However, for a = b and a < rand(), we cannot propagate a < rand() to b < rand() because rand() is non-deterministic
   214  //
   215  // This propagation may bring redundancies that we need to resolve later, for example:
   216  // for a = b and a < 3 and b < 3, we get new a < 3 and b < 3, which are redundant
   217  // for a = b and a < 3 and 3 > b, we get new b < 3 and 3 > a, which are redundant
   218  // for a = b and a < 3 and b < 4, we get new a < 4 and b < 3 but should expect a < 3 and b < 3
   219  // for a = b and a in (3) and b in (4), we get b in (3) and a in (4) but should expect 'false'
   220  //
   221  // TODO: remove redundancies later
   222  //
   223  // We maintain a unionSet representing the equivalent for every two defCausumns.
   224  func (s *propConstSolver) propagateDeferredCausetEQ() {
   225  	visited := make([]bool, len(s.conditions))
   226  	s.unionSet = disjointset.NewIntSet(len(s.defCausumns))
   227  	for i := range s.conditions {
   228  		if fun, ok := s.conditions[i].(*ScalarFunction); ok && fun.FuncName.L == ast.EQ {
   229  			lDefCaus, lOk := fun.GetArgs()[0].(*DeferredCauset)
   230  			rDefCaus, rOk := fun.GetArgs()[1].(*DeferredCauset)
   231  			if lOk && rOk && lDefCaus.GetType().DefCauslate == rDefCaus.GetType().DefCauslate {
   232  				lID := s.getDefCausID(lDefCaus)
   233  				rID := s.getDefCausID(rDefCaus)
   234  				s.unionSet.Union(lID, rID)
   235  				visited[i] = true
   236  			}
   237  		}
   238  	}
   239  
   240  	condsLen := len(s.conditions)
   241  	for i, defCausi := range s.defCausumns {
   242  		for j := i + 1; j < len(s.defCausumns); j++ {
   243  			// unionSet doesn't have iterate(), we use a two layer loop to iterate defCaus_i = defCaus_j relation
   244  			if s.unionSet.FindRoot(i) != s.unionSet.FindRoot(j) {
   245  				continue
   246  			}
   247  			defCausj := s.defCausumns[j]
   248  			for k := 0; k < condsLen; k++ {
   249  				if visited[k] {
   250  					// cond_k has been used to retrieve equality relation
   251  					continue
   252  				}
   253  				cond := s.conditions[k]
   254  				replaced, _, newExpr := tryToReplaceCond(s.ctx, defCausi, defCausj, cond, false)
   255  				if replaced {
   256  					s.conditions = append(s.conditions, newExpr)
   257  				}
   258  				replaced, _, newExpr = tryToReplaceCond(s.ctx, defCausj, defCausi, cond, false)
   259  				if replaced {
   260  					s.conditions = append(s.conditions, newExpr)
   261  				}
   262  			}
   263  		}
   264  	}
   265  }
   266  
   267  func (s *propConstSolver) setConds2ConstFalse() {
   268  	s.conditions = []Expression{&Constant{
   269  		Value:   types.NewCauset(false),
   270  		RetType: types.NewFieldType(allegrosql.TypeTiny),
   271  	}}
   272  }
   273  
   274  // pickNewEQConds tries to pick new equal conds and puts them to retMapper.
   275  func (s *propConstSolver) pickNewEQConds(visited []bool) (retMapper map[int]*Constant) {
   276  	retMapper = make(map[int]*Constant)
   277  	for i, cond := range s.conditions {
   278  		if visited[i] {
   279  			continue
   280  		}
   281  		defCaus, con := validEqualCond(s.ctx, cond)
   282  		// Then we check if this CNF item is a false constant. If so, we will set the whole condition to false.
   283  		var ok bool
   284  		if defCaus == nil {
   285  			con, ok = cond.(*Constant)
   286  			if !ok {
   287  				continue
   288  			}
   289  			visited[i] = true
   290  			if ContainMublockConst(s.ctx, []Expression{con}) {
   291  				continue
   292  			}
   293  			value, _, err := EvalBool(s.ctx, []Expression{con}, chunk.Event{})
   294  			if err != nil {
   295  				terror.Log(err)
   296  				return nil
   297  			}
   298  			if !value {
   299  				s.setConds2ConstFalse()
   300  				return nil
   301  			}
   302  			continue
   303  		}
   304  		visited[i] = true
   305  		uFIDelated, foreverFalse := s.tryToUFIDelateEQList(defCaus, con)
   306  		if foreverFalse {
   307  			s.setConds2ConstFalse()
   308  			return nil
   309  		}
   310  		if uFIDelated {
   311  			retMapper[s.getDefCausID(defCaus)] = con
   312  		}
   313  	}
   314  	return
   315  }
   316  
   317  func (s *propConstSolver) solve(conditions []Expression) []Expression {
   318  	defcaus := make([]*DeferredCauset, 0, len(conditions))
   319  	for _, cond := range conditions {
   320  		s.conditions = append(s.conditions, SplitCNFItems(cond)...)
   321  		defcaus = append(defcaus, ExtractDeferredCausets(cond)...)
   322  	}
   323  	for _, defCaus := range defcaus {
   324  		s.insertDefCaus(defCaus)
   325  	}
   326  	if len(s.defCausumns) > MaxPropagateDefCaussCnt {
   327  		logutil.BgLogger().Warn("too many defCausumns in a single CNF",
   328  			zap.Int("numDefCauss", len(s.defCausumns)),
   329  			zap.Int("maxNumDefCauss", MaxPropagateDefCaussCnt),
   330  		)
   331  		return conditions
   332  	}
   333  	s.propagateConstantEQ()
   334  	s.propagateDeferredCausetEQ()
   335  	s.conditions = propagateConstantDNF(s.ctx, s.conditions)
   336  	return s.conditions
   337  }
   338  
   339  // PropagateConstant propagate constant values of deterministic predicates in a condition.
   340  func PropagateConstant(ctx stochastikctx.Context, conditions []Expression) []Expression {
   341  	return newPropConstSolver().PropagateConstant(ctx, conditions)
   342  }
   343  
   344  type propOuterJoinConstSolver struct {
   345  	basePropConstSolver
   346  	joinConds   []Expression
   347  	filterConds []Expression
   348  	outerSchema *Schema
   349  	innerSchema *Schema
   350  	// nullSensitive indicates if this outer join is null sensitive, if true, we cannot generate
   351  	// additional `defCaus is not null` condition from defCausumn equal conditions. Specifically, this value
   352  	// is true for LeftOuterSemiJoin and AntiLeftOuterSemiJoin.
   353  	nullSensitive bool
   354  }
   355  
   356  func (s *propOuterJoinConstSolver) setConds2ConstFalse(filterConds bool) {
   357  	s.joinConds = []Expression{&Constant{
   358  		Value:   types.NewCauset(false),
   359  		RetType: types.NewFieldType(allegrosql.TypeTiny),
   360  	}}
   361  	if filterConds {
   362  		s.filterConds = []Expression{&Constant{
   363  			Value:   types.NewCauset(false),
   364  			RetType: types.NewFieldType(allegrosql.TypeTiny),
   365  		}}
   366  	}
   367  }
   368  
   369  // pickEQCondsOnOuterDefCaus picks constant equal memex from specified conditions.
   370  func (s *propOuterJoinConstSolver) pickEQCondsOnOuterDefCaus(retMapper map[int]*Constant, visited []bool, filterConds bool) map[int]*Constant {
   371  	var conds []Expression
   372  	var condsOffset int
   373  	if filterConds {
   374  		conds = s.filterConds
   375  	} else {
   376  		conds = s.joinConds
   377  		condsOffset = len(s.filterConds)
   378  	}
   379  	for i, cond := range conds {
   380  		if visited[i+condsOffset] {
   381  			continue
   382  		}
   383  		defCaus, con := validEqualCond(s.ctx, cond)
   384  		// Then we check if this CNF item is a false constant. If so, we will set the whole condition to false.
   385  		var ok bool
   386  		if defCaus == nil {
   387  			con, ok = cond.(*Constant)
   388  			if !ok {
   389  				continue
   390  			}
   391  			visited[i+condsOffset] = true
   392  			if ContainMublockConst(s.ctx, []Expression{con}) {
   393  				continue
   394  			}
   395  			value, _, err := EvalBool(s.ctx, []Expression{con}, chunk.Event{})
   396  			if err != nil {
   397  				terror.Log(err)
   398  				return nil
   399  			}
   400  			if !value {
   401  				s.setConds2ConstFalse(filterConds)
   402  				return nil
   403  			}
   404  			continue
   405  		}
   406  		// Only extract `outerDefCaus = const` memexs.
   407  		if !s.outerSchema.Contains(defCaus) {
   408  			continue
   409  		}
   410  		visited[i+condsOffset] = true
   411  		uFIDelated, foreverFalse := s.tryToUFIDelateEQList(defCaus, con)
   412  		if foreverFalse {
   413  			s.setConds2ConstFalse(filterConds)
   414  			return nil
   415  		}
   416  		if uFIDelated {
   417  			retMapper[s.getDefCausID(defCaus)] = con
   418  		}
   419  	}
   420  	return retMapper
   421  }
   422  
   423  // pickNewEQConds picks constant equal memexs from join and filter conditions.
   424  func (s *propOuterJoinConstSolver) pickNewEQConds(visited []bool) map[int]*Constant {
   425  	retMapper := make(map[int]*Constant)
   426  	retMapper = s.pickEQCondsOnOuterDefCaus(retMapper, visited, true)
   427  	if retMapper == nil {
   428  		// Filter is constant false or error occurred, enforce early termination.
   429  		return nil
   430  	}
   431  	retMapper = s.pickEQCondsOnOuterDefCaus(retMapper, visited, false)
   432  	return retMapper
   433  }
   434  
   435  // propagateConstantEQ propagates memexs like `outerDefCaus = const` by substituting `outerDefCaus` in *JOIN* condition
   436  // with `const`, the procedure repeats multiple times.
   437  func (s *propOuterJoinConstSolver) propagateConstantEQ() {
   438  	s.eqList = make([]*Constant, len(s.defCausumns))
   439  	lenFilters := len(s.filterConds)
   440  	visited := make([]bool, lenFilters+len(s.joinConds))
   441  	for i := 0; i < MaxPropagateDefCaussCnt; i++ {
   442  		mapper := s.pickNewEQConds(visited)
   443  		if len(mapper) == 0 {
   444  			return
   445  		}
   446  		defcaus := make([]*DeferredCauset, 0, len(mapper))
   447  		cons := make([]Expression, 0, len(mapper))
   448  		for id, con := range mapper {
   449  			defcaus = append(defcaus, s.defCausumns[id])
   450  			cons = append(cons, con)
   451  		}
   452  		for i, cond := range s.joinConds {
   453  			if !visited[i+lenFilters] {
   454  				s.joinConds[i] = DeferredCausetSubstitute(cond, NewSchema(defcaus...), cons)
   455  			}
   456  		}
   457  	}
   458  }
   459  
   460  func (s *propOuterJoinConstSolver) defcausFromOuterAndInner(defCaus1, defCaus2 *DeferredCauset) (*DeferredCauset, *DeferredCauset) {
   461  	if s.outerSchema.Contains(defCaus1) && s.innerSchema.Contains(defCaus2) {
   462  		return defCaus1, defCaus2
   463  	}
   464  	if s.outerSchema.Contains(defCaus2) && s.innerSchema.Contains(defCaus1) {
   465  		return defCaus2, defCaus1
   466  	}
   467  	return nil, nil
   468  }
   469  
   470  // validDefCausEqualCond checks if memex is defCausumn equal condition that we can use for constant
   471  // propagation over outer join. We only use memex like `outerDefCaus = innerDefCaus`, for memexs like
   472  // `outerDefCaus1 = outerDefCaus2` or `innerDefCaus1 = innerDefCaus2`, they do not help deriving new inner causet conditions
   473  // which can be pushed down to children plan nodes, so we do not pick them.
   474  func (s *propOuterJoinConstSolver) validDefCausEqualCond(cond Expression) (*DeferredCauset, *DeferredCauset) {
   475  	if fun, ok := cond.(*ScalarFunction); ok && fun.FuncName.L == ast.EQ {
   476  		lDefCaus, lOk := fun.GetArgs()[0].(*DeferredCauset)
   477  		rDefCaus, rOk := fun.GetArgs()[1].(*DeferredCauset)
   478  		if lOk && rOk && lDefCaus.GetType().DefCauslate == rDefCaus.GetType().DefCauslate {
   479  			return s.defcausFromOuterAndInner(lDefCaus, rDefCaus)
   480  		}
   481  	}
   482  	return nil, nil
   483  
   484  }
   485  
   486  // deriveConds given `outerDefCaus = innerDefCaus`, derive new memex for specified conditions.
   487  func (s *propOuterJoinConstSolver) deriveConds(outerDefCaus, innerDefCaus *DeferredCauset, schemaReplicant *Schema, fCondsOffset int, visited []bool, filterConds bool) []bool {
   488  	var offset, condsLen int
   489  	var conds []Expression
   490  	if filterConds {
   491  		conds = s.filterConds
   492  		offset = fCondsOffset
   493  		condsLen = len(s.filterConds)
   494  	} else {
   495  		conds = s.joinConds
   496  		condsLen = fCondsOffset
   497  	}
   498  	for k := 0; k < condsLen; k++ {
   499  		if visited[k+offset] {
   500  			// condition has been used to retrieve equality relation or contains defCausumn beyond children schemaReplicant.
   501  			continue
   502  		}
   503  		cond := conds[k]
   504  		if !ExprFromSchema(cond, schemaReplicant) {
   505  			visited[k+offset] = true
   506  			continue
   507  		}
   508  		replaced, _, newExpr := tryToReplaceCond(s.ctx, outerDefCaus, innerDefCaus, cond, true)
   509  		if replaced {
   510  			s.joinConds = append(s.joinConds, newExpr)
   511  		}
   512  	}
   513  	return visited
   514  }
   515  
   516  // propagateDeferredCausetEQ propagates memexs like 'outerDefCaus = innerDefCaus' by adding extra filters
   517  // 'memex(..., innerDefCaus, ...)' derived from 'memex(..., outerDefCaus, ...)' as long as
   518  // 'memex(..., outerDefCaus, ...)' does not reference defCausumns outside children schemas of join node.
   519  // Derived new memexs must be appended into join condition, not filter condition.
   520  func (s *propOuterJoinConstSolver) propagateDeferredCausetEQ() {
   521  	visited := make([]bool, 2*len(s.joinConds)+len(s.filterConds))
   522  	s.unionSet = disjointset.NewIntSet(len(s.defCausumns))
   523  	var outerDefCaus, innerDefCaus *DeferredCauset
   524  	// Only consider defCausumn equal condition in joinConds.
   525  	// If we have defCausumn equal in filter condition, the outer join should have been simplified already.
   526  	for i := range s.joinConds {
   527  		outerDefCaus, innerDefCaus = s.validDefCausEqualCond(s.joinConds[i])
   528  		if outerDefCaus != nil {
   529  			outerID := s.getDefCausID(outerDefCaus)
   530  			innerID := s.getDefCausID(innerDefCaus)
   531  			s.unionSet.Union(outerID, innerID)
   532  			visited[i] = true
   533  			// Generate `innerDefCaus is not null` from `outerDefCaus = innerDefCaus`. Note that `outerDefCaus is not null`
   534  			// does not hold since we are in outer join.
   535  			// For AntiLeftOuterSemiJoin, this does not work, for example:
   536  			// `select *, t1.a not in (select t2.b from t t2) from t t1` does not imply `t2.b is not null`.
   537  			// For LeftOuterSemiJoin, this does not work either, for example:
   538  			// `select *, t1.a in (select t2.b from t t2) from t t1`
   539  			// rows with t2.b is null would impact whether LeftOuterSemiJoin should output 0 or null if there
   540  			// is no event satisfying t2.b = t1.a
   541  			if s.nullSensitive {
   542  				continue
   543  			}
   544  			childDefCaus := s.innerSchema.RetrieveDeferredCauset(innerDefCaus)
   545  			if !allegrosql.HasNotNullFlag(childDefCaus.RetType.Flag) {
   546  				notNullExpr := BuildNotNullExpr(s.ctx, childDefCaus)
   547  				s.joinConds = append(s.joinConds, notNullExpr)
   548  			}
   549  		}
   550  	}
   551  	lenJoinConds := len(s.joinConds)
   552  	mergedSchema := MergeSchema(s.outerSchema, s.innerSchema)
   553  	for i, defCausi := range s.defCausumns {
   554  		for j := i + 1; j < len(s.defCausumns); j++ {
   555  			// unionSet doesn't have iterate(), we use a two layer loop to iterate defCaus_i = defCaus_j relation.
   556  			if s.unionSet.FindRoot(i) != s.unionSet.FindRoot(j) {
   557  				continue
   558  			}
   559  			defCausj := s.defCausumns[j]
   560  			outerDefCaus, innerDefCaus = s.defcausFromOuterAndInner(defCausi, defCausj)
   561  			if outerDefCaus == nil {
   562  				continue
   563  			}
   564  			visited = s.deriveConds(outerDefCaus, innerDefCaus, mergedSchema, lenJoinConds, visited, false)
   565  			visited = s.deriveConds(outerDefCaus, innerDefCaus, mergedSchema, lenJoinConds, visited, true)
   566  		}
   567  	}
   568  }
   569  
   570  func (s *propOuterJoinConstSolver) solve(joinConds, filterConds []Expression) ([]Expression, []Expression) {
   571  	defcaus := make([]*DeferredCauset, 0, len(joinConds)+len(filterConds))
   572  	for _, cond := range joinConds {
   573  		s.joinConds = append(s.joinConds, SplitCNFItems(cond)...)
   574  		defcaus = append(defcaus, ExtractDeferredCausets(cond)...)
   575  	}
   576  	for _, cond := range filterConds {
   577  		s.filterConds = append(s.filterConds, SplitCNFItems(cond)...)
   578  		defcaus = append(defcaus, ExtractDeferredCausets(cond)...)
   579  	}
   580  	for _, defCaus := range defcaus {
   581  		s.insertDefCaus(defCaus)
   582  	}
   583  	if len(s.defCausumns) > MaxPropagateDefCaussCnt {
   584  		logutil.BgLogger().Warn("too many defCausumns",
   585  			zap.Int("numDefCauss", len(s.defCausumns)),
   586  			zap.Int("maxNumDefCauss", MaxPropagateDefCaussCnt),
   587  		)
   588  		return joinConds, filterConds
   589  	}
   590  	s.propagateConstantEQ()
   591  	s.propagateDeferredCausetEQ()
   592  	s.joinConds = propagateConstantDNF(s.ctx, s.joinConds)
   593  	s.filterConds = propagateConstantDNF(s.ctx, s.filterConds)
   594  	return s.joinConds, s.filterConds
   595  }
   596  
   597  // propagateConstantDNF find DNF item from CNF, and propagate constant inside DNF.
   598  func propagateConstantDNF(ctx stochastikctx.Context, conds []Expression) []Expression {
   599  	for i, cond := range conds {
   600  		if dnf, ok := cond.(*ScalarFunction); ok && dnf.FuncName.L == ast.LogicOr {
   601  			dnfItems := SplitDNFItems(cond)
   602  			for j, item := range dnfItems {
   603  				dnfItems[j] = ComposeCNFCondition(ctx, PropagateConstant(ctx, []Expression{item})...)
   604  			}
   605  			conds[i] = ComposeDNFCondition(ctx, dnfItems...)
   606  		}
   607  	}
   608  	return conds
   609  }
   610  
   611  // PropConstOverOuterJoin propagate constant equal and defCausumn equal conditions over outer join.
   612  // First step is to extract `outerDefCaus = const` from join conditions and filter conditions,
   613  // and substitute `outerDefCaus` in join conditions with `const`;
   614  // Second step is to extract `outerDefCaus = innerDefCaus` from join conditions, and derive new join
   615  // conditions based on this defCausumn equal condition and `outerDefCaus` related
   616  // memexs in join conditions and filter conditions;
   617  func PropConstOverOuterJoin(ctx stochastikctx.Context, joinConds, filterConds []Expression,
   618  	outerSchema, innerSchema *Schema, nullSensitive bool) ([]Expression, []Expression) {
   619  	solver := &propOuterJoinConstSolver{
   620  		outerSchema:   outerSchema,
   621  		innerSchema:   innerSchema,
   622  		nullSensitive: nullSensitive,
   623  	}
   624  	solver.defCausMapper = make(map[int64]int)
   625  	solver.ctx = ctx
   626  	return solver.solve(joinConds, filterConds)
   627  }
   628  
   629  // PropagateConstantSolver is a constant propagate solver.
   630  type PropagateConstantSolver interface {
   631  	PropagateConstant(ctx stochastikctx.Context, conditions []Expression) []Expression
   632  }
   633  
   634  // newPropConstSolver returns a PropagateConstantSolver.
   635  func newPropConstSolver() PropagateConstantSolver {
   636  	solver := &propConstSolver{}
   637  	solver.defCausMapper = make(map[int64]int)
   638  	return solver
   639  }
   640  
   641  // PropagateConstant propagate constant values of deterministic predicates in a condition.
   642  func (s *propConstSolver) PropagateConstant(ctx stochastikctx.Context, conditions []Expression) []Expression {
   643  	s.ctx = ctx
   644  	return s.solve(conditions)
   645  }