github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/inline/inlheur/scoring.go (about)

     1  // Copyright 2023 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package inlheur
     6  
     7  import (
     8  	"fmt"
     9  	"os"
    10  	"sort"
    11  	"strconv"
    12  	"strings"
    13  
    14  	"github.com/go-asm/go/cmd/compile/base"
    15  	"github.com/go-asm/go/cmd/compile/ir"
    16  	"github.com/go-asm/go/cmd/compile/pgo"
    17  	"github.com/go-asm/go/cmd/compile/types"
    18  )
    19  
    20  // These constants enumerate the set of possible ways/scenarios
    21  // in which we'll adjust the score of a given callsite.
    22  type scoreAdjustTyp uint
    23  
    24  // These constants capture the various ways in which the inliner's
    25  // scoring phase can adjust a callsite score based on heuristics. They
    26  // fall broadly into three categories:
    27  //
    28  // 1) adjustments based solely on the callsite context (ex: call
    29  // appears on panic path)
    30  //
    31  // 2) adjustments that take into account specific interesting values
    32  // passed at a call site (ex: passing a constant that could result in
    33  // cprop/deadcode in the caller)
    34  //
    35  // 3) adjustments that take into account values returned from the call
    36  // at a callsite (ex: call always returns the same inlinable function,
    37  // and return value flows unmodified into an indirect call)
    38  //
    39  // For categories 2 and 3 above, each adjustment can have either a
    40  // "must" version and a "may" version (but not both). Here the idea is
    41  // that in the "must" version the value flow is unconditional: if the
    42  // callsite executes, then the condition we're interested in (ex:
    43  // param feeding call) is guaranteed to happen. For the "may" version,
    44  // there may be control flow that could cause the benefit to be
    45  // bypassed.
    46  const (
    47  	// Category 1 adjustments (see above)
    48  	panicPathAdj scoreAdjustTyp = (1 << iota)
    49  	initFuncAdj
    50  	inLoopAdj
    51  
    52  	// Category 2 adjustments (see above).
    53  	passConstToIfAdj
    54  	passConstToNestedIfAdj
    55  	passConcreteToItfCallAdj
    56  	passConcreteToNestedItfCallAdj
    57  	passFuncToIndCallAdj
    58  	passFuncToNestedIndCallAdj
    59  	passInlinableFuncToIndCallAdj
    60  	passInlinableFuncToNestedIndCallAdj
    61  
    62  	// Category 3 adjustments.
    63  	returnFeedsConstToIfAdj
    64  	returnFeedsFuncToIndCallAdj
    65  	returnFeedsInlinableFuncToIndCallAdj
    66  	returnFeedsConcreteToInterfaceCallAdj
    67  
    68  	sentinelScoreAdj // sentinel; not a real adjustment
    69  )
    70  
    71  // This table records the specific values we use to adjust call
    72  // site scores in a given scenario.
    73  // NOTE: these numbers are chosen very arbitrarily; ideally
    74  // we will go through some sort of turning process to decide
    75  // what value for each one produces the best performance.
    76  
    77  var adjValues = map[scoreAdjustTyp]int{
    78  	panicPathAdj:                          40,
    79  	initFuncAdj:                           20,
    80  	inLoopAdj:                             -5,
    81  	passConstToIfAdj:                      -20,
    82  	passConstToNestedIfAdj:                -15,
    83  	passConcreteToItfCallAdj:              -30,
    84  	passConcreteToNestedItfCallAdj:        -25,
    85  	passFuncToIndCallAdj:                  -25,
    86  	passFuncToNestedIndCallAdj:            -20,
    87  	passInlinableFuncToIndCallAdj:         -45,
    88  	passInlinableFuncToNestedIndCallAdj:   -40,
    89  	returnFeedsConstToIfAdj:               -15,
    90  	returnFeedsFuncToIndCallAdj:           -25,
    91  	returnFeedsInlinableFuncToIndCallAdj:  -40,
    92  	returnFeedsConcreteToInterfaceCallAdj: -25,
    93  }
    94  
    95  // SetupScoreAdjustments interprets the value of the -d=inlscoreadj
    96  // debugging option, if set. The value of this flag is expected to be
    97  // a series of "/"-separated clauses of the form adj1:value1. Example:
    98  // -d=inlscoreadj=inLoopAdj=0/passConstToIfAdj=-99
    99  func SetupScoreAdjustments() {
   100  	if base.Debug.InlScoreAdj == "" {
   101  		return
   102  	}
   103  	if err := parseScoreAdj(base.Debug.InlScoreAdj); err != nil {
   104  		base.Fatalf("malformed -d=inlscoreadj argument %q: %v",
   105  			base.Debug.InlScoreAdj, err)
   106  	}
   107  }
   108  
   109  func adjStringToVal(s string) (scoreAdjustTyp, bool) {
   110  	for adj := scoreAdjustTyp(1); adj < sentinelScoreAdj; adj <<= 1 {
   111  		if adj.String() == s {
   112  			return adj, true
   113  		}
   114  	}
   115  	return 0, false
   116  }
   117  
   118  func parseScoreAdj(val string) error {
   119  	clauses := strings.Split(val, "/")
   120  	if len(clauses) == 0 {
   121  		return fmt.Errorf("no clauses")
   122  	}
   123  	for _, clause := range clauses {
   124  		elems := strings.Split(clause, ":")
   125  		if len(elems) < 2 {
   126  			return fmt.Errorf("clause %q: expected colon", clause)
   127  		}
   128  		if len(elems) != 2 {
   129  			return fmt.Errorf("clause %q has %d elements, wanted 2", clause,
   130  				len(elems))
   131  		}
   132  		adj, ok := adjStringToVal(elems[0])
   133  		if !ok {
   134  			return fmt.Errorf("clause %q: unknown adjustment", clause)
   135  		}
   136  		val, err := strconv.Atoi(elems[1])
   137  		if err != nil {
   138  			return fmt.Errorf("clause %q: malformed value: %v", clause, err)
   139  		}
   140  		adjValues[adj] = val
   141  	}
   142  	return nil
   143  }
   144  
   145  func adjValue(x scoreAdjustTyp) int {
   146  	if val, ok := adjValues[x]; ok {
   147  		return val
   148  	} else {
   149  		panic("internal error unregistered adjustment type")
   150  	}
   151  }
   152  
   153  var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{
   154  	{may: passConstToNestedIfAdj, must: passConstToIfAdj},
   155  	{may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj},
   156  	{may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj},
   157  	{may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj},
   158  }
   159  
   160  func isMay(x scoreAdjustTyp) bool {
   161  	return mayToMust(x) != 0
   162  }
   163  
   164  func isMust(x scoreAdjustTyp) bool {
   165  	return mustToMay(x) != 0
   166  }
   167  
   168  func mayToMust(x scoreAdjustTyp) scoreAdjustTyp {
   169  	for _, v := range mayMustAdj {
   170  		if x == v.may {
   171  			return v.must
   172  		}
   173  	}
   174  	return 0
   175  }
   176  
   177  func mustToMay(x scoreAdjustTyp) scoreAdjustTyp {
   178  	for _, v := range mayMustAdj {
   179  		if x == v.must {
   180  			return v.may
   181  		}
   182  	}
   183  	return 0
   184  }
   185  
   186  // computeCallSiteScore takes a given call site whose ir node is
   187  // 'call' and callee function is 'callee' and with previously computed
   188  // call site properties 'csflags', then computes a score for the
   189  // callsite that combines the size cost of the callee with heuristics
   190  // based on previously computed argument and function properties,
   191  // then stores the score and the adjustment mask in the appropriate
   192  // fields in 'cs'
   193  func (cs *CallSite) computeCallSiteScore(csa *callSiteAnalyzer, calleeProps *FuncProps) {
   194  	callee := cs.Callee
   195  	csflags := cs.Flags
   196  	call := cs.Call
   197  
   198  	// Start with the size-based score for the callee.
   199  	score := int(callee.Inl.Cost)
   200  	var tmask scoreAdjustTyp
   201  
   202  	if debugTrace&debugTraceScoring != 0 {
   203  		fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n",
   204  			callee.Sym().Name, fmtFullPos(call.Pos()), score)
   205  	}
   206  
   207  	// First some score adjustments to discourage inlining in selected cases.
   208  	if csflags&CallSiteOnPanicPath != 0 {
   209  		score, tmask = adjustScore(panicPathAdj, score, tmask)
   210  	}
   211  	if csflags&CallSiteInInitFunc != 0 {
   212  		score, tmask = adjustScore(initFuncAdj, score, tmask)
   213  	}
   214  
   215  	// Then adjustments to encourage inlining in selected cases.
   216  	if csflags&CallSiteInLoop != 0 {
   217  		score, tmask = adjustScore(inLoopAdj, score, tmask)
   218  	}
   219  
   220  	// Stop here if no callee props.
   221  	if calleeProps == nil {
   222  		cs.Score, cs.ScoreMask = score, tmask
   223  		return
   224  	}
   225  
   226  	// Walk through the actual expressions being passed at the call.
   227  	calleeRecvrParms := callee.Type().RecvParams()
   228  	for idx := range call.Args {
   229  		// ignore blanks
   230  		if calleeRecvrParms[idx].Sym == nil ||
   231  			calleeRecvrParms[idx].Sym.IsBlank() {
   232  			continue
   233  		}
   234  		arg := call.Args[idx]
   235  		pflag := calleeProps.ParamFlags[idx]
   236  		if debugTrace&debugTraceScoring != 0 {
   237  			fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n",
   238  				idx, len(call.Args), arg, pflag.String())
   239  		}
   240  
   241  		if len(cs.ArgProps) == 0 {
   242  			continue
   243  		}
   244  		argProps := cs.ArgProps[idx]
   245  
   246  		if debugTrace&debugTraceScoring != 0 {
   247  			fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n",
   248  				idx, argProps.String(), arg)
   249  		}
   250  
   251  		if argProps&ActualExprConstant != 0 {
   252  			if pflag&ParamMayFeedIfOrSwitch != 0 {
   253  				score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask)
   254  			}
   255  			if pflag&ParamFeedsIfOrSwitch != 0 {
   256  				score, tmask = adjustScore(passConstToIfAdj, score, tmask)
   257  			}
   258  		}
   259  
   260  		if argProps&ActualExprIsConcreteConvIface != 0 {
   261  			// FIXME: ideally here it would be nice to make a
   262  			// distinction between the inlinable case and the
   263  			// non-inlinable case, but this is hard to do. Example:
   264  			//
   265  			//    type I interface { Tiny() int; Giant() }
   266  			//    type Conc struct { x int }
   267  			//    func (c *Conc) Tiny() int { return 42 }
   268  			//    func (c *Conc) Giant() { <huge amounts of code> }
   269  			//
   270  			//    func passConcToItf(c *Conc) {
   271  			//        makesItfMethodCall(c)
   272  			//    }
   273  			//
   274  			// In the code above, function properties will only tell
   275  			// us that 'makesItfMethodCall' invokes a method on its
   276  			// interface parameter, but we don't know whether it calls
   277  			// "Tiny" or "Giant". If we knew if called "Tiny", then in
   278  			// theory in addition to converting the interface call to
   279  			// a direct call, we could also inline (in which case
   280  			// we'd want to decrease the score even more).
   281  			//
   282  			// One thing we could do (not yet implemented) is iterate
   283  			// through all of the methods of "*Conc" that allow it to
   284  			// satisfy I, and if all are inlinable, then exploit that.
   285  			if pflag&ParamMayFeedInterfaceMethodCall != 0 {
   286  				score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask)
   287  			}
   288  			if pflag&ParamFeedsInterfaceMethodCall != 0 {
   289  				score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask)
   290  			}
   291  		}
   292  
   293  		if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 {
   294  			mayadj := passFuncToNestedIndCallAdj
   295  			mustadj := passFuncToIndCallAdj
   296  			if argProps&ActualExprIsInlinableFunc != 0 {
   297  				mayadj = passInlinableFuncToNestedIndCallAdj
   298  				mustadj = passInlinableFuncToIndCallAdj
   299  			}
   300  			if pflag&ParamMayFeedIndirectCall != 0 {
   301  				score, tmask = adjustScore(mayadj, score, tmask)
   302  			}
   303  			if pflag&ParamFeedsIndirectCall != 0 {
   304  				score, tmask = adjustScore(mustadj, score, tmask)
   305  			}
   306  		}
   307  	}
   308  
   309  	cs.Score, cs.ScoreMask = score, tmask
   310  }
   311  
   312  func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) {
   313  
   314  	if isMust(typ) {
   315  		if mask&typ != 0 {
   316  			return score, mask
   317  		}
   318  		may := mustToMay(typ)
   319  		if mask&may != 0 {
   320  			// promote may to must, so undo may
   321  			score -= adjValue(may)
   322  			mask &^= may
   323  		}
   324  	} else if isMay(typ) {
   325  		must := mayToMust(typ)
   326  		if mask&(must|typ) != 0 {
   327  			return score, mask
   328  		}
   329  	}
   330  	if mask&typ == 0 {
   331  		if debugTrace&debugTraceScoring != 0 {
   332  			fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n",
   333  				adjValue(typ), typ.String())
   334  		}
   335  		score += adjValue(typ)
   336  		mask |= typ
   337  	}
   338  	return score, mask
   339  }
   340  
   341  var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp
   342  var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp
   343  
   344  func setupFlagToAdjMaps() {
   345  	resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{
   346  		ResultIsAllocatedMem:     returnFeedsConcreteToInterfaceCallAdj,
   347  		ResultAlwaysSameFunc:     returnFeedsFuncToIndCallAdj,
   348  		ResultAlwaysSameConstant: returnFeedsConstToIfAdj,
   349  	}
   350  	paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{
   351  		ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj,
   352  		ParamFeedsInterfaceMethodCall:   passConcreteToItfCallAdj,
   353  		ParamMayFeedIndirectCall:        passInlinableFuncToNestedIndCallAdj,
   354  		ParamFeedsIndirectCall:          passInlinableFuncToIndCallAdj,
   355  	}
   356  }
   357  
   358  // LargestNegativeScoreAdjustment tries to estimate the largest possible
   359  // negative score adjustment that could be applied to a call of the
   360  // function with the specified props. Example:
   361  //
   362  //	func foo() {                  func bar(x int, p *int) int {
   363  //	   ...                          if x < 0 { *p = x }
   364  //	}                               return 99
   365  //	                              }
   366  //
   367  // Function 'foo' above on the left has no interesting properties,
   368  // thus as a result the most we'll adjust any call to is the value for
   369  // "call in loop". If the calculated cost of the function is 150, and
   370  // the in-loop adjustment is 5 (for example), then there is not much
   371  // point treating it as inlinable. On the other hand "bar" has a param
   372  // property (parameter "x" feeds unmodified to an "if" statement") and
   373  // a return property (always returns same constant) meaning that a
   374  // given call _could_ be rescored down as much as -35 points-- thus if
   375  // the size of "bar" is 100 (for example) then there is at least a
   376  // chance that scoring will enable inlining.
   377  func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int {
   378  	if resultFlagToPositiveAdj == nil {
   379  		setupFlagToAdjMaps()
   380  	}
   381  	var tmask scoreAdjustTyp
   382  	score := adjValues[inLoopAdj] // any call can be in a loop
   383  	for _, pf := range props.ParamFlags {
   384  		if adj, ok := paramFlagToPositiveAdj[pf]; ok {
   385  			score, tmask = adjustScore(adj, score, tmask)
   386  		}
   387  	}
   388  	for _, rf := range props.ResultFlags {
   389  		if adj, ok := resultFlagToPositiveAdj[rf]; ok {
   390  			score, tmask = adjustScore(adj, score, tmask)
   391  		}
   392  	}
   393  
   394  	if debugTrace&debugTraceScoring != 0 {
   395  		fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n",
   396  			fn, score)
   397  	}
   398  
   399  	return score
   400  }
   401  
   402  // LargestPositiveScoreAdjustment tries to estimate the largest possible
   403  // positive score adjustment that could be applied to a given callsite.
   404  // At the moment we don't have very many positive score adjustments, so
   405  // this is just hard-coded, not table-driven.
   406  func LargestPositiveScoreAdjustment(fn *ir.Func) int {
   407  	return adjValues[panicPathAdj] + adjValues[initFuncAdj]
   408  }
   409  
   410  // callSiteTab contains entries for each call in the function
   411  // currently being processed by InlineCalls; this variable will either
   412  // be set to 'cstabCache' below (for non-inlinable routines) or to the
   413  // local 'cstab' entry in the fnInlHeur object for inlinable routines.
   414  //
   415  // NOTE: this assumes that inlining operations are happening in a serial,
   416  // single-threaded fashion,f which is true today but probably won't hold
   417  // in the future (for example, we might want to score the callsites
   418  // in multiple functions in parallel); if the inliner evolves in this
   419  // direction we'll need to come up with a different approach here.
   420  var callSiteTab CallSiteTab
   421  
   422  // scoreCallsCache caches a call site table and call site list between
   423  // invocations of ScoreCalls so that we can reuse previously allocated
   424  // storage.
   425  var scoreCallsCache scoreCallsCacheType
   426  
   427  type scoreCallsCacheType struct {
   428  	tab CallSiteTab
   429  	csl []*CallSite
   430  }
   431  
   432  // ScoreCalls assigns numeric scores to each of the callsites in
   433  // function 'fn'; the lower the score, the more helpful we think it
   434  // will be to inline.
   435  //
   436  // Unlike a lot of the other inline heuristics machinery, callsite
   437  // scoring can't be done as part of the CanInline call for a function,
   438  // due to fact that we may be working on a non-trivial SCC. So for
   439  // example with this SCC:
   440  //
   441  //	func foo(x int) {           func bar(x int, f func()) {
   442  //	  if x != 0 {                  f()
   443  //	    bar(x, func(){})           foo(x-1)
   444  //	  }                         }
   445  //	}
   446  //
   447  // We don't want to perform scoring for the 'foo' call in "bar" until
   448  // after foo has been analyzed, but it's conceivable that CanInline
   449  // might visit bar before foo for this SCC.
   450  func ScoreCalls(fn *ir.Func) {
   451  	if len(fn.Body) == 0 {
   452  		return
   453  	}
   454  	enableDebugTraceIfEnv()
   455  
   456  	nameFinder := newNameFinder(fn)
   457  
   458  	if debugTrace&debugTraceScoring != 0 {
   459  		fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn))
   460  	}
   461  
   462  	// If this is an inlinable function, use the precomputed
   463  	// call site table for it. If the function wasn't an inline
   464  	// candidate, collect a callsite table for it now.
   465  	var cstab CallSiteTab
   466  	if funcInlHeur, ok := fpmap[fn]; ok {
   467  		cstab = funcInlHeur.cstab
   468  	} else {
   469  		if len(scoreCallsCache.tab) != 0 {
   470  			panic("missing call to ScoreCallsCleanup")
   471  		}
   472  		if scoreCallsCache.tab == nil {
   473  			scoreCallsCache.tab = make(CallSiteTab)
   474  		}
   475  		if debugTrace&debugTraceScoring != 0 {
   476  			fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n",
   477  				ir.FuncName(fn))
   478  		}
   479  		cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0,
   480  			nameFinder)
   481  	}
   482  
   483  	csa := makeCallSiteAnalyzer(fn)
   484  	const doCallResults = true
   485  	csa.scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil)
   486  
   487  	disableDebugTrace()
   488  }
   489  
   490  // scoreCallsRegion assigns numeric scores to each of the callsites in
   491  // region 'region' within function 'fn'. This can be called on
   492  // an entire function, or with 'region' set to a chunk of
   493  // code corresponding to an inlined call.
   494  func (csa *callSiteAnalyzer) scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) {
   495  	if debugTrace&debugTraceScoring != 0 {
   496  		fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s) len(cstab)=%d\n",
   497  			ir.FuncName(fn), region[0].Op().String(), len(cstab))
   498  	}
   499  
   500  	// Sort callsites to avoid any surprises with non deterministic
   501  	// map iteration order (this is probably not needed, but here just
   502  	// in case).
   503  	csl := scoreCallsCache.csl[:0]
   504  	for _, cs := range cstab {
   505  		csl = append(csl, cs)
   506  	}
   507  	scoreCallsCache.csl = csl[:0]
   508  	sort.Slice(csl, func(i, j int) bool {
   509  		return csl[i].ID < csl[j].ID
   510  	})
   511  
   512  	// Score each call site.
   513  	var resultNameTab map[*ir.Name]resultPropAndCS
   514  	for _, cs := range csl {
   515  		var cprops *FuncProps
   516  		fihcprops := false
   517  		desercprops := false
   518  		if funcInlHeur, ok := fpmap[cs.Callee]; ok {
   519  			cprops = funcInlHeur.props
   520  			fihcprops = true
   521  		} else if cs.Callee.Inl != nil {
   522  			cprops = DeserializeFromString(cs.Callee.Inl.Properties)
   523  			desercprops = true
   524  		} else {
   525  			if base.Debug.DumpInlFuncProps != "" {
   526  				fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos()))
   527  				panic("should never happen")
   528  			} else {
   529  				continue
   530  			}
   531  		}
   532  		cs.computeCallSiteScore(csa, cprops)
   533  
   534  		if doCallResults {
   535  			if debugTrace&debugTraceScoring != 0 {
   536  				fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
   537  			}
   538  			resultNameTab = csa.examineCallResults(cs, resultNameTab)
   539  		}
   540  
   541  		if debugTrace&debugTraceScoring != 0 {
   542  			fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops)
   543  		}
   544  	}
   545  
   546  	if resultNameTab != nil {
   547  		csa.rescoreBasedOnCallResultUses(fn, resultNameTab, cstab)
   548  	}
   549  
   550  	disableDebugTrace()
   551  
   552  	if ic != nil && callSiteTab != nil {
   553  		// Integrate the calls from this cstab into the table for the caller.
   554  		if err := callSiteTab.merge(cstab); err != nil {
   555  			base.FatalfAt(ic.Pos(), "%v", err)
   556  		}
   557  	} else {
   558  		callSiteTab = cstab
   559  	}
   560  }
   561  
   562  // ScoreCallsCleanup resets the state of the callsite cache
   563  // once ScoreCalls is done with a function.
   564  func ScoreCallsCleanup() {
   565  	if base.Debug.DumpInlCallSiteScores != 0 {
   566  		if allCallSites == nil {
   567  			allCallSites = make(CallSiteTab)
   568  		}
   569  		for call, cs := range callSiteTab {
   570  			allCallSites[call] = cs
   571  		}
   572  	}
   573  	for k := range scoreCallsCache.tab {
   574  		delete(scoreCallsCache.tab, k)
   575  	}
   576  }
   577  
   578  // GetCallSiteScore returns the previously calculated score for call
   579  // within fn.
   580  func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) {
   581  	if funcInlHeur, ok := fpmap[fn]; ok {
   582  		if cs, ok := funcInlHeur.cstab[call]; ok {
   583  			return cs.Score, true
   584  		}
   585  	}
   586  	if cs, ok := callSiteTab[call]; ok {
   587  		return cs.Score, true
   588  	}
   589  	return 0, false
   590  }
   591  
   592  // BudgetExpansion returns the amount to relax/expand the base
   593  // inlining budget when the new inliner is turned on; the inliner
   594  // will add the returned value to the hairyness budget.
   595  //
   596  // Background: with the new inliner, the score for a given callsite
   597  // can be adjusted down by some amount due to heuristics, however we
   598  // won't know whether this is going to happen until much later after
   599  // the CanInline call. This function returns the amount to relax the
   600  // budget initially (to allow for a large score adjustment); later on
   601  // in RevisitInlinability we'll look at each individual function to
   602  // demote it if needed.
   603  func BudgetExpansion(maxBudget int32) int32 {
   604  	if base.Debug.InlBudgetSlack != 0 {
   605  		return int32(base.Debug.InlBudgetSlack)
   606  	}
   607  	// In the default case, return maxBudget, which will effectively
   608  	// double the budget from 80 to 160; this should be good enough
   609  	// for most cases.
   610  	return maxBudget
   611  }
   612  
   613  var allCallSites CallSiteTab
   614  
   615  // DumpInlCallSiteScores is invoked by the inliner if the debug flag
   616  // "-d=dumpinlcallsitescores" is set; it dumps out a human-readable
   617  // summary of all (potentially) inlinable callsites in the package,
   618  // along with info on call site scoring and the adjustments made to a
   619  // given score. Here profile is the PGO profile in use (may be
   620  // nil), budgetCallback is a callback that can be invoked to find out
   621  // the original pre-adjustment hairyness limit for the function, and
   622  // inlineHotMaxBudget is the constant of the same name used in the
   623  // inliner. Sample output lines:
   624  //
   625  // Score  Adjustment  Status  Callee  CallerPos ScoreFlags
   626  // 115    40          DEMOTED github.com/go-asm/go/cmd/compile/abi.(*ABIParamAssignment).Offset     expand_calls.go:1679:14|6       panicPathAdj
   627  // 76     -5n         PROMOTED runtime.persistentalloc   mcheckmark.go:48:45|3   inLoopAdj
   628  // 201    0           --- PGO  unicode.DecodeRuneInString        utf8.go:312:30|1
   629  // 7      -5          --- PGO  github.com/go-asm/go/abi.Name.DataChecked     type.go:625:22|0        inLoopAdj
   630  //
   631  // In the dump above, "Score" is the final score calculated for the
   632  // callsite, "Adjustment" is the amount added to or subtracted from
   633  // the original hairyness estimate to form the score. "Status" shows
   634  // whether anything changed with the site -- did the adjustment bump
   635  // it down just below the threshold ("PROMOTED") or instead bump it
   636  // above the threshold ("DEMOTED"); this will be blank ("---") if no
   637  // threshold was crossed as a result of the heuristics. Note that
   638  // "Status" also shows whether PGO was involved. "Callee" is the name
   639  // of the function called, "CallerPos" is the position of the
   640  // callsite, and "ScoreFlags" is a digest of the specific properties
   641  // we used to make adjustments to callsite score via heuristics.
   642  func DumpInlCallSiteScores(profile *pgo.Profile, budgetCallback func(fn *ir.Func, profile *pgo.Profile) (int32, bool)) {
   643  
   644  	var indirectlyDueToPromotion func(cs *CallSite) bool
   645  	indirectlyDueToPromotion = func(cs *CallSite) bool {
   646  		bud, _ := budgetCallback(cs.Callee, profile)
   647  		hairyval := cs.Callee.Inl.Cost
   648  		score := int32(cs.Score)
   649  		if hairyval > bud && score <= bud {
   650  			return true
   651  		}
   652  		if cs.parent != nil {
   653  			return indirectlyDueToPromotion(cs.parent)
   654  		}
   655  		return false
   656  	}
   657  
   658  	genstatus := func(cs *CallSite) string {
   659  		hairyval := cs.Callee.Inl.Cost
   660  		bud, isPGO := budgetCallback(cs.Callee, profile)
   661  		score := int32(cs.Score)
   662  		st := "---"
   663  		expinl := false
   664  		switch {
   665  		case hairyval <= bud && score <= bud:
   666  			// "Normal" inlined case: hairy val sufficiently low that
   667  			// it would have been inlined anyway without heuristics.
   668  			expinl = true
   669  		case hairyval > bud && score > bud:
   670  			// "Normal" not inlined case: hairy val sufficiently high
   671  			// and scoring didn't lower it.
   672  		case hairyval > bud && score <= bud:
   673  			// Promoted: we would not have inlined it before, but
   674  			// after score adjustment we decided to inline.
   675  			st = "PROMOTED"
   676  			expinl = true
   677  		case hairyval <= bud && score > bud:
   678  			// Demoted: we would have inlined it before, but after
   679  			// score adjustment we decided not to inline.
   680  			st = "DEMOTED"
   681  		}
   682  		inlined := cs.aux&csAuxInlined != 0
   683  		indprom := false
   684  		if cs.parent != nil {
   685  			indprom = indirectlyDueToPromotion(cs.parent)
   686  		}
   687  		if inlined && indprom {
   688  			st += "|INDPROM"
   689  		}
   690  		if inlined && !expinl {
   691  			st += "|[NI?]"
   692  		} else if !inlined && expinl {
   693  			st += "|[IN?]"
   694  		}
   695  		if isPGO {
   696  			st += "|PGO"
   697  		}
   698  		return st
   699  	}
   700  
   701  	if base.Debug.DumpInlCallSiteScores != 0 {
   702  		var sl []*CallSite
   703  		for _, cs := range allCallSites {
   704  			sl = append(sl, cs)
   705  		}
   706  		sort.Slice(sl, func(i, j int) bool {
   707  			if sl[i].Score != sl[j].Score {
   708  				return sl[i].Score < sl[j].Score
   709  			}
   710  			fni := ir.PkgFuncName(sl[i].Callee)
   711  			fnj := ir.PkgFuncName(sl[j].Callee)
   712  			if fni != fnj {
   713  				return fni < fnj
   714  			}
   715  			ecsi := EncodeCallSiteKey(sl[i])
   716  			ecsj := EncodeCallSiteKey(sl[j])
   717  			return ecsi < ecsj
   718  		})
   719  
   720  		mkname := func(fn *ir.Func) string {
   721  			var n string
   722  			if fn == nil || fn.Nname == nil {
   723  				return "<nil>"
   724  			}
   725  			if fn.Sym().Pkg == types.LocalPkg {
   726  				n = "ยท" + fn.Sym().Name
   727  			} else {
   728  				n = ir.PkgFuncName(fn)
   729  			}
   730  			// don't try to print super-long names
   731  			if len(n) <= 64 {
   732  				return n
   733  			}
   734  			return n[:32] + "..." + n[len(n)-32:]
   735  		}
   736  
   737  		if len(sl) != 0 {
   738  			fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path)
   739  			fmt.Fprintf(os.Stdout, "# Score  Adjustment  Status  Callee  CallerPos Flags ScoreFlags\n")
   740  		}
   741  		for _, cs := range sl {
   742  			hairyval := cs.Callee.Inl.Cost
   743  			adj := int32(cs.Score) - hairyval
   744  			nm := mkname(cs.Callee)
   745  			ecc := EncodeCallSiteKey(cs)
   746  			fmt.Fprintf(os.Stdout, "%d  %d\t%s\t%s\t%s\t%s\n",
   747  				cs.Score, adj, genstatus(cs),
   748  				nm, ecc,
   749  				cs.ScoreMask.String())
   750  		}
   751  	}
   752  }