github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/inline/inlheur/scoring.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package inlheur 6 7 import ( 8 "fmt" 9 "os" 10 "sort" 11 "strconv" 12 "strings" 13 14 "github.com/go-asm/go/cmd/compile/base" 15 "github.com/go-asm/go/cmd/compile/ir" 16 "github.com/go-asm/go/cmd/compile/pgo" 17 "github.com/go-asm/go/cmd/compile/types" 18 ) 19 20 // These constants enumerate the set of possible ways/scenarios 21 // in which we'll adjust the score of a given callsite. 22 type scoreAdjustTyp uint 23 24 // These constants capture the various ways in which the inliner's 25 // scoring phase can adjust a callsite score based on heuristics. They 26 // fall broadly into three categories: 27 // 28 // 1) adjustments based solely on the callsite context (ex: call 29 // appears on panic path) 30 // 31 // 2) adjustments that take into account specific interesting values 32 // passed at a call site (ex: passing a constant that could result in 33 // cprop/deadcode in the caller) 34 // 35 // 3) adjustments that take into account values returned from the call 36 // at a callsite (ex: call always returns the same inlinable function, 37 // and return value flows unmodified into an indirect call) 38 // 39 // For categories 2 and 3 above, each adjustment can have either a 40 // "must" version and a "may" version (but not both). Here the idea is 41 // that in the "must" version the value flow is unconditional: if the 42 // callsite executes, then the condition we're interested in (ex: 43 // param feeding call) is guaranteed to happen. For the "may" version, 44 // there may be control flow that could cause the benefit to be 45 // bypassed. 46 const ( 47 // Category 1 adjustments (see above) 48 panicPathAdj scoreAdjustTyp = (1 << iota) 49 initFuncAdj 50 inLoopAdj 51 52 // Category 2 adjustments (see above). 53 passConstToIfAdj 54 passConstToNestedIfAdj 55 passConcreteToItfCallAdj 56 passConcreteToNestedItfCallAdj 57 passFuncToIndCallAdj 58 passFuncToNestedIndCallAdj 59 passInlinableFuncToIndCallAdj 60 passInlinableFuncToNestedIndCallAdj 61 62 // Category 3 adjustments. 63 returnFeedsConstToIfAdj 64 returnFeedsFuncToIndCallAdj 65 returnFeedsInlinableFuncToIndCallAdj 66 returnFeedsConcreteToInterfaceCallAdj 67 68 sentinelScoreAdj // sentinel; not a real adjustment 69 ) 70 71 // This table records the specific values we use to adjust call 72 // site scores in a given scenario. 73 // NOTE: these numbers are chosen very arbitrarily; ideally 74 // we will go through some sort of turning process to decide 75 // what value for each one produces the best performance. 76 77 var adjValues = map[scoreAdjustTyp]int{ 78 panicPathAdj: 40, 79 initFuncAdj: 20, 80 inLoopAdj: -5, 81 passConstToIfAdj: -20, 82 passConstToNestedIfAdj: -15, 83 passConcreteToItfCallAdj: -30, 84 passConcreteToNestedItfCallAdj: -25, 85 passFuncToIndCallAdj: -25, 86 passFuncToNestedIndCallAdj: -20, 87 passInlinableFuncToIndCallAdj: -45, 88 passInlinableFuncToNestedIndCallAdj: -40, 89 returnFeedsConstToIfAdj: -15, 90 returnFeedsFuncToIndCallAdj: -25, 91 returnFeedsInlinableFuncToIndCallAdj: -40, 92 returnFeedsConcreteToInterfaceCallAdj: -25, 93 } 94 95 // SetupScoreAdjustments interprets the value of the -d=inlscoreadj 96 // debugging option, if set. The value of this flag is expected to be 97 // a series of "/"-separated clauses of the form adj1:value1. Example: 98 // -d=inlscoreadj=inLoopAdj=0/passConstToIfAdj=-99 99 func SetupScoreAdjustments() { 100 if base.Debug.InlScoreAdj == "" { 101 return 102 } 103 if err := parseScoreAdj(base.Debug.InlScoreAdj); err != nil { 104 base.Fatalf("malformed -d=inlscoreadj argument %q: %v", 105 base.Debug.InlScoreAdj, err) 106 } 107 } 108 109 func adjStringToVal(s string) (scoreAdjustTyp, bool) { 110 for adj := scoreAdjustTyp(1); adj < sentinelScoreAdj; adj <<= 1 { 111 if adj.String() == s { 112 return adj, true 113 } 114 } 115 return 0, false 116 } 117 118 func parseScoreAdj(val string) error { 119 clauses := strings.Split(val, "/") 120 if len(clauses) == 0 { 121 return fmt.Errorf("no clauses") 122 } 123 for _, clause := range clauses { 124 elems := strings.Split(clause, ":") 125 if len(elems) < 2 { 126 return fmt.Errorf("clause %q: expected colon", clause) 127 } 128 if len(elems) != 2 { 129 return fmt.Errorf("clause %q has %d elements, wanted 2", clause, 130 len(elems)) 131 } 132 adj, ok := adjStringToVal(elems[0]) 133 if !ok { 134 return fmt.Errorf("clause %q: unknown adjustment", clause) 135 } 136 val, err := strconv.Atoi(elems[1]) 137 if err != nil { 138 return fmt.Errorf("clause %q: malformed value: %v", clause, err) 139 } 140 adjValues[adj] = val 141 } 142 return nil 143 } 144 145 func adjValue(x scoreAdjustTyp) int { 146 if val, ok := adjValues[x]; ok { 147 return val 148 } else { 149 panic("internal error unregistered adjustment type") 150 } 151 } 152 153 var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{ 154 {may: passConstToNestedIfAdj, must: passConstToIfAdj}, 155 {may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj}, 156 {may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj}, 157 {may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj}, 158 } 159 160 func isMay(x scoreAdjustTyp) bool { 161 return mayToMust(x) != 0 162 } 163 164 func isMust(x scoreAdjustTyp) bool { 165 return mustToMay(x) != 0 166 } 167 168 func mayToMust(x scoreAdjustTyp) scoreAdjustTyp { 169 for _, v := range mayMustAdj { 170 if x == v.may { 171 return v.must 172 } 173 } 174 return 0 175 } 176 177 func mustToMay(x scoreAdjustTyp) scoreAdjustTyp { 178 for _, v := range mayMustAdj { 179 if x == v.must { 180 return v.may 181 } 182 } 183 return 0 184 } 185 186 // computeCallSiteScore takes a given call site whose ir node is 187 // 'call' and callee function is 'callee' and with previously computed 188 // call site properties 'csflags', then computes a score for the 189 // callsite that combines the size cost of the callee with heuristics 190 // based on previously computed argument and function properties, 191 // then stores the score and the adjustment mask in the appropriate 192 // fields in 'cs' 193 func (cs *CallSite) computeCallSiteScore(csa *callSiteAnalyzer, calleeProps *FuncProps) { 194 callee := cs.Callee 195 csflags := cs.Flags 196 call := cs.Call 197 198 // Start with the size-based score for the callee. 199 score := int(callee.Inl.Cost) 200 var tmask scoreAdjustTyp 201 202 if debugTrace&debugTraceScoring != 0 { 203 fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n", 204 callee.Sym().Name, fmtFullPos(call.Pos()), score) 205 } 206 207 // First some score adjustments to discourage inlining in selected cases. 208 if csflags&CallSiteOnPanicPath != 0 { 209 score, tmask = adjustScore(panicPathAdj, score, tmask) 210 } 211 if csflags&CallSiteInInitFunc != 0 { 212 score, tmask = adjustScore(initFuncAdj, score, tmask) 213 } 214 215 // Then adjustments to encourage inlining in selected cases. 216 if csflags&CallSiteInLoop != 0 { 217 score, tmask = adjustScore(inLoopAdj, score, tmask) 218 } 219 220 // Stop here if no callee props. 221 if calleeProps == nil { 222 cs.Score, cs.ScoreMask = score, tmask 223 return 224 } 225 226 // Walk through the actual expressions being passed at the call. 227 calleeRecvrParms := callee.Type().RecvParams() 228 for idx := range call.Args { 229 // ignore blanks 230 if calleeRecvrParms[idx].Sym == nil || 231 calleeRecvrParms[idx].Sym.IsBlank() { 232 continue 233 } 234 arg := call.Args[idx] 235 pflag := calleeProps.ParamFlags[idx] 236 if debugTrace&debugTraceScoring != 0 { 237 fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n", 238 idx, len(call.Args), arg, pflag.String()) 239 } 240 241 if len(cs.ArgProps) == 0 { 242 continue 243 } 244 argProps := cs.ArgProps[idx] 245 246 if debugTrace&debugTraceScoring != 0 { 247 fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n", 248 idx, argProps.String(), arg) 249 } 250 251 if argProps&ActualExprConstant != 0 { 252 if pflag&ParamMayFeedIfOrSwitch != 0 { 253 score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask) 254 } 255 if pflag&ParamFeedsIfOrSwitch != 0 { 256 score, tmask = adjustScore(passConstToIfAdj, score, tmask) 257 } 258 } 259 260 if argProps&ActualExprIsConcreteConvIface != 0 { 261 // FIXME: ideally here it would be nice to make a 262 // distinction between the inlinable case and the 263 // non-inlinable case, but this is hard to do. Example: 264 // 265 // type I interface { Tiny() int; Giant() } 266 // type Conc struct { x int } 267 // func (c *Conc) Tiny() int { return 42 } 268 // func (c *Conc) Giant() { <huge amounts of code> } 269 // 270 // func passConcToItf(c *Conc) { 271 // makesItfMethodCall(c) 272 // } 273 // 274 // In the code above, function properties will only tell 275 // us that 'makesItfMethodCall' invokes a method on its 276 // interface parameter, but we don't know whether it calls 277 // "Tiny" or "Giant". If we knew if called "Tiny", then in 278 // theory in addition to converting the interface call to 279 // a direct call, we could also inline (in which case 280 // we'd want to decrease the score even more). 281 // 282 // One thing we could do (not yet implemented) is iterate 283 // through all of the methods of "*Conc" that allow it to 284 // satisfy I, and if all are inlinable, then exploit that. 285 if pflag&ParamMayFeedInterfaceMethodCall != 0 { 286 score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask) 287 } 288 if pflag&ParamFeedsInterfaceMethodCall != 0 { 289 score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask) 290 } 291 } 292 293 if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 { 294 mayadj := passFuncToNestedIndCallAdj 295 mustadj := passFuncToIndCallAdj 296 if argProps&ActualExprIsInlinableFunc != 0 { 297 mayadj = passInlinableFuncToNestedIndCallAdj 298 mustadj = passInlinableFuncToIndCallAdj 299 } 300 if pflag&ParamMayFeedIndirectCall != 0 { 301 score, tmask = adjustScore(mayadj, score, tmask) 302 } 303 if pflag&ParamFeedsIndirectCall != 0 { 304 score, tmask = adjustScore(mustadj, score, tmask) 305 } 306 } 307 } 308 309 cs.Score, cs.ScoreMask = score, tmask 310 } 311 312 func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) { 313 314 if isMust(typ) { 315 if mask&typ != 0 { 316 return score, mask 317 } 318 may := mustToMay(typ) 319 if mask&may != 0 { 320 // promote may to must, so undo may 321 score -= adjValue(may) 322 mask &^= may 323 } 324 } else if isMay(typ) { 325 must := mayToMust(typ) 326 if mask&(must|typ) != 0 { 327 return score, mask 328 } 329 } 330 if mask&typ == 0 { 331 if debugTrace&debugTraceScoring != 0 { 332 fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n", 333 adjValue(typ), typ.String()) 334 } 335 score += adjValue(typ) 336 mask |= typ 337 } 338 return score, mask 339 } 340 341 var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp 342 var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp 343 344 func setupFlagToAdjMaps() { 345 resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{ 346 ResultIsAllocatedMem: returnFeedsConcreteToInterfaceCallAdj, 347 ResultAlwaysSameFunc: returnFeedsFuncToIndCallAdj, 348 ResultAlwaysSameConstant: returnFeedsConstToIfAdj, 349 } 350 paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{ 351 ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj, 352 ParamFeedsInterfaceMethodCall: passConcreteToItfCallAdj, 353 ParamMayFeedIndirectCall: passInlinableFuncToNestedIndCallAdj, 354 ParamFeedsIndirectCall: passInlinableFuncToIndCallAdj, 355 } 356 } 357 358 // LargestNegativeScoreAdjustment tries to estimate the largest possible 359 // negative score adjustment that could be applied to a call of the 360 // function with the specified props. Example: 361 // 362 // func foo() { func bar(x int, p *int) int { 363 // ... if x < 0 { *p = x } 364 // } return 99 365 // } 366 // 367 // Function 'foo' above on the left has no interesting properties, 368 // thus as a result the most we'll adjust any call to is the value for 369 // "call in loop". If the calculated cost of the function is 150, and 370 // the in-loop adjustment is 5 (for example), then there is not much 371 // point treating it as inlinable. On the other hand "bar" has a param 372 // property (parameter "x" feeds unmodified to an "if" statement") and 373 // a return property (always returns same constant) meaning that a 374 // given call _could_ be rescored down as much as -35 points-- thus if 375 // the size of "bar" is 100 (for example) then there is at least a 376 // chance that scoring will enable inlining. 377 func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int { 378 if resultFlagToPositiveAdj == nil { 379 setupFlagToAdjMaps() 380 } 381 var tmask scoreAdjustTyp 382 score := adjValues[inLoopAdj] // any call can be in a loop 383 for _, pf := range props.ParamFlags { 384 if adj, ok := paramFlagToPositiveAdj[pf]; ok { 385 score, tmask = adjustScore(adj, score, tmask) 386 } 387 } 388 for _, rf := range props.ResultFlags { 389 if adj, ok := resultFlagToPositiveAdj[rf]; ok { 390 score, tmask = adjustScore(adj, score, tmask) 391 } 392 } 393 394 if debugTrace&debugTraceScoring != 0 { 395 fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n", 396 fn, score) 397 } 398 399 return score 400 } 401 402 // LargestPositiveScoreAdjustment tries to estimate the largest possible 403 // positive score adjustment that could be applied to a given callsite. 404 // At the moment we don't have very many positive score adjustments, so 405 // this is just hard-coded, not table-driven. 406 func LargestPositiveScoreAdjustment(fn *ir.Func) int { 407 return adjValues[panicPathAdj] + adjValues[initFuncAdj] 408 } 409 410 // callSiteTab contains entries for each call in the function 411 // currently being processed by InlineCalls; this variable will either 412 // be set to 'cstabCache' below (for non-inlinable routines) or to the 413 // local 'cstab' entry in the fnInlHeur object for inlinable routines. 414 // 415 // NOTE: this assumes that inlining operations are happening in a serial, 416 // single-threaded fashion,f which is true today but probably won't hold 417 // in the future (for example, we might want to score the callsites 418 // in multiple functions in parallel); if the inliner evolves in this 419 // direction we'll need to come up with a different approach here. 420 var callSiteTab CallSiteTab 421 422 // scoreCallsCache caches a call site table and call site list between 423 // invocations of ScoreCalls so that we can reuse previously allocated 424 // storage. 425 var scoreCallsCache scoreCallsCacheType 426 427 type scoreCallsCacheType struct { 428 tab CallSiteTab 429 csl []*CallSite 430 } 431 432 // ScoreCalls assigns numeric scores to each of the callsites in 433 // function 'fn'; the lower the score, the more helpful we think it 434 // will be to inline. 435 // 436 // Unlike a lot of the other inline heuristics machinery, callsite 437 // scoring can't be done as part of the CanInline call for a function, 438 // due to fact that we may be working on a non-trivial SCC. So for 439 // example with this SCC: 440 // 441 // func foo(x int) { func bar(x int, f func()) { 442 // if x != 0 { f() 443 // bar(x, func(){}) foo(x-1) 444 // } } 445 // } 446 // 447 // We don't want to perform scoring for the 'foo' call in "bar" until 448 // after foo has been analyzed, but it's conceivable that CanInline 449 // might visit bar before foo for this SCC. 450 func ScoreCalls(fn *ir.Func) { 451 if len(fn.Body) == 0 { 452 return 453 } 454 enableDebugTraceIfEnv() 455 456 nameFinder := newNameFinder(fn) 457 458 if debugTrace&debugTraceScoring != 0 { 459 fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn)) 460 } 461 462 // If this is an inlinable function, use the precomputed 463 // call site table for it. If the function wasn't an inline 464 // candidate, collect a callsite table for it now. 465 var cstab CallSiteTab 466 if funcInlHeur, ok := fpmap[fn]; ok { 467 cstab = funcInlHeur.cstab 468 } else { 469 if len(scoreCallsCache.tab) != 0 { 470 panic("missing call to ScoreCallsCleanup") 471 } 472 if scoreCallsCache.tab == nil { 473 scoreCallsCache.tab = make(CallSiteTab) 474 } 475 if debugTrace&debugTraceScoring != 0 { 476 fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n", 477 ir.FuncName(fn)) 478 } 479 cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0, 480 nameFinder) 481 } 482 483 csa := makeCallSiteAnalyzer(fn) 484 const doCallResults = true 485 csa.scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil) 486 487 disableDebugTrace() 488 } 489 490 // scoreCallsRegion assigns numeric scores to each of the callsites in 491 // region 'region' within function 'fn'. This can be called on 492 // an entire function, or with 'region' set to a chunk of 493 // code corresponding to an inlined call. 494 func (csa *callSiteAnalyzer) scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) { 495 if debugTrace&debugTraceScoring != 0 { 496 fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s) len(cstab)=%d\n", 497 ir.FuncName(fn), region[0].Op().String(), len(cstab)) 498 } 499 500 // Sort callsites to avoid any surprises with non deterministic 501 // map iteration order (this is probably not needed, but here just 502 // in case). 503 csl := scoreCallsCache.csl[:0] 504 for _, cs := range cstab { 505 csl = append(csl, cs) 506 } 507 scoreCallsCache.csl = csl[:0] 508 sort.Slice(csl, func(i, j int) bool { 509 return csl[i].ID < csl[j].ID 510 }) 511 512 // Score each call site. 513 var resultNameTab map[*ir.Name]resultPropAndCS 514 for _, cs := range csl { 515 var cprops *FuncProps 516 fihcprops := false 517 desercprops := false 518 if funcInlHeur, ok := fpmap[cs.Callee]; ok { 519 cprops = funcInlHeur.props 520 fihcprops = true 521 } else if cs.Callee.Inl != nil { 522 cprops = DeserializeFromString(cs.Callee.Inl.Properties) 523 desercprops = true 524 } else { 525 if base.Debug.DumpInlFuncProps != "" { 526 fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos())) 527 panic("should never happen") 528 } else { 529 continue 530 } 531 } 532 cs.computeCallSiteScore(csa, cprops) 533 534 if doCallResults { 535 if debugTrace&debugTraceScoring != 0 { 536 fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops) 537 } 538 resultNameTab = csa.examineCallResults(cs, resultNameTab) 539 } 540 541 if debugTrace&debugTraceScoring != 0 { 542 fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops) 543 } 544 } 545 546 if resultNameTab != nil { 547 csa.rescoreBasedOnCallResultUses(fn, resultNameTab, cstab) 548 } 549 550 disableDebugTrace() 551 552 if ic != nil && callSiteTab != nil { 553 // Integrate the calls from this cstab into the table for the caller. 554 if err := callSiteTab.merge(cstab); err != nil { 555 base.FatalfAt(ic.Pos(), "%v", err) 556 } 557 } else { 558 callSiteTab = cstab 559 } 560 } 561 562 // ScoreCallsCleanup resets the state of the callsite cache 563 // once ScoreCalls is done with a function. 564 func ScoreCallsCleanup() { 565 if base.Debug.DumpInlCallSiteScores != 0 { 566 if allCallSites == nil { 567 allCallSites = make(CallSiteTab) 568 } 569 for call, cs := range callSiteTab { 570 allCallSites[call] = cs 571 } 572 } 573 for k := range scoreCallsCache.tab { 574 delete(scoreCallsCache.tab, k) 575 } 576 } 577 578 // GetCallSiteScore returns the previously calculated score for call 579 // within fn. 580 func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) { 581 if funcInlHeur, ok := fpmap[fn]; ok { 582 if cs, ok := funcInlHeur.cstab[call]; ok { 583 return cs.Score, true 584 } 585 } 586 if cs, ok := callSiteTab[call]; ok { 587 return cs.Score, true 588 } 589 return 0, false 590 } 591 592 // BudgetExpansion returns the amount to relax/expand the base 593 // inlining budget when the new inliner is turned on; the inliner 594 // will add the returned value to the hairyness budget. 595 // 596 // Background: with the new inliner, the score for a given callsite 597 // can be adjusted down by some amount due to heuristics, however we 598 // won't know whether this is going to happen until much later after 599 // the CanInline call. This function returns the amount to relax the 600 // budget initially (to allow for a large score adjustment); later on 601 // in RevisitInlinability we'll look at each individual function to 602 // demote it if needed. 603 func BudgetExpansion(maxBudget int32) int32 { 604 if base.Debug.InlBudgetSlack != 0 { 605 return int32(base.Debug.InlBudgetSlack) 606 } 607 // In the default case, return maxBudget, which will effectively 608 // double the budget from 80 to 160; this should be good enough 609 // for most cases. 610 return maxBudget 611 } 612 613 var allCallSites CallSiteTab 614 615 // DumpInlCallSiteScores is invoked by the inliner if the debug flag 616 // "-d=dumpinlcallsitescores" is set; it dumps out a human-readable 617 // summary of all (potentially) inlinable callsites in the package, 618 // along with info on call site scoring and the adjustments made to a 619 // given score. Here profile is the PGO profile in use (may be 620 // nil), budgetCallback is a callback that can be invoked to find out 621 // the original pre-adjustment hairyness limit for the function, and 622 // inlineHotMaxBudget is the constant of the same name used in the 623 // inliner. Sample output lines: 624 // 625 // Score Adjustment Status Callee CallerPos ScoreFlags 626 // 115 40 DEMOTED github.com/go-asm/go/cmd/compile/abi.(*ABIParamAssignment).Offset expand_calls.go:1679:14|6 panicPathAdj 627 // 76 -5n PROMOTED runtime.persistentalloc mcheckmark.go:48:45|3 inLoopAdj 628 // 201 0 --- PGO unicode.DecodeRuneInString utf8.go:312:30|1 629 // 7 -5 --- PGO github.com/go-asm/go/abi.Name.DataChecked type.go:625:22|0 inLoopAdj 630 // 631 // In the dump above, "Score" is the final score calculated for the 632 // callsite, "Adjustment" is the amount added to or subtracted from 633 // the original hairyness estimate to form the score. "Status" shows 634 // whether anything changed with the site -- did the adjustment bump 635 // it down just below the threshold ("PROMOTED") or instead bump it 636 // above the threshold ("DEMOTED"); this will be blank ("---") if no 637 // threshold was crossed as a result of the heuristics. Note that 638 // "Status" also shows whether PGO was involved. "Callee" is the name 639 // of the function called, "CallerPos" is the position of the 640 // callsite, and "ScoreFlags" is a digest of the specific properties 641 // we used to make adjustments to callsite score via heuristics. 642 func DumpInlCallSiteScores(profile *pgo.Profile, budgetCallback func(fn *ir.Func, profile *pgo.Profile) (int32, bool)) { 643 644 var indirectlyDueToPromotion func(cs *CallSite) bool 645 indirectlyDueToPromotion = func(cs *CallSite) bool { 646 bud, _ := budgetCallback(cs.Callee, profile) 647 hairyval := cs.Callee.Inl.Cost 648 score := int32(cs.Score) 649 if hairyval > bud && score <= bud { 650 return true 651 } 652 if cs.parent != nil { 653 return indirectlyDueToPromotion(cs.parent) 654 } 655 return false 656 } 657 658 genstatus := func(cs *CallSite) string { 659 hairyval := cs.Callee.Inl.Cost 660 bud, isPGO := budgetCallback(cs.Callee, profile) 661 score := int32(cs.Score) 662 st := "---" 663 expinl := false 664 switch { 665 case hairyval <= bud && score <= bud: 666 // "Normal" inlined case: hairy val sufficiently low that 667 // it would have been inlined anyway without heuristics. 668 expinl = true 669 case hairyval > bud && score > bud: 670 // "Normal" not inlined case: hairy val sufficiently high 671 // and scoring didn't lower it. 672 case hairyval > bud && score <= bud: 673 // Promoted: we would not have inlined it before, but 674 // after score adjustment we decided to inline. 675 st = "PROMOTED" 676 expinl = true 677 case hairyval <= bud && score > bud: 678 // Demoted: we would have inlined it before, but after 679 // score adjustment we decided not to inline. 680 st = "DEMOTED" 681 } 682 inlined := cs.aux&csAuxInlined != 0 683 indprom := false 684 if cs.parent != nil { 685 indprom = indirectlyDueToPromotion(cs.parent) 686 } 687 if inlined && indprom { 688 st += "|INDPROM" 689 } 690 if inlined && !expinl { 691 st += "|[NI?]" 692 } else if !inlined && expinl { 693 st += "|[IN?]" 694 } 695 if isPGO { 696 st += "|PGO" 697 } 698 return st 699 } 700 701 if base.Debug.DumpInlCallSiteScores != 0 { 702 var sl []*CallSite 703 for _, cs := range allCallSites { 704 sl = append(sl, cs) 705 } 706 sort.Slice(sl, func(i, j int) bool { 707 if sl[i].Score != sl[j].Score { 708 return sl[i].Score < sl[j].Score 709 } 710 fni := ir.PkgFuncName(sl[i].Callee) 711 fnj := ir.PkgFuncName(sl[j].Callee) 712 if fni != fnj { 713 return fni < fnj 714 } 715 ecsi := EncodeCallSiteKey(sl[i]) 716 ecsj := EncodeCallSiteKey(sl[j]) 717 return ecsi < ecsj 718 }) 719 720 mkname := func(fn *ir.Func) string { 721 var n string 722 if fn == nil || fn.Nname == nil { 723 return "<nil>" 724 } 725 if fn.Sym().Pkg == types.LocalPkg { 726 n = "ยท" + fn.Sym().Name 727 } else { 728 n = ir.PkgFuncName(fn) 729 } 730 // don't try to print super-long names 731 if len(n) <= 64 { 732 return n 733 } 734 return n[:32] + "..." + n[len(n)-32:] 735 } 736 737 if len(sl) != 0 { 738 fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path) 739 fmt.Fprintf(os.Stdout, "# Score Adjustment Status Callee CallerPos Flags ScoreFlags\n") 740 } 741 for _, cs := range sl { 742 hairyval := cs.Callee.Inl.Cost 743 adj := int32(cs.Score) - hairyval 744 nm := mkname(cs.Callee) 745 ecc := EncodeCallSiteKey(cs) 746 fmt.Fprintf(os.Stdout, "%d %d\t%s\t%s\t%s\t%s\n", 747 cs.Score, adj, genstatus(cs), 748 nm, ecc, 749 cs.ScoreMask.String()) 750 } 751 } 752 }