github.com/bir3/gocompiler@v0.9.2202/src/cmd/compile/internal/inline/inlheur/scoring.go (about) 1 // Copyright 2023 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package inlheur 6 7 import ( 8 "github.com/bir3/gocompiler/src/cmd/compile/internal/base" 9 "github.com/bir3/gocompiler/src/cmd/compile/internal/ir" 10 "github.com/bir3/gocompiler/src/cmd/compile/internal/pgo" 11 "github.com/bir3/gocompiler/src/cmd/compile/internal/types" 12 "fmt" 13 "os" 14 "sort" 15 "strconv" 16 "strings" 17 ) 18 19 // These constants enumerate the set of possible ways/scenarios 20 // in which we'll adjust the score of a given callsite. 21 type scoreAdjustTyp uint 22 23 // These constants capture the various ways in which the inliner's 24 // scoring phase can adjust a callsite score based on heuristics. They 25 // fall broadly into three categories: 26 // 27 // 1) adjustments based solely on the callsite context (ex: call 28 // appears on panic path) 29 // 30 // 2) adjustments that take into account specific interesting values 31 // passed at a call site (ex: passing a constant that could result in 32 // cprop/deadcode in the caller) 33 // 34 // 3) adjustments that take into account values returned from the call 35 // at a callsite (ex: call always returns the same inlinable function, 36 // and return value flows unmodified into an indirect call) 37 // 38 // For categories 2 and 3 above, each adjustment can have either a 39 // "must" version and a "may" version (but not both). Here the idea is 40 // that in the "must" version the value flow is unconditional: if the 41 // callsite executes, then the condition we're interested in (ex: 42 // param feeding call) is guaranteed to happen. For the "may" version, 43 // there may be control flow that could cause the benefit to be 44 // bypassed. 45 const ( 46 // Category 1 adjustments (see above) 47 panicPathAdj scoreAdjustTyp = (1 << iota) 48 initFuncAdj 49 inLoopAdj 50 51 // Category 2 adjustments (see above). 52 passConstToIfAdj 53 passConstToNestedIfAdj 54 passConcreteToItfCallAdj 55 passConcreteToNestedItfCallAdj 56 passFuncToIndCallAdj 57 passFuncToNestedIndCallAdj 58 passInlinableFuncToIndCallAdj 59 passInlinableFuncToNestedIndCallAdj 60 61 // Category 3 adjustments. 62 returnFeedsConstToIfAdj 63 returnFeedsFuncToIndCallAdj 64 returnFeedsInlinableFuncToIndCallAdj 65 returnFeedsConcreteToInterfaceCallAdj 66 67 sentinelScoreAdj // sentinel; not a real adjustment 68 ) 69 70 // This table records the specific values we use to adjust call 71 // site scores in a given scenario. 72 // NOTE: these numbers are chosen very arbitrarily; ideally 73 // we will go through some sort of turning process to decide 74 // what value for each one produces the best performance. 75 76 var adjValues = map[scoreAdjustTyp]int{ 77 panicPathAdj: 40, 78 initFuncAdj: 20, 79 inLoopAdj: -5, 80 passConstToIfAdj: -20, 81 passConstToNestedIfAdj: -15, 82 passConcreteToItfCallAdj: -30, 83 passConcreteToNestedItfCallAdj: -25, 84 passFuncToIndCallAdj: -25, 85 passFuncToNestedIndCallAdj: -20, 86 passInlinableFuncToIndCallAdj: -45, 87 passInlinableFuncToNestedIndCallAdj: -40, 88 returnFeedsConstToIfAdj: -15, 89 returnFeedsFuncToIndCallAdj: -25, 90 returnFeedsInlinableFuncToIndCallAdj: -40, 91 returnFeedsConcreteToInterfaceCallAdj: -25, 92 } 93 94 // SetupScoreAdjustments interprets the value of the -d=inlscoreadj 95 // debugging option, if set. The value of this flag is expected to be 96 // a series of "/"-separated clauses of the form adj1:value1. Example: 97 // -d=inlscoreadj=inLoopAdj=0/passConstToIfAdj=-99 98 func SetupScoreAdjustments() { 99 if base.Debug.InlScoreAdj == "" { 100 return 101 } 102 if err := parseScoreAdj(base.Debug.InlScoreAdj); err != nil { 103 base.Fatalf("malformed -d=inlscoreadj argument %q: %v", 104 base.Debug.InlScoreAdj, err) 105 } 106 } 107 108 func adjStringToVal(s string) (scoreAdjustTyp, bool) { 109 for adj := scoreAdjustTyp(1); adj < sentinelScoreAdj; adj <<= 1 { 110 if adj.String() == s { 111 return adj, true 112 } 113 } 114 return 0, false 115 } 116 117 func parseScoreAdj(val string) error { 118 clauses := strings.Split(val, "/") 119 if len(clauses) == 0 { 120 return fmt.Errorf("no clauses") 121 } 122 for _, clause := range clauses { 123 elems := strings.Split(clause, ":") 124 if len(elems) < 2 { 125 return fmt.Errorf("clause %q: expected colon", clause) 126 } 127 if len(elems) != 2 { 128 return fmt.Errorf("clause %q has %d elements, wanted 2", clause, 129 len(elems)) 130 } 131 adj, ok := adjStringToVal(elems[0]) 132 if !ok { 133 return fmt.Errorf("clause %q: unknown adjustment", clause) 134 } 135 val, err := strconv.Atoi(elems[1]) 136 if err != nil { 137 return fmt.Errorf("clause %q: malformed value: %v", clause, err) 138 } 139 adjValues[adj] = val 140 } 141 return nil 142 } 143 144 func adjValue(x scoreAdjustTyp) int { 145 if val, ok := adjValues[x]; ok { 146 return val 147 } else { 148 panic("internal error unregistered adjustment type") 149 } 150 } 151 152 var mayMustAdj = [...]struct{ may, must scoreAdjustTyp }{ 153 {may: passConstToNestedIfAdj, must: passConstToIfAdj}, 154 {may: passConcreteToNestedItfCallAdj, must: passConcreteToItfCallAdj}, 155 {may: passFuncToNestedIndCallAdj, must: passFuncToNestedIndCallAdj}, 156 {may: passInlinableFuncToNestedIndCallAdj, must: passInlinableFuncToIndCallAdj}, 157 } 158 159 func isMay(x scoreAdjustTyp) bool { 160 return mayToMust(x) != 0 161 } 162 163 func isMust(x scoreAdjustTyp) bool { 164 return mustToMay(x) != 0 165 } 166 167 func mayToMust(x scoreAdjustTyp) scoreAdjustTyp { 168 for _, v := range mayMustAdj { 169 if x == v.may { 170 return v.must 171 } 172 } 173 return 0 174 } 175 176 func mustToMay(x scoreAdjustTyp) scoreAdjustTyp { 177 for _, v := range mayMustAdj { 178 if x == v.must { 179 return v.may 180 } 181 } 182 return 0 183 } 184 185 // computeCallSiteScore takes a given call site whose ir node is 186 // 'call' and callee function is 'callee' and with previously computed 187 // call site properties 'csflags', then computes a score for the 188 // callsite that combines the size cost of the callee with heuristics 189 // based on previously computed argument and function properties, 190 // then stores the score and the adjustment mask in the appropriate 191 // fields in 'cs' 192 func (cs *CallSite) computeCallSiteScore(csa *callSiteAnalyzer, calleeProps *FuncProps) { 193 callee := cs.Callee 194 csflags := cs.Flags 195 call := cs.Call 196 197 // Start with the size-based score for the callee. 198 score := int(callee.Inl.Cost) 199 var tmask scoreAdjustTyp 200 201 if debugTrace&debugTraceScoring != 0 { 202 fmt.Fprintf(os.Stderr, "=-= scoring call to %s at %s , initial=%d\n", 203 callee.Sym().Name, fmtFullPos(call.Pos()), score) 204 } 205 206 // First some score adjustments to discourage inlining in selected cases. 207 if csflags&CallSiteOnPanicPath != 0 { 208 score, tmask = adjustScore(panicPathAdj, score, tmask) 209 } 210 if csflags&CallSiteInInitFunc != 0 { 211 score, tmask = adjustScore(initFuncAdj, score, tmask) 212 } 213 214 // Then adjustments to encourage inlining in selected cases. 215 if csflags&CallSiteInLoop != 0 { 216 score, tmask = adjustScore(inLoopAdj, score, tmask) 217 } 218 219 // Stop here if no callee props. 220 if calleeProps == nil { 221 cs.Score, cs.ScoreMask = score, tmask 222 return 223 } 224 225 // Walk through the actual expressions being passed at the call. 226 calleeRecvrParms := callee.Type().RecvParams() 227 for idx := range call.Args { 228 // ignore blanks 229 if calleeRecvrParms[idx].Sym == nil || 230 calleeRecvrParms[idx].Sym.IsBlank() { 231 continue 232 } 233 arg := call.Args[idx] 234 pflag := calleeProps.ParamFlags[idx] 235 if debugTrace&debugTraceScoring != 0 { 236 fmt.Fprintf(os.Stderr, "=-= arg %d of %d: val %v flags=%s\n", 237 idx, len(call.Args), arg, pflag.String()) 238 } 239 240 if len(cs.ArgProps) == 0 { 241 continue 242 } 243 argProps := cs.ArgProps[idx] 244 245 if debugTrace&debugTraceScoring != 0 { 246 fmt.Fprintf(os.Stderr, "=-= arg %d props %s value %v\n", 247 idx, argProps.String(), arg) 248 } 249 250 if argProps&ActualExprConstant != 0 { 251 if pflag&ParamMayFeedIfOrSwitch != 0 { 252 score, tmask = adjustScore(passConstToNestedIfAdj, score, tmask) 253 } 254 if pflag&ParamFeedsIfOrSwitch != 0 { 255 score, tmask = adjustScore(passConstToIfAdj, score, tmask) 256 } 257 } 258 259 if argProps&ActualExprIsConcreteConvIface != 0 { 260 // FIXME: ideally here it would be nice to make a 261 // distinction between the inlinable case and the 262 // non-inlinable case, but this is hard to do. Example: 263 // 264 // type I interface { Tiny() int; Giant() } 265 // type Conc struct { x int } 266 // func (c *Conc) Tiny() int { return 42 } 267 // func (c *Conc) Giant() { <huge amounts of code> } 268 // 269 // func passConcToItf(c *Conc) { 270 // makesItfMethodCall(c) 271 // } 272 // 273 // In the code above, function properties will only tell 274 // us that 'makesItfMethodCall' invokes a method on its 275 // interface parameter, but we don't know whether it calls 276 // "Tiny" or "Giant". If we knew if called "Tiny", then in 277 // theory in addition to converting the interface call to 278 // a direct call, we could also inline (in which case 279 // we'd want to decrease the score even more). 280 // 281 // One thing we could do (not yet implemented) is iterate 282 // through all of the methods of "*Conc" that allow it to 283 // satisfy I, and if all are inlinable, then exploit that. 284 if pflag&ParamMayFeedInterfaceMethodCall != 0 { 285 score, tmask = adjustScore(passConcreteToNestedItfCallAdj, score, tmask) 286 } 287 if pflag&ParamFeedsInterfaceMethodCall != 0 { 288 score, tmask = adjustScore(passConcreteToItfCallAdj, score, tmask) 289 } 290 } 291 292 if argProps&(ActualExprIsFunc|ActualExprIsInlinableFunc) != 0 { 293 mayadj := passFuncToNestedIndCallAdj 294 mustadj := passFuncToIndCallAdj 295 if argProps&ActualExprIsInlinableFunc != 0 { 296 mayadj = passInlinableFuncToNestedIndCallAdj 297 mustadj = passInlinableFuncToIndCallAdj 298 } 299 if pflag&ParamMayFeedIndirectCall != 0 { 300 score, tmask = adjustScore(mayadj, score, tmask) 301 } 302 if pflag&ParamFeedsIndirectCall != 0 { 303 score, tmask = adjustScore(mustadj, score, tmask) 304 } 305 } 306 } 307 308 cs.Score, cs.ScoreMask = score, tmask 309 } 310 311 func adjustScore(typ scoreAdjustTyp, score int, mask scoreAdjustTyp) (int, scoreAdjustTyp) { 312 313 if isMust(typ) { 314 if mask&typ != 0 { 315 return score, mask 316 } 317 may := mustToMay(typ) 318 if mask&may != 0 { 319 // promote may to must, so undo may 320 score -= adjValue(may) 321 mask &^= may 322 } 323 } else if isMay(typ) { 324 must := mayToMust(typ) 325 if mask&(must|typ) != 0 { 326 return score, mask 327 } 328 } 329 if mask&typ == 0 { 330 if debugTrace&debugTraceScoring != 0 { 331 fmt.Fprintf(os.Stderr, "=-= applying adj %d for %s\n", 332 adjValue(typ), typ.String()) 333 } 334 score += adjValue(typ) 335 mask |= typ 336 } 337 return score, mask 338 } 339 340 var resultFlagToPositiveAdj map[ResultPropBits]scoreAdjustTyp 341 var paramFlagToPositiveAdj map[ParamPropBits]scoreAdjustTyp 342 343 func setupFlagToAdjMaps() { 344 resultFlagToPositiveAdj = map[ResultPropBits]scoreAdjustTyp{ 345 ResultIsAllocatedMem: returnFeedsConcreteToInterfaceCallAdj, 346 ResultAlwaysSameFunc: returnFeedsFuncToIndCallAdj, 347 ResultAlwaysSameConstant: returnFeedsConstToIfAdj, 348 } 349 paramFlagToPositiveAdj = map[ParamPropBits]scoreAdjustTyp{ 350 ParamMayFeedInterfaceMethodCall: passConcreteToNestedItfCallAdj, 351 ParamFeedsInterfaceMethodCall: passConcreteToItfCallAdj, 352 ParamMayFeedIndirectCall: passInlinableFuncToNestedIndCallAdj, 353 ParamFeedsIndirectCall: passInlinableFuncToIndCallAdj, 354 } 355 } 356 357 // LargestNegativeScoreAdjustment tries to estimate the largest possible 358 // negative score adjustment that could be applied to a call of the 359 // function with the specified props. Example: 360 // 361 // func foo() { func bar(x int, p *int) int { 362 // ... if x < 0 { *p = x } 363 // } return 99 364 // } 365 // 366 // Function 'foo' above on the left has no interesting properties, 367 // thus as a result the most we'll adjust any call to is the value for 368 // "call in loop". If the calculated cost of the function is 150, and 369 // the in-loop adjustment is 5 (for example), then there is not much 370 // point treating it as inlinable. On the other hand "bar" has a param 371 // property (parameter "x" feeds unmodified to an "if" statement") and 372 // a return property (always returns same constant) meaning that a 373 // given call _could_ be rescored down as much as -35 points-- thus if 374 // the size of "bar" is 100 (for example) then there is at least a 375 // chance that scoring will enable inlining. 376 func LargestNegativeScoreAdjustment(fn *ir.Func, props *FuncProps) int { 377 if resultFlagToPositiveAdj == nil { 378 setupFlagToAdjMaps() 379 } 380 var tmask scoreAdjustTyp 381 score := adjValues[inLoopAdj] // any call can be in a loop 382 for _, pf := range props.ParamFlags { 383 if adj, ok := paramFlagToPositiveAdj[pf]; ok { 384 score, tmask = adjustScore(adj, score, tmask) 385 } 386 } 387 for _, rf := range props.ResultFlags { 388 if adj, ok := resultFlagToPositiveAdj[rf]; ok { 389 score, tmask = adjustScore(adj, score, tmask) 390 } 391 } 392 393 if debugTrace&debugTraceScoring != 0 { 394 fmt.Fprintf(os.Stderr, "=-= largestScore(%v) is %d\n", 395 fn, score) 396 } 397 398 return score 399 } 400 401 // LargestPositiveScoreAdjustment tries to estimate the largest possible 402 // positive score adjustment that could be applied to a given callsite. 403 // At the moment we don't have very many positive score adjustments, so 404 // this is just hard-coded, not table-driven. 405 func LargestPositiveScoreAdjustment(fn *ir.Func) int { 406 return adjValues[panicPathAdj] + adjValues[initFuncAdj] 407 } 408 409 // callSiteTab contains entries for each call in the function 410 // currently being processed by InlineCalls; this variable will either 411 // be set to 'cstabCache' below (for non-inlinable routines) or to the 412 // local 'cstab' entry in the fnInlHeur object for inlinable routines. 413 // 414 // NOTE: this assumes that inlining operations are happening in a serial, 415 // single-threaded fashion,f which is true today but probably won't hold 416 // in the future (for example, we might want to score the callsites 417 // in multiple functions in parallel); if the inliner evolves in this 418 // direction we'll need to come up with a different approach here. 419 var callSiteTab CallSiteTab 420 421 // scoreCallsCache caches a call site table and call site list between 422 // invocations of ScoreCalls so that we can reuse previously allocated 423 // storage. 424 var scoreCallsCache scoreCallsCacheType 425 426 type scoreCallsCacheType struct { 427 tab CallSiteTab 428 csl []*CallSite 429 } 430 431 // ScoreCalls assigns numeric scores to each of the callsites in 432 // function 'fn'; the lower the score, the more helpful we think it 433 // will be to inline. 434 // 435 // Unlike a lot of the other inline heuristics machinery, callsite 436 // scoring can't be done as part of the CanInline call for a function, 437 // due to fact that we may be working on a non-trivial SCC. So for 438 // example with this SCC: 439 // 440 // func foo(x int) { func bar(x int, f func()) { 441 // if x != 0 { f() 442 // bar(x, func(){}) foo(x-1) 443 // } } 444 // } 445 // 446 // We don't want to perform scoring for the 'foo' call in "bar" until 447 // after foo has been analyzed, but it's conceivable that CanInline 448 // might visit bar before foo for this SCC. 449 func ScoreCalls(fn *ir.Func) { 450 if len(fn.Body) == 0 { 451 return 452 } 453 enableDebugTraceIfEnv() 454 455 nameFinder := newNameFinder(fn) 456 457 if debugTrace&debugTraceScoring != 0 { 458 fmt.Fprintf(os.Stderr, "=-= ScoreCalls(%v)\n", ir.FuncName(fn)) 459 } 460 461 // If this is an inlinable function, use the precomputed 462 // call site table for it. If the function wasn't an inline 463 // candidate, collect a callsite table for it now. 464 var cstab CallSiteTab 465 if funcInlHeur, ok := fpmap[fn]; ok { 466 cstab = funcInlHeur.cstab 467 } else { 468 if len(scoreCallsCache.tab) != 0 { 469 panic("missing call to ScoreCallsCleanup") 470 } 471 if scoreCallsCache.tab == nil { 472 scoreCallsCache.tab = make(CallSiteTab) 473 } 474 if debugTrace&debugTraceScoring != 0 { 475 fmt.Fprintf(os.Stderr, "=-= building cstab for non-inl func %s\n", 476 ir.FuncName(fn)) 477 } 478 cstab = computeCallSiteTable(fn, fn.Body, scoreCallsCache.tab, nil, 0, 479 nameFinder) 480 } 481 482 csa := makeCallSiteAnalyzer(fn) 483 const doCallResults = true 484 csa.scoreCallsRegion(fn, fn.Body, cstab, doCallResults, nil) 485 486 disableDebugTrace() 487 } 488 489 // scoreCallsRegion assigns numeric scores to each of the callsites in 490 // region 'region' within function 'fn'. This can be called on 491 // an entire function, or with 'region' set to a chunk of 492 // code corresponding to an inlined call. 493 func (csa *callSiteAnalyzer) scoreCallsRegion(fn *ir.Func, region ir.Nodes, cstab CallSiteTab, doCallResults bool, ic *ir.InlinedCallExpr) { 494 if debugTrace&debugTraceScoring != 0 { 495 fmt.Fprintf(os.Stderr, "=-= scoreCallsRegion(%v, %s) len(cstab)=%d\n", 496 ir.FuncName(fn), region[0].Op().String(), len(cstab)) 497 } 498 499 // Sort callsites to avoid any surprises with non deterministic 500 // map iteration order (this is probably not needed, but here just 501 // in case). 502 csl := scoreCallsCache.csl[:0] 503 for _, cs := range cstab { 504 csl = append(csl, cs) 505 } 506 scoreCallsCache.csl = csl[:0] 507 sort.Slice(csl, func(i, j int) bool { 508 return csl[i].ID < csl[j].ID 509 }) 510 511 // Score each call site. 512 var resultNameTab map[*ir.Name]resultPropAndCS 513 for _, cs := range csl { 514 var cprops *FuncProps 515 fihcprops := false 516 desercprops := false 517 if funcInlHeur, ok := fpmap[cs.Callee]; ok { 518 cprops = funcInlHeur.props 519 fihcprops = true 520 } else if cs.Callee.Inl != nil { 521 cprops = DeserializeFromString(cs.Callee.Inl.Properties) 522 desercprops = true 523 } else { 524 if base.Debug.DumpInlFuncProps != "" { 525 fmt.Fprintf(os.Stderr, "=-= *** unable to score call to %s from %s\n", cs.Callee.Sym().Name, fmtFullPos(cs.Call.Pos())) 526 panic("should never happen") 527 } else { 528 continue 529 } 530 } 531 cs.computeCallSiteScore(csa, cprops) 532 533 if doCallResults { 534 if debugTrace&debugTraceScoring != 0 { 535 fmt.Fprintf(os.Stderr, "=-= examineCallResults at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops) 536 } 537 resultNameTab = csa.examineCallResults(cs, resultNameTab) 538 } 539 540 if debugTrace&debugTraceScoring != 0 { 541 fmt.Fprintf(os.Stderr, "=-= scoring call at %s: flags=%d score=%d funcInlHeur=%v deser=%v\n", fmtFullPos(cs.Call.Pos()), cs.Flags, cs.Score, fihcprops, desercprops) 542 } 543 } 544 545 if resultNameTab != nil { 546 csa.rescoreBasedOnCallResultUses(fn, resultNameTab, cstab) 547 } 548 549 disableDebugTrace() 550 551 if ic != nil && callSiteTab != nil { 552 // Integrate the calls from this cstab into the table for the caller. 553 if err := callSiteTab.merge(cstab); err != nil { 554 base.FatalfAt(ic.Pos(), "%v", err) 555 } 556 } else { 557 callSiteTab = cstab 558 } 559 } 560 561 // ScoreCallsCleanup resets the state of the callsite cache 562 // once ScoreCalls is done with a function. 563 func ScoreCallsCleanup() { 564 if base.Debug.DumpInlCallSiteScores != 0 { 565 if allCallSites == nil { 566 allCallSites = make(CallSiteTab) 567 } 568 for call, cs := range callSiteTab { 569 allCallSites[call] = cs 570 } 571 } 572 for k := range scoreCallsCache.tab { 573 delete(scoreCallsCache.tab, k) 574 } 575 } 576 577 // GetCallSiteScore returns the previously calculated score for call 578 // within fn. 579 func GetCallSiteScore(fn *ir.Func, call *ir.CallExpr) (int, bool) { 580 if funcInlHeur, ok := fpmap[fn]; ok { 581 if cs, ok := funcInlHeur.cstab[call]; ok { 582 return cs.Score, true 583 } 584 } 585 if cs, ok := callSiteTab[call]; ok { 586 return cs.Score, true 587 } 588 return 0, false 589 } 590 591 // BudgetExpansion returns the amount to relax/expand the base 592 // inlining budget when the new inliner is turned on; the inliner 593 // will add the returned value to the hairyness budget. 594 // 595 // Background: with the new inliner, the score for a given callsite 596 // can be adjusted down by some amount due to heuristics, however we 597 // won't know whether this is going to happen until much later after 598 // the CanInline call. This function returns the amount to relax the 599 // budget initially (to allow for a large score adjustment); later on 600 // in RevisitInlinability we'll look at each individual function to 601 // demote it if needed. 602 func BudgetExpansion(maxBudget int32) int32 { 603 if base.Debug.InlBudgetSlack != 0 { 604 return int32(base.Debug.InlBudgetSlack) 605 } 606 // In the default case, return maxBudget, which will effectively 607 // double the budget from 80 to 160; this should be good enough 608 // for most cases. 609 return maxBudget 610 } 611 612 var allCallSites CallSiteTab 613 614 // DumpInlCallSiteScores is invoked by the inliner if the debug flag 615 // "-d=dumpinlcallsitescores" is set; it dumps out a human-readable 616 // summary of all (potentially) inlinable callsites in the package, 617 // along with info on call site scoring and the adjustments made to a 618 // given score. Here profile is the PGO profile in use (may be 619 // nil), budgetCallback is a callback that can be invoked to find out 620 // the original pre-adjustment hairyness limit for the function, and 621 // inlineHotMaxBudget is the constant of the same name used in the 622 // inliner. Sample output lines: 623 // 624 // Score Adjustment Status Callee CallerPos ScoreFlags 625 // 115 40 DEMOTED cmd/compile/internal/abi.(*ABIParamAssignment).Offset expand_calls.go:1679:14|6 panicPathAdj 626 // 76 -5n PROMOTED runtime.persistentalloc mcheckmark.go:48:45|3 inLoopAdj 627 // 201 0 --- PGO unicode.DecodeRuneInString utf8.go:312:30|1 628 // 7 -5 --- PGO internal/abi.Name.DataChecked type.go:625:22|0 inLoopAdj 629 // 630 // In the dump above, "Score" is the final score calculated for the 631 // callsite, "Adjustment" is the amount added to or subtracted from 632 // the original hairyness estimate to form the score. "Status" shows 633 // whether anything changed with the site -- did the adjustment bump 634 // it down just below the threshold ("PROMOTED") or instead bump it 635 // above the threshold ("DEMOTED"); this will be blank ("---") if no 636 // threshold was crossed as a result of the heuristics. Note that 637 // "Status" also shows whether PGO was involved. "Callee" is the name 638 // of the function called, "CallerPos" is the position of the 639 // callsite, and "ScoreFlags" is a digest of the specific properties 640 // we used to make adjustments to callsite score via heuristics. 641 func DumpInlCallSiteScores(profile *pgo.Profile, budgetCallback func(fn *ir.Func, profile *pgo.Profile) (int32, bool)) { 642 643 var indirectlyDueToPromotion func(cs *CallSite) bool 644 indirectlyDueToPromotion = func(cs *CallSite) bool { 645 bud, _ := budgetCallback(cs.Callee, profile) 646 hairyval := cs.Callee.Inl.Cost 647 score := int32(cs.Score) 648 if hairyval > bud && score <= bud { 649 return true 650 } 651 if cs.parent != nil { 652 return indirectlyDueToPromotion(cs.parent) 653 } 654 return false 655 } 656 657 genstatus := func(cs *CallSite) string { 658 hairyval := cs.Callee.Inl.Cost 659 bud, isPGO := budgetCallback(cs.Callee, profile) 660 score := int32(cs.Score) 661 st := "---" 662 expinl := false 663 switch { 664 case hairyval <= bud && score <= bud: 665 // "Normal" inlined case: hairy val sufficiently low that 666 // it would have been inlined anyway without heuristics. 667 expinl = true 668 case hairyval > bud && score > bud: 669 // "Normal" not inlined case: hairy val sufficiently high 670 // and scoring didn't lower it. 671 case hairyval > bud && score <= bud: 672 // Promoted: we would not have inlined it before, but 673 // after score adjustment we decided to inline. 674 st = "PROMOTED" 675 expinl = true 676 case hairyval <= bud && score > bud: 677 // Demoted: we would have inlined it before, but after 678 // score adjustment we decided not to inline. 679 st = "DEMOTED" 680 } 681 inlined := cs.aux&csAuxInlined != 0 682 indprom := false 683 if cs.parent != nil { 684 indprom = indirectlyDueToPromotion(cs.parent) 685 } 686 if inlined && indprom { 687 st += "|INDPROM" 688 } 689 if inlined && !expinl { 690 st += "|[NI?]" 691 } else if !inlined && expinl { 692 st += "|[IN?]" 693 } 694 if isPGO { 695 st += "|PGO" 696 } 697 return st 698 } 699 700 if base.Debug.DumpInlCallSiteScores != 0 { 701 var sl []*CallSite 702 for _, cs := range allCallSites { 703 sl = append(sl, cs) 704 } 705 sort.Slice(sl, func(i, j int) bool { 706 if sl[i].Score != sl[j].Score { 707 return sl[i].Score < sl[j].Score 708 } 709 fni := ir.PkgFuncName(sl[i].Callee) 710 fnj := ir.PkgFuncName(sl[j].Callee) 711 if fni != fnj { 712 return fni < fnj 713 } 714 ecsi := EncodeCallSiteKey(sl[i]) 715 ecsj := EncodeCallSiteKey(sl[j]) 716 return ecsi < ecsj 717 }) 718 719 mkname := func(fn *ir.Func) string { 720 var n string 721 if fn == nil || fn.Nname == nil { 722 return "<nil>" 723 } 724 if fn.Sym().Pkg == types.LocalPkg { 725 n = "ยท" + fn.Sym().Name 726 } else { 727 n = ir.PkgFuncName(fn) 728 } 729 // don't try to print super-long names 730 if len(n) <= 64 { 731 return n 732 } 733 return n[:32] + "..." + n[len(n)-32:len(n)] 734 } 735 736 if len(sl) != 0 { 737 fmt.Fprintf(os.Stdout, "# scores for package %s\n", types.LocalPkg.Path) 738 fmt.Fprintf(os.Stdout, "# Score Adjustment Status Callee CallerPos Flags ScoreFlags\n") 739 } 740 for _, cs := range sl { 741 hairyval := cs.Callee.Inl.Cost 742 adj := int32(cs.Score) - hairyval 743 nm := mkname(cs.Callee) 744 ecc := EncodeCallSiteKey(cs) 745 fmt.Fprintf(os.Stdout, "%d %d\t%s\t%s\t%s\t%s\n", 746 cs.Score, adj, genstatus(cs), 747 nm, ecc, 748 cs.ScoreMask.String()) 749 } 750 } 751 }