github.com/go-asm/go@v1.21.1-0.20240213172139-40c5ead50c48/cmd/compile/inline/inl.go (about) 1 // Copyright 2011 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 // 5 // The inlining facility makes 2 passes: first CanInline determines which 6 // functions are suitable for inlining, and for those that are it 7 // saves a copy of the body. Then InlineCalls walks each function body to 8 // expand calls to inlinable functions. 9 // 10 // The Debug.l flag controls the aggressiveness. Note that main() swaps level 0 and 1, 11 // making 1 the default and -l disable. Additional levels (beyond -l) may be buggy and 12 // are not supported. 13 // 0: disabled 14 // 1: 80-nodes leaf functions, oneliners, panic, lazy typechecking (default) 15 // 2: (unassigned) 16 // 3: (unassigned) 17 // 4: allow non-leaf functions 18 // 19 // At some point this may get another default and become switch-offable with -N. 20 // 21 // The -d typcheckinl flag enables early typechecking of all imported bodies, 22 // which is useful to flush out bugs. 23 // 24 // The Debug.m flag enables diagnostic output. a single -m is useful for verifying 25 // which calls get inlined or not, more is for debugging, and may go away at any point. 26 27 package inline 28 29 import ( 30 "fmt" 31 "go/constant" 32 "strconv" 33 34 "github.com/go-asm/go/buildcfg" 35 36 "github.com/go-asm/go/cmd/compile/base" 37 "github.com/go-asm/go/cmd/compile/inline/inlheur" 38 "github.com/go-asm/go/cmd/compile/ir" 39 "github.com/go-asm/go/cmd/compile/logopt" 40 "github.com/go-asm/go/cmd/compile/pgo" 41 "github.com/go-asm/go/cmd/compile/typecheck" 42 "github.com/go-asm/go/cmd/compile/types" 43 "github.com/go-asm/go/cmd/obj" 44 ) 45 46 // Inlining budget parameters, gathered in one place 47 const ( 48 inlineMaxBudget = 80 49 inlineExtraAppendCost = 0 50 // default is to inline if there's at most one call. -l=4 overrides this by using 1 instead. 51 inlineExtraCallCost = 57 // 57 was benchmarked to provided most benefit with no bad surprises; see https://github.com/golang/go/issues/19348#issuecomment-439370742 52 inlineExtraPanicCost = 1 // do not penalize inlining panics. 53 inlineExtraThrowCost = inlineMaxBudget // with current (2018-05/1.11) code, inlining runtime.throw does not help. 54 55 inlineBigFunctionNodes = 5000 // Functions with this many nodes are considered "big". 56 inlineBigFunctionMaxCost = 20 // Max cost of inlinee when inlining into a "big" function. 57 ) 58 59 var ( 60 // List of all hot callee nodes. 61 // TODO(prattmic): Make this non-global. 62 candHotCalleeMap = make(map[*pgo.IRNode]struct{}) 63 64 // List of all hot call sites. CallSiteInfo.Callee is always nil. 65 // TODO(prattmic): Make this non-global. 66 candHotEdgeMap = make(map[pgo.CallSiteInfo]struct{}) 67 68 // Threshold in percentage for hot callsite inlining. 69 inlineHotCallSiteThresholdPercent float64 70 71 // Threshold in CDF percentage for hot callsite inlining, 72 // that is, for a threshold of X the hottest callsites that 73 // make up the top X% of total edge weight will be 74 // considered hot for inlining candidates. 75 inlineCDFHotCallSiteThresholdPercent = float64(99) 76 77 // Budget increased due to hotness. 78 inlineHotMaxBudget int32 = 2000 79 ) 80 81 // PGOInlinePrologue records the hot callsites from ir-graph. 82 func PGOInlinePrologue(p *pgo.Profile, funcs []*ir.Func) { 83 if base.Debug.PGOInlineCDFThreshold != "" { 84 if s, err := strconv.ParseFloat(base.Debug.PGOInlineCDFThreshold, 64); err == nil && s >= 0 && s <= 100 { 85 inlineCDFHotCallSiteThresholdPercent = s 86 } else { 87 base.Fatalf("invalid PGOInlineCDFThreshold, must be between 0 and 100") 88 } 89 } 90 var hotCallsites []pgo.NamedCallEdge 91 inlineHotCallSiteThresholdPercent, hotCallsites = hotNodesFromCDF(p) 92 if base.Debug.PGODebug > 0 { 93 fmt.Printf("hot-callsite-thres-from-CDF=%v\n", inlineHotCallSiteThresholdPercent) 94 } 95 96 if x := base.Debug.PGOInlineBudget; x != 0 { 97 inlineHotMaxBudget = int32(x) 98 } 99 100 for _, n := range hotCallsites { 101 // mark inlineable callees from hot edges 102 if callee := p.WeightedCG.IRNodes[n.CalleeName]; callee != nil { 103 candHotCalleeMap[callee] = struct{}{} 104 } 105 // mark hot call sites 106 if caller := p.WeightedCG.IRNodes[n.CallerName]; caller != nil && caller.AST != nil { 107 csi := pgo.CallSiteInfo{LineOffset: n.CallSiteOffset, Caller: caller.AST} 108 candHotEdgeMap[csi] = struct{}{} 109 } 110 } 111 112 if base.Debug.PGODebug >= 3 { 113 fmt.Printf("hot-cg before inline in dot format:") 114 p.PrintWeightedCallGraphDOT(inlineHotCallSiteThresholdPercent) 115 } 116 } 117 118 // hotNodesFromCDF computes an edge weight threshold and the list of hot 119 // nodes that make up the given percentage of the CDF. The threshold, as 120 // a percent, is the lower bound of weight for nodes to be considered hot 121 // (currently only used in debug prints) (in case of equal weights, 122 // comparing with the threshold may not accurately reflect which nodes are 123 // considiered hot). 124 func hotNodesFromCDF(p *pgo.Profile) (float64, []pgo.NamedCallEdge) { 125 cum := int64(0) 126 for i, n := range p.NamedEdgeMap.ByWeight { 127 w := p.NamedEdgeMap.Weight[n] 128 cum += w 129 if pgo.WeightInPercentage(cum, p.TotalWeight) > inlineCDFHotCallSiteThresholdPercent { 130 // nodes[:i+1] to include the very last node that makes it to go over the threshold. 131 // (Say, if the CDF threshold is 50% and one hot node takes 60% of weight, we want to 132 // include that node instead of excluding it.) 133 return pgo.WeightInPercentage(w, p.TotalWeight), p.NamedEdgeMap.ByWeight[:i+1] 134 } 135 } 136 return 0, p.NamedEdgeMap.ByWeight 137 } 138 139 // CanInlineFuncs computes whether a batch of functions are inlinable. 140 func CanInlineFuncs(funcs []*ir.Func, profile *pgo.Profile) { 141 if profile != nil { 142 PGOInlinePrologue(profile, funcs) 143 } 144 145 ir.VisitFuncsBottomUp(funcs, func(list []*ir.Func, recursive bool) { 146 CanInlineSCC(list, recursive, profile) 147 }) 148 } 149 150 // CanInlineSCC computes the inlinability of functions within an SCC 151 // (strongly connected component). 152 // 153 // CanInlineSCC is designed to be used by ir.VisitFuncsBottomUp 154 // callbacks. 155 func CanInlineSCC(funcs []*ir.Func, recursive bool, profile *pgo.Profile) { 156 if base.Flag.LowerL == 0 { 157 return 158 } 159 160 numfns := numNonClosures(funcs) 161 162 for _, fn := range funcs { 163 if !recursive || numfns > 1 { 164 // We allow inlining if there is no 165 // recursion, or the recursion cycle is 166 // across more than one function. 167 CanInline(fn, profile) 168 } else { 169 if base.Flag.LowerM > 1 && fn.OClosure == nil { 170 fmt.Printf("%v: cannot inline %v: recursive\n", ir.Line(fn), fn.Nname) 171 } 172 } 173 if inlheur.Enabled() { 174 analyzeFuncProps(fn, profile) 175 } 176 } 177 } 178 179 // GarbageCollectUnreferencedHiddenClosures makes a pass over all the 180 // top-level (non-hidden-closure) functions looking for nested closure 181 // functions that are reachable, then sweeps through the Target.Decls 182 // list and marks any non-reachable hidden closure function as dead. 183 // See issues #59404 and #59638 for more context. 184 func GarbageCollectUnreferencedHiddenClosures() { 185 186 liveFuncs := make(map[*ir.Func]bool) 187 188 var markLiveFuncs func(fn *ir.Func) 189 markLiveFuncs = func(fn *ir.Func) { 190 if liveFuncs[fn] { 191 return 192 } 193 liveFuncs[fn] = true 194 ir.Visit(fn, func(n ir.Node) { 195 if clo, ok := n.(*ir.ClosureExpr); ok { 196 markLiveFuncs(clo.Func) 197 } 198 }) 199 } 200 201 for i := 0; i < len(typecheck.Target.Funcs); i++ { 202 fn := typecheck.Target.Funcs[i] 203 if fn.IsHiddenClosure() { 204 continue 205 } 206 markLiveFuncs(fn) 207 } 208 209 for i := 0; i < len(typecheck.Target.Funcs); i++ { 210 fn := typecheck.Target.Funcs[i] 211 if !fn.IsHiddenClosure() { 212 continue 213 } 214 if fn.IsDeadcodeClosure() { 215 continue 216 } 217 if liveFuncs[fn] { 218 continue 219 } 220 fn.SetIsDeadcodeClosure(true) 221 if base.Flag.LowerM > 2 { 222 fmt.Printf("%v: unreferenced closure %v marked as dead\n", ir.Line(fn), fn) 223 } 224 if fn.Inl != nil && fn.LSym == nil { 225 ir.InitLSym(fn, true) 226 } 227 } 228 } 229 230 // inlineBudget determines the max budget for function 'fn' prior to 231 // analyzing the hairyness of the body of 'fn'. We pass in the pgo 232 // profile if available (which can change the budget), also a 233 // 'relaxed' flag, which expands the budget slightly to allow for the 234 // possibility that a call to the function might have its score 235 // adjusted downwards. If 'verbose' is set, then print a remark where 236 // we boost the budget due to PGO. 237 func inlineBudget(fn *ir.Func, profile *pgo.Profile, relaxed bool, verbose bool) int32 { 238 // Update the budget for profile-guided inlining. 239 budget := int32(inlineMaxBudget) 240 if profile != nil { 241 if n, ok := profile.WeightedCG.IRNodes[ir.LinkFuncName(fn)]; ok { 242 if _, ok := candHotCalleeMap[n]; ok { 243 budget = int32(inlineHotMaxBudget) 244 if verbose { 245 fmt.Printf("hot-node enabled increased budget=%v for func=%v\n", budget, ir.PkgFuncName(fn)) 246 } 247 } 248 } 249 } 250 if relaxed { 251 budget += inlheur.BudgetExpansion(inlineMaxBudget) 252 } 253 return budget 254 } 255 256 // CanInline determines whether fn is inlineable. 257 // If so, CanInline saves copies of fn.Body and fn.Dcl in fn.Inl. 258 // fn and fn.Body will already have been typechecked. 259 func CanInline(fn *ir.Func, profile *pgo.Profile) { 260 if fn.Nname == nil { 261 base.Fatalf("CanInline no nname %+v", fn) 262 } 263 264 var reason string // reason, if any, that the function was not inlined 265 if base.Flag.LowerM > 1 || logopt.Enabled() { 266 defer func() { 267 if reason != "" { 268 if base.Flag.LowerM > 1 { 269 fmt.Printf("%v: cannot inline %v: %s\n", ir.Line(fn), fn.Nname, reason) 270 } 271 if logopt.Enabled() { 272 logopt.LogOpt(fn.Pos(), "cannotInlineFunction", "inline", ir.FuncName(fn), reason) 273 } 274 } 275 }() 276 } 277 278 reason = InlineImpossible(fn) 279 if reason != "" { 280 return 281 } 282 if fn.Typecheck() == 0 { 283 base.Fatalf("CanInline on non-typechecked function %v", fn) 284 } 285 286 n := fn.Nname 287 if n.Func.InlinabilityChecked() { 288 return 289 } 290 defer n.Func.SetInlinabilityChecked(true) 291 292 cc := int32(inlineExtraCallCost) 293 if base.Flag.LowerL == 4 { 294 cc = 1 // this appears to yield better performance than 0. 295 } 296 297 // Used a "relaxed" inline budget if the new inliner is enabled. 298 relaxed := inlheur.Enabled() 299 300 // Compute the inline budget for this func. 301 budget := inlineBudget(fn, profile, relaxed, base.Debug.PGODebug > 0) 302 303 // At this point in the game the function we're looking at may 304 // have "stale" autos, vars that still appear in the Dcl list, but 305 // which no longer have any uses in the function body (due to 306 // elimination by deadcode). We'd like to exclude these dead vars 307 // when creating the "Inline.Dcl" field below; to accomplish this, 308 // the hairyVisitor below builds up a map of used/referenced 309 // locals, and we use this map to produce a pruned Inline.Dcl 310 // list. See issue 25459 for more context. 311 312 visitor := hairyVisitor{ 313 curFunc: fn, 314 isBigFunc: IsBigFunc(fn), 315 budget: budget, 316 maxBudget: budget, 317 extraCallCost: cc, 318 profile: profile, 319 } 320 if visitor.tooHairy(fn) { 321 reason = visitor.reason 322 return 323 } 324 325 n.Func.Inl = &ir.Inline{ 326 Cost: budget - visitor.budget, 327 Dcl: pruneUnusedAutos(n.Func.Dcl, &visitor), 328 HaveDcl: true, 329 330 CanDelayResults: canDelayResults(fn), 331 } 332 if base.Flag.LowerM != 0 || logopt.Enabled() { 333 noteInlinableFunc(n, fn, budget-visitor.budget) 334 } 335 } 336 337 // noteInlinableFunc issues a message to the user that the specified 338 // function is inlinable. 339 func noteInlinableFunc(n *ir.Name, fn *ir.Func, cost int32) { 340 if base.Flag.LowerM > 1 { 341 fmt.Printf("%v: can inline %v with cost %d as: %v { %v }\n", ir.Line(fn), n, cost, fn.Type(), ir.Nodes(fn.Body)) 342 } else if base.Flag.LowerM != 0 { 343 fmt.Printf("%v: can inline %v\n", ir.Line(fn), n) 344 } 345 // JSON optimization log output. 346 if logopt.Enabled() { 347 logopt.LogOpt(fn.Pos(), "canInlineFunction", "inline", ir.FuncName(fn), fmt.Sprintf("cost: %d", cost)) 348 } 349 } 350 351 // InlineImpossible returns a non-empty reason string if fn is impossible to 352 // inline regardless of cost or contents. 353 func InlineImpossible(fn *ir.Func) string { 354 var reason string // reason, if any, that the function can not be inlined. 355 if fn.Nname == nil { 356 reason = "no name" 357 return reason 358 } 359 360 // If marked "go:noinline", don't inline. 361 if fn.Pragma&ir.Noinline != 0 { 362 reason = "marked go:noinline" 363 return reason 364 } 365 366 // If marked "go:norace" and -race compilation, don't inline. 367 if base.Flag.Race && fn.Pragma&ir.Norace != 0 { 368 reason = "marked go:norace with -race compilation" 369 return reason 370 } 371 372 // If marked "go:nocheckptr" and -d checkptr compilation, don't inline. 373 if base.Debug.Checkptr != 0 && fn.Pragma&ir.NoCheckPtr != 0 { 374 reason = "marked go:nocheckptr" 375 return reason 376 } 377 378 // If marked "go:cgo_unsafe_args", don't inline, since the function 379 // makes assumptions about its argument frame layout. 380 if fn.Pragma&ir.CgoUnsafeArgs != 0 { 381 reason = "marked go:cgo_unsafe_args" 382 return reason 383 } 384 385 // If marked as "go:uintptrkeepalive", don't inline, since the keep 386 // alive information is lost during inlining. 387 // 388 // TODO(prattmic): This is handled on calls during escape analysis, 389 // which is after inlining. Move prior to inlining so the keep-alive is 390 // maintained after inlining. 391 if fn.Pragma&ir.UintptrKeepAlive != 0 { 392 reason = "marked as having a keep-alive uintptr argument" 393 return reason 394 } 395 396 // If marked as "go:uintptrescapes", don't inline, since the escape 397 // information is lost during inlining. 398 if fn.Pragma&ir.UintptrEscapes != 0 { 399 reason = "marked as having an escaping uintptr argument" 400 return reason 401 } 402 403 // The nowritebarrierrec checker currently works at function 404 // granularity, so inlining yeswritebarrierrec functions can confuse it 405 // (#22342). As a workaround, disallow inlining them for now. 406 if fn.Pragma&ir.Yeswritebarrierrec != 0 { 407 reason = "marked go:yeswritebarrierrec" 408 return reason 409 } 410 411 // If a local function has no fn.Body (is defined outside of Go), cannot inline it. 412 // Imported functions don't have fn.Body but might have inline body in fn.Inl. 413 if len(fn.Body) == 0 && !typecheck.HaveInlineBody(fn) { 414 reason = "no function body" 415 return reason 416 } 417 418 return "" 419 } 420 421 // canDelayResults reports whether inlined calls to fn can delay 422 // declaring the result parameter until the "return" statement. 423 func canDelayResults(fn *ir.Func) bool { 424 // We can delay declaring+initializing result parameters if: 425 // (1) there's exactly one "return" statement in the inlined function; 426 // (2) it's not an empty return statement (#44355); and 427 // (3) the result parameters aren't named. 428 429 nreturns := 0 430 ir.VisitList(fn.Body, func(n ir.Node) { 431 if n, ok := n.(*ir.ReturnStmt); ok { 432 nreturns++ 433 if len(n.Results) == 0 { 434 nreturns++ // empty return statement (case 2) 435 } 436 } 437 }) 438 439 if nreturns != 1 { 440 return false // not exactly one return statement (case 1) 441 } 442 443 // temporaries for return values. 444 for _, param := range fn.Type().Results() { 445 if sym := param.Sym; sym != nil && !sym.IsBlank() { 446 return false // found a named result parameter (case 3) 447 } 448 } 449 450 return true 451 } 452 453 // hairyVisitor visits a function body to determine its inlining 454 // hairiness and whether or not it can be inlined. 455 type hairyVisitor struct { 456 // This is needed to access the current caller in the doNode function. 457 curFunc *ir.Func 458 isBigFunc bool 459 budget int32 460 maxBudget int32 461 reason string 462 extraCallCost int32 463 usedLocals ir.NameSet 464 do func(ir.Node) bool 465 profile *pgo.Profile 466 } 467 468 func (v *hairyVisitor) tooHairy(fn *ir.Func) bool { 469 v.do = v.doNode // cache closure 470 if ir.DoChildren(fn, v.do) { 471 return true 472 } 473 if v.budget < 0 { 474 v.reason = fmt.Sprintf("function too complex: cost %d exceeds budget %d", v.maxBudget-v.budget, v.maxBudget) 475 return true 476 } 477 return false 478 } 479 480 // doNode visits n and its children, updates the state in v, and returns true if 481 // n makes the current function too hairy for inlining. 482 func (v *hairyVisitor) doNode(n ir.Node) bool { 483 if n == nil { 484 return false 485 } 486 opSwitch: 487 switch n.Op() { 488 // Call is okay if inlinable and we have the budget for the body. 489 case ir.OCALLFUNC: 490 n := n.(*ir.CallExpr) 491 // Functions that call runtime.getcaller{pc,sp} can not be inlined 492 // because getcaller{pc,sp} expect a pointer to the caller's first argument. 493 // 494 // runtime.throw is a "cheap call" like panic in normal code. 495 var cheap bool 496 if n.Fun.Op() == ir.ONAME { 497 name := n.Fun.(*ir.Name) 498 if name.Class == ir.PFUNC { 499 switch fn := types.RuntimeSymName(name.Sym()); fn { 500 case "getcallerpc", "getcallersp": 501 v.reason = "call to " + fn 502 return true 503 case "throw": 504 v.budget -= inlineExtraThrowCost 505 break opSwitch 506 case "panicrangeexit": 507 cheap = true 508 } 509 // Special case for reflect.noescape. It does just type 510 // conversions to appease the escape analysis, and doesn't 511 // generate code. 512 if types.ReflectSymName(name.Sym()) == "noescape" { 513 cheap = true 514 } 515 } 516 // Special case for coverage counter updates; although 517 // these correspond to real operations, we treat them as 518 // zero cost for the moment. This is due to the existence 519 // of tests that are sensitive to inlining-- if the 520 // insertion of coverage instrumentation happens to tip a 521 // given function over the threshold and move it from 522 // "inlinable" to "not-inlinable", this can cause changes 523 // in allocation behavior, which can then result in test 524 // failures (a good example is the TestAllocations in 525 // crypto/ed25519). 526 if isAtomicCoverageCounterUpdate(n) { 527 return false 528 } 529 } 530 if n.Fun.Op() == ir.OMETHEXPR { 531 if meth := ir.MethodExprName(n.Fun); meth != nil { 532 if fn := meth.Func; fn != nil { 533 s := fn.Sym() 534 if types.RuntimeSymName(s) == "heapBits.nextArena" { 535 // Special case: explicitly allow mid-stack inlining of 536 // runtime.heapBits.next even though it calls slow-path 537 // runtime.heapBits.nextArena. 538 cheap = true 539 } 540 // Special case: on architectures that can do unaligned loads, 541 // explicitly mark encoding/binary methods as cheap, 542 // because in practice they are, even though our inlining 543 // budgeting system does not see that. See issue 42958. 544 if base.Ctxt.Arch.CanMergeLoads && s.Pkg.Path == "encoding/binary" { 545 switch s.Name { 546 case "littleEndian.Uint64", "littleEndian.Uint32", "littleEndian.Uint16", 547 "bigEndian.Uint64", "bigEndian.Uint32", "bigEndian.Uint16", 548 "littleEndian.PutUint64", "littleEndian.PutUint32", "littleEndian.PutUint16", 549 "bigEndian.PutUint64", "bigEndian.PutUint32", "bigEndian.PutUint16", 550 "littleEndian.AppendUint64", "littleEndian.AppendUint32", "littleEndian.AppendUint16", 551 "bigEndian.AppendUint64", "bigEndian.AppendUint32", "bigEndian.AppendUint16": 552 cheap = true 553 } 554 } 555 } 556 } 557 } 558 if cheap { 559 break // treat like any other node, that is, cost of 1 560 } 561 562 if ir.IsIntrinsicCall(n) { 563 // Treat like any other node. 564 break 565 } 566 567 if callee := inlCallee(v.curFunc, n.Fun, v.profile); callee != nil && typecheck.HaveInlineBody(callee) { 568 // Check whether we'd actually inline this call. Set 569 // log == false since we aren't actually doing inlining 570 // yet. 571 if ok, _ := canInlineCallExpr(v.curFunc, n, callee, v.isBigFunc, false); ok { 572 // mkinlcall would inline this call [1], so use 573 // the cost of the inline body as the cost of 574 // the call, as that is what will actually 575 // appear in the code. 576 // 577 // [1] This is almost a perfect match to the 578 // mkinlcall logic, except that 579 // canInlineCallExpr considers inlining cycles 580 // by looking at what has already been inlined. 581 // Since we haven't done any inlining yet we 582 // will miss those. 583 v.budget -= callee.Inl.Cost 584 break 585 } 586 } 587 588 // Call cost for non-leaf inlining. 589 v.budget -= v.extraCallCost 590 591 case ir.OCALLMETH: 592 base.FatalfAt(n.Pos(), "OCALLMETH missed by typecheck") 593 594 // Things that are too hairy, irrespective of the budget 595 case ir.OCALL, ir.OCALLINTER: 596 // Call cost for non-leaf inlining. 597 v.budget -= v.extraCallCost 598 599 case ir.OPANIC: 600 n := n.(*ir.UnaryExpr) 601 if n.X.Op() == ir.OCONVIFACE && n.X.(*ir.ConvExpr).Implicit() { 602 // Hack to keep reflect.flag.mustBe inlinable for TestIntendedInlining. 603 // Before CL 284412, these conversions were introduced later in the 604 // compiler, so they didn't count against inlining budget. 605 v.budget++ 606 } 607 v.budget -= inlineExtraPanicCost 608 609 case ir.ORECOVER: 610 base.FatalfAt(n.Pos(), "ORECOVER missed typecheck") 611 case ir.ORECOVERFP: 612 // recover matches the argument frame pointer to find 613 // the right panic value, so it needs an argument frame. 614 v.reason = "call to recover" 615 return true 616 617 case ir.OCLOSURE: 618 if base.Debug.InlFuncsWithClosures == 0 { 619 v.reason = "not inlining functions with closures" 620 return true 621 } 622 623 // TODO(danscales): Maybe make budget proportional to number of closure 624 // variables, e.g.: 625 //v.budget -= int32(len(n.(*ir.ClosureExpr).Func.ClosureVars) * 3) 626 // TODO(austin): However, if we're able to inline this closure into 627 // v.curFunc, then we actually pay nothing for the closure captures. We 628 // should try to account for that if we're going to account for captures. 629 v.budget -= 15 630 631 case ir.OGO, ir.ODEFER, ir.OTAILCALL: 632 v.reason = "unhandled op " + n.Op().String() 633 return true 634 635 case ir.OAPPEND: 636 v.budget -= inlineExtraAppendCost 637 638 case ir.OADDR: 639 n := n.(*ir.AddrExpr) 640 // Make "&s.f" cost 0 when f's offset is zero. 641 if dot, ok := n.X.(*ir.SelectorExpr); ok && (dot.Op() == ir.ODOT || dot.Op() == ir.ODOTPTR) { 642 if _, ok := dot.X.(*ir.Name); ok && dot.Selection.Offset == 0 { 643 v.budget += 2 // undo ir.OADDR+ir.ODOT/ir.ODOTPTR 644 } 645 } 646 647 case ir.ODEREF: 648 // *(*X)(unsafe.Pointer(&x)) is low-cost 649 n := n.(*ir.StarExpr) 650 651 ptr := n.X 652 for ptr.Op() == ir.OCONVNOP { 653 ptr = ptr.(*ir.ConvExpr).X 654 } 655 if ptr.Op() == ir.OADDR { 656 v.budget += 1 // undo half of default cost of ir.ODEREF+ir.OADDR 657 } 658 659 case ir.OCONVNOP: 660 // This doesn't produce code, but the children might. 661 v.budget++ // undo default cost 662 663 case ir.OFALL, ir.OTYPE: 664 // These nodes don't produce code; omit from inlining budget. 665 return false 666 667 case ir.OIF: 668 n := n.(*ir.IfStmt) 669 if ir.IsConst(n.Cond, constant.Bool) { 670 // This if and the condition cost nothing. 671 if doList(n.Init(), v.do) { 672 return true 673 } 674 if ir.BoolVal(n.Cond) { 675 return doList(n.Body, v.do) 676 } else { 677 return doList(n.Else, v.do) 678 } 679 } 680 681 case ir.ONAME: 682 n := n.(*ir.Name) 683 if n.Class == ir.PAUTO { 684 v.usedLocals.Add(n) 685 } 686 687 case ir.OBLOCK: 688 // The only OBLOCK we should see at this point is an empty one. 689 // In any event, let the visitList(n.List()) below take care of the statements, 690 // and don't charge for the OBLOCK itself. The ++ undoes the -- below. 691 v.budget++ 692 693 case ir.OMETHVALUE, ir.OSLICELIT: 694 v.budget-- // Hack for toolstash -cmp. 695 696 case ir.OMETHEXPR: 697 v.budget++ // Hack for toolstash -cmp. 698 699 case ir.OAS2: 700 n := n.(*ir.AssignListStmt) 701 702 // Unified IR unconditionally rewrites: 703 // 704 // a, b = f() 705 // 706 // into: 707 // 708 // DCL tmp1 709 // DCL tmp2 710 // tmp1, tmp2 = f() 711 // a, b = tmp1, tmp2 712 // 713 // so that it can insert implicit conversions as necessary. To 714 // minimize impact to the existing inlining heuristics (in 715 // particular, to avoid breaking the existing inlinability regress 716 // tests), we need to compensate for this here. 717 // 718 // See also identical logic in IsBigFunc. 719 if len(n.Rhs) > 0 { 720 if init := n.Rhs[0].Init(); len(init) == 1 { 721 if _, ok := init[0].(*ir.AssignListStmt); ok { 722 // 4 for each value, because each temporary variable now 723 // appears 3 times (DCL, LHS, RHS), plus an extra DCL node. 724 // 725 // 1 for the extra "tmp1, tmp2 = f()" assignment statement. 726 v.budget += 4*int32(len(n.Lhs)) + 1 727 } 728 } 729 } 730 731 case ir.OAS: 732 // Special case for coverage counter updates and coverage 733 // function registrations. Although these correspond to real 734 // operations, we treat them as zero cost for the moment. This 735 // is primarily due to the existence of tests that are 736 // sensitive to inlining-- if the insertion of coverage 737 // instrumentation happens to tip a given function over the 738 // threshold and move it from "inlinable" to "not-inlinable", 739 // this can cause changes in allocation behavior, which can 740 // then result in test failures (a good example is the 741 // TestAllocations in crypto/ed25519). 742 n := n.(*ir.AssignStmt) 743 if n.X.Op() == ir.OINDEX && isIndexingCoverageCounter(n.X) { 744 return false 745 } 746 } 747 748 v.budget-- 749 750 // When debugging, don't stop early, to get full cost of inlining this function 751 if v.budget < 0 && base.Flag.LowerM < 2 && !logopt.Enabled() { 752 v.reason = "too expensive" 753 return true 754 } 755 756 return ir.DoChildren(n, v.do) 757 } 758 759 // IsBigFunc reports whether fn is a "big" function. 760 // 761 // Note: The criteria for "big" is heuristic and subject to change. 762 func IsBigFunc(fn *ir.Func) bool { 763 budget := inlineBigFunctionNodes 764 return ir.Any(fn, func(n ir.Node) bool { 765 // See logic in hairyVisitor.doNode, explaining unified IR's 766 // handling of "a, b = f()" assignments. 767 if n, ok := n.(*ir.AssignListStmt); ok && n.Op() == ir.OAS2 && len(n.Rhs) > 0 { 768 if init := n.Rhs[0].Init(); len(init) == 1 { 769 if _, ok := init[0].(*ir.AssignListStmt); ok { 770 budget += 4*len(n.Lhs) + 1 771 } 772 } 773 } 774 775 budget-- 776 return budget <= 0 777 }) 778 } 779 780 // TryInlineCall returns an inlined call expression for call, or nil 781 // if inlining is not possible. 782 func TryInlineCall(callerfn *ir.Func, call *ir.CallExpr, bigCaller bool, profile *pgo.Profile) *ir.InlinedCallExpr { 783 if base.Flag.LowerL == 0 { 784 return nil 785 } 786 if call.Op() != ir.OCALLFUNC { 787 return nil 788 } 789 if call.GoDefer || call.NoInline { 790 return nil 791 } 792 793 // Prevent inlining some reflect.Value methods when using checkptr, 794 // even when package reflect was compiled without it (#35073). 795 if base.Debug.Checkptr != 0 && call.Fun.Op() == ir.OMETHEXPR { 796 if method := ir.MethodExprName(call.Fun); method != nil { 797 switch types.ReflectSymName(method.Sym()) { 798 case "Value.UnsafeAddr", "Value.Pointer": 799 return nil 800 } 801 } 802 } 803 804 if base.Flag.LowerM > 3 { 805 fmt.Printf("%v:call to func %+v\n", ir.Line(call), call.Fun) 806 } 807 if ir.IsIntrinsicCall(call) { 808 return nil 809 } 810 if fn := inlCallee(callerfn, call.Fun, profile); fn != nil && typecheck.HaveInlineBody(fn) { 811 return mkinlcall(callerfn, call, fn, bigCaller) 812 } 813 return nil 814 } 815 816 // inlCallee takes a function-typed expression and returns the underlying function ONAME 817 // that it refers to if statically known. Otherwise, it returns nil. 818 func inlCallee(caller *ir.Func, fn ir.Node, profile *pgo.Profile) (res *ir.Func) { 819 fn = ir.StaticValue(fn) 820 switch fn.Op() { 821 case ir.OMETHEXPR: 822 fn := fn.(*ir.SelectorExpr) 823 n := ir.MethodExprName(fn) 824 // Check that receiver type matches fn.X. 825 // TODO(mdempsky): Handle implicit dereference 826 // of pointer receiver argument? 827 if n == nil || !types.Identical(n.Type().Recv().Type, fn.X.Type()) { 828 return nil 829 } 830 return n.Func 831 case ir.ONAME: 832 fn := fn.(*ir.Name) 833 if fn.Class == ir.PFUNC { 834 return fn.Func 835 } 836 case ir.OCLOSURE: 837 fn := fn.(*ir.ClosureExpr) 838 c := fn.Func 839 if len(c.ClosureVars) != 0 && c.ClosureVars[0].Outer.Curfn != caller { 840 return nil // inliner doesn't support inlining across closure frames 841 } 842 CanInline(c, profile) 843 return c 844 } 845 return nil 846 } 847 848 var inlgen int 849 850 // SSADumpInline gives the SSA back end a chance to dump the function 851 // when producing output for debugging the compiler itself. 852 var SSADumpInline = func(*ir.Func) {} 853 854 // InlineCall allows the inliner implementation to be overridden. 855 // If it returns nil, the function will not be inlined. 856 var InlineCall = func(callerfn *ir.Func, call *ir.CallExpr, fn *ir.Func, inlIndex int) *ir.InlinedCallExpr { 857 base.Fatalf("inline.InlineCall not overridden") 858 panic("unreachable") 859 } 860 861 // inlineCostOK returns true if call n from caller to callee is cheap enough to 862 // inline. bigCaller indicates that caller is a big function. 863 // 864 // In addition to the "cost OK" boolean, it also returns the "max 865 // cost" limit used to make the decision (which may differ depending 866 // on func size), and the score assigned to this specific callsite. 867 func inlineCostOK(n *ir.CallExpr, caller, callee *ir.Func, bigCaller bool) (bool, int32, int32) { 868 maxCost := int32(inlineMaxBudget) 869 if bigCaller { 870 // We use this to restrict inlining into very big functions. 871 // See issue 26546 and 17566. 872 maxCost = inlineBigFunctionMaxCost 873 } 874 875 metric := callee.Inl.Cost 876 if inlheur.Enabled() { 877 score, ok := inlheur.GetCallSiteScore(caller, n) 878 if ok { 879 metric = int32(score) 880 } 881 } 882 883 if metric <= maxCost { 884 // Simple case. Function is already cheap enough. 885 return true, 0, metric 886 } 887 888 // We'll also allow inlining of hot functions below inlineHotMaxBudget, 889 // but only in small functions. 890 891 lineOffset := pgo.NodeLineOffset(n, caller) 892 csi := pgo.CallSiteInfo{LineOffset: lineOffset, Caller: caller} 893 if _, ok := candHotEdgeMap[csi]; !ok { 894 // Cold 895 return false, maxCost, metric 896 } 897 898 // Hot 899 900 if bigCaller { 901 if base.Debug.PGODebug > 0 { 902 fmt.Printf("hot-big check disallows inlining for call %s (cost %d) at %v in big function %s\n", ir.PkgFuncName(callee), callee.Inl.Cost, ir.Line(n), ir.PkgFuncName(caller)) 903 } 904 return false, maxCost, metric 905 } 906 907 if metric > inlineHotMaxBudget { 908 return false, inlineHotMaxBudget, metric 909 } 910 911 if !base.PGOHash.MatchPosWithInfo(n.Pos(), "inline", nil) { 912 // De-selected by PGO Hash. 913 return false, maxCost, metric 914 } 915 916 if base.Debug.PGODebug > 0 { 917 fmt.Printf("hot-budget check allows inlining for call %s (cost %d) at %v in function %s\n", ir.PkgFuncName(callee), callee.Inl.Cost, ir.Line(n), ir.PkgFuncName(caller)) 918 } 919 920 return true, 0, metric 921 } 922 923 // canInlineCallsite returns true if the call n from caller to callee 924 // can be inlined, plus the score computed for the call expr in 925 // question. bigCaller indicates that caller is a big function. log 926 // indicates that the 'cannot inline' reason should be logged. 927 // 928 // Preconditions: CanInline(callee) has already been called. 929 func canInlineCallExpr(callerfn *ir.Func, n *ir.CallExpr, callee *ir.Func, bigCaller bool, log bool) (bool, int32) { 930 if callee.Inl == nil { 931 // callee is never inlinable. 932 if log && logopt.Enabled() { 933 logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), 934 fmt.Sprintf("%s cannot be inlined", ir.PkgFuncName(callee))) 935 } 936 return false, 0 937 } 938 939 ok, maxCost, callSiteScore := inlineCostOK(n, callerfn, callee, bigCaller) 940 if !ok { 941 // callee cost too high for this call site. 942 if log && logopt.Enabled() { 943 logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), 944 fmt.Sprintf("cost %d of %s exceeds max caller cost %d", callee.Inl.Cost, ir.PkgFuncName(callee), maxCost)) 945 } 946 return false, 0 947 } 948 949 if callee == callerfn { 950 // Can't recursively inline a function into itself. 951 if log && logopt.Enabled() { 952 logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", fmt.Sprintf("recursive call to %s", ir.FuncName(callerfn))) 953 } 954 return false, 0 955 } 956 957 if base.Flag.Cfg.Instrumenting && types.IsNoInstrumentPkg(callee.Sym().Pkg) { 958 // Runtime package must not be instrumented. 959 // Instrument skips runtime package. However, some runtime code can be 960 // inlined into other packages and instrumented there. To avoid this, 961 // we disable inlining of runtime functions when instrumenting. 962 // The example that we observed is inlining of LockOSThread, 963 // which lead to false race reports on m contents. 964 if log && logopt.Enabled() { 965 logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), 966 fmt.Sprintf("call to runtime function %s in instrumented build", ir.PkgFuncName(callee))) 967 } 968 return false, 0 969 } 970 971 if base.Flag.Race && types.IsNoRacePkg(callee.Sym().Pkg) { 972 if log && logopt.Enabled() { 973 logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), 974 fmt.Sprintf(`call to into "no-race" package function %s in race build`, ir.PkgFuncName(callee))) 975 } 976 return false, 0 977 } 978 979 // Check if we've already inlined this function at this particular 980 // call site, in order to stop inlining when we reach the beginning 981 // of a recursion cycle again. We don't inline immediately recursive 982 // functions, but allow inlining if there is a recursion cycle of 983 // many functions. Most likely, the inlining will stop before we 984 // even hit the beginning of the cycle again, but this catches the 985 // unusual case. 986 parent := base.Ctxt.PosTable.Pos(n.Pos()).Base().InliningIndex() 987 sym := callee.Linksym() 988 for inlIndex := parent; inlIndex >= 0; inlIndex = base.Ctxt.InlTree.Parent(inlIndex) { 989 if base.Ctxt.InlTree.InlinedFunction(inlIndex) == sym { 990 if log { 991 if base.Flag.LowerM > 1 { 992 fmt.Printf("%v: cannot inline %v into %v: repeated recursive cycle\n", ir.Line(n), callee, ir.FuncName(callerfn)) 993 } 994 if logopt.Enabled() { 995 logopt.LogOpt(n.Pos(), "cannotInlineCall", "inline", ir.FuncName(callerfn), 996 fmt.Sprintf("repeated recursive cycle to %s", ir.PkgFuncName(callee))) 997 } 998 } 999 return false, 0 1000 } 1001 } 1002 1003 return true, callSiteScore 1004 } 1005 1006 // mkinlcall returns an OINLCALL node that can replace OCALLFUNC n, or 1007 // nil if it cannot be inlined. callerfn is the function that contains 1008 // n, and fn is the function being called. 1009 // 1010 // The result of mkinlcall MUST be assigned back to n, e.g. 1011 // 1012 // n.Left = mkinlcall(n.Left, fn, isddd) 1013 func mkinlcall(callerfn *ir.Func, n *ir.CallExpr, fn *ir.Func, bigCaller bool) *ir.InlinedCallExpr { 1014 ok, score := canInlineCallExpr(callerfn, n, fn, bigCaller, true) 1015 if !ok { 1016 return nil 1017 } 1018 typecheck.AssertFixedCall(n) 1019 1020 parent := base.Ctxt.PosTable.Pos(n.Pos()).Base().InliningIndex() 1021 sym := fn.Linksym() 1022 inlIndex := base.Ctxt.InlTree.Add(parent, n.Pos(), sym, ir.FuncName(fn)) 1023 1024 closureInitLSym := func(n *ir.CallExpr, fn *ir.Func) { 1025 // The linker needs FuncInfo metadata for all inlined 1026 // functions. This is typically handled by gc.enqueueFunc 1027 // calling ir.InitLSym for all function declarations in 1028 // typecheck.Target.Decls (ir.UseClosure adds all closures to 1029 // Decls). 1030 // 1031 // However, non-trivial closures in Decls are ignored, and are 1032 // insteaded enqueued when walk of the calling function 1033 // discovers them. 1034 // 1035 // This presents a problem for direct calls to closures. 1036 // Inlining will replace the entire closure definition with its 1037 // body, which hides the closure from walk and thus suppresses 1038 // symbol creation. 1039 // 1040 // Explicitly create a symbol early in this edge case to ensure 1041 // we keep this metadata. 1042 // 1043 // TODO: Refactor to keep a reference so this can all be done 1044 // by enqueueFunc. 1045 1046 if n.Op() != ir.OCALLFUNC { 1047 // Not a standard call. 1048 return 1049 } 1050 if n.Fun.Op() != ir.OCLOSURE { 1051 // Not a direct closure call. 1052 return 1053 } 1054 1055 clo := n.Fun.(*ir.ClosureExpr) 1056 if ir.IsTrivialClosure(clo) { 1057 // enqueueFunc will handle trivial closures anyways. 1058 return 1059 } 1060 1061 ir.InitLSym(fn, true) 1062 } 1063 1064 closureInitLSym(n, fn) 1065 1066 if base.Flag.GenDwarfInl > 0 { 1067 if !sym.WasInlined() { 1068 base.Ctxt.DwFixups.SetPrecursorFunc(sym, fn) 1069 sym.Set(obj.AttrWasInlined, true) 1070 } 1071 } 1072 1073 if base.Flag.LowerM != 0 { 1074 if buildcfg.Experiment.NewInliner { 1075 fmt.Printf("%v: inlining call to %v with score %d\n", 1076 ir.Line(n), fn, score) 1077 } else { 1078 fmt.Printf("%v: inlining call to %v\n", ir.Line(n), fn) 1079 } 1080 } 1081 if base.Flag.LowerM > 2 { 1082 fmt.Printf("%v: Before inlining: %+v\n", ir.Line(n), n) 1083 } 1084 1085 res := InlineCall(callerfn, n, fn, inlIndex) 1086 1087 if res == nil { 1088 base.FatalfAt(n.Pos(), "inlining call to %v failed", fn) 1089 } 1090 1091 if base.Flag.LowerM > 2 { 1092 fmt.Printf("%v: After inlining %+v\n\n", ir.Line(res), res) 1093 } 1094 1095 if inlheur.Enabled() { 1096 inlheur.UpdateCallsiteTable(callerfn, n, res) 1097 } 1098 1099 return res 1100 } 1101 1102 // CalleeEffects appends any side effects from evaluating callee to init. 1103 func CalleeEffects(init *ir.Nodes, callee ir.Node) { 1104 for { 1105 init.Append(ir.TakeInit(callee)...) 1106 1107 switch callee.Op() { 1108 case ir.ONAME, ir.OCLOSURE, ir.OMETHEXPR: 1109 return // done 1110 1111 case ir.OCONVNOP: 1112 conv := callee.(*ir.ConvExpr) 1113 callee = conv.X 1114 1115 case ir.OINLCALL: 1116 ic := callee.(*ir.InlinedCallExpr) 1117 init.Append(ic.Body.Take()...) 1118 callee = ic.SingleResult() 1119 1120 default: 1121 base.FatalfAt(callee.Pos(), "unexpected callee expression: %v", callee) 1122 } 1123 } 1124 } 1125 1126 func pruneUnusedAutos(ll []*ir.Name, vis *hairyVisitor) []*ir.Name { 1127 s := make([]*ir.Name, 0, len(ll)) 1128 for _, n := range ll { 1129 if n.Class == ir.PAUTO { 1130 if !vis.usedLocals.Has(n) { 1131 // TODO(mdempsky): Simplify code after confident that this 1132 // never happens anymore. 1133 base.FatalfAt(n.Pos(), "unused auto: %v", n) 1134 continue 1135 } 1136 } 1137 s = append(s, n) 1138 } 1139 return s 1140 } 1141 1142 // numNonClosures returns the number of functions in list which are not closures. 1143 func numNonClosures(list []*ir.Func) int { 1144 count := 0 1145 for _, fn := range list { 1146 if fn.OClosure == nil { 1147 count++ 1148 } 1149 } 1150 return count 1151 } 1152 1153 func doList(list []ir.Node, do func(ir.Node) bool) bool { 1154 for _, x := range list { 1155 if x != nil { 1156 if do(x) { 1157 return true 1158 } 1159 } 1160 } 1161 return false 1162 } 1163 1164 // isIndexingCoverageCounter returns true if the specified node 'n' is indexing 1165 // into a coverage counter array. 1166 func isIndexingCoverageCounter(n ir.Node) bool { 1167 if n.Op() != ir.OINDEX { 1168 return false 1169 } 1170 ixn := n.(*ir.IndexExpr) 1171 if ixn.X.Op() != ir.ONAME || !ixn.X.Type().IsArray() { 1172 return false 1173 } 1174 nn := ixn.X.(*ir.Name) 1175 return nn.CoverageCounter() 1176 } 1177 1178 // isAtomicCoverageCounterUpdate examines the specified node to 1179 // determine whether it represents a call to sync/atomic.AddUint32 to 1180 // increment a coverage counter. 1181 func isAtomicCoverageCounterUpdate(cn *ir.CallExpr) bool { 1182 if cn.Fun.Op() != ir.ONAME { 1183 return false 1184 } 1185 name := cn.Fun.(*ir.Name) 1186 if name.Class != ir.PFUNC { 1187 return false 1188 } 1189 fn := name.Sym().Name 1190 if name.Sym().Pkg.Path != "sync/atomic" || 1191 (fn != "AddUint32" && fn != "StoreUint32") { 1192 return false 1193 } 1194 if len(cn.Args) != 2 || cn.Args[0].Op() != ir.OADDR { 1195 return false 1196 } 1197 adn := cn.Args[0].(*ir.AddrExpr) 1198 v := isIndexingCoverageCounter(adn.X) 1199 return v 1200 } 1201 1202 func PostProcessCallSites(profile *pgo.Profile) { 1203 if base.Debug.DumpInlCallSiteScores != 0 { 1204 budgetCallback := func(fn *ir.Func, prof *pgo.Profile) (int32, bool) { 1205 v := inlineBudget(fn, prof, false, false) 1206 return v, v == inlineHotMaxBudget 1207 } 1208 inlheur.DumpInlCallSiteScores(profile, budgetCallback) 1209 } 1210 } 1211 1212 func analyzeFuncProps(fn *ir.Func, p *pgo.Profile) { 1213 canInline := func(fn *ir.Func) { CanInline(fn, p) } 1214 budgetForFunc := func(fn *ir.Func) int32 { 1215 return inlineBudget(fn, p, true, false) 1216 } 1217 inlheur.AnalyzeFunc(fn, canInline, budgetForFunc, inlineMaxBudget) 1218 }